Calculate weights based on current policy. Normally run after an optimal policy has been found.

Usage

runCalcWeights(
  mdp,
  wLbl,
  criterion = "expected",
  durLbl = NULL,
  rate = 0,
  rateBase = 1,
  discountFactor = NULL,
  termValues = NULL,
  discountMethod = "continuous"
)

Arguments

mdp: The MDP loaded using loadMDP().
wLbl: The label of the weight we consider.
criterion: The Bellman operator shortcut. If expected use expected weights, if discount use discounted expected weights, if average use average expected weights, if min use minimum-successor weights, if max use maximum-successor weights, if secondMoment use the second moment of total accumulated weight, and if variance use the law-of-total-variance recursion under the current policy.
durLbl: The label of the duration/time such that discount rates can be calculated.
rate: The interest rate.
rateBase: The time-horizon the rate is valid over.
discountFactor: The discount rate for one time unit. If specified rate and rateBase are not used to calculate the discount rate.
termValues: The terminal values used (values of the last stage in the MDP).
discountMethod: Either 'continuous' or 'discrete', corresponding to discount factor exp(-rate/rateBase) or 1/(1 + rate/rateBase), respectively. Only used if discountFactor is NULL.

Value

Nothing.

Examples

## Set working dir
wd <- setwd(tempdir())

# Create the small machine repleacement problem used as an example in L.R. Nielsen and A.R.
# Kristensen. Finding the K best policies in a finite-horizon Markov decision process. European
# Journal of Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011.

## Create the MDP using a dummy replacement node
prefix<-"machine1_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
      w$state(label="replaced")       # v=(3,3)
         w$action(label="Dummy", weights=0, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good", end=TRUE)        # v=(4,0)
      w$state(label="average", end=TRUE)     # v=(4,1)
      w$state(label="not working", end=TRUE) # v=(4,2)
      w$state(label="replaced", end=TRUE)    # v=(4,3)
   w$endStage()
w$endProcess()
w$closeWriter()
#> 
#>   Statistics:
#>     states : 14 
#>     actions: 18 
#>     weights: 1 
#> 
#>   Closing binary MDP writer.
#> 

## Load the model into memory
mdp<-loadMDP(prefix)
#> Read binary files (0.000158504 sec.)
#> Build the HMDP (3.6338e-05 sec.)
#> Checking MDP and found no errors (1.112e-06 sec.)
mdp
#> $binNames
#>  [1] "machine1_stateIdx.bin"          "machine1_stateIdxLbl.bin"      
#>  [3] "machine1_actionIdx.bin"         "machine1_actionIdxLbl.bin"     
#>  [5] "machine1_actionWeight.bin"      "machine1_actionWeightLbl.bin"  
#>  [7] "machine1_transProb.bin"         "machine1_externalProcesses.bin"
#>  [9] "machine1_transWeight.bin"       "machine1_transWeightLbl.bin"   
#> 
#> $timeHorizon
#> [1] 5
#> 
#> $states
#> [1] 14
#> 
#> $founderStatesLast
#> [1] 4
#> 
#> $actions
#> [1] 18
#> 
#> $levels
#> [1] 1
#> 
#> $weightNames
#> [1] "Net reward"
#> 
#> $weightActionNames
#> [1] "Net reward"
#> 
#> $weightTransNames
#> character(0)
#> 
#> $ptr
#> C++ object <0x559e249acb50> of class 'HMDP' <0x559e23190c70>
#> 
#> attr(,"class")
#> [1] "HMDP" "list"
plot(mdp)


getInfo(mdp, withList = FALSE)
#> $df
#> # A tibble: 14 × 4
#>      sId stateStr label       actions   
#>    <dbl> <chr>    <chr>       <list>    
#>  1     0 4,0      good        <NULL>    
#>  2     1 4,1      average     <NULL>    
#>  3     2 4,2      not working <NULL>    
#>  4     3 4,3      replaced    <NULL>    
#>  5     4 3,0      good        <list [2]>
#>  6     5 3,1      average     <list [2]>
#>  7     6 3,2      not working <list [2]>
#>  8     7 3,3      replaced    <list [1]>
#>  9     8 2,0      good        <list [2]>
#> 10     9 2,1      average     <list [2]>
#> 11    10 2,2      not working <list [2]>
#> 12    11 1,0      good        <list [2]>
#> 13    12 1,1      average     <list [2]>
#> 14    13 0,0      Dummy       <list [1]>
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
#> $df
#> # A tibble: 18 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>        <chr>   <lgl>        <chr> <chr>
#>  1     4 3,0      good           0 mt           55      NA           0     1    
#>  2     4 3,0      good           1 nmt          70      NA           0,1   0.2,…
#>  3     5 3,1      average        0 mt           40      NA           0     1    
#>  4     5 3,1      average        1 nmt          50      NA           1,2   0.2,…
#>  5     6 3,2      not worki…     0 mt           30      NA           0     1    
#>  6     6 3,2      not worki…     1 rep          5       NA           3     1    
#>  7     7 3,3      replaced       0 Dummy        0       NA           3     1    
#>  8     8 2,0      good           0 mt           55      NA           4     1    
#>  9     8 2,0      good           1 nmt          70      NA           4,5   0.5,…
#> 10     9 2,1      average        0 mt           40      NA           4     1    
#> 11     9 2,1      average        1 nmt          50      NA           5,6   0.5,…
#> 12    10 2,2      not worki…     0 mt           30      NA           4     1    
#> 13    10 2,2      not worki…     1 rep          5       NA           7     1    
#> 14    11 1,0      good           0 mt           55      NA           8     1    
#> 15    11 1,0      good           1 nmt          70      NA           8,9   0.6,…
#> 16    12 1,1      average        0 mt           40      NA           8     1    
#> 17    12 1,1      average        1 nmt          50      NA           9,10  0.6,…
#> 18    13 0,0      Dummy          0 buy          -100    NA           11,12 0.7,…
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)
#> $df
#> # A tibble: 18 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>          <dbl> <lgl>        <lis> <lis>
#>  1     4 3,0      good           0 mt                55 NA           <dbl> <dbl>
#>  2     4 3,0      good           1 nmt               70 NA           <dbl> <dbl>
#>  3     5 3,1      average        0 mt                40 NA           <dbl> <dbl>
#>  4     5 3,1      average        1 nmt               50 NA           <dbl> <dbl>
#>  5     6 3,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#>  6     6 3,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#>  7     7 3,3      replaced       0 Dummy              0 NA           <dbl> <dbl>
#>  8     8 2,0      good           0 mt                55 NA           <dbl> <dbl>
#>  9     8 2,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 10     9 2,1      average        0 mt                40 NA           <dbl> <dbl>
#> 11     9 2,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 12    10 2,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#> 13    10 2,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#> 14    11 1,0      good           0 mt                55 NA           <dbl> <dbl>
#> 15    11 1,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 16    12 1,1      average        0 mt                40 NA           <dbl> <dbl>
#> 17    12 1,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 18    13 0,0      Dummy          0 buy             -100 NA           <dbl> <dbl>
#> 

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
scrapValues<-c(30,10,5,0)   # scrap values (the values of the 4 states at stage 4)
runValueIte(mdp, w, termValues=scrapValues)
#> Run value iteration with epsilon = 0 at most 1 time(s)
#> using weight 'Net reward' under expected-weight Bellman operator.
#>  Finished. Cpu time 7.193e-06 sec.
getPolicy(mdp)     # optimal policy
#> # A tibble: 14 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 4,0      good           -1 ""             30 
#>  2     1 4,1      average        -1 ""             10 
#>  3     2 4,2      not working    -1 ""              5 
#>  4     3 4,3      replaced       -1 ""              0 
#>  5     4 3,0      good            0 "mt"           85 
#>  6     5 3,1      average         0 "mt"           70 
#>  7     6 3,2      not working     0 "mt"           60 
#>  8     7 3,3      replaced        0 "Dummy"         0 
#>  9     8 2,0      good            1 "nmt"         148.
#> 10     9 2,1      average         0 "mt"          125 
#> 11    10 2,2      not working     0 "mt"          115 
#> 12    11 1,0      good            1 "nmt"         208.
#> 13    12 1,1      average         0 "mt"          188.
#> 14    13 0,0      Dummy           0 "buy"         102.

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  
#> # A tibble: 14 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 4,0      good           -1 ""            30  
#>  2     1 4,1      average        -1 ""            10  
#>  3     2 4,2      not working    -1 ""             5  
#>  4     3 4,3      replaced       -1 ""             0  
#>  5     4 3,0      good            0 "mt"          85  
#>  6     5 3,1      average         0 "mt"          70  
#>  7     6 3,2      not working     0 "mt"          60  
#>  8     7 3,3      replaced        0 "Dummy"        0  
#>  9     8 2,0      good            0 "mt"         140  
#> 10     9 2,1      average         0 "mt"         125  
#> 11    10 2,2      not working     0 "mt"         115  
#> 12    11 1,0      good            0 "mt"         195  
#> 13    12 1,1      average         0 "mt"         180  
#> 14    13 0,0      Dummy           0 "buy"         90.5



# The example given in L.R. Nielsen and A.R. Kristensen. Finding the K best
# policies in a finite-horizon Markov decision process. European Journal of
# Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011,
# does actually not have any dummy replacement node as in the MDP above. The same
# model can be created using a single dummy node at the end of the process.

## Create the MDP using a single dummy node
prefix<-"machine2_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE) # transition to sId=12 (Dummy)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good")        # v=(4,0)
         w$action(label="rep", weights=30, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="average")     # v=(4,1)
         w$action(label="rep", weights=10, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="not working") # v=(4,2)
         w$action(label="rep", weights=5, prob=c(1,0,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=5
      w$state(label="Dummy", end=TRUE)        # v=(5,0)
   w$endStage()
w$endProcess()
w$closeWriter()
#> 
#>   Statistics:
#>     states : 13 
#>     actions: 20 
#>     weights: 1 
#> 
#>   Closing binary MDP writer.
#> 

## Have a look at the state-expanded hypergraph
mdp<-loadMDP(prefix)
#> Read binary files (0.000145259 sec.)
#> Build the HMDP (3.797e-05 sec.)
#> Checking MDP and found no errors (1.563e-06 sec.)
mdp
#> $binNames
#>  [1] "machine2_stateIdx.bin"          "machine2_stateIdxLbl.bin"      
#>  [3] "machine2_actionIdx.bin"         "machine2_actionIdxLbl.bin"     
#>  [5] "machine2_actionWeight.bin"      "machine2_actionWeightLbl.bin"  
#>  [7] "machine2_transProb.bin"         "machine2_externalProcesses.bin"
#>  [9] "machine2_transWeight.bin"       "machine2_transWeightLbl.bin"   
#> 
#> $timeHorizon
#> [1] 6
#> 
#> $states
#> [1] 13
#> 
#> $founderStatesLast
#> [1] 1
#> 
#> $actions
#> [1] 20
#> 
#> $levels
#> [1] 1
#> 
#> $weightNames
#> [1] "Net reward"
#> 
#> $weightActionNames
#> [1] "Net reward"
#> 
#> $weightTransNames
#> character(0)
#> 
#> $ptr
#> C++ object <0x559e2a04ce40> of class 'HMDP' <0x559e23190c70>
#> 
#> attr(,"class")
#> [1] "HMDP" "list"
plot(mdp)


getInfo(mdp, withList = FALSE)
#> $df
#> # A tibble: 13 × 4
#>      sId stateStr label       actions   
#>    <dbl> <chr>    <chr>       <list>    
#>  1     0 5,0      Dummy       <NULL>    
#>  2     1 4,0      good        <list [1]>
#>  3     2 4,1      average     <list [1]>
#>  4     3 4,2      not working <list [1]>
#>  5     4 3,0      good        <list [2]>
#>  6     5 3,1      average     <list [2]>
#>  7     6 3,2      not working <list [2]>
#>  8     7 2,0      good        <list [2]>
#>  9     8 2,1      average     <list [2]>
#> 10     9 2,2      not working <list [2]>
#> 11    10 1,0      good        <list [2]>
#> 12    11 1,1      average     <list [2]>
#> 13    12 0,0      Dummy       <list [1]>
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
#> $df
#> # A tibble: 20 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>        <chr>   <lgl>        <chr> <chr>
#>  1     1 4,0      good           0 rep          30      NA           0     1    
#>  2     2 4,1      average        0 rep          10      NA           0     1    
#>  3     3 4,2      not worki…     0 rep          5       NA           0     1    
#>  4     4 3,0      good           0 mt           55      NA           1     1    
#>  5     4 3,0      good           1 nmt          70      NA           1,2   0.2,…
#>  6     5 3,1      average        0 mt           40      NA           1     1    
#>  7     5 3,1      average        1 nmt          50      NA           2,3   0.2,…
#>  8     6 3,2      not worki…     0 mt           30      NA           1     1    
#>  9     6 3,2      not worki…     1 rep          5       NA           0     1    
#> 10     7 2,0      good           0 mt           55      NA           4     1    
#> 11     7 2,0      good           1 nmt          70      NA           4,5   0.5,…
#> 12     8 2,1      average        0 mt           40      NA           4     1    
#> 13     8 2,1      average        1 nmt          50      NA           5,6   0.5,…
#> 14     9 2,2      not worki…     0 mt           30      NA           4     1    
#> 15     9 2,2      not worki…     1 rep          5       NA           0     1    
#> 16    10 1,0      good           0 mt           55      NA           7     1    
#> 17    10 1,0      good           1 nmt          70      NA           7,8   0.6,…
#> 18    11 1,1      average        0 mt           40      NA           7     1    
#> 19    11 1,1      average        1 nmt          50      NA           8,9   0.6,…
#> 20    12 0,0      Dummy          0 buy          -100    NA           10,11 0.7,…
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)
#> $df
#> # A tibble: 20 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>          <dbl> <lgl>        <lis> <lis>
#>  1     1 4,0      good           0 rep               30 NA           <dbl> <dbl>
#>  2     2 4,1      average        0 rep               10 NA           <dbl> <dbl>
#>  3     3 4,2      not worki…     0 rep                5 NA           <dbl> <dbl>
#>  4     4 3,0      good           0 mt                55 NA           <dbl> <dbl>
#>  5     4 3,0      good           1 nmt               70 NA           <dbl> <dbl>
#>  6     5 3,1      average        0 mt                40 NA           <dbl> <dbl>
#>  7     5 3,1      average        1 nmt               50 NA           <dbl> <dbl>
#>  8     6 3,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#>  9     6 3,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#> 10     7 2,0      good           0 mt                55 NA           <dbl> <dbl>
#> 11     7 2,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 12     8 2,1      average        0 mt                40 NA           <dbl> <dbl>
#> 13     8 2,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 14     9 2,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#> 15     9 2,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#> 16    10 1,0      good           0 mt                55 NA           <dbl> <dbl>
#> 17    10 1,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 18    11 1,1      average        0 mt                40 NA           <dbl> <dbl>
#> 19    11 1,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 20    12 0,0      Dummy          0 buy             -100 NA           <dbl> <dbl>
#> 

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
runValueIte(mdp, w, termValues = 0)
#> Run value iteration with epsilon = 0 at most 1 time(s)
#> using weight 'Net reward' under expected-weight Bellman operator.
#>  Finished. Cpu time 7.374e-06 sec.
getPolicy(mdp)     # optimal policy
#> # A tibble: 13 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 5,0      Dummy          -1 ""              0 
#>  2     1 4,0      good            0 "rep"          30 
#>  3     2 4,1      average         0 "rep"          10 
#>  4     3 4,2      not working     0 "rep"           5 
#>  5     4 3,0      good            0 "mt"           85 
#>  6     5 3,1      average         0 "mt"           70 
#>  7     6 3,2      not working     0 "mt"           60 
#>  8     7 2,0      good            1 "nmt"         148.
#>  9     8 2,1      average         0 "mt"          125 
#> 10     9 2,2      not working     0 "mt"          115 
#> 11    10 1,0      good            1 "nmt"         208.
#> 12    11 1,1      average         0 "mt"          188.
#> 13    12 0,0      Dummy           0 "buy"         102.

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  
#> # A tibble: 13 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 5,0      Dummy          -1 ""             0  
#>  2     1 4,0      good            0 "rep"         30  
#>  3     2 4,1      average         0 "rep"         10  
#>  4     3 4,2      not working     0 "rep"          5  
#>  5     4 3,0      good            0 "mt"          85  
#>  6     5 3,1      average         0 "mt"          70  
#>  7     6 3,2      not working     0 "mt"          60  
#>  8     7 2,0      good            0 "mt"         140  
#>  9     8 2,1      average         0 "mt"         125  
#> 10     9 2,2      not working     0 "mt"         115  
#> 11    10 1,0      good            0 "mt"         195  
#> 12    11 1,1      average         0 "mt"         180  
#> 13    12 0,0      Dummy           0 "buy"         90.5


## Reset working dir
setwd(wd)