Skip to contents

Information about the MDP

Usage

getInfo(
  mdp,
  sId = 1:ifelse(mdp$timeHorizon < Inf, mdp$states, mdp$states + mdp$founderStatesLast) -
    1,
  stateStr = NULL,
  stageStr = NULL,
  withList = TRUE,
  withDF = TRUE,
  dfLevel = "state",
  asStringsState = TRUE,
  asStringsActions = FALSE,
  withHarc = FALSE
)

Arguments

mdp

The MDP loaded using loadMDP().

sId

The id of the state(s) considered.

stateStr

A character vector containing the index of the state(s) (e.g. "n0,s0,a0,n1,s1"). Parameter sId are ignored if not NULL.

stageStr

A character vector containing the index of the stage(s) (e.g. "n0,s0,a0,n1"). Parameter sId and idxS are ignored if not NULL.

withList

Output info as a list lst.

withDF

Output the info as a data frame.

dfLevel

If withDF and equal "state" the data frame contains a row for each state. If equal "action" the data frame contains a row for each action.

asStringsState

Write state vector as a string; otherwise, output it as columns.

asStringsActions

Write action vectors (weights, transitions and probabilities) as strings; otherwise, output it as columns.

withHarc

Output a hyperarcs data frame. Each row contains a hyperarc with the first column denoting the head (sId), the tails (sId) and the label.

Value

A list containing the list, data frame(s).

Examples

## Set working dir
wd <- setwd(tempdir())

# Create the small machine repleacement problem used as an example in L.R. Nielsen and A.R.
# Kristensen. Finding the K best policies in a finite-horizon Markov decision process. European
# Journal of Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011.

## Create the MDP using a dummy replacement node
prefix<-"machine1_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
      w$state(label="replaced")       # v=(3,3)
         w$action(label="Dummy", weights=0, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good", end=TRUE)        # v=(4,0)
      w$state(label="average", end=TRUE)     # v=(4,1)
      w$state(label="not working", end=TRUE) # v=(4,2)
      w$state(label="replaced", end=TRUE)    # v=(4,3)
   w$endStage()
w$endProcess()
w$closeWriter()
#> 
#>   Statistics:
#>     states : 14 
#>     actions: 18 
#>     weights: 1 
#> 
#>   Closing binary MDP writer.
#> 

## Load the model into memory
mdp<-loadMDP(prefix)
#> Read binary files (0.000127017 sec.)
#> Build the HMDP (3.4455e-05 sec.)
#> Checking MDP and found no errors (1.172e-06 sec.)
mdp
#> $binNames
#>  [1] "machine1_stateIdx.bin"          "machine1_stateIdxLbl.bin"      
#>  [3] "machine1_actionIdx.bin"         "machine1_actionIdxLbl.bin"     
#>  [5] "machine1_actionWeight.bin"      "machine1_actionWeightLbl.bin"  
#>  [7] "machine1_transProb.bin"         "machine1_externalProcesses.bin"
#>  [9] "machine1_transWeight.bin"       "machine1_transWeightLbl.bin"   
#> 
#> $timeHorizon
#> [1] 5
#> 
#> $states
#> [1] 14
#> 
#> $founderStatesLast
#> [1] 4
#> 
#> $actions
#> [1] 18
#> 
#> $levels
#> [1] 1
#> 
#> $weightNames
#> [1] "Net reward"
#> 
#> $weightActionNames
#> [1] "Net reward"
#> 
#> $weightTransNames
#> character(0)
#> 
#> $ptr
#> C++ object <0x559e293a4b50> of class 'HMDP' <0x559e23190c70>
#> 
#> attr(,"class")
#> [1] "HMDP" "list"
plot(mdp)


getInfo(mdp, withList = FALSE)
#> $df
#> # A tibble: 14 × 4
#>      sId stateStr label       actions   
#>    <dbl> <chr>    <chr>       <list>    
#>  1     0 4,0      good        <NULL>    
#>  2     1 4,1      average     <NULL>    
#>  3     2 4,2      not working <NULL>    
#>  4     3 4,3      replaced    <NULL>    
#>  5     4 3,0      good        <list [2]>
#>  6     5 3,1      average     <list [2]>
#>  7     6 3,2      not working <list [2]>
#>  8     7 3,3      replaced    <list [1]>
#>  9     8 2,0      good        <list [2]>
#> 10     9 2,1      average     <list [2]>
#> 11    10 2,2      not working <list [2]>
#> 12    11 1,0      good        <list [2]>
#> 13    12 1,1      average     <list [2]>
#> 14    13 0,0      Dummy       <list [1]>
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
#> $df
#> # A tibble: 18 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>        <chr>   <lgl>        <chr> <chr>
#>  1     4 3,0      good           0 mt           55      NA           0     1    
#>  2     4 3,0      good           1 nmt          70      NA           0,1   0.2,…
#>  3     5 3,1      average        0 mt           40      NA           0     1    
#>  4     5 3,1      average        1 nmt          50      NA           1,2   0.2,…
#>  5     6 3,2      not worki…     0 mt           30      NA           0     1    
#>  6     6 3,2      not worki…     1 rep          5       NA           3     1    
#>  7     7 3,3      replaced       0 Dummy        0       NA           3     1    
#>  8     8 2,0      good           0 mt           55      NA           4     1    
#>  9     8 2,0      good           1 nmt          70      NA           4,5   0.5,…
#> 10     9 2,1      average        0 mt           40      NA           4     1    
#> 11     9 2,1      average        1 nmt          50      NA           5,6   0.5,…
#> 12    10 2,2      not worki…     0 mt           30      NA           4     1    
#> 13    10 2,2      not worki…     1 rep          5       NA           7     1    
#> 14    11 1,0      good           0 mt           55      NA           8     1    
#> 15    11 1,0      good           1 nmt          70      NA           8,9   0.6,…
#> 16    12 1,1      average        0 mt           40      NA           8     1    
#> 17    12 1,1      average        1 nmt          50      NA           9,10  0.6,…
#> 18    13 0,0      Dummy          0 buy          -100    NA           11,12 0.7,…
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)
#> $df
#> # A tibble: 18 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>          <dbl> <lgl>        <lis> <lis>
#>  1     4 3,0      good           0 mt                55 NA           <dbl> <dbl>
#>  2     4 3,0      good           1 nmt               70 NA           <dbl> <dbl>
#>  3     5 3,1      average        0 mt                40 NA           <dbl> <dbl>
#>  4     5 3,1      average        1 nmt               50 NA           <dbl> <dbl>
#>  5     6 3,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#>  6     6 3,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#>  7     7 3,3      replaced       0 Dummy              0 NA           <dbl> <dbl>
#>  8     8 2,0      good           0 mt                55 NA           <dbl> <dbl>
#>  9     8 2,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 10     9 2,1      average        0 mt                40 NA           <dbl> <dbl>
#> 11     9 2,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 12    10 2,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#> 13    10 2,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#> 14    11 1,0      good           0 mt                55 NA           <dbl> <dbl>
#> 15    11 1,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 16    12 1,1      average        0 mt                40 NA           <dbl> <dbl>
#> 17    12 1,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 18    13 0,0      Dummy          0 buy             -100 NA           <dbl> <dbl>
#> 

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
scrapValues<-c(30,10,5,0)   # scrap values (the values of the 4 states at stage 4)
runValueIte(mdp, w, termValues=scrapValues)
#> Run value iteration with epsilon = 0 at most 1 time(s)
#> using weight 'Net reward' under expected-weight Bellman operator.
#>  Finished. Cpu time 7.133e-06 sec.
getPolicy(mdp)     # optimal policy
#> # A tibble: 14 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 4,0      good           -1 ""             30 
#>  2     1 4,1      average        -1 ""             10 
#>  3     2 4,2      not working    -1 ""              5 
#>  4     3 4,3      replaced       -1 ""              0 
#>  5     4 3,0      good            0 "mt"           85 
#>  6     5 3,1      average         0 "mt"           70 
#>  7     6 3,2      not working     0 "mt"           60 
#>  8     7 3,3      replaced        0 "Dummy"         0 
#>  9     8 2,0      good            1 "nmt"         148.
#> 10     9 2,1      average         0 "mt"          125 
#> 11    10 2,2      not working     0 "mt"          115 
#> 12    11 1,0      good            1 "nmt"         208.
#> 13    12 1,1      average         0 "mt"          188.
#> 14    13 0,0      Dummy           0 "buy"         102.

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  
#> # A tibble: 14 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 4,0      good           -1 ""            30  
#>  2     1 4,1      average        -1 ""            10  
#>  3     2 4,2      not working    -1 ""             5  
#>  4     3 4,3      replaced       -1 ""             0  
#>  5     4 3,0      good            0 "mt"          85  
#>  6     5 3,1      average         0 "mt"          70  
#>  7     6 3,2      not working     0 "mt"          60  
#>  8     7 3,3      replaced        0 "Dummy"        0  
#>  9     8 2,0      good            0 "mt"         140  
#> 10     9 2,1      average         0 "mt"         125  
#> 11    10 2,2      not working     0 "mt"         115  
#> 12    11 1,0      good            0 "mt"         195  
#> 13    12 1,1      average         0 "mt"         180  
#> 14    13 0,0      Dummy           0 "buy"         90.5



# The example given in L.R. Nielsen and A.R. Kristensen. Finding the K best
# policies in a finite-horizon Markov decision process. European Journal of
# Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011,
# does actually not have any dummy replacement node as in the MDP above. The same
# model can be created using a single dummy node at the end of the process.

## Create the MDP using a single dummy node
prefix<-"machine2_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE) # transition to sId=12 (Dummy)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good")        # v=(4,0)
         w$action(label="rep", weights=30, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="average")     # v=(4,1)
         w$action(label="rep", weights=10, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="not working") # v=(4,2)
         w$action(label="rep", weights=5, prob=c(1,0,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=5
      w$state(label="Dummy", end=TRUE)        # v=(5,0)
   w$endStage()
w$endProcess()
w$closeWriter()
#> 
#>   Statistics:
#>     states : 13 
#>     actions: 20 
#>     weights: 1 
#> 
#>   Closing binary MDP writer.
#> 

## Have a look at the state-expanded hypergraph
mdp<-loadMDP(prefix)
#> Read binary files (0.000143137 sec.)
#> Build the HMDP (3.747e-05 sec.)
#> Checking MDP and found no errors (3.046e-06 sec.)
mdp
#> $binNames
#>  [1] "machine2_stateIdx.bin"          "machine2_stateIdxLbl.bin"      
#>  [3] "machine2_actionIdx.bin"         "machine2_actionIdxLbl.bin"     
#>  [5] "machine2_actionWeight.bin"      "machine2_actionWeightLbl.bin"  
#>  [7] "machine2_transProb.bin"         "machine2_externalProcesses.bin"
#>  [9] "machine2_transWeight.bin"       "machine2_transWeightLbl.bin"   
#> 
#> $timeHorizon
#> [1] 6
#> 
#> $states
#> [1] 13
#> 
#> $founderStatesLast
#> [1] 1
#> 
#> $actions
#> [1] 20
#> 
#> $levels
#> [1] 1
#> 
#> $weightNames
#> [1] "Net reward"
#> 
#> $weightActionNames
#> [1] "Net reward"
#> 
#> $weightTransNames
#> character(0)
#> 
#> $ptr
#> C++ object <0x559e2917b820> of class 'HMDP' <0x559e23190c70>
#> 
#> attr(,"class")
#> [1] "HMDP" "list"
plot(mdp)


getInfo(mdp, withList = FALSE)
#> $df
#> # A tibble: 13 × 4
#>      sId stateStr label       actions   
#>    <dbl> <chr>    <chr>       <list>    
#>  1     0 5,0      Dummy       <NULL>    
#>  2     1 4,0      good        <list [1]>
#>  3     2 4,1      average     <list [1]>
#>  4     3 4,2      not working <list [1]>
#>  5     4 3,0      good        <list [2]>
#>  6     5 3,1      average     <list [2]>
#>  7     6 3,2      not working <list [2]>
#>  8     7 2,0      good        <list [2]>
#>  9     8 2,1      average     <list [2]>
#> 10     9 2,2      not working <list [2]>
#> 11    10 1,0      good        <list [2]>
#> 12    11 1,1      average     <list [2]>
#> 13    12 0,0      Dummy       <list [1]>
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
#> $df
#> # A tibble: 20 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>        <chr>   <lgl>        <chr> <chr>
#>  1     1 4,0      good           0 rep          30      NA           0     1    
#>  2     2 4,1      average        0 rep          10      NA           0     1    
#>  3     3 4,2      not worki…     0 rep          5       NA           0     1    
#>  4     4 3,0      good           0 mt           55      NA           1     1    
#>  5     4 3,0      good           1 nmt          70      NA           1,2   0.2,…
#>  6     5 3,1      average        0 mt           40      NA           1     1    
#>  7     5 3,1      average        1 nmt          50      NA           2,3   0.2,…
#>  8     6 3,2      not worki…     0 mt           30      NA           1     1    
#>  9     6 3,2      not worki…     1 rep          5       NA           0     1    
#> 10     7 2,0      good           0 mt           55      NA           4     1    
#> 11     7 2,0      good           1 nmt          70      NA           4,5   0.5,…
#> 12     8 2,1      average        0 mt           40      NA           4     1    
#> 13     8 2,1      average        1 nmt          50      NA           5,6   0.5,…
#> 14     9 2,2      not worki…     0 mt           30      NA           4     1    
#> 15     9 2,2      not worki…     1 rep          5       NA           0     1    
#> 16    10 1,0      good           0 mt           55      NA           7     1    
#> 17    10 1,0      good           1 nmt          70      NA           7,8   0.6,…
#> 18    11 1,1      average        0 mt           40      NA           7     1    
#> 19    11 1,1      average        1 nmt          50      NA           8,9   0.6,…
#> 20    12 0,0      Dummy          0 buy          -100    NA           10,11 0.7,…
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)
#> $df
#> # A tibble: 20 × 9
#>      sId stateStr label       aIdx label_action weights transWeights trans pr   
#>    <dbl> <chr>    <chr>      <dbl> <chr>          <dbl> <lgl>        <lis> <lis>
#>  1     1 4,0      good           0 rep               30 NA           <dbl> <dbl>
#>  2     2 4,1      average        0 rep               10 NA           <dbl> <dbl>
#>  3     3 4,2      not worki…     0 rep                5 NA           <dbl> <dbl>
#>  4     4 3,0      good           0 mt                55 NA           <dbl> <dbl>
#>  5     4 3,0      good           1 nmt               70 NA           <dbl> <dbl>
#>  6     5 3,1      average        0 mt                40 NA           <dbl> <dbl>
#>  7     5 3,1      average        1 nmt               50 NA           <dbl> <dbl>
#>  8     6 3,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#>  9     6 3,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#> 10     7 2,0      good           0 mt                55 NA           <dbl> <dbl>
#> 11     7 2,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 12     8 2,1      average        0 mt                40 NA           <dbl> <dbl>
#> 13     8 2,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 14     9 2,2      not worki…     0 mt                30 NA           <dbl> <dbl>
#> 15     9 2,2      not worki…     1 rep                5 NA           <dbl> <dbl>
#> 16    10 1,0      good           0 mt                55 NA           <dbl> <dbl>
#> 17    10 1,0      good           1 nmt               70 NA           <dbl> <dbl>
#> 18    11 1,1      average        0 mt                40 NA           <dbl> <dbl>
#> 19    11 1,1      average        1 nmt               50 NA           <dbl> <dbl>
#> 20    12 0,0      Dummy          0 buy             -100 NA           <dbl> <dbl>
#> 

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
runValueIte(mdp, w, termValues = 0)
#> Run value iteration with epsilon = 0 at most 1 time(s)
#> using weight 'Net reward' under expected-weight Bellman operator.
#>  Finished. Cpu time 7.334e-06 sec.
getPolicy(mdp)     # optimal policy
#> # A tibble: 13 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 5,0      Dummy          -1 ""              0 
#>  2     1 4,0      good            0 "rep"          30 
#>  3     2 4,1      average         0 "rep"          10 
#>  4     3 4,2      not working     0 "rep"           5 
#>  5     4 3,0      good            0 "mt"           85 
#>  6     5 3,1      average         0 "mt"           70 
#>  7     6 3,2      not working     0 "mt"           60 
#>  8     7 2,0      good            1 "nmt"         148.
#>  9     8 2,1      average         0 "mt"          125 
#> 10     9 2,2      not working     0 "mt"          115 
#> 11    10 1,0      good            1 "nmt"         208.
#> 12    11 1,1      average         0 "mt"          188.
#> 13    12 0,0      Dummy           0 "buy"         102.

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  
#> # A tibble: 13 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 5,0      Dummy          -1 ""             0  
#>  2     1 4,0      good            0 "rep"         30  
#>  3     2 4,1      average         0 "rep"         10  
#>  4     3 4,2      not working     0 "rep"          5  
#>  5     4 3,0      good            0 "mt"          85  
#>  6     5 3,1      average         0 "mt"          70  
#>  7     6 3,2      not working     0 "mt"          60  
#>  8     7 2,0      good            0 "mt"         140  
#>  9     8 2,1      average         0 "mt"         125  
#> 10     9 2,2      not working     0 "mt"         115  
#> 11    10 1,0      good            0 "mt"         195  
#> 12    11 1,1      average         0 "mt"         180  
#> 13    12 0,0      Dummy           0 "buy"         90.5


## Reset working dir
setwd(wd)