Skip to contents

If the policy does not contain all states then the actions from the previous optimal policy are used.

Usage

setPolicy(mdp, policy)

Arguments

mdp

The MDP loaded using loadMDP().

policy

A data frame with two columns state id sId and action index aIdx.

Value

NULL (invisible)

Examples

## Set working dir
wd <- setwd(tempdir())

# Create the small machine repleacement problem used as an example in L.R. Nielsen and A.R.
# Kristensen. Finding the K best policies in a finite-horizon Markov decision process. European
# Journal of Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011.

## Create the MDP using a dummy replacement node
prefix<-"machine1_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(1,3,1), end=TRUE)
      w$endState()
      w$state(label="replaced")       # v=(3,3)
         w$action(label="Dummy", weights=0, prob=c(1,3,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good", end=TRUE)        # v=(4,0)
      w$state(label="average", end=TRUE)     # v=(4,1)
      w$state(label="not working", end=TRUE) # v=(4,2)
      w$state(label="replaced", end=TRUE)    # v=(4,3)
   w$endStage()
w$endProcess()
w$closeWriter()
#> 
#>   Statistics:
#>     states : 14 
#>     actions: 18 
#>     weights: 1 
#> 
#>   Closing binary MDP writer.
#> 

## Load the model into memory
mdp<-loadMDP(prefix)
#> Read binary files (0.000151503 sec.)
#> Build the HMDP (4.0501e-05 sec.)
#> Checking MDP and found no errors (2.2e-06 sec.)
mdp
#> $binNames
#> [1] "machine1_stateIdx.bin"          "machine1_stateIdxLbl.bin"      
#> [3] "machine1_actionIdx.bin"         "machine1_actionIdxLbl.bin"     
#> [5] "machine1_actionWeight.bin"      "machine1_actionWeightLbl.bin"  
#> [7] "machine1_transProb.bin"         "machine1_externalProcesses.bin"
#> 
#> $timeHorizon
#> [1] 5
#> 
#> $states
#> [1] 14
#> 
#> $founderStatesLast
#> [1] 4
#> 
#> $actions
#> [1] 18
#> 
#> $levels
#> [1] 1
#> 
#> $weightNames
#> [1] "Net reward"
#> 
#> $ptr
#> C++ object <0x55c72a907ec0> of class 'HMDP' <0x55c727d5e910>
#> 
#> attr(,"class")
#> [1] "HMDP" "list"
plot(mdp)


getInfo(mdp, withList = FALSE)
#> $df
#> # A tibble: 14 × 4
#>      sId stateStr label       actions   
#>    <dbl> <chr>    <chr>       <list>    
#>  1     0 4,0      good        <NULL>    
#>  2     1 4,1      average     <NULL>    
#>  3     2 4,2      not working <NULL>    
#>  4     3 4,3      replaced    <NULL>    
#>  5     4 3,0      good        <list [2]>
#>  6     5 3,1      average     <list [2]>
#>  7     6 3,2      not working <list [2]>
#>  8     7 3,3      replaced    <list [1]>
#>  9     8 2,0      good        <list [2]>
#> 10     9 2,1      average     <list [2]>
#> 11    10 2,2      not working <list [2]>
#> 12    11 1,0      good        <list [2]>
#> 13    12 1,1      average     <list [2]>
#> 14    13 0,0      Dummy       <list [1]>
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
#> $df
#> # A tibble: 18 × 8
#>      sId stateStr label        aIdx label_action weights trans pr     
#>    <dbl> <chr>    <chr>       <dbl> <chr>        <chr>   <chr> <chr>  
#>  1     4 3,0      good            0 mt           55      0     1      
#>  2     4 3,0      good            1 nmt          70      0,1   0.2,0.8
#>  3     5 3,1      average         0 mt           40      0     1      
#>  4     5 3,1      average         1 nmt          50      1,2   0.2,0.8
#>  5     6 3,2      not working     0 mt           30      0     1      
#>  6     6 3,2      not working     1 rep          5       3     1      
#>  7     7 3,3      replaced        0 Dummy        0       3     1      
#>  8     8 2,0      good            0 mt           55      4     1      
#>  9     8 2,0      good            1 nmt          70      4,5   0.5,0.5
#> 10     9 2,1      average         0 mt           40      4     1      
#> 11     9 2,1      average         1 nmt          50      5,6   0.5,0.5
#> 12    10 2,2      not working     0 mt           30      4     1      
#> 13    10 2,2      not working     1 rep          5       7     1      
#> 14    11 1,0      good            0 mt           55      8     1      
#> 15    11 1,0      good            1 nmt          70      8,9   0.6,0.4
#> 16    12 1,1      average         0 mt           40      8     1      
#> 17    12 1,1      average         1 nmt          50      9,10  0.6,0.4
#> 18    13 0,0      Dummy           0 buy          -100    11,12 0.7,0.3
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)
#> $df
#> # A tibble: 18 × 8
#>      sId stateStr label        aIdx label_action weights trans     pr       
#>    <dbl> <chr>    <chr>       <dbl> <chr>          <dbl> <list>    <list>   
#>  1     4 3,0      good            0 mt                55 <dbl [1]> <dbl [1]>
#>  2     4 3,0      good            1 nmt               70 <dbl [2]> <dbl [2]>
#>  3     5 3,1      average         0 mt                40 <dbl [1]> <dbl [1]>
#>  4     5 3,1      average         1 nmt               50 <dbl [2]> <dbl [2]>
#>  5     6 3,2      not working     0 mt                30 <dbl [1]> <dbl [1]>
#>  6     6 3,2      not working     1 rep                5 <dbl [1]> <dbl [1]>
#>  7     7 3,3      replaced        0 Dummy              0 <dbl [1]> <dbl [1]>
#>  8     8 2,0      good            0 mt                55 <dbl [1]> <dbl [1]>
#>  9     8 2,0      good            1 nmt               70 <dbl [2]> <dbl [2]>
#> 10     9 2,1      average         0 mt                40 <dbl [1]> <dbl [1]>
#> 11     9 2,1      average         1 nmt               50 <dbl [2]> <dbl [2]>
#> 12    10 2,2      not working     0 mt                30 <dbl [1]> <dbl [1]>
#> 13    10 2,2      not working     1 rep                5 <dbl [1]> <dbl [1]>
#> 14    11 1,0      good            0 mt                55 <dbl [1]> <dbl [1]>
#> 15    11 1,0      good            1 nmt               70 <dbl [2]> <dbl [2]>
#> 16    12 1,1      average         0 mt                40 <dbl [1]> <dbl [1]>
#> 17    12 1,1      average         1 nmt               50 <dbl [2]> <dbl [2]>
#> 18    13 0,0      Dummy           0 buy             -100 <dbl [2]> <dbl [2]>
#> 

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
scrapValues<-c(30,10,5,0)   # scrap values (the values of the 4 states at stage 4)
runValueIte(mdp, w, termValues=scrapValues)
#> Run value iteration with epsilon = 0 at most 1 time(s)
#> using quantity 'Net reward' under reward criterion.
#>  Finished. Cpu time 8.5e-06 sec.
getPolicy(mdp)     # optimal policy
#> # A tibble: 14 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 4,0      good           -1 ""             30 
#>  2     1 4,1      average        -1 ""             10 
#>  3     2 4,2      not working    -1 ""              5 
#>  4     3 4,3      replaced       -1 ""              0 
#>  5     4 3,0      good            0 "mt"           85 
#>  6     5 3,1      average         0 "mt"           70 
#>  7     6 3,2      not working     0 "mt"           60 
#>  8     7 3,3      replaced        0 "Dummy"         0 
#>  9     8 2,0      good            1 "nmt"         148.
#> 10     9 2,1      average         0 "mt"          125 
#> 11    10 2,2      not working     0 "mt"          115 
#> 12    11 1,0      good            1 "nmt"         208.
#> 13    12 1,1      average         0 "mt"          188.
#> 14    13 0,0      Dummy           0 "buy"         102.

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  
#> # A tibble: 14 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 4,0      good           -1 ""            30  
#>  2     1 4,1      average        -1 ""            10  
#>  3     2 4,2      not working    -1 ""             5  
#>  4     3 4,3      replaced       -1 ""             0  
#>  5     4 3,0      good            0 "mt"          85  
#>  6     5 3,1      average         0 "mt"          70  
#>  7     6 3,2      not working     0 "mt"          60  
#>  8     7 3,3      replaced        0 "Dummy"        0  
#>  9     8 2,0      good            0 "mt"         140  
#> 10     9 2,1      average         0 "mt"         125  
#> 11    10 2,2      not working     0 "mt"         115  
#> 12    11 1,0      good            0 "mt"         195  
#> 13    12 1,1      average         0 "mt"         180  
#> 14    13 0,0      Dummy           0 "buy"         90.5



# The example given in L.R. Nielsen and A.R. Kristensen. Finding the K best
# policies in a finite-horizon Markov decision process. European Journal of
# Operational Research, 175(2):1164-1179, 2006. doi:10.1016/j.ejor.2005.06.011,
# does actually not have any dummy replacement node as in the MDP above. The same
# model can be created using a single dummy node at the end of the process.

## Create the MDP using a single dummy node
prefix<-"machine2_"
w <- binaryMDPWriter(prefix)
w$setWeights(c("Net reward"))
w$process()
   w$stage()   # stage n=0
      w$state(label="Dummy")          # v=(0,0)
         w$action(label="buy", weights=-100, prob=c(1,0,0.7, 1,1,0.3), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=1
      w$state(label="good")           # v=(1,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.6, 1,1,0.4), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(1,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.6, 1,2,0.4), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=2
      w$state(label="good")           # v=(2,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.5, 1,1,0.5), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(2,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.5, 1,2,0.5), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(2,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE) # transition to sId=12 (Dummy)
      w$endState()
   w$endStage()
   w$stage()   # stage n=3
      w$state(label="good")           # v=(3,0)
         w$action(label="mt", weights=55, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=70, prob=c(1,0,0.2, 1,1,0.8), end=TRUE)
      w$endState()
      w$state(label="average")        # v=(3,1)
         w$action(label="mt", weights=40, prob=c(1,0,1), end=TRUE)
         w$action(label="nmt", weights=50, prob=c(1,1,0.2, 1,2,0.8), end=TRUE)
      w$endState()
      w$state(label="not working")    # v=(3,2)
         w$action(label="mt", weights=30, prob=c(1,0,1), end=TRUE)
         w$action(label="rep", weights=5, prob=c(3,12,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=4
      w$state(label="good")        # v=(4,0)
         w$action(label="rep", weights=30, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="average")     # v=(4,1)
         w$action(label="rep", weights=10, prob=c(1,0,1), end=TRUE)
      w$endState()
      w$state(label="not working") # v=(4,2)
         w$action(label="rep", weights=5, prob=c(1,0,1), end=TRUE)
      w$endState()
   w$endStage()
   w$stage()   # stage n=5
      w$state(label="Dummy", end=TRUE)        # v=(5,0)
   w$endStage()
w$endProcess()
w$closeWriter()
#> 
#>   Statistics:
#>     states : 13 
#>     actions: 20 
#>     weights: 1 
#> 
#>   Closing binary MDP writer.
#> 

## Have a look at the state-expanded hypergraph
mdp<-loadMDP(prefix)
#> Read binary files (0.000161504 sec.)
#> Build the HMDP (4.0401e-05 sec.)
#> Checking MDP and found no errors (2.2e-06 sec.)
mdp
#> $binNames
#> [1] "machine2_stateIdx.bin"          "machine2_stateIdxLbl.bin"      
#> [3] "machine2_actionIdx.bin"         "machine2_actionIdxLbl.bin"     
#> [5] "machine2_actionWeight.bin"      "machine2_actionWeightLbl.bin"  
#> [7] "machine2_transProb.bin"         "machine2_externalProcesses.bin"
#> 
#> $timeHorizon
#> [1] 6
#> 
#> $states
#> [1] 13
#> 
#> $founderStatesLast
#> [1] 1
#> 
#> $actions
#> [1] 20
#> 
#> $levels
#> [1] 1
#> 
#> $weightNames
#> [1] "Net reward"
#> 
#> $ptr
#> C++ object <0x55c729dd34f0> of class 'HMDP' <0x55c727d5e910>
#> 
#> attr(,"class")
#> [1] "HMDP" "list"
plot(mdp)


getInfo(mdp, withList = FALSE)
#> $df
#> # A tibble: 13 × 4
#>      sId stateStr label       actions   
#>    <dbl> <chr>    <chr>       <list>    
#>  1     0 5,0      Dummy       <NULL>    
#>  2     1 4,0      good        <list [1]>
#>  3     2 4,1      average     <list [1]>
#>  4     3 4,2      not working <list [1]>
#>  5     4 3,0      good        <list [2]>
#>  6     5 3,1      average     <list [2]>
#>  7     6 3,2      not working <list [2]>
#>  8     7 2,0      good        <list [2]>
#>  9     8 2,1      average     <list [2]>
#> 10     9 2,2      not working <list [2]>
#> 11    10 1,0      good        <list [2]>
#> 12    11 1,1      average     <list [2]>
#> 13    12 0,0      Dummy       <list [1]>
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = TRUE)
#> $df
#> # A tibble: 20 × 8
#>      sId stateStr label        aIdx label_action weights trans pr     
#>    <dbl> <chr>    <chr>       <dbl> <chr>        <chr>   <chr> <chr>  
#>  1     1 4,0      good            0 rep          30      0     1      
#>  2     2 4,1      average         0 rep          10      0     1      
#>  3     3 4,2      not working     0 rep          5       0     1      
#>  4     4 3,0      good            0 mt           55      1     1      
#>  5     4 3,0      good            1 nmt          70      1,2   0.2,0.8
#>  6     5 3,1      average         0 mt           40      1     1      
#>  7     5 3,1      average         1 nmt          50      2,3   0.2,0.8
#>  8     6 3,2      not working     0 mt           30      1     1      
#>  9     6 3,2      not working     1 rep          5       0     1      
#> 10     7 2,0      good            0 mt           55      4     1      
#> 11     7 2,0      good            1 nmt          70      4,5   0.5,0.5
#> 12     8 2,1      average         0 mt           40      4     1      
#> 13     8 2,1      average         1 nmt          50      5,6   0.5,0.5
#> 14     9 2,2      not working     0 mt           30      4     1      
#> 15     9 2,2      not working     1 rep          5       0     1      
#> 16    10 1,0      good            0 mt           55      7     1      
#> 17    10 1,0      good            1 nmt          70      7,8   0.6,0.4
#> 18    11 1,1      average         0 mt           40      7     1      
#> 19    11 1,1      average         1 nmt          50      8,9   0.6,0.4
#> 20    12 0,0      Dummy           0 buy          -100    10,11 0.7,0.3
#> 
getInfo(mdp, withList = FALSE, dfLevel = "action", asStringsActions = FALSE)
#> $df
#> # A tibble: 20 × 8
#>      sId stateStr label        aIdx label_action weights trans     pr       
#>    <dbl> <chr>    <chr>       <dbl> <chr>          <dbl> <list>    <list>   
#>  1     1 4,0      good            0 rep               30 <dbl [1]> <dbl [1]>
#>  2     2 4,1      average         0 rep               10 <dbl [1]> <dbl [1]>
#>  3     3 4,2      not working     0 rep                5 <dbl [1]> <dbl [1]>
#>  4     4 3,0      good            0 mt                55 <dbl [1]> <dbl [1]>
#>  5     4 3,0      good            1 nmt               70 <dbl [2]> <dbl [2]>
#>  6     5 3,1      average         0 mt                40 <dbl [1]> <dbl [1]>
#>  7     5 3,1      average         1 nmt               50 <dbl [2]> <dbl [2]>
#>  8     6 3,2      not working     0 mt                30 <dbl [1]> <dbl [1]>
#>  9     6 3,2      not working     1 rep                5 <dbl [1]> <dbl [1]>
#> 10     7 2,0      good            0 mt                55 <dbl [1]> <dbl [1]>
#> 11     7 2,0      good            1 nmt               70 <dbl [2]> <dbl [2]>
#> 12     8 2,1      average         0 mt                40 <dbl [1]> <dbl [1]>
#> 13     8 2,1      average         1 nmt               50 <dbl [2]> <dbl [2]>
#> 14     9 2,2      not working     0 mt                30 <dbl [1]> <dbl [1]>
#> 15     9 2,2      not working     1 rep                5 <dbl [1]> <dbl [1]>
#> 16    10 1,0      good            0 mt                55 <dbl [1]> <dbl [1]>
#> 17    10 1,0      good            1 nmt               70 <dbl [2]> <dbl [2]>
#> 18    11 1,1      average         0 mt                40 <dbl [1]> <dbl [1]>
#> 19    11 1,1      average         1 nmt               50 <dbl [2]> <dbl [2]>
#> 20    12 0,0      Dummy           0 buy             -100 <dbl [2]> <dbl [2]>
#> 

## Perform value iteration
w<-"Net reward"             # label of the weight we want to optimize
runValueIte(mdp, w, termValues = 0)
#> Run value iteration with epsilon = 0 at most 1 time(s)
#> using quantity 'Net reward' under reward criterion.
#>  Finished. Cpu time 1.0001e-05 sec.
getPolicy(mdp)     # optimal policy
#> # A tibble: 13 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 5,0      Dummy          -1 ""              0 
#>  2     1 4,0      good            0 "rep"          30 
#>  3     2 4,1      average         0 "rep"          10 
#>  4     3 4,2      not working     0 "rep"           5 
#>  5     4 3,0      good            0 "mt"           85 
#>  6     5 3,1      average         0 "mt"           70 
#>  7     6 3,2      not working     0 "mt"           60 
#>  8     7 2,0      good            1 "nmt"         148.
#>  9     8 2,1      average         0 "mt"          125 
#> 10     9 2,2      not working     0 "mt"          115 
#> 11    10 1,0      good            1 "nmt"         208.
#> 12    11 1,1      average         0 "mt"          188.
#> 13    12 0,0      Dummy           0 "buy"         102.

## Calculate the weights of the policy always to maintain
library(magrittr)
policy <- getInfo(mdp, withList = FALSE, dfLevel = "action")$df %>% 
   dplyr::filter(label_action == "mt") %>% 
   dplyr::select(sId, aIdx)
setPolicy(mdp, policy)
runCalcWeights(mdp, w, termValues=scrapValues)
getPolicy(mdp)  
#> # A tibble: 13 × 6
#>      sId stateStr stateLabel   aIdx actionLabel weight
#>    <dbl> <chr>    <chr>       <int> <chr>        <dbl>
#>  1     0 5,0      Dummy          -1 ""             0  
#>  2     1 4,0      good            0 "rep"         30  
#>  3     2 4,1      average         0 "rep"         10  
#>  4     3 4,2      not working     0 "rep"          5  
#>  5     4 3,0      good            0 "mt"          85  
#>  6     5 3,1      average         0 "mt"          70  
#>  7     6 3,2      not working     0 "mt"          60  
#>  8     7 2,0      good            0 "mt"         140  
#>  9     8 2,1      average         0 "mt"         125  
#> 10     9 2,2      not working     0 "mt"         115  
#> 11    10 1,0      good            0 "mt"         195  
#> 12    11 1,1      average         0 "mt"         180  
#> 13    12 0,0      Dummy           0 "buy"         90.5


## Reset working dir
setwd(wd)