@@ -734,10 +734,11 @@ begin
734
734
if isterm (i_s)
735
735
if continuing
736
736
state_transition_map[:, i_s] .= i_start
737
+ reward_transition_map[:, i_s] .= stepreward
737
738
else
738
739
state_transition_map[:, i_s] .= i_s
740
+ reward_transition_map[:, i_s] .= 0f0
739
741
end
740
- reward_transition_map[:, i_s] .= 0f0
741
742
else
742
743
for a in actions
743
744
i_a = action_index[a] # get index for action
@@ -1191,6 +1192,16 @@ State values for the random policy. Notice that at a discount rate of $\gamma=1
1191
1192
md"""Select Discount Rate for State Policy Evaluation: $(@bind γ_gridworld_policy_evaluation Slider(0.01f0:0.01f0:1f0; show_value=true, default = 1f0))"""
1192
1193
╠═╡ =#
1193
1194
1195
+ # ╔═╡ e8fb7296-ecaf-48a4-a15c-cb994e399387
1196
+ #= ╠═╡
1197
+ @bind ex_3_5_params PlutoUI.combine() do Child
1198
+ md"""
1199
+ Discount Rate: $(Child(:γ, Slider(0.1f0:0.1f0:1.f0; default = 0.9f0, show_value=true)))
1200
+ Reward Boost: $(Child(:c, NumberField(0:100)))
1201
+ """
1202
+ end
1203
+ ╠═╡ =#
1204
+
1194
1205
# ╔═╡ cb96b24a-65aa-4832-bc7d-093f0c951f83
1195
1206
# ╠═╡ skip_as_script = true
1196
1207
#= ╠═╡
@@ -5236,6 +5247,39 @@ show_grid_value(windy_gridworld, windy_gridworld_random_policy_evaluation.value_
5236
5247
show_grid_value(stochastic_gridworld, stochastic_gridworld_random_policy_evaluation.value_function, "gridworld_random_values"; square_pixels = 50)
5237
5248
╠═╡ =#
5238
5249
5250
+ # ╔═╡ 75513920-f739-4d9d-b2e7-598a7905c854
5251
+ #= ╠═╡
5252
+ function ex_3_5_grid(γ, c)
5253
+ mdp1 = make_deterministic_gridworld(;stepreward = 0f0 + Float32(c), termreward=10f0+c, continuing=false)
5254
+ mdp2 = make_deterministic_gridworld(;stepreward = 0f0 + Float32(c), termreward=10f0+c, continuing=true)
5255
+
5256
+ solution1 = policy_evaluation_v(mdp1, example_gridworld_random_policy, γ)
5257
+ solution2 = policy_evaluation_v(mdp2, example_gridworld_random_policy, γ)
5258
+
5259
+ v1 = copy(solution1.value_function)
5260
+ v1 .-= minimum(solution1.value_function[findall(.!mdp1.terminal_states)])
5261
+ v1[findall(mdp1.terminal_states)] .= 0f0
5262
+
5263
+ @htl("""
5264
+ <div style = "display: flex; justify-content: space-around;">
5265
+ <div>
5266
+ Episodic Values Relative to Minimum Non-Terminal Value
5267
+ $(show_grid_value(mdp1, v1, "solution_values_351"; square_pixels = 50))
5268
+ </div>
5269
+ <div>
5270
+ Continuing Values Relative to Minimum
5271
+ $(show_grid_value(mdp2, solution2.value_function .- minimum(solution2.value_function), "solution_values_352"; square_pixels = 50))
5272
+ </div>
5273
+ </div>
5274
+ """)
5275
+ end
5276
+ ╠═╡ =#
5277
+
5278
+ # ╔═╡ 822d6b66-5c1f-4abe-87d0-a10db0f309f7
5279
+ #= ╠═╡
5280
+ ex_3_5_grid(ex_3_5_params...)
5281
+ ╠═╡ =#
5282
+
5239
5283
# ╔═╡ f856ecc7-53e9-47e4-9869-abca0f19a98b
5240
5284
#= ╠═╡
5241
5285
show_grid_value(differential_gridworld, differential_policy_evaluation_v(differential_gridworld, π_list_differential[3]; θ = 1f-6).value_function, "fdfs", square_pixels = 40)
@@ -6581,8 +6625,11 @@ version = "17.4.0+2"
6581
6625
# ╟─900a2ece-9638-49fc-afbe-e012f9520b48
6582
6626
# ╟─0f6cc7a9-4184-471f-86d5-4ad0c0e495ce
6583
6627
# ╟─91ca282d-e857-41d7-b99d-d9449b82da09
6584
- # ╠═5b53ef57-12d1-45e2-ad1e-28c490c336a6
6585
- # ╟─966eae0d-7556-4ff9-b9f7-d47a736524a4
6628
+ # ╟─5b53ef57-12d1-45e2-ad1e-28c490c336a6
6629
+ # ╟─e8fb7296-ecaf-48a4-a15c-cb994e399387
6630
+ # ╟─822d6b66-5c1f-4abe-87d0-a10db0f309f7
6631
+ # ╠═75513920-f739-4d9d-b2e7-598a7905c854
6632
+ # ╠═966eae0d-7556-4ff9-b9f7-d47a736524a4
6586
6633
# ╟─cb96b24a-65aa-4832-bc7d-093f0c951f83
6587
6634
# ╟─7df4fcbb-2f5f-4d59-ba0c-c7e635bb0503
6588
6635
# ╟─4f0f052d-b461-4040-b5ff-46aac74a24de
0 commit comments