Skip to content

Commit 1f742bd

Browse files
committed
Add example 3.5 to summary
1 parent d9e7d5a commit 1f742bd

File tree

1 file changed

+50
-3
lines changed

1 file changed

+50
-3
lines changed

TabularRL.jl/src/tabular_methods_overview_notebook.jl

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -734,10 +734,11 @@ begin
734734
if isterm(i_s)
735735
if continuing
736736
state_transition_map[:, i_s] .= i_start
737+
reward_transition_map[:, i_s] .= stepreward
737738
else
738739
state_transition_map[:, i_s] .= i_s
740+
reward_transition_map[:, i_s] .= 0f0
739741
end
740-
reward_transition_map[:, i_s] .= 0f0
741742
else
742743
for a in actions
743744
i_a = action_index[a] #get index for action
@@ -1191,6 +1192,16 @@ State values for the random policy. Notice that at a discount rate of $\gamma=1
11911192
md"""Select Discount Rate for State Policy Evaluation: $(@bind γ_gridworld_policy_evaluation Slider(0.01f0:0.01f0:1f0; show_value=true, default = 1f0))"""
11921193
╠═╡ =#
11931194

1195+
# ╔═╡ e8fb7296-ecaf-48a4-a15c-cb994e399387
1196+
#=╠═╡
1197+
@bind ex_3_5_params PlutoUI.combine() do Child
1198+
md"""
1199+
Discount Rate: $(Child(:γ, Slider(0.1f0:0.1f0:1.f0; default = 0.9f0, show_value=true)))
1200+
Reward Boost: $(Child(:c, NumberField(0:100)))
1201+
"""
1202+
end
1203+
╠═╡ =#
1204+
11941205
# ╔═╡ cb96b24a-65aa-4832-bc7d-093f0c951f83
11951206
# ╠═╡ skip_as_script = true
11961207
#=╠═╡
@@ -5236,6 +5247,39 @@ show_grid_value(windy_gridworld, windy_gridworld_random_policy_evaluation.value_
52365247
show_grid_value(stochastic_gridworld, stochastic_gridworld_random_policy_evaluation.value_function, "gridworld_random_values"; square_pixels = 50)
52375248
╠═╡ =#
52385249

5250+
# ╔═╡ 75513920-f739-4d9d-b2e7-598a7905c854
5251+
#=╠═╡
5252+
function ex_3_5_grid(γ, c)
5253+
mdp1 = make_deterministic_gridworld(;stepreward = 0f0 + Float32(c), termreward=10f0+c, continuing=false)
5254+
mdp2 = make_deterministic_gridworld(;stepreward = 0f0 + Float32(c), termreward=10f0+c, continuing=true)
5255+
5256+
solution1 = policy_evaluation_v(mdp1, example_gridworld_random_policy, γ)
5257+
solution2 = policy_evaluation_v(mdp2, example_gridworld_random_policy, γ)
5258+
5259+
v1 = copy(solution1.value_function)
5260+
v1 .-= minimum(solution1.value_function[findall(.!mdp1.terminal_states)])
5261+
v1[findall(mdp1.terminal_states)] .= 0f0
5262+
5263+
@htl("""
5264+
<div style = "display: flex; justify-content: space-around;">
5265+
<div>
5266+
Episodic Values Relative to Minimum Non-Terminal Value
5267+
$(show_grid_value(mdp1, v1, "solution_values_351"; square_pixels = 50))
5268+
</div>
5269+
<div>
5270+
Continuing Values Relative to Minimum
5271+
$(show_grid_value(mdp2, solution2.value_function .- minimum(solution2.value_function), "solution_values_352"; square_pixels = 50))
5272+
</div>
5273+
</div>
5274+
""")
5275+
end
5276+
╠═╡ =#
5277+
5278+
# ╔═╡ 822d6b66-5c1f-4abe-87d0-a10db0f309f7
5279+
#=╠═╡
5280+
ex_3_5_grid(ex_3_5_params...)
5281+
╠═╡ =#
5282+
52395283
# ╔═╡ f856ecc7-53e9-47e4-9869-abca0f19a98b
52405284
#=╠═╡
52415285
show_grid_value(differential_gridworld, differential_policy_evaluation_v(differential_gridworld, π_list_differential[3]; θ = 1f-6).value_function, "fdfs", square_pixels = 40)
@@ -6581,8 +6625,11 @@ version = "17.4.0+2"
65816625
# ╟─900a2ece-9638-49fc-afbe-e012f9520b48
65826626
# ╟─0f6cc7a9-4184-471f-86d5-4ad0c0e495ce
65836627
# ╟─91ca282d-e857-41d7-b99d-d9449b82da09
6584-
# ╠═5b53ef57-12d1-45e2-ad1e-28c490c336a6
6585-
# ╟─966eae0d-7556-4ff9-b9f7-d47a736524a4
6628+
# ╟─5b53ef57-12d1-45e2-ad1e-28c490c336a6
6629+
# ╟─e8fb7296-ecaf-48a4-a15c-cb994e399387
6630+
# ╟─822d6b66-5c1f-4abe-87d0-a10db0f309f7
6631+
# ╠═75513920-f739-4d9d-b2e7-598a7905c854
6632+
# ╠═966eae0d-7556-4ff9-b9f7-d47a736524a4
65866633
# ╟─cb96b24a-65aa-4832-bc7d-093f0c951f83
65876634
# ╟─7df4fcbb-2f5f-4d59-ba0c-c7e635bb0503
65886635
# ╟─4f0f052d-b461-4040-b5ff-46aac74a24de

0 commit comments

Comments
 (0)