Skip to content

Commit b22ba48

Browse files
committed
Streamline mountaincar parameter studies
1 parent c5dcb36 commit b22ba48

File tree

1 file changed

+110
-78
lines changed

1 file changed

+110
-78
lines changed

Chapter-12/Chapter_12_Eligibility_Traces.jl

Lines changed: 110 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -2060,56 +2060,81 @@ end
20602060
# ╔═╡ 8b6b5084-3972-4bd4-9ca2-423f1c627788
20612061
md"""
20622062
### *Example: Mountain Car Sarsa(λ) Variations*
2063-
2064-
Show similar parameter study for sarsa(λ) with different methods of traces and the true online version. Can also add implementations of Sarsa(λ) that use q-learning and use the distribution function over output to bypass the action value estimation
20652063
"""
20662064

20672065
# ╔═╡ 5bc128ec-2934-4aa5-a922-9017f647e1b3
20682066
md"""
20692067
#### Sarsa(λ) Parameter Studies With Mountain Car Tile Coding
20702068
"""
20712069

2072-
# ╔═╡ 5652f3fd-ec23-4dfb-a171-1e1ed0de275a
2073-
#add button to run all parameter studies
2074-
2075-
# ╔═╡ 251a762a-0d78-419f-b38d-8000d1c072af
2070+
# ╔═╡ c19209dc-bddf-4390-95a9-fc1d1d836a8a
20762071
md"""
2077-
##### Sarsa$(λ)$ with $\epsilon = 0.01$
2072+
##### Sarsa$$(λ)$$ with $$\epsilon = 0.01$$
20782073
"""
20792074

2080-
# ╔═╡ 54a335f3-672d-4897-b181-e1ee31ba11e1
2075+
# ╔═╡ 5652f3fd-ec23-4dfb-a171-1e1ed0de275a
2076+
#=╠═╡
2077+
@bind run_mountaincar_λ_study1 CounterButton("Run Parameter Study (could take several minutes)")
2078+
╠═╡ =#
2079+
2080+
# ╔═╡ 2c425a9a-49ae-48d3-8ab7-f3c12b081180
20812081
md"""
2082-
##### Expected Sarsa$(λ)$ with $\epsilon = 0.01$
2082+
##### Expected Sarsa$$(λ)$$ with $$\epsilon = 0.01$$
20832083
"""
20842084

2085-
# ╔═╡ 3086d674-49e4-48b9-ae98-9dede3e98fc8
2085+
# ╔═╡ d7c7316d-aac3-4500-ac3c-0c21b9cf5215
2086+
#=╠═╡
2087+
@bind run_mountaincar_λ_study2 CounterButton("Run Parameter Study (could take several minutes)")
2088+
╠═╡ =#
2089+
2090+
# ╔═╡ aea15e6d-9873-406b-993b-04717dad01c6
20862091
md"""
2087-
##### DP$(λ)$ with $\epsilon = 0.01$
2092+
##### DP$$(λ)$$ with $$\epsilon = 0.01$$
20882093
20892094
In this method the full transition distribution is used and only state values are estimated.
20902095
"""
20912096

2092-
# ╔═╡ 978bb3cd-2b9f-4c73-9d1e-897efbc56f9d
2097+
# ╔═╡ c57b4792-928a-4450-9364-786e9f186cc8
2098+
#=╠═╡
2099+
@bind run_mountaincar_λ_study3 CounterButton("Run Parameter Study (could take several minutes)")
2100+
╠═╡ =#
2101+
2102+
# ╔═╡ b28f47cc-eda7-4961-b6b3-569753386249
20932103
md"""
2094-
##### True Online Sarsa$(λ)$ with $ϵ = 0.01$
2104+
##### True Online Sarsa$$(λ)$$ with $$ϵ = 0.01$$
20952105
20962106
Notice that here a slightly lower value of $\lambda$ is optimal which increases the degree of bootstrapping compared to Sarsa$(\lambda)$
20972107
"""
20982108

2099-
# ╔═╡ e7beffa8-cea1-497f-80d5-278c3be17802
2109+
# ╔═╡ 31633123-0249-4d15-b6fe-59480d3038eb
2110+
#=╠═╡
2111+
@bind run_mountaincar_λ_study4 CounterButton("Run Parameter Study (could take several minutes)")
2112+
╠═╡ =#
2113+
2114+
# ╔═╡ 48c87368-6f11-4330-9a29-3ecbf60cd146
21002115
md"""
2101-
##### True Online Expected Sarsa$(λ)$ with $ϵ = 0.01$
2116+
##### True Online Expected Sarsa$$(λ)$$ with $$ϵ = 0.01$$
21022117
21032118
Similar results to above as we'd expect for such a small value of $\epsilon$
21042119
"""
21052120

2106-
# ╔═╡ 0385d4b6-9e60-4e0a-83dd-a9989bdb5cc8
2121+
# ╔═╡ 831b925f-9f76-48e2-9de0-32724215c568
2122+
#=╠═╡
2123+
@bind run_mountaincar_λ_study5 CounterButton("Run Parameter Study (could take several minutes)")
2124+
╠═╡ =#
2125+
2126+
# ╔═╡ 438726e5-f9a1-4bf7-abda-e5bb0eb30c39
21072127
md"""
2108-
##### True Online DP$(λ)$ with $ϵ = 0.01$
2128+
##### True Online DP$$(λ)$$ with $$ϵ = 0.01$$
21092129
21102130
Bests results so far which also favor a higher value of $\lambda$ which indicates less reliance on bootstrapping.
21112131
"""
21122132

2133+
# ╔═╡ 4d00dfcc-7b01-4335-95ba-0b31fa0e62ad
2134+
#=╠═╡
2135+
@bind run_mountaincar_λ_study6 CounterButton("Run Parameter Study (could take several minutes)")
2136+
╠═╡ =#
2137+
21132138
# ╔═╡ 0a5bec4a-0e65-4753-a1e8-f7b3c6a061df
21142139
md"""
21152140
##### Results Visualization for Best Training Parameters
@@ -2282,20 +2307,16 @@ function tile_coding_setup(min_value::S, max_value::S, tile_size::S, num_tilings
22822307
(num_features = num_features, get_active_features = f)
22832308
end
22842309

2285-
# ╔═╡ 66112956-63a3-4629-8fba-958ff04f59e2
2286-
function run_mountaincar_dp_λ(num_steps, num_tiles, num_tilings, α, λ; kwargs...)
2287-
tile_coding = tile_coding_setup((-1.2f0, -0.07f0), (0.5f0, 0.07f0), (1f0/num_tiles, 1f0/num_tiles), num_tilings, (1, 3))
2288-
output = dp_λ(MountainCarTask.dist_mdp, 1f0, λ, typemax(Int64), num_steps, tile_coding...; α = α, kwargs...)
2289-
end
2290-
22912310
# ╔═╡ 0324b4e2-2544-4bd6-b310-8a330b5a92c5
22922311
#=╠═╡
2293-
function run_mountaincar_sarsa_λ(num_steps::Integer, num_tiles::Integer, num_tilings::Integer, num_trials::Integer, α_list, λ_list; kwargs...)
2312+
function run_mountaincar_λ_parameter_study(num_steps::Integer, num_tiles::Integer, num_tilings::Integer, num_trials::Integer, α_list, λ_list; algo = sarsa_λ, seed = rand(UInt64), ymin = 100, ymax = 400, kwargs...)
22942313
tile_coding = tile_coding_setup((-1.2f0, -0.07f0), (0.5f0, 0.07f0), (1f0/num_tiles, 1f0/num_tiles), num_tilings, (1, 3))
2314+
Random.seed!(seed)
2315+
mdp = algo == sarsa_λ ? MountainCarTask.mdp : MountainCarTask.dist_mdp
22952316
traces = [begin
22962317
y = [begin
22972318
1:num_trials |> Map() do _
2298-
output = sarsa_λ(MountainCarTask.mdp, 1f0, λ, typemax(Int64), num_steps, tile_coding...; α = α, save_episode_steps = true, kwargs...)
2319+
output = algo(mdp, 1f0, λ, typemax(Int64), num_steps, tile_coding...; α = α, save_episode_steps = true, kwargs...)
22992320
step_history = output.history.episode_steps
23002321
isempty(step_history) && return NaN
23012322
step_history[end] / length(step_history)
@@ -2305,50 +2326,45 @@ function run_mountaincar_sarsa_λ(num_steps::Integer, num_tiles::Integer, num_ti
23052326
scatter(x = α_list, y = y, name = "λ = $λ")
23062327
end
23072328
for λ in λ_list]
2308-
plot(traces, Layout(xaxis_title = "Learning Rate", yaxis_title = "Average Steps Per Episode Averaged <br> Over the First $num_steps Steps and $num_trials Runs"))
2329+
plot(traces, Layout(xaxis_title = "Learning Rate", yaxis_title = "Average Steps Per Episode Averaged <br> Over the First $num_steps Steps and $num_trials Runs", yaxis_range = [ymin, ymax], xaxis_type = "log"))
23092330
end
23102331
╠═╡ =#
23112332

2312-
# ╔═╡ 2fcbe12b-aed2-4815-ac20-307f23e41465
2333+
# ╔═╡ 111cda26-bd25-49ed-9ba7-4ee8f71b063f
23132334
#=╠═╡
2314-
run_mountaincar_sarsa_λ(50_000, 12, 8, 40, Base.LogRange(0.01f0, 0.1f0, 8), [0f0, 0.5f0, 0.8f0, 0.92f0, 0.96f0, 0.98f0, 0.99f0]; ϵ = 0.01f0)
2335+
if run_mountaincar_λ_study1 > 0
2336+
run_mountaincar_λ_parameter_study(50_000, 12, 8, 40, Base.LogRange(0.025f0, 0.15f0, 6), [0f0, 0.5f0, 0.8f0, 0.9f0, 0.95f0, 0.99f0]; ϵ = 0.01f0, seed = 45, ymin = 150)
2337+
else
2338+
md"""Waiting to run parameter study"""
2339+
end
23152340
╠═╡ =#
23162341

2317-
# ╔═╡ 890e46ac-7cf5-43a9-8bb6-db3ee308212a
2342+
# ╔═╡ f1a8df55-a5ef-475e-a0c4-ed31b1c6c9f5
23182343
#=╠═╡
2319-
function run_mountaincar_dp_λ(num_steps::Integer, num_tiles::Integer, num_tilings::Integer, num_trials::Integer, α_list, λ_list; kwargs...)
2320-
tile_coding = tile_coding_setup((-1.2f0, -0.07f0), (0.5f0, 0.07f0), (1f0/num_tiles, 1f0/num_tiles), num_tilings, (1, 3))
2321-
traces = [begin
2322-
y = [begin
2323-
1:num_trials |> Map() do _
2324-
output = dp_λ(MountainCarTask.dist_mdp, 1f0, λ, typemax(Int64), num_steps, tile_coding...; α = α, save_episode_steps = true, kwargs...)
2325-
step_history = output.history.episode_steps
2326-
isempty(step_history) && return NaN
2327-
step_history[end] / length(step_history)
2328-
end |> foldxt(+) |> x -> x/num_trials
2329-
end
2330-
for α in α_list]
2331-
scatter(x = α_list, y = y, name = "λ = $λ")
2332-
end
2333-
for λ in λ_list]
2334-
plot(traces)
2344+
if run_mountaincar_λ_study3 > 0
2345+
run_mountaincar_λ_parameter_study(50_000, 12, 8, 40, Base.LogRange(0.005f0, 0.07f0, 6), [0f0, 0.5f0, 0.8f0, 0.9f0, 0.95f0, 0.99f0]; ϵ = 0.01f0, seed = 45, algo = dp_λ, ymin = 140, ymax = 200)
2346+
else
2347+
md"""Waiting to run parameter study"""
23352348
end
23362349
╠═╡ =#
23372350

2338-
# ╔═╡ 73d4314e-e34e-4f80-a800-9198a375465e
2351+
# ╔═╡ cc14f0a2-d0bc-40fa-83fa-b99e62351282
23392352
#=╠═╡
2340-
run_mountaincar_dp_λ(50_000, 12, 8, 40, Base.LogRange(0.005f0, 0.05f0, 8), [0f0, 0.5f0, 0.9f0, 0.92f0, 0.96f0, 0.99f0]; ϵ = 0.01f0)
2353+
if run_mountaincar_λ_study6 > 0
2354+
run_mountaincar_λ_parameter_study(50_000, 12, 8, 40, Base.LogRange(0.001f0, 0.02f0, 6), [0.8f0, 0.9f0, 0.95f0, 0.99f0]; ϵ = 0.01f0, seed = 45, algo = dp_λ, algo! = true_online_dp_λ!, ymin = 130, ymax = 200)
2355+
else
2356+
md"""Waiting to run parameter study"""
2357+
end
23412358
╠═╡ =#
23422359

2343-
# ╔═╡ c3a8fe6b-ed40-42dd-9cfd-bd9d857682a8
2344-
#=╠═╡
2345-
run_mountaincar_dp_λ(50_000, 12, 8, 40, Base.LogRange(0.001f0, 0.02f0, 8), [0f0, 0.5f0, 0.8f0, 0.90f0, 0.96f0, 0.98f0, 0.99f0]; ϵ = 0.01f0, algo! = true_online_dp_λ!)
2346-
╠═╡ =#
2360+
# ╔═╡ 66112956-63a3-4629-8fba-958ff04f59e2
2361+
function run_mountaincar_dp_λ(num_steps, num_tiles, num_tilings, α, λ; kwargs...)
2362+
tile_coding = tile_coding_setup((-1.2f0, -0.07f0), (0.5f0, 0.07f0), (1f0/num_tiles, 1f0/num_tiles), num_tilings, (1, 3))
2363+
output = dp_λ(MountainCarTask.dist_mdp, 1f0, λ, typemax(Int64), num_steps, tile_coding...; α = α, kwargs...)
2364+
end
23472365

23482366
# ╔═╡ 7a0f8a69-467b-4059-b717-97d8e7a7a5fd
2349-
#=╠═╡
2350-
const mountaincar_test_output = run_mountaincar_dp_λ(1_000_000, 12, 8, 0.003f0, 0.98f0, ϵ = 0.01f0, algo! = true_online_dp_λ!)
2351-
╠═╡ =#
2367+
const mountaincar_test_output = run_mountaincar_dp_λ(100_000, 12, 8, 0.001f0, 0.99f0, ϵ = 0.01f0, algo! = true_online_dp_λ!)
23522368

23532369
# ╔═╡ fbe8691b-6d71-4cba-90e4-5de63421f634
23542370
md"""
@@ -2540,9 +2556,13 @@ begin
25402556
end
25412557
end
25422558

2543-
# ╔═╡ d0188d56-7acd-47da-bb2b-0bd08c7453f3
2559+
# ╔═╡ fb1bde32-35e4-4985-ad88-6b5408f3c7f7
25442560
#=╠═╡
2545-
run_mountaincar_sarsa_λ(50_000, 12, 8, 40, Base.LogRange(0.01f0, 0.1f0, 8), [0f0, 0.5f0, 0.8f0, 0.92f0, 0.96f0, 0.98f0, 0.99f0]; ϵ = 0.01f0, algo! = expected_sarsa_λ!)
2561+
if run_mountaincar_λ_study2 > 0
2562+
run_mountaincar_λ_parameter_study(50_000, 12, 8, 40, Base.LogRange(0.025f0, 0.15f0, 6), [0f0, 0.5f0, 0.8f0, 0.9f0, 0.95f0, 0.99f0]; ϵ = 0.01f0, algo! = expected_sarsa_λ!, seed = 45, ymin = 150)
2563+
else
2564+
md"""Waiting to run parameter study"""
2565+
end
25462566
╠═╡ =#
25472567

25482568
# ╔═╡ 771cca22-d61d-498a-98be-90fa59e09571
@@ -2765,9 +2785,13 @@ begin
27652785
end
27662786
end
27672787

2768-
# ╔═╡ 46fb8d4f-ec4a-49e7-b2c1-7b21feda4df1
2788+
# ╔═╡ 6b449c6c-249e-4193-96ea-caccee683de0
27692789
#=╠═╡
2770-
run_mountaincar_sarsa_λ(50_000, 12, 8, 40, Base.LogRange(0.01f0, .1f0, 8), [0f0, 0.5f0, 0.8f0, 0.92f0, 0.96f0, 0.98f0, 0.99f0]; algo! = true_online_sarsa_λ!, ϵ = 0.01f0)
2790+
if run_mountaincar_λ_study4 > 0
2791+
run_mountaincar_λ_parameter_study(50_000, 12, 8, 40, Base.LogRange(0.01f0, 0.1f0, 6), [0f0, 0.5f0, 0.8f0, 0.9f0, 0.95f0]; ϵ = 0.01f0, seed = 45, algo! = true_online_sarsa_λ!, ymin = 150)
2792+
else
2793+
md"""Waiting to run parameter study"""
2794+
end
27712795
╠═╡ =#
27722796

27732797
# ╔═╡ b6d67598-b020-4626-a572-adfb9e75edba
@@ -2935,9 +2959,13 @@ begin
29352959
end
29362960
end
29372961

2938-
# ╔═╡ 6dd6d055-8882-48a8-a0a6-e5d36405e587
2962+
# ╔═╡ d01f8b48-06c1-4dc7-afae-3a2e1b3ba751
29392963
#=╠═╡
2940-
run_mountaincar_sarsa_λ(50_000, 12, 8, 40, Base.LogRange(0.01f0, .1f0, 8), [0f0, 0.5f0, 0.8f0, 0.92f0, 0.96f0, 0.98f0, 0.99f0]; algo! = true_online_expected_sarsa_λ!, ϵ = 0.01f0)
2964+
if run_mountaincar_λ_study5 > 0
2965+
run_mountaincar_λ_parameter_study(50_000, 12, 8, 40, Base.LogRange(0.01f0, 0.1f0, 6), [0f0, 0.5f0, 0.8f0, 0.9f0, 0.95f0]; ϵ = 0.01f0, seed = 45, algo! = true_online_expected_sarsa_λ!, ymin = 150)
2966+
else
2967+
md"""Waiting to run parameter study"""
2968+
end
29412969
╠═╡ =#
29422970

29432971
# ╔═╡ 5062690c-96b9-450a-9927-6a6707dfc511
@@ -4289,7 +4317,7 @@ function plot_mountaincar_values(v̂_mountain_car, π; n1 = 100, n2 = 100)
42894317
p1 = plot(heatmap(x = xvals, y = vvals, z = values), Layout(xaxis_title = "position", yaxis_title = "velocity", title = "Learned Value Function"))
42904318
p2 = plot(heatmap(x = xvals, y = vvals, z = actions, colorscale = "rb", showscale = false), Layout(xaxis_title = "position", yaxis_title = "velocity", title = "Policy (blue = accelerate left, <br>red = accelerate right, gray = no acceleration)"))
42914319
@htl("""
4292-
<div style = "display:flex;">
4320+
<div style = "display:flex; height: 400px;">
42934321
$p1
42944322
$p2
42954323
</div>
@@ -5236,33 +5264,37 @@ version = "17.4.0+2"
52365264
# ╠═e323cbc2-1396-43fb-969a-1837bb60c5b5
52375265
# ╟─4fa824ba-51a3-4f5a-a990-dd05bbf2526a
52385266
# ╟─7aa62007-0685-40d2-88ab-9c03add8e75a
5239-
# ╠═e047cce1-11a5-4bcb-8668-a767628da140
5240-
# ╠═f5a8cc64-f7a3-44ef-b925-d11df6a414f6
5267+
# ╟─e047cce1-11a5-4bcb-8668-a767628da140
5268+
# ╟─f5a8cc64-f7a3-44ef-b925-d11df6a414f6
52415269
# ╠═771cca22-d61d-498a-98be-90fa59e09571
52425270
# ╠═b6d67598-b020-4626-a572-adfb9e75edba
52435271
# ╠═ded7c8e0-f44c-44c9-afad-070d325c180b
52445272
# ╟─8b6b5084-3972-4bd4-9ca2-423f1c627788
52455273
# ╟─5bc128ec-2934-4aa5-a922-9017f647e1b3
5246-
# ╠═5652f3fd-ec23-4dfb-a171-1e1ed0de275a
5247-
# ╟─251a762a-0d78-419f-b38d-8000d1c072af
5248-
# ╠═2fcbe12b-aed2-4815-ac20-307f23e41465
5249-
# ╟─54a335f3-672d-4897-b181-e1ee31ba11e1
5250-
# ╠═d0188d56-7acd-47da-bb2b-0bd08c7453f3
5251-
# ╟─3086d674-49e4-48b9-ae98-9dede3e98fc8
5252-
# ╠═73d4314e-e34e-4f80-a800-9198a375465e
5253-
# ╟─978bb3cd-2b9f-4c73-9d1e-897efbc56f9d
5254-
# ╠═46fb8d4f-ec4a-49e7-b2c1-7b21feda4df1
5255-
# ╟─e7beffa8-cea1-497f-80d5-278c3be17802
5256-
# ╠═6dd6d055-8882-48a8-a0a6-e5d36405e587
5257-
# ╟─0385d4b6-9e60-4e0a-83dd-a9989bdb5cc8
5258-
# ╠═c3a8fe6b-ed40-42dd-9cfd-bd9d857682a8
5274+
# ╠═0324b4e2-2544-4bd6-b310-8a330b5a92c5
5275+
# ╟─c19209dc-bddf-4390-95a9-fc1d1d836a8a
5276+
# ╟─5652f3fd-ec23-4dfb-a171-1e1ed0de275a
5277+
# ╟─111cda26-bd25-49ed-9ba7-4ee8f71b063f
5278+
# ╟─2c425a9a-49ae-48d3-8ab7-f3c12b081180
5279+
# ╟─d7c7316d-aac3-4500-ac3c-0c21b9cf5215
5280+
# ╟─fb1bde32-35e4-4985-ad88-6b5408f3c7f7
5281+
# ╟─aea15e6d-9873-406b-993b-04717dad01c6
5282+
# ╟─c57b4792-928a-4450-9364-786e9f186cc8
5283+
# ╟─f1a8df55-a5ef-475e-a0c4-ed31b1c6c9f5
5284+
# ╟─b28f47cc-eda7-4961-b6b3-569753386249
5285+
# ╟─31633123-0249-4d15-b6fe-59480d3038eb
5286+
# ╟─6b449c6c-249e-4193-96ea-caccee683de0
5287+
# ╟─48c87368-6f11-4330-9a29-3ecbf60cd146
5288+
# ╟─831b925f-9f76-48e2-9de0-32724215c568
5289+
# ╟─d01f8b48-06c1-4dc7-afae-3a2e1b3ba751
5290+
# ╟─438726e5-f9a1-4bf7-abda-e5bb0eb30c39
5291+
# ╟─4d00dfcc-7b01-4335-95ba-0b31fa0e62ad
5292+
# ╟─cc14f0a2-d0bc-40fa-83fa-b99e62351282
52595293
# ╟─0a5bec4a-0e65-4753-a1e8-f7b3c6a061df
52605294
# ╠═66112956-63a3-4629-8fba-958ff04f59e2
52615295
# ╠═7a0f8a69-467b-4059-b717-97d8e7a7a5fd
52625296
# ╠═798544c9-215c-4516-a196-b00350512d48
52635297
# ╠═d4cd0741-1c01-407f-867c-2c804151c6fb
5264-
# ╠═0324b4e2-2544-4bd6-b310-8a330b5a92c5
5265-
# ╠═890e46ac-7cf5-43a9-8bb6-db3ee308212a
52665298
# ╠═3ac75a88-6894-4c48-ae2a-30c822814888
52675299
# ╠═cc263d1a-d098-472b-8a2f-92e1ddedfdc4
52685300
# ╠═a51c4911-8878-4eef-9ed4-4402d380dc4d

0 commit comments

Comments
 (0)