Update LunarLander version in hyperparams (#495)

araffin · web-flow · commit ad1ae184ebfa · 2025-06-16T11:13:28.000+02:00
* Update LunarLander version in hyperparams

* Update CI dependencies

* Ignore mypy error
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -53,6 +53,7 @@ jobs:
         run: |
           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
           uv pip install --system "numpy<2"
+          uv pip install --system "ale-py==0.10.1"
         # Only run for python 3.10, downgrade gym to 0.29.1
         if: matrix.gymnasium-version != '1.0.0'
 
diff --git a/.github/workflows/trained_agents.yml b/.github/workflows/trained_agents.yml
@@ -49,14 +49,12 @@ jobs:
           # Use headless version
           uv pip install --system opencv-python-headless
           uv pip install --system -e .[plots,tests]
-          # Downgrade numpy to run pybullet agents
-          # See https://github.com/bulletphysics/bullet3/issues/4649
-          uv pip install --system "numpy<2"
 
       - name: Install specific version of gym
         run: |
           uv pip install --system gymnasium==${{ matrix.gymnasium-version }}
           uv pip install --system "numpy<2"
+          uv pip install --system "ale-py==0.10.1"
         # Only run for python 3.10, downgrade gym to 0.29.1
         if: matrix.gymnasium-version != '1.0.0'
 
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -3,6 +3,7 @@
 ### Breaking Changes
 - Upgraded to SB3 >= 2.6.1
 - `linear_schedule` now returns a `SimpleLinearSchedule` object for better portability
+- Renamed `LunarLander-v2` to `LunarLander-v3` in hyperparameters
 
 ### New Features
 
diff --git a/hyperparams/a2c.yml b/hyperparams/a2c.yml
@@ -19,7 +19,7 @@ CartPole-v1:
   policy: 'MlpPolicy'
   ent_coef: 0.0
 
-LunarLander-v2:
+LunarLander-v3:
   n_envs: 8
   n_timesteps: !!float 2e5
   policy: 'MlpPolicy'
diff --git a/hyperparams/ars.yml b/hyperparams/ars.yml
@@ -19,7 +19,7 @@ Pendulum-v1: &pendulum-params
   zero_policy: False
 
 # TO BE Tuned
-LunarLander-v2:
+LunarLander-v3:
   <<: *pendulum-params
   n_delta: 6
   n_top: 1
diff --git a/hyperparams/dqn.yml b/hyperparams/dqn.yml
@@ -50,7 +50,7 @@ MountainCar-v0:
   policy_kwargs: "dict(net_arch=[256, 256])"
 
 # Tuned
-LunarLander-v2:
+LunarLander-v3:
   n_timesteps: !!float 1e5
   policy: 'MlpPolicy'
   learning_rate: !!float 6.3e-4
diff --git a/hyperparams/ppo.yml b/hyperparams/ppo.yml
@@ -111,7 +111,7 @@ BipedalWalkerHardcore-v3:
   learning_rate: lin_2.5e-4
   clip_range: lin_0.2
 
-LunarLander-v2:
+LunarLander-v3:
   n_envs: 16
   n_timesteps: !!float 1e6
   policy: 'MlpPolicy'
diff --git a/hyperparams/qrdqn.yml b/hyperparams/qrdqn.yml
@@ -43,7 +43,7 @@ MountainCar-v0:
   policy_kwargs: "dict(net_arch=[256, 256], n_quantiles=25)"
 
 # Tuned
-LunarLander-v2:
+LunarLander-v3:
   n_timesteps: !!float 1e5
   policy: 'MlpPolicy'
   learning_rate: lin_1.5e-3
diff --git a/hyperparams/trpo.yml b/hyperparams/trpo.yml
@@ -23,7 +23,7 @@ Pendulum-v1:
   sde_sample_freq: 4
 
 # Tuned
-LunarLander-v2:
+LunarLander-v3:
   n_envs: 2
   n_timesteps: !!float 2e5
   policy: 'MlpPolicy'
diff --git a/rl_zoo3/wrappers.py b/rl_zoo3/wrappers.py
@@ -304,8 +304,8 @@ class MaskVelocityWrapper(gym.ObservationWrapper):
         "MountainCar-v0": np.array([1]),
         "MountainCarContinuous-v0": np.array([1]),
         "Pendulum-v1": np.array([2]),
-        "LunarLander-v2": np.array([2, 3, 5]),
-        "LunarLanderContinuous-v2": np.array([2, 3, 5]),
+        "LunarLander-v3": np.array([2, 3, 5]),
+        "LunarLanderContinuous-v3": np.array([2, 3, 5]),
     }
 
     def __init__(self, env: gym.Env):
diff --git a/scripts/create_mujoco_jobs.py b/scripts/create_mujoco_jobs.py
@@ -49,7 +49,7 @@
             ]
             args = list(map(str, args))
 
-            command = " ".join(["python", "-u", "train.py", *args])
+            command = " ".join(["python", "-u", "train.py", *args])  # type: ignore[list-item]
 
             ok = subprocess.call(["sbatch", "cluster_torchy.sh", algo, env_id, "ablation", command])
             time.sleep(0.05)