Use schedule class instead of lambdas (#493)

araffin · web-flow · commit 577616cb9f13 · 2025-05-19T12:42:35.000+02:00
* Use schedule class instead of lambdas

* Add test for linear schedule
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,3 +1,19 @@
+## Release 2.6.1 (WIP)
+
+### Breaking Changes
+- Upgraded to SB3 >= 2.6.1
+- `linear_schedule` now returns a `SimpleLinearSchedule` object for better portability
+
+### New Features
+
+### Bug fixes
+- Docker GPU images are now working again
+- Use `ConstantSchedule`, and `SimpleLinearSchedule` instead of `constant_fn` and `linear_schedule`
+
+### Documentation
+
+### Other
+
 ## Release 2.6.0 (2025-03-24)
 
 ### Breaking Changes
diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,5 @@
 gym==0.26.2
-stable-baselines3[extra,tests,docs]>=2.6.0,<3.0
+stable-baselines3[extra,tests,docs]>=2.6.1a1,<3.0
 box2d-py==2.3.8
 pybullet_envs_gymnasium>=0.6.0
 # minigrid
diff --git a/rl_zoo3/exp_manager.py b/rl_zoo3/exp_manager.py
@@ -32,7 +32,7 @@
 from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
 from stable_baselines3.common.preprocessing import is_image_space, is_image_space_channels_first
 from stable_baselines3.common.sb2_compat.rmsprop_tf_like import RMSpropTFLike  # noqa: F401
-from stable_baselines3.common.utils import constant_fn
+from stable_baselines3.common.utils import ConstantSchedule
 from stable_baselines3.common.vec_env import (
     DummyVecEnv,
     SubprocVecEnv,
@@ -50,7 +50,14 @@
 import rl_zoo3.import_envs  # noqa: F401
 from rl_zoo3.callbacks import SaveVecNormalizeCallback, TrialEvalCallback
 from rl_zoo3.hyperparams_opt import HYPERPARAMS_CONVERTER, HYPERPARAMS_SAMPLER
-from rl_zoo3.utils import ALGOS, get_callback_list, get_class_by_name, get_latest_run_id, get_wrapper_class, linear_schedule
+from rl_zoo3.utils import (
+    ALGOS,
+    SimpleLinearSchedule,
+    get_callback_list,
+    get_class_by_name,
+    get_latest_run_id,
+    get_wrapper_class,
+)
 
 
 class ExperimentManager:
@@ -381,12 +388,12 @@ def _preprocess_schedules(hyperparams: dict[str, Any]) -> dict[str, Any]:
             if isinstance(hyperparams[key], str):
                 schedule, initial_value = hyperparams[key].split("_")
                 initial_value = float(initial_value)
-                hyperparams[key] = linear_schedule(initial_value)
+                hyperparams[key] = SimpleLinearSchedule(initial_value)
             elif isinstance(hyperparams[key], (float, int)):
                 # Negative value: ignore (ex: for clipping)
                 if hyperparams[key] < 0:
                     continue
-                hyperparams[key] = constant_fn(float(hyperparams[key]))
+                hyperparams[key] = ConstantSchedule(float(hyperparams[key]))
             else:
                 raise ValueError(f"Invalid value for {key}: {hyperparams[key]}")
         return hyperparams
diff --git a/rl_zoo3/utils.py b/rl_zoo3/utils.py
@@ -292,25 +292,33 @@ def make_env(**kwargs) -> gym.Env:
     return env
 
 
-def linear_schedule(initial_value: Union[float, str]) -> Callable[[float], float]:
+class SimpleLinearSchedule:
+    """
+    Linear learning rate schedule (from initial value to zero),
+    simpler than sb3 LinearSchedule.
+
+    :param initial_value: (float or str) The initial value for the schedule
+    """
+
+    def __init__(self, initial_value: Union[float, str]) -> None:
+        # Force conversion to float
+        self.initial_value = float(initial_value)
+
+    def __call__(self, progress_remaining: float) -> float:
+        return progress_remaining * self.initial_value
+
+    def __repr__(self) -> str:
+        return f"SimpleLinearSchedule(initial_value={self.initial_value})"
+
+
+def linear_schedule(initial_value: Union[float, str]) -> SimpleLinearSchedule:
     """
     Linear learning rate schedule.
 
     :param initial_value: (float or str)
-    :return: (function)
+    :return: A `SimpleLinearSchedule` object
     """
-    # Force conversion to float
-    initial_value_ = float(initial_value)
-
-    def func(progress_remaining: float) -> float:
-        """
-        Progress will decrease from 1 (beginning) to 0
-        :param progress_remaining: (float)
-        :return: (float)
-        """
-        return progress_remaining * initial_value_
-
-    return func
+    return SimpleLinearSchedule(initial_value)
 
 
 def get_trained_models(log_folder: str) -> dict[str, tuple[str, str]]:
diff --git a/rl_zoo3/version.txt b/rl_zoo3/version.txt
@@ -1 +1 @@
-2.6.1a0
+2.6.1a1
diff --git a/setup.py b/setup.py
@@ -15,7 +15,7 @@
 See https://github.com/DLR-RM/rl-baselines3-zoo
 """
 install_requires = [
-    "sb3_contrib>=2.6.0,<3.0",
+    "sb3_contrib>=2.6.1a1,<3.0",
     "gymnasium>=0.29.1,<1.2.0",
     "huggingface_sb3>=3.0,<4.0",
     "tqdm",
diff --git a/tests/test_wrappers.py b/tests/test_wrappers.py
@@ -1,14 +1,16 @@
 import gymnasium as gym
+import numpy as np
 import pytest
 import stable_baselines3 as sb3
+from sb3_contrib.common.wrappers import TimeFeatureWrapper
 from stable_baselines3 import A2C
 from stable_baselines3.common.env_checker import check_env
 from stable_baselines3.common.env_util import DummyVecEnv
 
 import rl_zoo3.import_envs
 import rl_zoo3.wrappers
-from rl_zoo3.utils import get_wrapper_class
-from rl_zoo3.wrappers import ActionNoiseWrapper, DelayedRewardWrapper, HistoryWrapper, TimeFeatureWrapper
+from rl_zoo3.utils import SimpleLinearSchedule, get_wrapper_class, linear_schedule
+from rl_zoo3.wrappers import ActionNoiseWrapper, DelayedRewardWrapper, HistoryWrapper
 
 
 def test_wrappers():
@@ -55,3 +57,11 @@ def test_get_vec_env_wrapper(vec_env_wrapper):
     if wrapper_class is not None:
         env = wrapper_class(env)
     A2C("MlpPolicy", env).learn(16)
+
+
+def test_linear_schedule():
+    schedule = linear_schedule(100)
+    assert isinstance(schedule, SimpleLinearSchedule)
+    assert np.allclose(schedule(1.0), 100.0)
+    assert np.allclose(schedule(0.5), 50.0)
+    assert np.allclose(schedule(0.0), 0.0)