Move callbacks for setting DQN epsilon values to the library

2024-01-11 14:57:03 +01:00 · 2024-01-11 14:57:03 +01:00 · ff398beed9
commit ff398beed9
parent 63269fe198
5 changed files with 64 additions and 48 deletions
--- a/examples/atari/atari_callbacks.py
+++ b/examples/atari/atari_callbacks.py
@ -1,33 +0,0 @@
 from tianshou.highlevel.trainer import (
    TrainerEpochCallbackTest,
    TrainerEpochCallbackTrain,
    TrainingContext,
 )
 from tianshou.policy import DQNPolicy
 class TestEpochCallbackDQNSetEps(TrainerEpochCallbackTest):
    def __init__(self, eps_test: float):
        self.eps_test = eps_test
    def callback(self, epoch: int, env_step: int, context: TrainingContext) -> None:
        policy: DQNPolicy = context.policy
        policy.set_eps(self.eps_test)
 class TrainEpochCallbackNatureDQNEpsLinearDecay(TrainerEpochCallbackTrain):
    def __init__(self, eps_train: float, eps_train_final: float):
        self.eps_train = eps_train
        self.eps_train_final = eps_train_final
    def callback(self, epoch: int, env_step: int, context: TrainingContext) -> None:
        policy: DQNPolicy = context.policy
        logger = context.logger
        # nature DQN setting, linear decay in the first 1M steps
        if env_step <= 1e6:
            eps = self.eps_train - env_step / 1e6 * (self.eps_train - self.eps_train_final)
        else:
            eps = self.eps_train_final
        policy.set_eps(eps)
        if env_step % 1000 == 0:
            logger.write("train/env_step", env_step, {"train/eps": eps})
--- a/examples/atari/atari_dqn_hl.py
+++ b/examples/atari/atari_dqn_hl.py
@ -2,10 +2,6 @@
 import os
 from examples.atari.atari_callbacks import (
    TestEpochCallbackDQNSetEps,
    TrainEpochCallbackNatureDQNEpsLinearDecay,
 )
 from examples.atari.atari_network import (
    IntermediateModuleFactoryAtariDQN,
    IntermediateModuleFactoryAtariDQNFeatures,
@ -20,6 +16,10 @@ from tianshou.highlevel.params.policy_params import DQNParams
 from tianshou.highlevel.params.policy_wrapper import (
    PolicyWrapperFactoryIntrinsicCuriosity,
 )
 from tianshou.highlevel.trainer import (
    TrainerEpochCallbackTestDQNSetEps,
    TrainerEpochCallbackTrainDQNEpsLinearDecay,
 )
 from tianshou.utils import logging
 from tianshou.utils.logging import datetime_tag
@ -80,9 +80,9 @@ def main(
        )
        .with_model_factory(IntermediateModuleFactoryAtariDQN())
        .with_trainer_epoch_callback_train(
-            TrainEpochCallbackNatureDQNEpsLinearDecay(eps_train, eps_train_final),
+            TrainerEpochCallbackTrainDQNEpsLinearDecay(eps_train, eps_train_final),
        )
-        .with_trainer_epoch_callback_test(TestEpochCallbackDQNSetEps(eps_test))
+        .with_trainer_epoch_callback_test(TrainerEpochCallbackTestDQNSetEps(eps_test))
        .with_trainer_stop_callback(AtariStopCallback(task))
    )
    if icm_lr_scale > 0:
--- a/examples/atari/atari_iqn_hl.py
+++ b/examples/atari/atari_iqn_hl.py
@ -3,10 +3,6 @@
 import os
 from collections.abc import Sequence
 from examples.atari.atari_callbacks import (
    TestEpochCallbackDQNSetEps,
    TrainEpochCallbackNatureDQNEpsLinearDecay,
 )
 from examples.atari.atari_network import (
    IntermediateModuleFactoryAtariDQN,
 )
@ -17,6 +13,10 @@ from tianshou.highlevel.experiment import (
    IQNExperimentBuilder,
 )
 from tianshou.highlevel.params.policy_params import IQNParams
 from tianshou.highlevel.trainer import (
    TrainerEpochCallbackTestDQNSetEps,
    TrainerEpochCallbackTrainDQNEpsLinearDecay,
 )
 from tianshou.utils import logging
 from tianshou.utils.logging import datetime_tag
@ -84,9 +84,9 @@ def main(
        )
        .with_preprocess_network_factory(IntermediateModuleFactoryAtariDQN(features_only=True))
        .with_trainer_epoch_callback_train(
-            TrainEpochCallbackNatureDQNEpsLinearDecay(eps_train, eps_train_final),
+            TrainerEpochCallbackTrainDQNEpsLinearDecay(eps_train, eps_train_final),
        )
-        .with_trainer_epoch_callback_test(TestEpochCallbackDQNSetEps(eps_test))
+        .with_trainer_epoch_callback_test(TrainerEpochCallbackTestDQNSetEps(eps_test))
        .with_trainer_stop_callback(AtariStopCallback(task))
        .build()
    )
--- a/tianshou/highlevel/trainer.py
+++ b/tianshou/highlevel/trainer.py
@ -1,11 +1,11 @@
 from abc import ABC, abstractmethod
 from collections.abc import Callable
 from dataclasses import dataclass
-from typing import TypeVar
+from typing import TypeVar, cast
 from tianshou.highlevel.env import Environments
 from tianshou.highlevel.logger import TLogger
-from tianshou.policy import BasePolicy
+from tianshou.policy import BasePolicy, DQNPolicy
 from tianshou.utils.string import ToStringMixin
 TPolicy = TypeVar("TPolicy", bound=BasePolicy)
@ -72,3 +72,52 @@ class TrainerCallbacks:
    epoch_callback_train: TrainerEpochCallbackTrain | None = None
    epoch_callback_test: TrainerEpochCallbackTest | None = None
    stop_callback: TrainerStopCallback | None = None
 class TrainerEpochCallbackTrainDQNSetEps(TrainerEpochCallbackTrain):
    """Sets the epsilon value for DQN-based policies at the beginning of the training
    stage in each epoch.
    """
    def __init__(self, eps_test: float):
        self.eps_test = eps_test
    def callback(self, epoch: int, env_step: int, context: TrainingContext) -> None:
        policy = cast(DQNPolicy, context.policy)
        policy.set_eps(self.eps_test)
 class TrainerEpochCallbackTrainDQNEpsLinearDecay(TrainerEpochCallbackTrain):
    """Sets the epsilon value for DQN-based policies at the beginning of the training
    stage in each epoch, using a linear decay in the first `decay_steps` steps.
    """
    def __init__(self, eps_train: float, eps_train_final: float, decay_steps: int = 1000000):
        self.eps_train = eps_train
        self.eps_train_final = eps_train_final
        self.decay_steps = decay_steps
    def callback(self, epoch: int, env_step: int, context: TrainingContext) -> None:
        policy = cast(DQNPolicy, context.policy)
        logger = context.logger
        if env_step <= self.decay_steps:
            eps = self.eps_train - env_step / self.decay_steps * (
                self.eps_train - self.eps_train_final
            )
        else:
            eps = self.eps_train_final
        policy.set_eps(eps)
        logger.write("train/env_step", env_step, {"train/eps": eps})
 class TrainerEpochCallbackTestDQNSetEps(TrainerEpochCallbackTest):
    """Sets the epsilon value for DQN-based policies at the beginning of the test
    stage in each epoch.
    """
    def __init__(self, eps_test: float):
        self.eps_test = eps_test
    def callback(self, epoch: int, env_step: int | None, context: TrainingContext) -> None:
        policy = cast(DQNPolicy, context.policy)
        policy.set_eps(self.eps_test)
--- a/tianshou/utils/logger/base.py
+++ b/tianshou/utils/logger/base.py
@ -7,7 +7,7 @@ from typing import Any
 import numpy as np
-VALID_LOG_VALS_TYPE = int | Number | np.number | np.ndarray
+VALID_LOG_VALS_TYPE = int | Number | np.number | np.ndarray | float
 VALID_LOG_VALS = typing.get_args(
    VALID_LOG_VALS_TYPE,
 )  # I know it's stupid, but we can't use Union type in isinstance