Add documentation, improve structure of 'module' package

2023-10-16 18:19:31 +02:00 · 2023-10-16 18:19:31 +02:00 · 4b270eaa2d
commit 4b270eaa2d
parent 97e21b5ddf
19 changed files with 256 additions and 64 deletions
--- a/examples/atari/atari_network.py
+++ b/examples/atari/atari_network.py
@ -8,9 +8,11 @@ from torch import nn
 from tianshou.highlevel.env import Environments
 from tianshou.highlevel.module.actor import ActorFactory
 from tianshou.highlevel.module.core import (
    TDevice,
 )
 from tianshou.highlevel.module.intermediate import (
    IntermediateModule,
    IntermediateModuleFactory,
    TDevice,
 )
 from tianshou.utils.net.discrete import Actor, NoisyLinear
--- a/tianshou/highlevel/agent.py
+++ b/tianshou/highlevel/agent.py
@ -81,6 +81,8 @@ log = logging.getLogger(__name__)
 class AgentFactory(ABC, ToStringMixin):
    """Factory for the creation of an agent's policy, its trainer as well as collectors."""
    def __init__(self, sampling_config: SamplingConfig, optim_factory: OptimizerFactory):
        self.sampling_config = sampling_config
        self.optim_factory = optim_factory
--- a/tianshou/highlevel/env.py
+++ b/tianshou/highlevel/env.py
@ -14,6 +14,8 @@ TObservationShape: TypeAlias = int | Sequence[int]
 class EnvType(Enum):
    """Enumeration of environment types."""
    CONTINUOUS = "continuous"
    DISCRETE = "discrete"
@ -33,6 +35,8 @@ class EnvType(Enum):
 class Environments(ToStringMixin, ABC):
    """Represents (vectorized) environments."""
    def __init__(self, env: gym.Env, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):
        self.env = env
        self.train_envs = train_envs
@ -52,6 +56,11 @@ class Environments(ToStringMixin, ABC):
        }
    def set_persistence(self, *p: Persistence) -> None:
        """Associates the given persistence handlers which may persist and restore
        environment-specific information.
        :param p: persistence handlers
        """
        self.persistence = p
    @abstractmethod
@ -74,6 +83,8 @@ class Environments(ToStringMixin, ABC):
 class ContinuousEnvironments(Environments):
    """Represents (vectorized) continuous environments."""
    def __init__(self, env: gym.Env, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):
        super().__init__(env, train_envs, test_envs)
        self.state_shape, self.action_shape, self.max_action = self._get_continuous_env_info(env)
@ -110,6 +121,8 @@ class ContinuousEnvironments(Environments):
 class DiscreteEnvironments(Environments):
    """Represents (vectorized) discrete environments."""
    def __init__(self, env: gym.Env, train_envs: BaseVectorEnv, test_envs: BaseVectorEnv):
        super().__init__(env, train_envs, test_envs)
        self.observation_shape = env.observation_space.shape or env.observation_space.n  # type: ignore
--- a/tianshou/highlevel/experiment.py
+++ b/tianshou/highlevel/experiment.py
@ -27,7 +27,7 @@ from tianshou.highlevel.agent import (
 )
 from tianshou.highlevel.config import SamplingConfig
 from tianshou.highlevel.env import EnvFactory, Environments
-from tianshou.highlevel.logger import DefaultLoggerFactory, LoggerFactory, TLogger
+from tianshou.highlevel.logger import LoggerFactory, LoggerFactoryDefault, TLogger
 from tianshou.highlevel.module.actor import (
    ActorFactory,
    ActorFactoryDefault,
@ -38,8 +38,6 @@ from tianshou.highlevel.module.actor import (
    IntermediateModuleFactoryFromActorFactory,
 )
 from tianshou.highlevel.module.core import (
    ImplicitQuantileNetworkFactory,
    IntermediateModuleFactory,
    TDevice,
 )
 from tianshou.highlevel.module.critic import (
@ -49,6 +47,8 @@ from tianshou.highlevel.module.critic import (
    CriticFactoryDefault,
    CriticFactoryReuseActor,
 )
 from tianshou.highlevel.module.intermediate import IntermediateModuleFactory
 from tianshou.highlevel.module.special import ImplicitQuantileNetworkFactory
 from tianshou.highlevel.optim import OptimizerFactory, OptimizerFactoryAdam
 from tianshou.highlevel.params.policy_params import (
    A2CParams,
@ -116,8 +116,12 @@ class ExperimentConfig:
@dataclass
 class ExperimentResult:
    """Contains the results of an experiment."""
    world: World
    """contains all the essential instances of the experiment"""
    trainer_result: dict[str, Any] | None
    """dictionary of results as returned by the trained (if any)"""
 class Experiment(Generic[TPolicy, TTrainer], ToStringMixin):
@ -140,7 +144,7 @@ class Experiment(Generic[TPolicy, TTrainer], ToStringMixin):
        env_config: PersistableConfigProtocol | None = None,
    ):
        if logger_factory is None:
-            logger_factory = DefaultLoggerFactory()
+            logger_factory = LoggerFactoryDefault()
        self.config = config
        self.env_factory = env_factory
        self.agent_factory = agent_factory
@ -179,7 +183,9 @@ class Experiment(Generic[TPolicy, TTrainer], ToStringMixin):
            pickle.dump(self, f)
    def run(
-        self, experiment_name: str | None = None, logger_run_id: str | None = None,
+        self,
        experiment_name: str | None = None,
        logger_run_id: str | None = None,
    ) -> ExperimentResult:
        """:param experiment_name: the experiment name, which corresponds to the directory (within the logging
            directory) where all results associated with the experiment will be saved.
@ -317,14 +323,31 @@ class ExperimentBuilder:
        return self
    def with_logger_factory(self, logger_factory: LoggerFactory) -> Self:
        """Allows to customize the logger factory to use.
        If this method is not called, the default logger factory :class:`LoggerFactoryDefault` will be used.
        :param logger_factory: the factory to use
        :return: the builder
        """
        self._logger_factory = logger_factory
        return self
    def with_policy_wrapper_factory(self, policy_wrapper_factory: PolicyWrapperFactory) -> Self:
        """Allows to define a wrapper around the policy that is created, extending the original policy.
        :param policy_wrapper_factory: the factory for the wrapper
        :return: the builder
        """
        self._policy_wrapper_factory = policy_wrapper_factory
        return self
    def with_optim_factory(self, optim_factory: OptimizerFactory) -> Self:
        """Allows to customize the gradient-based optimizer to use.
        By default, :class:`OptimizerFactoryAdam` will be used with default parameters.
        :param optim_factory: the optimizer factory
        :return: the builder
        """
        self._optim_factory = optim_factory
        return self
@ -345,14 +368,30 @@ class ExperimentBuilder:
        return self
    def with_trainer_epoch_callback_train(self, callback: TrainerEpochCallbackTrain) -> Self:
        """Allows to define a callback function which is called at the beginning of every epoch during training.
        :param callback: the callback
        :return: the builder
        """
        self._trainer_callbacks.epoch_callback_train = callback
        return self
    def with_trainer_epoch_callback_test(self, callback: TrainerEpochCallbackTest) -> Self:
        """Allows to define a callback function which is called at the beginning of testing in each epoch.
        :param callback: the callback
        :return: the builder
        """
        self._trainer_callbacks.epoch_callback_test = callback
        return self
    def with_trainer_stop_callback(self, callback: TrainerStopCallback) -> Self:
        """Allows to define a callback that decides whether training shall stop early.
        The callback receives the undiscounted returns of the testing result.
        :param callback: the callback
        :return: the builder
        """
        self._trainer_callbacks.stop_callback = callback
        return self
@ -367,6 +406,10 @@ class ExperimentBuilder:
            return self._optim_factory
    def build(self) -> Experiment:
        """Creates the experiment based on the options specified via this builder.
        :return: the experiment
        """
        agent_factory = self._create_agent_factory()
        agent_factory.set_trainer_callbacks(self._trainer_callbacks)
        if self._policy_wrapper_factory:
@ -388,6 +431,12 @@ class _BuilderMixinActorFactory(ActorFutureProviderProtocol):
        self._actor_factory: ActorFactory | None = None
    def with_actor_factory(self, actor_factory: ActorFactory) -> Self:
        """Allows to customize the actor component via the specification of a factory.
        If this function is not called, a default actor factory (with default parameters) will be used.
        :param actor_factory: the factory to use for the creation of the actor network
        :return: the builder
        """
        self._actor_factory = actor_factory
        return self
@ -397,6 +446,12 @@ class _BuilderMixinActorFactory(ActorFutureProviderProtocol):
        continuous_unbounded: bool = False,
        continuous_conditioned_sigma: bool = False,
    ) -> Self:
        """:param hidden_sizes: the sequence of hidden dimensions to use in the network structure
        :param continuous_unbounded: whether, for continuous action spaces, to apply tanh activation on final logits
        :param continuous_conditioned_sigma: whether, for continuous action spaces, the standard deviation of continuous actions (sigma)
            shall be computed from the input; if False, sigma is an independent parameter.
        :return: the builder
        """
        self._actor_factory = ActorFactoryDefault(
            self._continuous_actor_type,
            hidden_sizes,
@ -406,6 +461,7 @@ class _BuilderMixinActorFactory(ActorFutureProviderProtocol):
        return self
    def get_actor_future(self) -> ActorFuture:
        """:return: an object, which, in the future, will contain the actor instance that is created for the experiment."""
        return self._actor_future
    def _get_actor_factory(self) -> ActorFactory:
@ -431,6 +487,15 @@ class _BuilderMixinActorFactory_ContinuousGaussian(_BuilderMixinActorFactory):
        continuous_unbounded: bool = False,
        continuous_conditioned_sigma: bool = False,
    ) -> Self:
        """Defines use of the default actor factory, allowing its parameters it to be customized.
        The default actor factory uses an MLP-style architecture.
        :param hidden_sizes: dimensions of hidden layers used by the network
        :param continuous_unbounded: whether, for continuous action spaces, to apply tanh activation on final logits
        :param continuous_conditioned_sigma: whether, for continuous action spaces, the standard deviation of continuous actions (sigma)
            shall be computed from the input; if False, sigma is an independent parameter.
        :return: the builder
        """
        return super()._with_actor_factory_default(
            hidden_sizes,
            continuous_unbounded=continuous_unbounded,
@ -445,6 +510,12 @@ class _BuilderMixinActorFactory_ContinuousDeterministic(_BuilderMixinActorFactor
        super().__init__(ContinuousActorType.DETERMINISTIC)
    def with_actor_factory_default(self, hidden_sizes: Sequence[int]) -> Self:
        """Defines use of the default actor factory, allowing its parameters it to be customized.
        The default actor factory uses an MLP-style architecture.
        :param hidden_sizes: dimensions of hidden layers used by the network
        :return: the builder
        """
        return super()._with_actor_factory_default(hidden_sizes)
@ -480,6 +551,11 @@ class _BuilderMixinSingleCriticFactory(_BuilderMixinCriticsFactory):
        super().__init__(1, actor_future_provider)
    def with_critic_factory(self, critic_factory: CriticFactory) -> Self:
        """Specifies that the given factory shall be used for the critic.
        :param critic_factory: the critic factory
        :return: the builder
        """
        self._with_critic_factory(0, critic_factory)
        return self
@ -487,6 +563,11 @@ class _BuilderMixinSingleCriticFactory(_BuilderMixinCriticsFactory):
        self,
        hidden_sizes: Sequence[int] = CriticFactoryDefault.DEFAULT_HIDDEN_SIZES,
    ) -> Self:
        """Makes the critic use the default, MLP-style architecture with the given parameters.
        :param hidden_sizes: the sequence of dimensions to use in hidden layers of the network
        :return: the builder
        """
        self._with_critic_factory_default(0, hidden_sizes)
        return self
@ -496,7 +577,7 @@ class _BuilderMixinSingleCriticCanUseActorFactory(_BuilderMixinSingleCriticFacto
        super().__init__(actor_future_provider)
    def with_critic_factory_use_actor(self) -> Self:
-        """Makes the critic use the same network as the actor."""
+        """Makes the first critic reuse the actor's preprocessing network (parameter sharing)."""
        return self._with_critic_factory_use_actor(0)
@ -505,6 +586,11 @@ class _BuilderMixinDualCriticFactory(_BuilderMixinCriticsFactory):
        super().__init__(2, actor_future_provider)
    def with_common_critic_factory(self, critic_factory: CriticFactory) -> Self:
        """Specifies that the given factory shall be used for both critics.
        :param critic_factory: the critic factory
        :return: the builder
        """
        for i in range(len(self._critic_factories)):
            self._with_critic_factory(i, critic_factory)
        return self
@ -513,17 +599,27 @@ class _BuilderMixinDualCriticFactory(_BuilderMixinCriticsFactory):
        self,
        hidden_sizes: Sequence[int] = CriticFactoryDefault.DEFAULT_HIDDEN_SIZES,
    ) -> Self:
        """Makes both critics use the default, MLP-style architecture with the given parameters.
        :param hidden_sizes: the sequence of dimensions to use in hidden layers of the network
        :return: the builder
        """
        for i in range(len(self._critic_factories)):
            self._with_critic_factory_default(i, hidden_sizes)
        return self
    def with_common_critic_factory_use_actor(self) -> Self:
-        """Makes all critics use the same network as the actor."""
+        """Makes both critics reuse the actor's preprocessing network (parameter sharing)."""
        for i in range(len(self._critic_factories)):
            self._with_critic_factory_use_actor(i)
        return self
    def with_critic1_factory(self, critic_factory: CriticFactory) -> Self:
        """Specifies that the given factory shall be used for the first critic.
        :param critic_factory: the critic factory
        :return: the builder
        """
        self._with_critic_factory(0, critic_factory)
        return self
@ -531,14 +627,24 @@ class _BuilderMixinDualCriticFactory(_BuilderMixinCriticsFactory):
        self,
        hidden_sizes: Sequence[int] = CriticFactoryDefault.DEFAULT_HIDDEN_SIZES,
    ) -> Self:
        """Makes the first critic use the default, MLP-style architecture with the given parameters.
        :param hidden_sizes: the sequence of dimensions to use in hidden layers of the network
        :return: the builder
        """
        self._with_critic_factory_default(0, hidden_sizes)
        return self
    def with_critic1_factory_use_actor(self) -> Self:
-        """Makes the critic use the same network as the actor."""
+        """Makes the first critic reuse the actor's preprocessing network (parameter sharing)."""
        return self._with_critic_factory_use_actor(0)
    def with_critic2_factory(self, critic_factory: CriticFactory) -> Self:
        """Specifies that the given factory shall be used for the second critic.
        :param critic_factory: the critic factory
        :return: the builder
        """
        self._with_critic_factory(1, critic_factory)
        return self
@ -546,11 +652,16 @@ class _BuilderMixinDualCriticFactory(_BuilderMixinCriticsFactory):
        self,
        hidden_sizes: Sequence[int] = CriticFactoryDefault.DEFAULT_HIDDEN_SIZES,
    ) -> Self:
        """Makes the second critic use the default, MLP-style architecture with the given parameters.
        :param hidden_sizes: the sequence of dimensions to use in hidden layers of the network
        :return: the builder
        """
        self._with_critic_factory_default(0, hidden_sizes)
        return self
    def with_critic2_factory_use_actor(self) -> Self:
-        """Makes the second critic use the same network as the actor."""
+        """Makes the first critic reuse the actor's preprocessing network (parameter sharing)."""
        return self._with_critic_factory_use_actor(1)
@ -559,6 +670,12 @@ class _BuilderMixinCriticEnsembleFactory:
        self.critic_ensemble_factory: CriticEnsembleFactory | None = None
    def with_critic_ensemble_factory(self, factory: CriticEnsembleFactory) -> Self:
        """Specifies that the given factory shall be used for the critic ensemble.
        If unspecified, the default factory (:class:`CriticEnsembleFactoryDefault`) is used.
        :param critic_factory: the critic factory
        :return: the builder
        """
        self.critic_ensemble_factory = factory
        return self
@ -566,6 +683,11 @@ class _BuilderMixinCriticEnsembleFactory:
        self,
        hidden_sizes: Sequence[int] = CriticFactoryDefault.DEFAULT_HIDDEN_SIZES,
    ) -> Self:
        """Allows to customize the parameters of the default critic ensemble factory.
        :param hidden_sizes: the sequence of sizes of hidden layers in the network architecture
        :return: the builder
        """
        self.critic_ensemble_factory = CriticEnsembleFactoryDefault(hidden_sizes)
        return self
--- a/tianshou/highlevel/logger.py
+++ b/tianshou/highlevel/logger.py
@ -27,7 +27,7 @@ class LoggerFactory(ToStringMixin, ABC):
        """
-class DefaultLoggerFactory(LoggerFactory):
+class LoggerFactoryDefault(LoggerFactory):
    def __init__(
        self,
        logger_type: Literal["tensorboard", "wandb"] = "tensorboard",
--- a/tianshou/highlevel/module/actor.py
+++ b/tianshou/highlevel/module/actor.py
@ -9,12 +9,14 @@ from torch import nn
 from tianshou.highlevel.env import Environments, EnvType
 from tianshou.highlevel.module.core import (
    IntermediateModule,
    IntermediateModuleFactory,
    ModuleFactory,
    TDevice,
    init_linear_orthogonal,
 )
 from tianshou.highlevel.module.intermediate import (
    IntermediateModule,
    IntermediateModuleFactory,
 )
 from tianshou.highlevel.module.module_opt import ModuleOpt
 from tianshou.highlevel.optim import OptimizerFactory
 from tianshou.utils.net import continuous, discrete
@ -157,6 +159,11 @@ class ActorFactoryContinuousGaussianNet(ActorFactoryContinuous):
        unbounded: bool = True,
        conditioned_sigma: bool = False,
    ):
        """:param hidden_sizes: the sequence of hidden dimensions to use in the network structure
        :param unbounded: whether to apply tanh activation on final logits
        :param conditioned_sigma: if True, the standard deviation of continuous actions (sigma) is computed from the
            input; if False, sigma is an independent parameter
        """
        self.hidden_sizes = hidden_sizes
        self.unbounded = unbounded
        self.conditioned_sigma = conditioned_sigma
--- a/tianshou/highlevel/module/core.py
+++ b/tianshou/highlevel/module/core.py
@ -1,14 +1,10 @@
 from abc import ABC, abstractmethod
 from collections.abc import Sequence
 from dataclasses import dataclass
 from typing import TypeAlias
 import numpy as np
 import torch
 from tianshou.highlevel.env import Environments
 from tianshou.utils.net.discrete import ImplicitQuantileNetwork
 from tianshou.utils.string import ToStringMixin
 TDevice: TypeAlias = str | torch.device
@ -25,44 +21,8 @@ def init_linear_orthogonal(module: torch.nn.Module) -> None:
 class ModuleFactory(ABC):
    """Represents a factory for the creation of a torch module given an environment and target device."""
    @abstractmethod
    def create_module(self, envs: Environments, device: TDevice) -> torch.nn.Module:
        pass
@dataclass
 class IntermediateModule:
    module: torch.nn.Module
    output_dim: int
 class IntermediateModuleFactory(ToStringMixin, ModuleFactory, ABC):
    @abstractmethod
    def create_intermediate_module(self, envs: Environments, device: TDevice) -> IntermediateModule:
        pass
    def create_module(self, envs: Environments, device: TDevice) -> torch.nn.Module:
        return self.create_intermediate_module(envs, device).module
 class ImplicitQuantileNetworkFactory(ModuleFactory, ToStringMixin):
    def __init__(
        self,
        preprocess_net_factory: IntermediateModuleFactory,
        hidden_sizes: Sequence[int] = (),
        num_cosines: int = 64,
    ):
        self.preprocess_net_factory = preprocess_net_factory
        self.hidden_sizes = hidden_sizes
        self.num_cosines = num_cosines
    def create_module(self, envs: Environments, device: TDevice) -> ImplicitQuantileNetwork:
        preprocess_net = self.preprocess_net_factory.create_intermediate_module(envs, device)
        return ImplicitQuantileNetwork(
            preprocess_net=preprocess_net.module,
            action_shape=envs.get_action_shape(),
            hidden_sizes=self.hidden_sizes,
            num_cosines=self.num_cosines,
            preprocess_net_output_dim=preprocess_net.output_dim,
            device=device,
        ).to(device)
--- a/tianshou/highlevel/module/critic.py
+++ b/tianshou/highlevel/module/critic.py
@ -15,6 +15,8 @@ from tianshou.utils.string import ToStringMixin
 class CriticFactory(ToStringMixin, ABC):
    """Represents a factory for the generation of a critic module."""
    @abstractmethod
    def create_module(
        self,
@ -23,9 +25,11 @@ class CriticFactory(ToStringMixin, ABC):
        use_action: bool,
        discrete_last_size_use_action_shape: bool = False,
    ) -> nn.Module:
-        """:param envs: the environments
+        """Creates the critic module.
        :param envs: the environments
        :param device: the torch device
-        :param use_action: whether to (additionally) expect the action as input
+        :param use_action: whether to expect the action as an additional input (in addition to the observations)
        :param discrete_last_size_use_action_shape: whether, for the discrete case, the output dimension shall use the action shape
        :return: the module
        """
@ -39,6 +43,16 @@ class CriticFactory(ToStringMixin, ABC):
        lr: float,
        discrete_last_size_use_action_shape: bool = False,
    ) -> ModuleOpt:
        """Creates the critic module along with its optimizer for the given learning rate.
        :param envs: the environments
        :param device: the torch device
        :param use_action: whether to expect the action as an additional input (in addition to the observations)
        :param optim_factory: the optimizer factory
        :param lr: the learning rate
        :param discrete_last_size_use_action_shape: whether, for the discrete case, the output dimension shall use the action shape
        :return:
        """
        module = self.create_module(
            envs,
            device,
--- a/tianshou/highlevel/module/intermediate.py
+++ b/tianshou/highlevel/module/intermediate.py
@ -0,0 +1,27 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 import torch
 from tianshou.highlevel.env import Environments
 from tianshou.highlevel.module.core import ModuleFactory, TDevice
 from tianshou.utils.string import ToStringMixin
@dataclass
 class IntermediateModule:
    """Container for a module which computes an intermediate representation (with a known dimension)."""
    module: torch.nn.Module
    output_dim: int
 class IntermediateModuleFactory(ToStringMixin, ModuleFactory, ABC):
    """Factory for the generation of a module which computes an intermediate representation."""
    @abstractmethod
    def create_intermediate_module(self, envs: Environments, device: TDevice) -> IntermediateModule:
        pass
    def create_module(self, envs: Environments, device: TDevice) -> torch.nn.Module:
        return self.create_intermediate_module(envs, device).module
--- a/tianshou/highlevel/module/module_opt.py
+++ b/tianshou/highlevel/module/module_opt.py
@ -7,12 +7,16 @@ from tianshou.utils.net.common import ActorCritic
@dataclass
 class ModuleOpt:
    """Container for a torch module along with its optimizer."""
    module: torch.nn.Module
    optim: torch.optim.Optimizer
@dataclass
 class ActorCriticModuleOpt:
    """Container for an :class:`ActorCritic` instance along with its optimizer."""
    actor_critic_module: ActorCritic
    optim: torch.optim.Optimizer
--- a/tianshou/highlevel/module/special.py
+++ b/tianshou/highlevel/module/special.py
@ -0,0 +1,30 @@
 from collections.abc import Sequence
 from tianshou.highlevel.env import Environments
 from tianshou.highlevel.module.core import ModuleFactory, TDevice
 from tianshou.highlevel.module.intermediate import IntermediateModuleFactory
 from tianshou.utils.net.discrete import ImplicitQuantileNetwork
 from tianshou.utils.string import ToStringMixin
 class ImplicitQuantileNetworkFactory(ModuleFactory, ToStringMixin):
    def __init__(
        self,
        preprocess_net_factory: IntermediateModuleFactory,
        hidden_sizes: Sequence[int] = (),
        num_cosines: int = 64,
    ):
        self.preprocess_net_factory = preprocess_net_factory
        self.hidden_sizes = hidden_sizes
        self.num_cosines = num_cosines
    def create_module(self, envs: Environments, device: TDevice) -> ImplicitQuantileNetwork:
        preprocess_net = self.preprocess_net_factory.create_intermediate_module(envs, device)
        return ImplicitQuantileNetwork(
            preprocess_net=preprocess_net.module,
            action_shape=envs.get_action_shape(),
            hidden_sizes=self.hidden_sizes,
            num_cosines=self.num_cosines,
            preprocess_net_output_dim=preprocess_net.output_dim,
            device=device,
        ).to(device)
--- a/tianshou/highlevel/optim.py
+++ b/tianshou/highlevel/optim.py
@ -13,10 +13,6 @@ class OptimizerWithLearningRateProtocol(Protocol):
 class OptimizerFactory(ABC, ToStringMixin):
    # TODO: Is it OK to assume that all optimizers have a learning rate argument?
    # Right now, the learning rate is typically a configuration parameter.
    # If we drop the assumption, we can't have that and will need to move the parameter
    # to the optimizer factory, which is inconvenient for the user.
    @abstractmethod
    def create_optimizer(self, module: torch.nn.Module, lr: float) -> torch.optim.Optimizer:
        pass
--- a/tianshou/highlevel/params/lr_scheduler.py
+++ b/tianshou/highlevel/params/lr_scheduler.py
@ -9,6 +9,8 @@ from tianshou.utils.string import ToStringMixin
 class LRSchedulerFactory(ToStringMixin, ABC):
    """Factory for the createion of a learning rate scheduler."""
    @abstractmethod
    def create_scheduler(self, optim: torch.optim.Optimizer) -> LRScheduler:
        pass
--- a/tianshou/highlevel/params/noise.py
+++ b/tianshou/highlevel/params/noise.py
@ -18,9 +18,12 @@ class NoiseFactoryMaxActionScaledGaussian(NoiseFactory):
    """
    def __init__(self, std_fraction: float):
        """:param std_fraction: fraction (between 0 and 1) of the maximum action value that shall
        be used as the standard deviation
        """
        self.std_fraction = std_fraction
-    def create_noise(self, envs: Environments) -> BaseNoise:
+    def create_noise(self, envs: Environments) -> GaussianNoise:
        envs.get_type().assert_continuous(self)
        envs: ContinuousEnvironments
        return GaussianNoise(sigma=envs.max_action * self.std_fraction)
--- a/tianshou/highlevel/params/policy_params.py
+++ b/tianshou/highlevel/params/policy_params.py
@ -241,7 +241,9 @@ class Params(GetParamTransformersProtocol):
@dataclass
 class ParamsMixinLearningRateWithScheduler(GetParamTransformersProtocol):
    lr: float = 1e-3
    """the learning rate to use in the gradient-based optimizer"""
    lr_scheduler_factory: LRSchedulerFactory | None = None
    """factory for the creation of a learning rate scheduler"""
    def _get_param_transformers(self) -> list[ParamTransformer]:
        return [
--- a/tianshou/highlevel/params/policy_wrapper.py
+++ b/tianshou/highlevel/params/policy_wrapper.py
@ -3,7 +3,8 @@ from collections.abc import Sequence
 from typing import Generic, TypeVar
 from tianshou.highlevel.env import Environments
-from tianshou.highlevel.module.core import IntermediateModuleFactory, TDevice
+from tianshou.highlevel.module.core import TDevice
 from tianshou.highlevel.module.intermediate import IntermediateModuleFactory
 from tianshou.highlevel.optim import OptimizerFactory
 from tianshou.policy import BasePolicy, ICMPolicy
 from tianshou.utils.net.discrete import IntrinsicCuriosityModule
--- a/tianshou/highlevel/persistence.py
+++ b/tianshou/highlevel/persistence.py
@ -50,6 +50,8 @@ class Persistence(ABC):
 class PersistenceGroup(Persistence):
    """Groups persistence handler such that they can be applied collectively."""
    def __init__(self, *p: Persistence, enabled: bool = True):
        self.items = p
        self.enabled = enabled
@ -69,7 +71,7 @@ class PolicyPersistence:
    FILENAME = "policy.dat"
    def __init__(self, additional_persistence: Persistence | None = None, enabled: bool = True):
-        """:param additional_persistence: a persistence instance which is to be envoked whenever
+        """:param additional_persistence: a persistence instance which is to be invoked whenever
            this object is used to persist/restore data
        :param enabled: whether persistence is enabled (restoration is always enabled)
        """
--- a/tianshou/highlevel/trainer.py
+++ b/tianshou/highlevel/trainer.py
@ -52,6 +52,7 @@ class TrainerStopCallback(ToStringMixin, ABC):
    @abstractmethod
    def should_stop(self, mean_rewards: float, context: TrainingContext) -> bool:
        """:param mean_rewards: the average undiscounted returns of the testing result
        :param context: the training context
        :return: True if the goal has been reached and training should stop, False otherwise
        """
@ -64,6 +65,8 @@ class TrainerStopCallback(ToStringMixin, ABC):
@dataclass
 class TrainerCallbacks:
    """Container for callbacks used during training."""
    epoch_callback_train: TrainerEpochCallbackTrain | None = None
    epoch_callback_test: TrainerEpochCallbackTest | None = None
    stop_callback: TrainerStopCallback | None = None
--- a/tianshou/highlevel/world.py
+++ b/tianshou/highlevel/world.py
@ -12,6 +12,8 @@ if TYPE_CHECKING:
@dataclass
 class World:
    """Container for instances and configuration items that are relevant to an experiment."""
    envs: "Environments"
    policy: "BasePolicy"
    train_collector: "Collector"