Tianshou/examples/mujoco/mujoco_sac_hl.py

#!/usr/bin/env python3

import datetime
import os
from collections.abc import Sequence

from jsonargparse import CLI

from examples.mujoco.mujoco_env import MujocoEnvFactory
from tianshou.highlevel.config import RLSamplingConfig
from tianshou.highlevel.experiment import (
    RLExperimentConfig,
    SACExperimentBuilder,
)
from tianshou.highlevel.params.alpha import AutoAlphaFactoryDefault
from tianshou.highlevel.params.policy_params import SACParams
from tianshou.utils import logging


def main(
    experiment_config: RLExperimentConfig,
    task: str = "Ant-v3",
    buffer_size: int = 1000000,
    hidden_sizes: Sequence[int] = (256, 256),
    actor_lr: float = 1e-3,
    critic_lr: float = 1e-3,
    gamma: float = 0.99,
    tau: float = 0.005,
    alpha: float = 0.2,
    auto_alpha: bool = False,
    alpha_lr: float = 3e-4,
    start_timesteps: int = 10000,
    epoch: int = 200,
    step_per_epoch: int = 5000,
    step_per_collect: int = 1,
    update_per_step: int = 1,
    n_step: int = 1,
    batch_size: int = 256,
    training_num: int = 1,
    test_num: int = 10,
):
    now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")
    log_name = os.path.join(task, "sac", str(experiment_config.seed), now)

    sampling_config = RLSamplingConfig(
        num_epochs=epoch,
        step_per_epoch=step_per_epoch,
        num_train_envs=training_num,
        num_test_envs=test_num,
        buffer_size=buffer_size,
        batch_size=batch_size,
        step_per_collect=step_per_collect,
        update_per_step=update_per_step,
        start_timesteps=start_timesteps,
        start_timesteps_random=True,
    )

    env_factory = MujocoEnvFactory(task, experiment_config.seed, sampling_config)

    experiment = (
        SACExperimentBuilder(experiment_config, env_factory, sampling_config)
        .with_sac_params(
            SACParams(
                tau=tau,
                gamma=gamma,
                alpha=AutoAlphaFactoryDefault(lr=alpha_lr) if auto_alpha else alpha,
                estimation_step=n_step,
                actor_lr=actor_lr,
                critic1_lr=critic_lr,
                critic2_lr=critic_lr,
            ),
        )
        .with_actor_factory_default(
            hidden_sizes,
            continuous_unbounded=True,
            continuous_conditioned_sigma=True,
        )
        .with_common_critic_factory_default(hidden_sizes)
        .build()
    )
    experiment.run(log_name)


if __name__ == "__main__":
    logging.run_main(lambda: CLI(main))
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`#!/usr/bin/env python3`

			`import datetime`
			`import os`
			`from collections.abc import Sequence`

			`from jsonargparse import CLI`

			`from examples.mujoco.mujoco_env import MujocoEnvFactory`
Unify PPO configuration objects, use experiment-specific configuration in mujoco_ppo_hl 2023-09-20 15:45:09 +02:00			`from tianshou.highlevel.config import RLSamplingConfig`
Refactoring, dropping package config 2023-09-20 13:15:06 +02:00			`from tianshou.highlevel.experiment import (`
			`RLExperimentConfig,`
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`SACExperimentBuilder,`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`)`
Adapt class naming scheme * Use prefix convention (subclasses have superclass names as prefix) to facilitate discoverability of relevant classes via IDE autocompletion * Use dual naming, adding an alternative concise name that omits the precise OO semantics and retains only the essential part of the name (which can be more pleasing to users not accustomed to convoluted OO naming) 2023-09-27 17:20:35 +02:00			`from tianshou.highlevel.params.alpha import AutoAlphaFactoryDefault`
Add high-level API support for TD3 * Created mixins for agent factories to reduce code duplication * Further factorised params & mixins for experiment factories * Additional parameter abstractions * Implement high-level MuJoCo TD3 example 2023-09-26 15:35:18 +02:00			`from tianshou.highlevel.params.policy_params import SACParams`
Log full experiment configuration, adding string representations to relevant classes 2023-10-03 21:14:22 +02:00			`from tianshou.utils import logging`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00

			`def main(`
Refactoring, dropping package config 2023-09-20 13:15:06 +02:00			`experiment_config: RLExperimentConfig,`
Use experiment-specific config in mujoco_sac_hl, adding auto-alpha 2023-09-20 15:13:05 +02:00			`task: str = "Ant-v3",`
			`buffer_size: int = 1000000,`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`hidden_sizes: Sequence[int] = (256, 256),`
Use experiment-specific config in mujoco_sac_hl, adding auto-alpha 2023-09-20 15:13:05 +02:00			`actor_lr: float = 1e-3,`
			`critic_lr: float = 1e-3,`
			`gamma: float = 0.99,`
			`tau: float = 0.005,`
			`alpha: float = 0.2,`
			`auto_alpha: bool = False,`
			`alpha_lr: float = 3e-4,`
			`start_timesteps: int = 10000,`
			`epoch: int = 200,`
			`step_per_epoch: int = 5000,`
			`step_per_collect: int = 1,`
			`update_per_step: int = 1,`
			`n_step: int = 1,`
			`batch_size: int = 256,`
			`training_num: int = 1,`
			`test_num: int = 10,`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`):`
			`now = datetime.datetime.now().strftime("%y%m%d-%H%M%S")`
Refactoring, dropping package config 2023-09-20 13:15:06 +02:00			`log_name = os.path.join(task, "sac", str(experiment_config.seed), now)`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00
Use experiment-specific config in mujoco_sac_hl, adding auto-alpha 2023-09-20 15:13:05 +02:00			`sampling_config = RLSamplingConfig(`
			`num_epochs=epoch,`
			`step_per_epoch=step_per_epoch,`
			`num_train_envs=training_num,`
			`num_test_envs=test_num,`
			`buffer_size=buffer_size,`
			`batch_size=batch_size,`
			`step_per_collect=step_per_collect,`
			`update_per_step=update_per_step,`
			`start_timesteps=start_timesteps,`
			`start_timesteps_random=True,`
			`)`

Refactoring, dropping package config 2023-09-20 13:15:06 +02:00			`env_factory = MujocoEnvFactory(task, experiment_config.seed, sampling_config)`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`experiment = (`
			`SACExperimentBuilder(experiment_config, env_factory, sampling_config)`
			`.with_sac_params(`
Improve high-level policy parametrisation Policy objects are now parametrised by converting the parameter dataclass instances to kwargs, using some injectable conversions along the way 2023-09-25 17:56:37 +02:00			`SACParams(`
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`tau=tau,`
			`gamma=gamma,`
Adapt class naming scheme * Use prefix convention (subclasses have superclass names as prefix) to facilitate discoverability of relevant classes via IDE autocompletion * Use dual naming, adding an alternative concise name that omits the precise OO semantics and retains only the essential part of the name (which can be more pleasing to users not accustomed to convoluted OO naming) 2023-09-27 17:20:35 +02:00			`alpha=AutoAlphaFactoryDefault(lr=alpha_lr) if auto_alpha else alpha,`
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`estimation_step=n_step,`
			`actor_lr=actor_lr,`
			`critic1_lr=critic_lr,`
			`critic2_lr=critic_lr,`
			`),`
			`)`
			`.with_actor_factory_default(`
Add high-level API support for TD3 * Created mixins for agent factories to reduce code duplication * Further factorised params & mixins for experiment factories * Additional parameter abstractions * Implement high-level MuJoCo TD3 example 2023-09-26 15:35:18 +02:00			`hidden_sizes,`
			`continuous_unbounded=True,`
			`continuous_conditioned_sigma=True,`
Add high-level experiment builder interface 2023-09-21 12:36:27 +02:00			`)`
			`.with_common_critic_factory_default(hidden_sizes)`
			`.build()`
Add SAC high-level interface 2023-09-20 09:29:34 +02:00			`)`
			`experiment.run(log_name)`


			`if __name__ == "__main__":`
Log full experiment configuration, adding string representations to relevant classes 2023-10-03 21:14:22 +02:00			`logging.run_main(lambda: CLI(main))`