Tianshou/examples/mujoco/mujoco_reinforce_hl.py

#!/usr/bin/env python3

import os
from collections.abc import Sequence
from typing import Literal

import torch

from examples.mujoco.mujoco_env import MujocoEnvFactory
from tianshou.highlevel.config import SamplingConfig
from tianshou.highlevel.experiment import (
    ExperimentConfig,
    PGExperimentBuilder,
)
from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear
from tianshou.highlevel.params.policy_params import PGParams
from tianshou.utils import logging
from tianshou.utils.logging import datetime_tag


def main(
    experiment_config: ExperimentConfig,
    task: str = "Ant-v3",
    buffer_size: int = 4096,
    hidden_sizes: Sequence[int] = (64, 64),
    lr: float = 1e-3,
    gamma: float = 0.99,
    epoch: int = 100,
    step_per_epoch: int = 30000,
    step_per_collect: int = 2048,
    repeat_per_collect: int = 1,
    batch_size: int = 99999,
    training_num: int = 64,
    test_num: int = 10,
    rew_norm: bool = True,
    action_bound_method: Literal["clip", "tanh"] = "tanh",
    lr_decay: bool = True,
):
    log_name = os.path.join(task, "reinforce", str(experiment_config.seed), datetime_tag())

    sampling_config = SamplingConfig(
        num_epochs=epoch,
        step_per_epoch=step_per_epoch,
        batch_size=batch_size,
        num_train_envs=training_num,
        num_test_envs=test_num,
        buffer_size=buffer_size,
        step_per_collect=step_per_collect,
        repeat_per_collect=repeat_per_collect,
    )

    env_factory = MujocoEnvFactory(task, experiment_config.seed, obs_norm=True)

    experiment = (
        PGExperimentBuilder(env_factory, experiment_config, sampling_config)
        .with_pg_params(
            PGParams(
                discount_factor=gamma,
                action_bound_method=action_bound_method,
                reward_normalization=rew_norm,
                lr=lr,
                lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)
                if lr_decay
                else None,
            ),
        )
        .with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)
        .build()
    )
    experiment.run(log_name)


if __name__ == "__main__":
    logging.run_cli(main)
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`#!/usr/bin/env python3`

			`import os`
			`from collections.abc import Sequence`
			`from typing import Literal`

Allow to configure activation function in default networks * Set ReLU as default in all actor and critic factories * Configure non-default in applicable MuJoCo examples 2023-10-18 13:57:36 +02:00			`import torch`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00
			`from examples.mujoco.mujoco_env import MujocoEnvFactory`
			`from tianshou.highlevel.config import SamplingConfig`
			`from tianshou.highlevel.experiment import (`
			`ExperimentConfig,`
			`PGExperimentBuilder,`
			`)`
			`from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear`
			`from tianshou.highlevel.params.policy_params import PGParams`
Revert "Depend on sensAI instead of copying its utils (logging, string)" This reverts commit fdb0eba93d81fa5e698770b4f7088c87fc1238da. 2023-11-07 10:54:22 +01:00			`from tianshou.utils import logging`
			`from tianshou.utils.logging import datetime_tag`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00

			`def main(`
			`experiment_config: ExperimentConfig,`
			`task: str = "Ant-v3",`
			`buffer_size: int = 4096,`
			`hidden_sizes: Sequence[int] = (64, 64),`
			`lr: float = 1e-3,`
			`gamma: float = 0.99,`
			`epoch: int = 100,`
			`step_per_epoch: int = 30000,`
			`step_per_collect: int = 2048,`
			`repeat_per_collect: int = 1,`
			`batch_size: int = 99999,`
			`training_num: int = 64,`
			`test_num: int = 10,`
			`rew_norm: bool = True,`
			`action_bound_method: Literal["clip", "tanh"] = "tanh",`
			`lr_decay: bool = True,`
			`):`
			`log_name = os.path.join(task, "reinforce", str(experiment_config.seed), datetime_tag())`

			`sampling_config = SamplingConfig(`
			`num_epochs=epoch,`
			`step_per_epoch=step_per_epoch,`
			`batch_size=batch_size,`
			`num_train_envs=training_num,`
			`num_test_envs=test_num,`
			`buffer_size=buffer_size,`
			`step_per_collect=step_per_collect,`
			`repeat_per_collect=repeat_per_collect,`
			`)`

Change interface of EnvFactory to ensure that configuration of number of environments in SamplingConfig is used (values are now passed to factory method) This is clearer and removes the need to pass otherwise unnecessary configuration to environment factories at construction 2023-10-18 23:55:23 +02:00			`env_factory = MujocoEnvFactory(task, experiment_config.seed, obs_norm=True)`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00
			`experiment = (`
			`PGExperimentBuilder(env_factory, experiment_config, sampling_config)`
			`.with_pg_params(`
			`PGParams(`
			`discount_factor=gamma,`
			`action_bound_method=action_bound_method,`
			`reward_normalization=rew_norm,`
			`lr=lr,`
			`lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)`
			`if lr_decay`
			`else None,`
			`),`
			`)`
Allow to configure activation function in default networks * Set ReLU as default in all actor and critic factories * Configure non-default in applicable MuJoCo examples 2023-10-18 13:57:36 +02:00			`.with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`.build()`
			`)`
			`experiment.run(log_name)`


			`if __name__ == "__main__":`
Fix tianshou.highlevel depending on jsonargparse (should be dev dependency only) by introducing a new place where jsonargparse can be configured: logging.run_cli, which is also slightly more convenient 2023-10-19 11:40:49 +02:00			`logging.run_cli(main)`