Tianshou/examples/mujoco/mujoco_reinforce_hl.py

#!/usr/bin/env python3

import functools
import os
from collections.abc import Sequence
from typing import Literal

import torch

from examples.mujoco.mujoco_env import MujocoEnvFactory
from tianshou.highlevel.config import SamplingConfig
from tianshou.highlevel.experiment import (
    ExperimentConfig,
    PGExperimentBuilder,
)
from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear
from tianshou.highlevel.params.policy_params import PGParams
from tianshou.utils import logging
from tianshou.utils.logging import datetime_tag


def main(
    experiment_config: ExperimentConfig,
    task: str = "Ant-v4",
    buffer_size: int = 4096,
    hidden_sizes: Sequence[int] = (64, 64),
    lr: float = 1e-3,
    gamma: float = 0.99,
    epoch: int = 100,
    step_per_epoch: int = 30000,
    step_per_collect: int = 2048,
    repeat_per_collect: int = 1,
    batch_size: int | None = None,
    training_num: int = 10,
    test_num: int = 10,
    rew_norm: bool = True,
    action_bound_method: Literal["clip", "tanh"] = "tanh",
    lr_decay: bool = True,
) -> None:
    log_name = os.path.join(task, "reinforce", str(experiment_config.seed), datetime_tag())

    sampling_config = SamplingConfig(
        num_epochs=epoch,
        step_per_epoch=step_per_epoch,
        batch_size=batch_size,
        num_train_envs=training_num,
        num_test_envs=test_num,
        buffer_size=buffer_size,
        step_per_collect=step_per_collect,
        repeat_per_collect=repeat_per_collect,
    )

    env_factory = MujocoEnvFactory(task, experiment_config.seed, obs_norm=True)

    experiment = (
        PGExperimentBuilder(env_factory, experiment_config, sampling_config)
        .with_pg_params(
            PGParams(
                discount_factor=gamma,
                action_bound_method=action_bound_method,
                reward_normalization=rew_norm,
                lr=lr,
                lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)
                if lr_decay
                else None,
            ),
        )
        .with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)
        .build()
    )
    experiment.run(log_name)


if __name__ == "__main__":
    run_with_default_config = functools.partial(main, experiment_config=ExperimentConfig())
    logging.run_cli(run_with_default_config)
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`#!/usr/bin/env python3`

Refactoring/mypy issues test (#1017) Improves typing in examples and tests, towards mypy passing there. Introduces the SpaceInfo utility 2024-02-06 14:24:30 +01:00			`import functools`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`import os`
			`from collections.abc import Sequence`
			`from typing import Literal`

Allow to configure activation function in default networks * Set ReLU as default in all actor and critic factories * Configure non-default in applicable MuJoCo examples 2023-10-18 13:57:36 +02:00			`import torch`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00
			`from examples.mujoco.mujoco_env import MujocoEnvFactory`
			`from tianshou.highlevel.config import SamplingConfig`
			`from tianshou.highlevel.experiment import (`
			`ExperimentConfig,`
			`PGExperimentBuilder,`
			`)`
			`from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear`
			`from tianshou.highlevel.params.policy_params import PGParams`
Revert "Depend on sensAI instead of copying its utils (logging, string)" This reverts commit fdb0eba93d81fa5e698770b4f7088c87fc1238da. 2023-11-07 10:54:22 +01:00			`from tianshou.utils import logging`
			`from tianshou.utils.logging import datetime_tag`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00

			`def main(`
			`experiment_config: ExperimentConfig,`
Update MuJoCo examples to use Ant-v4 instead of Ant-v3 2024-01-10 15:39:53 +01:00			`task: str = "Ant-v4",`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`buffer_size: int = 4096,`
			`hidden_sizes: Sequence[int] = (64, 64),`
			`lr: float = 1e-3,`
			`gamma: float = 0.99,`
			`epoch: int = 100,`
			`step_per_epoch: int = 30000,`
			`step_per_collect: int = 2048,`
			`repeat_per_collect: int = 1,`
Support batch_size=None and use it in various scripts (#993) Closes #986 2023-11-24 19:13:10 +01:00			`batch_size: int \| None = None,`
Update and fix dependencies related to mac install (#1044) Addresses part of #1015 ### Dependencies - move jsonargparse and docstring-parser to dependencies to run hl examples without dev - create mujoco-py extra for legacy mujoco envs - updated atari extra - removed atari-py and gym dependencies - added ALE-py, autorom, and shimmy - created robotics extra for HER-DDPG ### Mac specific - only install envpool when not on mac - mujoco-py not working on macOS newer than Monterey (https://github.com/openai/mujoco-py/issues/777) - D4RL also fails due to dependency on mujoco-py (https://github.com/Farama-Foundation/D4RL/issues/232) ### Other - reduced training-num/test-num in example files to a number ≤ 20 (examples with 100 led to too many open files) - rendering for Mujoco envs needs to be fixed on gymnasium side (https://github.com/Farama-Foundation/Gymnasium/issues/749) --------- Co-authored-by: Maximilian Huettenrauch <m.huettenrauch@appliedai.de> Co-authored-by: Michael Panchenko <35432522+MischaPanch@users.noreply.github.com> 2024-02-06 17:06:38 +01:00			`training_num: int = 10,`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`test_num: int = 10,`
			`rew_norm: bool = True,`
			`action_bound_method: Literal["clip", "tanh"] = "tanh",`
			`lr_decay: bool = True,`
Refactoring/mypy issues test (#1017) Improves typing in examples and tests, towards mypy passing there. Introduces the SpaceInfo utility 2024-02-06 14:24:30 +01:00			`) -> None:`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`log_name = os.path.join(task, "reinforce", str(experiment_config.seed), datetime_tag())`

			`sampling_config = SamplingConfig(`
			`num_epochs=epoch,`
			`step_per_epoch=step_per_epoch,`
			`batch_size=batch_size,`
			`num_train_envs=training_num,`
			`num_test_envs=test_num,`
			`buffer_size=buffer_size,`
			`step_per_collect=step_per_collect,`
			`repeat_per_collect=repeat_per_collect,`
			`)`

Change interface of EnvFactory to ensure that configuration of number of environments in SamplingConfig is used (values are now passed to factory method) This is clearer and removes the need to pass otherwise unnecessary configuration to environment factories at construction 2023-10-18 23:55:23 +02:00			`env_factory = MujocoEnvFactory(task, experiment_config.seed, obs_norm=True)`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00
			`experiment = (`
			`PGExperimentBuilder(env_factory, experiment_config, sampling_config)`
			`.with_pg_params(`
			`PGParams(`
			`discount_factor=gamma,`
			`action_bound_method=action_bound_method,`
			`reward_normalization=rew_norm,`
			`lr=lr,`
			`lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)`
			`if lr_decay`
			`else None,`
			`),`
			`)`
Allow to configure activation function in default networks * Set ReLU as default in all actor and critic factories * Configure non-default in applicable MuJoCo examples 2023-10-18 13:57:36 +02:00			`.with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)`
Support PG/Reinforce in high-level API * Add example mujoco_reinforce_hl * Extended functionality of ActorFactory to support creation of ModuleOpt 2023-10-10 12:55:25 +02:00			`.build()`
			`)`
			`experiment.run(log_name)`


			`if __name__ == "__main__":`
Refactoring/mypy issues test (#1017) Improves typing in examples and tests, towards mypy passing there. Introduces the SpaceInfo utility 2024-02-06 14:24:30 +01:00			`run_with_default_config = functools.partial(main, experiment_config=ExperimentConfig())`
			`logging.run_cli(run_with_default_config)`