Tianshou/examples/mujoco/mujoco_npg_hl.py

#!/usr/bin/env python3

import os
from collections.abc import Sequence
from typing import Literal

import torch

from examples.mujoco.mujoco_env import MujocoEnvFactory
from tianshou.highlevel.config import SamplingConfig
from tianshou.highlevel.experiment import (
    ExperimentConfig,
    NPGExperimentBuilder,
)
from tianshou.highlevel.params.dist_fn import (
    DistributionFunctionFactoryIndependentGaussians,
)
from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear
from tianshou.highlevel.params.policy_params import NPGParams
from tianshou.utils import logging
from tianshou.utils.logging import datetime_tag


def main(
    experiment_config: ExperimentConfig,
    task: str = "Ant-v3",
    buffer_size: int = 4096,
    hidden_sizes: Sequence[int] = (64, 64),
    lr: float = 1e-3,
    gamma: float = 0.99,
    epoch: int = 100,
    step_per_epoch: int = 30000,
    step_per_collect: int = 1024,
    repeat_per_collect: int = 1,
    batch_size: int | None = None,
    training_num: int = 16,
    test_num: int = 10,
    rew_norm: bool = True,
    gae_lambda: float = 0.95,
    bound_action_method: Literal["clip", "tanh"] = "clip",
    lr_decay: bool = True,
    norm_adv: bool = True,
    optim_critic_iters: int = 20,
    actor_step_size: float = 0.1,
):
    log_name = os.path.join(task, "npg", str(experiment_config.seed), datetime_tag())

    sampling_config = SamplingConfig(
        num_epochs=epoch,
        step_per_epoch=step_per_epoch,
        batch_size=batch_size,
        num_train_envs=training_num,
        num_test_envs=test_num,
        buffer_size=buffer_size,
        step_per_collect=step_per_collect,
        repeat_per_collect=repeat_per_collect,
    )

    env_factory = MujocoEnvFactory(task, experiment_config.seed, obs_norm=True)

    experiment = (
        NPGExperimentBuilder(env_factory, experiment_config, sampling_config)
        .with_npg_params(
            NPGParams(
                discount_factor=gamma,
                gae_lambda=gae_lambda,
                action_bound_method=bound_action_method,
                reward_normalization=rew_norm,
                advantage_normalization=norm_adv,
                optim_critic_iters=optim_critic_iters,
                actor_step_size=actor_step_size,
                lr=lr,
                lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)
                if lr_decay
                else None,
                dist_fn=DistributionFunctionFactoryIndependentGaussians(),
            ),
        )
        .with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)
        .with_critic_factory_default(hidden_sizes, torch.nn.Tanh)
        .build()
    )
    experiment.run(log_name)


if __name__ == "__main__":
    logging.run_cli(main)
Support NPG in high-level API and add example mujoco_npg_hl 2023-10-10 13:47:30 +02:00			`#!/usr/bin/env python3`

			`import os`
			`from collections.abc import Sequence`
			`from typing import Literal`

Allow to configure activation function in default networks * Set ReLU as default in all actor and critic factories * Configure non-default in applicable MuJoCo examples 2023-10-18 13:57:36 +02:00			`import torch`
Support NPG in high-level API and add example mujoco_npg_hl 2023-10-10 13:47:30 +02:00
			`from examples.mujoco.mujoco_env import MujocoEnvFactory`
			`from tianshou.highlevel.config import SamplingConfig`
			`from tianshou.highlevel.experiment import (`
			`ExperimentConfig,`
			`NPGExperimentBuilder,`
			`)`
			`from tianshou.highlevel.params.dist_fn import (`
			`DistributionFunctionFactoryIndependentGaussians,`
			`)`
			`from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear`
			`from tianshou.highlevel.params.policy_params import NPGParams`
Revert "Depend on sensAI instead of copying its utils (logging, string)" This reverts commit fdb0eba93d81fa5e698770b4f7088c87fc1238da. 2023-11-07 10:54:22 +01:00			`from tianshou.utils import logging`
			`from tianshou.utils.logging import datetime_tag`
Support NPG in high-level API and add example mujoco_npg_hl 2023-10-10 13:47:30 +02:00

			`def main(`
			`experiment_config: ExperimentConfig,`
			`task: str = "Ant-v3",`
			`buffer_size: int = 4096,`
			`hidden_sizes: Sequence[int] = (64, 64),`
			`lr: float = 1e-3,`
			`gamma: float = 0.99,`
			`epoch: int = 100,`
			`step_per_epoch: int = 30000,`
			`step_per_collect: int = 1024,`
			`repeat_per_collect: int = 1,`
Support batch_size=None and use it in various scripts (#993) Closes #986 2023-11-24 19:13:10 +01:00			`batch_size: int \| None = None,`
Support NPG in high-level API and add example mujoco_npg_hl 2023-10-10 13:47:30 +02:00			`training_num: int = 16,`
			`test_num: int = 10,`
			`rew_norm: bool = True,`
			`gae_lambda: float = 0.95,`
			`bound_action_method: Literal["clip", "tanh"] = "clip",`
			`lr_decay: bool = True,`
			`norm_adv: bool = True,`
			`optim_critic_iters: int = 20,`
			`actor_step_size: float = 0.1,`
			`):`
			`log_name = os.path.join(task, "npg", str(experiment_config.seed), datetime_tag())`

			`sampling_config = SamplingConfig(`
			`num_epochs=epoch,`
			`step_per_epoch=step_per_epoch,`
			`batch_size=batch_size,`
			`num_train_envs=training_num,`
			`num_test_envs=test_num,`
			`buffer_size=buffer_size,`
			`step_per_collect=step_per_collect,`
			`repeat_per_collect=repeat_per_collect,`
			`)`

Change interface of EnvFactory to ensure that configuration of number of environments in SamplingConfig is used (values are now passed to factory method) This is clearer and removes the need to pass otherwise unnecessary configuration to environment factories at construction 2023-10-18 23:55:23 +02:00			`env_factory = MujocoEnvFactory(task, experiment_config.seed, obs_norm=True)`
Support NPG in high-level API and add example mujoco_npg_hl 2023-10-10 13:47:30 +02:00
			`experiment = (`
			`NPGExperimentBuilder(env_factory, experiment_config, sampling_config)`
			`.with_npg_params(`
			`NPGParams(`
			`discount_factor=gamma,`
			`gae_lambda=gae_lambda,`
			`action_bound_method=bound_action_method,`
			`reward_normalization=rew_norm,`
			`advantage_normalization=norm_adv,`
			`optim_critic_iters=optim_critic_iters,`
			`actor_step_size=actor_step_size,`
			`lr=lr,`
			`lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config)`
			`if lr_decay`
			`else None,`
			`dist_fn=DistributionFunctionFactoryIndependentGaussians(),`
			`),`
			`)`
Allow to configure activation function in default networks * Set ReLU as default in all actor and critic factories * Configure non-default in applicable MuJoCo examples 2023-10-18 13:57:36 +02:00			`.with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)`
			`.with_critic_factory_default(hidden_sizes, torch.nn.Tanh)`
Support NPG in high-level API and add example mujoco_npg_hl 2023-10-10 13:47:30 +02:00			`.build()`
			`)`
			`experiment.run(log_name)`


			`if __name__ == "__main__":`
Fix tianshou.highlevel depending on jsonargparse (should be dev dependency only) by introducing a new place where jsonargparse can be configured: logging.run_cli, which is also slightly more convenient 2023-10-19 11:40:49 +02:00			`logging.run_cli(main)`