#!/usr/bin/env python3 """The high-level multi experiment script demonstrates how to use the high-level API of TianShou to train a single configuration of an experiment (here a PPO agent on mujoco) with multiple non-intersecting seeds. Thus, the experiment will be repeated `num_experiments` times. For each repetition, a policy seed, train env seeds, and test env seeds are set that are non-intersecting with the seeds of the other experiments. Each experiment's results are stored in a separate subdirectory. The final results are aggregated and turned into useful statistics with the rliable package. The call to `eval_experiments` will load the results from the log directory and create an interp-quantile mean plot for the returns as well as a performance profile plot. These plots are saved in the log directory and displayed in the console. """ import os import torch from examples.mujoco.mujoco_env import MujocoEnvFactory from tianshou.evaluation.launcher import RegisteredExpLauncher from tianshou.evaluation.rliable_evaluation_hl import RLiableExperimentResult from tianshou.highlevel.config import SamplingConfig from tianshou.highlevel.experiment import ( ExperimentConfig, PPOExperimentBuilder, ) from tianshou.highlevel.logger import LoggerFactoryDefault from tianshou.highlevel.params.dist_fn import ( DistributionFunctionFactoryIndependentGaussians, ) from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear from tianshou.highlevel.params.policy_params import PPOParams from tianshou.utils import logging from tianshou.utils.logging import datetime_tag log = logging.getLogger(__name__) def main( num_experiments: int = 5, run_experiments_sequentially: bool = False, ) -> RLiableExperimentResult: """:param num_experiments: the number of experiments to run. The experiments differ exclusively in the seeds. :param run_experiments_sequentially: if True, the experiments are run sequentially, otherwise in parallel. If a single experiment is set to use all available CPU cores, it might be undesired to run multiple experiments in parallel on the same machine, :return: an object containing rliable-based evaluation results """ task = "Ant-v4" persistence_dir = os.path.abspath(os.path.join("log", task, "ppo", datetime_tag())) experiment_config = ExperimentConfig(persistence_base_dir=persistence_dir, watch=False) sampling_config = SamplingConfig( num_epochs=1, step_per_epoch=5000, batch_size=64, num_train_envs=5, num_test_envs=5, num_test_episodes=5, buffer_size=4096, step_per_collect=2048, repeat_per_collect=1, ) env_factory = MujocoEnvFactory( task, train_seed=sampling_config.train_seed, test_seed=sampling_config.test_seed, obs_norm=True, ) hidden_sizes = (64, 64) experiment_collection = ( PPOExperimentBuilder(env_factory, experiment_config, sampling_config) .with_ppo_params( PPOParams( discount_factor=0.99, gae_lambda=0.95, action_bound_method="clip", reward_normalization=True, ent_coef=0.0, vf_coef=0.25, max_grad_norm=0.5, value_clip=False, advantage_normalization=False, eps_clip=0.2, dual_clip=None, recompute_advantage=True, lr=3e-4, lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config), dist_fn=DistributionFunctionFactoryIndependentGaussians(), ), ) .with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True) .with_critic_factory_default(hidden_sizes, torch.nn.Tanh) .with_logger_factory(LoggerFactoryDefault("tensorboard")) .build_seeded_collection(num_experiments) ) if run_experiments_sequentially: launcher = RegisteredExpLauncher.sequential.create_launcher() else: launcher = RegisteredExpLauncher.joblib.create_launcher() successful_experiment_stats = experiment_collection.run(launcher) log.info(f"Successfully completed {len(successful_experiment_stats)} experiments.") num_successful_experiments = len(successful_experiment_stats) for i, info_stats in enumerate(successful_experiment_stats, start=1): if info_stats is not None: log.info(f"Training stats for successful experiment {i}/{num_successful_experiments}:") log.info(info_stats.pprints_asdict()) else: log.info( f"No training stats available for successful experiment {i}/{num_successful_experiments}.", ) rliable_result = RLiableExperimentResult.load_from_disk(persistence_dir) rliable_result.eval_results(show_plots=True, save_plots=True) return rliable_result if __name__ == "__main__": result = logging.run_cli(main, level=logging.INFO)