Tianshou/examples/mujoco/mujoco_ppo_hl_multi.py
Dominik Jain f8cca8b07c Improve creation of multiple seeded experiments:
* Add class ExperimentCollection to improve usability
  * Remove parameters from ExperimentBuilder.build
  * Renamed ExperimentBuilder.build_default_seeded_experiments to build_seeded_collection,
    changing the return type to ExperimentCollection
  * Replace temp_config_mutation (which was not appropriate for the public API) with
    method copy (which performs a safe deep copy)
2024-04-30 17:22:11 +02:00

120 lines
4.6 KiB
Python

#!/usr/bin/env python3
"""The high-level multi experiment script demonstrates how to use the high-level API of TianShou to train
a single configuration of an experiment (here a PPO agent on mujoco) with multiple non-intersecting seeds.
Thus, the experiment will be repeated `num_experiments` times.
For each repetition, a policy seed, train env seeds, and test env seeds are set that
are non-intersecting with the seeds of the other experiments.
Each experiment's results are stored in a separate subdirectory.
The final results are aggregated and turned into useful statistics with the rliable API.
The call to `eval_experiments` will load the results from the log directory and
create an interp-quantile mean plot for the returns as well as a performance profile plot.
These plots are saved in the log directory and displayed in the console.
"""
import os
import sys
import torch
from examples.mujoco.mujoco_env import MujocoEnvFactory
from tianshou.evaluation.launcher import RegisteredExpLauncher
from tianshou.evaluation.rliable_evaluation_hl import RLiableExperimentResult
from tianshou.highlevel.config import SamplingConfig
from tianshou.highlevel.env import VectorEnvType
from tianshou.highlevel.experiment import (
ExperimentConfig,
PPOExperimentBuilder,
)
from tianshou.highlevel.logger import LoggerFactoryDefault
from tianshou.highlevel.params.dist_fn import (
DistributionFunctionFactoryIndependentGaussians,
)
from tianshou.highlevel.params.lr_scheduler import LRSchedulerFactoryLinear
from tianshou.highlevel.params.policy_params import PPOParams
from tianshou.utils import logging
from tianshou.utils.logging import datetime_tag
log = logging.getLogger(__name__)
def main(
num_experiments: int = 2,
run_experiments_sequentially: bool = True,
) -> RLiableExperimentResult:
""":param run_experiments_sequentially: if True, the experiments are run sequentially, otherwise in parallel.
LIMITATIONS: currently, the parallel execution does not seem to work properly on linux.
It might generally be undesired to run multiple experiments in parallel on the same machine,
as a single experiment already uses all available CPU cores by default.
:return: the directory where the results are stored
"""
task = "Ant-v4"
persistence_dir = os.path.abspath(os.path.join("log", task, "ppo", datetime_tag()))
experiment_config = ExperimentConfig(persistence_base_dir=persistence_dir, watch=False)
sampling_config = SamplingConfig(
num_epochs=1,
step_per_epoch=5000,
batch_size=64,
num_train_envs=10,
num_test_envs=10,
num_test_episodes=10,
buffer_size=4096,
step_per_collect=2048,
repeat_per_collect=10,
)
env_factory = MujocoEnvFactory(
task,
train_seed=sampling_config.train_seed,
test_seed=sampling_config.test_seed,
obs_norm=True,
venv_type=VectorEnvType.SUBPROC_SHARED_MEM_FORK_CONTEXT
if sys.platform == "darwin"
else VectorEnvType.SUBPROC_SHARED_MEM,
)
hidden_sizes = (64, 64)
experiment_collection = (
PPOExperimentBuilder(env_factory, experiment_config, sampling_config)
.with_ppo_params(
PPOParams(
discount_factor=0.99,
gae_lambda=0.95,
action_bound_method="clip",
reward_normalization=True,
ent_coef=0.0,
vf_coef=0.25,
max_grad_norm=0.5,
value_clip=False,
advantage_normalization=False,
eps_clip=0.2,
dual_clip=None,
recompute_advantage=True,
lr=3e-4,
lr_scheduler_factory=LRSchedulerFactoryLinear(sampling_config),
dist_fn=DistributionFunctionFactoryIndependentGaussians(),
),
)
.with_actor_factory_default(hidden_sizes, torch.nn.Tanh, continuous_unbounded=True)
.with_critic_factory_default(hidden_sizes, torch.nn.Tanh)
.with_logger_factory(LoggerFactoryDefault("tensorboard"))
.build_seeded_collection(num_experiments)
)
if run_experiments_sequentially:
launcher = RegisteredExpLauncher.sequential.create_launcher()
else:
launcher = RegisteredExpLauncher.joblib.create_launcher()
experiment_collection.run(launcher)
rliable_result = RLiableExperimentResult.load_from_disk(persistence_dir)
rliable_result.eval_results(show_plots=True, save_plots=True)
return rliable_result
if __name__ == "__main__":
result = logging.run_cli(main, level=logging.INFO)