# General config logger: "tensorboard" wandb_project: "mujoco.benchmark" seed: 24 logdir: "log" device: "cpu" watch: false render: 0.0 resume_path: null resume_id: null # Training: NN lr: 3e-4 hidden_sizes: [64, 64] lr_decay: true # Training: sampling training_num: 64 test_num: 10 repeat_per_collect: 10 batch_size: 64 epoch: 100 step_per_epoch: 30000 step_per_collect: 2048 buffer_size: 4096 # Training: RL modelling gamma: 0.99 rew_norm: true dual_clip: null value_clip: false norm_adv: false recompute_adv: true gae_lambda: 0.95 # Training: PPO specifics ent_coef: 0.0 vf_coef: 0.25 bound_action_method: "clip" max_grad_norm: 0.5 eps_clip: 0.2 # Mujoco task: "Ant-v3"