So far only for one script (mujoco_ppo_cfg), extension will follow Conflicts: examples/mujoco/mujoco_env.py examples/mujoco/mujoco_ppo.py setup.py
46 lines
678 B
YAML
46 lines
678 B
YAML
# General config
|
|
logger: "tensorboard"
|
|
wandb_project: "mujoco.benchmark"
|
|
seed: 24
|
|
logdir: "log"
|
|
device: "cpu"
|
|
watch: false
|
|
render: 0.0
|
|
resume_path: null
|
|
resume_id: null
|
|
|
|
# Training: NN
|
|
lr: 3e-4
|
|
hidden_sizes: [64, 64]
|
|
lr_decay: true
|
|
|
|
# Training: sampling
|
|
training_num: 64
|
|
test_num: 10
|
|
repeat_per_collect: 10
|
|
batch_size: 64
|
|
epoch: 100
|
|
step_per_epoch: 30000
|
|
step_per_collect: 2048
|
|
buffer_size: 4096
|
|
|
|
# Training: RL modelling
|
|
gamma: 0.99
|
|
rew_norm: true
|
|
dual_clip: null
|
|
value_clip: false
|
|
norm_adv: false
|
|
recompute_adv: true
|
|
gae_lambda: 0.95
|
|
|
|
# Training: PPO specifics
|
|
ent_coef: 0.0
|
|
vf_coef: 0.25
|
|
bound_action_method: "clip"
|
|
max_grad_norm: 0.5
|
|
eps_clip: 0.2
|
|
|
|
|
|
# Mujoco
|
|
task: "Ant-v3"
|