170 lines
3.6 KiB
YAML
170 lines
3.6 KiB
YAML
# agent
|
|
agent_name: dmc_state_agent
|
|
|
|
# env setting
|
|
env:
|
|
env: DMC
|
|
game: hopper_hop
|
|
base_seed: 0
|
|
n_skip: 2
|
|
n_stack: 1
|
|
max_episode_steps: 1000
|
|
gray_scale: False
|
|
image_based: False
|
|
clip_reward: False
|
|
obs_shape: 10
|
|
episodic: False
|
|
|
|
rl:
|
|
discount: 0.997 # prev 0.99
|
|
unroll_steps: 5 # prev 5
|
|
td_steps: 5 # prev 5
|
|
auto_td_steps: 60000 # prev 30000 for 100k env steps
|
|
td_lambda: 0.95
|
|
|
|
# optimizer
|
|
optimizer:
|
|
type: Adam
|
|
lr: 3e-4
|
|
lr_warm_up: 0.01
|
|
lr_decay_type: none
|
|
lr_decay_rate: 0.1
|
|
lr_decay_steps: 300000
|
|
weight_decay: 2e-5
|
|
momentum: 0.9
|
|
|
|
# priority of data
|
|
priority:
|
|
use_priority: True
|
|
priority_prob_alpha: 1.0 # prev 0.6
|
|
priority_prob_beta: 1.0 # prev 0.4
|
|
min_prior: 0.000001
|
|
|
|
# training
|
|
train:
|
|
load_model_path: ''
|
|
batch_size: 256
|
|
training_steps: 100000 # 100 * 1000
|
|
offline_training_steps: 20000 # 20 * 1000
|
|
start_transitions: 2000 # 2 * 1000
|
|
|
|
eval_n_episode: 10 # prev 10
|
|
eval_interval: 5000
|
|
|
|
self_play_update_interval: 100 # prev 100
|
|
reanalyze_update_interval: 200 # prev 200
|
|
save_ckpt_interval: 10000
|
|
|
|
mini_batch_size: 256
|
|
reanalyze_ratio: 1.0
|
|
|
|
reward_loss_coeff: 1.0
|
|
value_loss_coeff: 0.5 # prev 0.25
|
|
policy_loss_coeff: 1.0
|
|
consistency_coeff: 2.0
|
|
decorrelation_coeff: 0.01
|
|
off_diag_coeff: 5e-3
|
|
entropy_coeff: 5e-2 # prev 5e-3
|
|
|
|
max_grad_norm: 5
|
|
change_temperature: True
|
|
|
|
periodic_reset: False
|
|
value_reanalyze: False
|
|
path_consistency: False
|
|
use_decorrelation: False
|
|
value_policy_detach: False
|
|
optimal_Q: False
|
|
v_num: 1
|
|
value_target: 'mixed' # sarsa or search or mixed or max
|
|
use_IQL: False
|
|
IQL_weight: 0.5
|
|
start_use_mix_training_steps: 4e4 # prev 4e4
|
|
mixed_value_threshold: 2e4 # prev 2e4
|
|
|
|
# self-play data collection
|
|
data:
|
|
num_envs: 4
|
|
buffer_size: 100000 # 1 * 1000 * 1000
|
|
total_transitions: 100000 # 1M
|
|
top_transitions: 2e5
|
|
trajectory_size: 100 # prev 500
|
|
save_video: False
|
|
save_as_dataset: False
|
|
|
|
# MCTS
|
|
mcts:
|
|
language: cython
|
|
num_simulations: 32 # prev 8
|
|
num_top_actions: 16 # prev 4
|
|
num_sampled_actions: 16 # same as Sampled MuZero
|
|
c_visit: 50
|
|
c_scale: 0.1 # prev 0.1
|
|
value_minmax_delta: 0.01
|
|
mpc_horizon: 1
|
|
vis: ['print']
|
|
use_mppi: False
|
|
std_magnification: 3
|
|
|
|
# model architecture
|
|
model:
|
|
noisy_net: False
|
|
action_embedding: True
|
|
block_type: resnet # resnet, convnext
|
|
down_sample: True
|
|
state_norm: False
|
|
value_prefix: False
|
|
value_target: bootstrapped # bootstrapped or GAE
|
|
GAE_max_steps: 15 # 10 or 15 or 20
|
|
dynamic_type: None # RNN or Transformer or None
|
|
init_zero: True
|
|
use_bn: True
|
|
use_p_norm: False
|
|
num_blocks: 2 # prev 3
|
|
|
|
hidden_shape: 128 # prev 128
|
|
rep_net_shape: 256
|
|
dyn_shape: 256
|
|
act_embed_shape: 64
|
|
rew_net_shape: [ 256, 256 ]
|
|
val_net_shape: [ 256, 256 ]
|
|
pi_net_shape: [ 256, 256 ]
|
|
proj_hid_shape: 512
|
|
pred_hid_shape: 512
|
|
proj_shape: 128
|
|
pred_shape: 128
|
|
|
|
fc_layers: [32]
|
|
lstm_hidden_size: 512
|
|
lstm_horizon_len: 5
|
|
value_ensumble: 1
|
|
|
|
policy_distribution: squashed_gaussian # beta or squashed_gaussian or truncated_gaussian
|
|
policy_loss_type: reanalyze # policy_gradient or reanalyze
|
|
policy_action_num: 4
|
|
random_action_num: 12
|
|
random_type: std # std, normal, pink, OU
|
|
|
|
reward_support:
|
|
range: [ -2, 2 ]
|
|
scale: 0.01
|
|
env: DMC
|
|
bins: 51
|
|
type: support # support or symlog
|
|
value_support:
|
|
range: [ -299, 299 ]
|
|
scale: 0.5
|
|
env: DMC
|
|
bins: 51
|
|
type: support # support or symlog
|
|
|
|
# worker process allocation
|
|
actors:
|
|
data_worker: 1
|
|
batch_worker: 10
|
|
|
|
# wandb
|
|
wandb:
|
|
project: 'ez-v2-release'
|
|
tag: 'DMC-state'
|