163 lines
3.4 KiB
YAML
163 lines
3.4 KiB
YAML
|
# agent
|
||
|
agent_name: dmc_image_agent
|
||
|
|
||
|
# env setting
|
||
|
env:
|
||
|
env: DMC
|
||
|
game: hopper_hop
|
||
|
base_seed: 0
|
||
|
n_skip: 2
|
||
|
n_stack: 4
|
||
|
max_episode_steps: 1000
|
||
|
gray_scale: False
|
||
|
image_based: True
|
||
|
clip_reward: False
|
||
|
obs_shape: [3, 96, 96]
|
||
|
episodic: False
|
||
|
|
||
|
rl:
|
||
|
discount: 0.99
|
||
|
unroll_steps: 5 # prev 5
|
||
|
td_steps: 5
|
||
|
auto_td_steps: 30000 # prev 30000 for 100K env steps
|
||
|
td_lambda: 0.95
|
||
|
|
||
|
|
||
|
# optimizer
|
||
|
optimizer:
|
||
|
type: SGD
|
||
|
lr: 0.2 # prev 0.2
|
||
|
lr_warm_up: 0.01
|
||
|
lr_decay_type: none
|
||
|
lr_decay_rate: 0.1
|
||
|
lr_decay_steps: 100000
|
||
|
weight_decay: 1e-4
|
||
|
momentum: 0.9
|
||
|
|
||
|
# priority of data
|
||
|
priority:
|
||
|
use_priority: True
|
||
|
priority_prob_alpha: 1.0 # prev 0.6
|
||
|
priority_prob_beta: 1.0 # prev 0.4
|
||
|
min_prior: 0.000001
|
||
|
|
||
|
# training
|
||
|
train:
|
||
|
load_model_path: ''
|
||
|
batch_size: 256
|
||
|
training_steps: 200000 # 100 * 1000
|
||
|
offline_training_steps: 20000 # 20 * 1000
|
||
|
start_transitions: 2000 # 2 * 1000
|
||
|
|
||
|
eval_n_episode: 10
|
||
|
eval_interval: 5000
|
||
|
|
||
|
self_play_update_interval: 100
|
||
|
reanalyze_update_interval: 200
|
||
|
save_ckpt_interval: 10000
|
||
|
|
||
|
mini_batch_size: 256
|
||
|
reanalyze_ratio: 1.0
|
||
|
|
||
|
reward_loss_coeff: 1.0
|
||
|
value_loss_coeff: 0.5 # prev 0.25
|
||
|
policy_loss_coeff: 1.0
|
||
|
consistency_coeff: 2.0
|
||
|
decorrelation_coeff: 0.01
|
||
|
off_diag_coeff: 5e-3
|
||
|
entropy_coeff: 5e-3
|
||
|
|
||
|
max_grad_norm: 5
|
||
|
change_temperature: True
|
||
|
|
||
|
periodic_reset: False
|
||
|
value_reanalyze: False
|
||
|
path_consistency: False
|
||
|
use_decorrelation: False
|
||
|
value_policy_detach: False
|
||
|
optimal_Q: False
|
||
|
v_num: 1
|
||
|
value_target: 'mixed' # sarsa or search or mixed or max
|
||
|
use_IQL: False
|
||
|
IQL_weight: 0.5
|
||
|
start_use_mix_training_steps: 4e4
|
||
|
mixed_value_threshold: 2e4
|
||
|
|
||
|
# self-play data collection
|
||
|
data:
|
||
|
num_envs: 4
|
||
|
buffer_size: 200000 # 1 * 1000 * 1000
|
||
|
total_transitions: 200000 # 1000 * 1000
|
||
|
top_transitions: 2e5
|
||
|
trajectory_size: 100 # prev 500
|
||
|
save_video: False
|
||
|
save_as_dataset: False
|
||
|
|
||
|
# MCTS
|
||
|
mcts:
|
||
|
language: cython
|
||
|
num_simulations: 32 # prev 8
|
||
|
num_top_actions: 16 # prev 4
|
||
|
num_sampled_actions: 16
|
||
|
c_visit: 50
|
||
|
c_scale: 0.1 # prev 1.0
|
||
|
value_minmax_delta: 0.01
|
||
|
mpc_horizon: 1
|
||
|
vis: ['print']
|
||
|
use_mppi: False
|
||
|
std_magnification: 3
|
||
|
|
||
|
# model architecture
|
||
|
model:
|
||
|
noisy_net: False
|
||
|
action_embedding: True
|
||
|
action_embedding_dim: 16
|
||
|
block_type: resnet # resnet, convnext
|
||
|
down_sample: True
|
||
|
state_norm: False
|
||
|
value_prefix: False
|
||
|
value_target: bootstrapped # bootstrapped or GAE
|
||
|
GAE_max_steps: 15 # 10 or 15 or 20
|
||
|
dynamic_type: None # RNN or Transformer or None
|
||
|
init_zero: True
|
||
|
num_blocks: 1 # prev 1
|
||
|
num_channels: 64
|
||
|
reduced_channels: 16
|
||
|
|
||
|
projection_layers: [1024, 1024] # hidden dim, output dim
|
||
|
prjection_head_layers: [256, 1024] # hidden dim, output dim
|
||
|
|
||
|
fc_layers: [32] # prev [32]
|
||
|
lstm_hidden_size: 512
|
||
|
lstm_horizon_len: 5
|
||
|
value_ensumble: 1
|
||
|
|
||
|
policy_distribution: squashed_gaussian # beta or squashed_gaussian or truncated_gaussian or discretized
|
||
|
policy_loss_type: reanalyze # policy_gradient or reanalyze
|
||
|
policy_action_num: 4
|
||
|
random_action_num: 12
|
||
|
random_type: std # std, normal, pink, OU
|
||
|
|
||
|
reward_support:
|
||
|
range: [-2, 2]
|
||
|
scale: 0.01
|
||
|
env: DMC
|
||
|
bins: 51
|
||
|
type: support # support or symlog
|
||
|
value_support:
|
||
|
range: [-299, 299]
|
||
|
scale: 0.5
|
||
|
env: DMC
|
||
|
bins: 51
|
||
|
type: support # support or symlog
|
||
|
|
||
|
# worker process allocation
|
||
|
actors:
|
||
|
data_worker: 2
|
||
|
batch_worker: 8
|
||
|
|
||
|
# wandb
|
||
|
wandb:
|
||
|
project: 'ez-v2-release'
|
||
|
tag: 'DMC-image'
|