157 lines
3.2 KiB
YAML
157 lines
3.2 KiB
YAML
# agent
|
|
agent_name: atari_agent
|
|
|
|
# env setting
|
|
env:
|
|
env: Atari
|
|
game: Asterix
|
|
base_seed: 0
|
|
n_skip: 4
|
|
n_stack: 4
|
|
max_episode_steps: 3000 # 27000 for final test
|
|
gray_scale: False
|
|
image_based: True
|
|
clip_reward: True
|
|
obs_shape: [3, 96, 96]
|
|
episodic: True
|
|
|
|
rl:
|
|
discount: 0.997
|
|
unroll_steps: 5 # prev 5
|
|
td_steps: 5
|
|
auto_td_steps: 30000 # prev 30000
|
|
td_lambda: 0.95
|
|
|
|
|
|
# optimizer
|
|
optimizer:
|
|
type: SGD
|
|
lr: 0.2 # prev 0.2
|
|
lr_decay_type: none
|
|
lr_warm_up: 0.01
|
|
lr_decay_rate: 0.1
|
|
lr_decay_steps: 100000
|
|
weight_decay: 1e-4
|
|
momentum: 0.9
|
|
|
|
# priority of data
|
|
priority:
|
|
use_priority: True
|
|
priority_prob_alpha: 1.0 # prev 0.6
|
|
priority_prob_beta: 1.0 # prev 0.4
|
|
min_prior: 0.000001
|
|
|
|
# training
|
|
train:
|
|
load_model_path: ''
|
|
batch_size: 256
|
|
training_steps: 100000 # 100 * 1000
|
|
offline_training_steps: 20000 # 20 * 1000
|
|
start_transitions: 2000 # 2 * 1000
|
|
|
|
eval_n_episode: 10
|
|
eval_interval: 10000
|
|
|
|
self_play_update_interval: 100
|
|
reanalyze_update_interval: 200
|
|
save_ckpt_interval: 10000
|
|
|
|
mini_batch_size: 256
|
|
reanalyze_ratio: 1.0
|
|
|
|
reward_loss_coeff: 1.0
|
|
value_loss_coeff: 0.5 # prev 0.25, 1.5 for test model-free value fitting
|
|
policy_loss_coeff: 1.0 # prev 1.0
|
|
consistency_coeff: 5.0 # prev 2.0
|
|
decorrelation_coeff: 0.01
|
|
off_diag_coeff: 5e-3
|
|
entropy_coeff: 5e-3
|
|
|
|
max_grad_norm: 5
|
|
change_temperature: True
|
|
|
|
periodic_reset: False
|
|
value_reanalyze: False
|
|
path_consistency: False
|
|
use_decorrelation: False
|
|
value_policy_detach: False
|
|
optimal_Q: False
|
|
v_num: 1
|
|
value_target: 'mixed' # sarsa or search or mixed or max
|
|
use_IQL: False
|
|
IQL_weight: 0.7
|
|
start_use_mix_training_steps: 3e4
|
|
mixed_value_threshold: 5e3
|
|
|
|
# self-play data collection
|
|
data:
|
|
num_envs: 4
|
|
buffer_size: 1000000 # 1 * 1000 * 1000
|
|
total_transitions: 100000 # 100 * 1000
|
|
top_transitions: 2e5
|
|
trajectory_size: 400
|
|
save_video: False
|
|
save_as_dataset: False
|
|
|
|
# MCTS
|
|
mcts:
|
|
language: cython
|
|
num_simulations: 16 # prev 8, ori_mcts 50
|
|
num_top_actions: 4 # prev 4
|
|
c_visit: 50
|
|
c_scale: 0.1 # prev 0.1
|
|
c_base: 19652
|
|
c_init: 1.25
|
|
dirichlet_alpha: 0.3
|
|
explore_frac: 0.25
|
|
value_minmax_delta: 0.01
|
|
vis: ['print']
|
|
mpc_horizon: 1
|
|
use_gumbel: True
|
|
|
|
# model architecture
|
|
model:
|
|
noisy_net: False
|
|
action_embedding: True
|
|
action_embedding_dim: 16
|
|
down_sample: True
|
|
state_norm: False
|
|
value_prefix: True
|
|
value_target: bootstrapped # bootstrapped or GAE
|
|
GAE_max_steps: 15 # 10 or 15 or 20
|
|
init_zero: True # prev True
|
|
num_blocks: 1 # prev 1
|
|
num_channels: 64
|
|
reduced_channels: 16
|
|
|
|
projection_layers: [1024, 1024] # hidden dim, output dim
|
|
prjection_head_layers: [256, 1024] # hidden dim, output dim
|
|
|
|
fc_layers: [32] # prev [32]
|
|
lstm_hidden_size: 512
|
|
lstm_horizon_len: 5
|
|
policy_loss_type: reanalyze # policy_gradient or reanalyze
|
|
|
|
reward_support:
|
|
range: [-300, 300]
|
|
scale: 1
|
|
env: Atari
|
|
bins: 51
|
|
type: support # support or symlog
|
|
value_support:
|
|
range: [-300, 300]
|
|
scale: 1
|
|
env: Atari
|
|
bins: 51
|
|
type: support # support or symlog
|
|
|
|
# worker process allocation
|
|
actors:
|
|
data_worker: 1
|
|
batch_worker: 8 # oriMCTS 16
|
|
|
|
# wandb
|
|
wandb:
|
|
project: 'ez-v2-release'
|
|
tag: 'Atari'
|