197 lines
4.6 KiB
YAML
197 lines
4.6 KiB
YAML
defaults:
|
|
|
|
logdir: null
|
|
traindir: null
|
|
evaldir: null
|
|
offline_traindir: ''
|
|
offline_evaldir: ''
|
|
seed: 0
|
|
deterministic_run: False
|
|
steps: 1e6
|
|
parallel: False
|
|
eval_every: 1e4
|
|
eval_episode_num: 10
|
|
log_every: 1e4
|
|
reset_every: 0
|
|
device: 'cuda:0'
|
|
compile: True
|
|
precision: 32
|
|
debug: False
|
|
expl_gifs: False
|
|
video_pred_log: True
|
|
|
|
# Environment
|
|
task: 'dmc_walker_walk'
|
|
size: [64, 64]
|
|
envs: 1
|
|
action_repeat: 2
|
|
time_limit: 1000
|
|
grayscale: False
|
|
prefill: 2500
|
|
eval_noise: 0.0
|
|
reward_EMA: True
|
|
|
|
# Model
|
|
dyn_cell: 'gru_layer_norm'
|
|
dyn_hidden: 512
|
|
dyn_deter: 512
|
|
dyn_stoch: 32
|
|
dyn_discrete: 32
|
|
dyn_input_layers: 1
|
|
dyn_output_layers: 1
|
|
dyn_rec_depth: 1
|
|
dyn_shared: False
|
|
dyn_mean_act: 'none'
|
|
dyn_std_act: 'sigmoid2'
|
|
dyn_min_std: 0.1
|
|
dyn_temp_post: True
|
|
grad_heads: ['decoder', 'reward', 'cont']
|
|
units: 512
|
|
act: 'SiLU'
|
|
norm: 'LayerNorm'
|
|
encoder:
|
|
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
|
|
decoder:
|
|
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
|
|
actor:
|
|
{layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
|
|
critic:
|
|
{layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
|
|
reward_head:
|
|
{layers: 2, dist: 'symlog_disc', scale: 1.0, outscale: 0.0}
|
|
cont_head:
|
|
{layers: 2, scale: 1.0, outscale: 1.0}
|
|
dyn_scale: 0.5
|
|
rep_scale: 0.1
|
|
kl_free: 1.0
|
|
weight_decay: 0.0
|
|
unimix_ratio: 0.01
|
|
initial: 'learned'
|
|
|
|
# Training
|
|
batch_size: 16
|
|
batch_length: 64
|
|
train_ratio: 512
|
|
pretrain: 100
|
|
model_lr: 1e-4
|
|
opt_eps: 1e-8
|
|
grad_clip: 1000
|
|
dataset_size: 1000000
|
|
opt: 'adam'
|
|
|
|
# Behavior.
|
|
discount: 0.997
|
|
discount_lambda: 0.95
|
|
imag_horizon: 15
|
|
imag_gradient: 'dynamics'
|
|
imag_gradient_mix: 0.0
|
|
imag_sample: True
|
|
expl_amount: 0
|
|
eval_state_mean: False
|
|
collect_dyn_sample: True
|
|
behavior_stop_grad: True
|
|
future_entropy: False
|
|
|
|
# Exploration
|
|
expl_behavior: 'greedy'
|
|
expl_until: 0
|
|
expl_extr_scale: 0.0
|
|
expl_intr_scale: 1.0
|
|
disag_target: 'stoch'
|
|
disag_log: True
|
|
disag_models: 10
|
|
disag_offset: 1
|
|
disag_layers: 4
|
|
disag_units: 400
|
|
disag_action_cond: False
|
|
|
|
dmc_proprio:
|
|
steps: 5e5
|
|
action_repeat: 2
|
|
envs: 4
|
|
train_ratio: 512
|
|
video_pred_log: false
|
|
encoder: {mlp_keys: '.*', cnn_keys: '$^'}
|
|
decoder: {mlp_keys: '.*', cnn_keys: '$^'}
|
|
|
|
dmc_vision:
|
|
steps: 1e6
|
|
action_repeat: 2
|
|
envs: 4
|
|
train_ratio: 512
|
|
video_pred_log: true
|
|
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
|
|
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
|
|
|
|
crafter:
|
|
task: crafter_reward
|
|
step: 1e6
|
|
action_repeat: 1
|
|
envs: 1
|
|
train_ratio: 512
|
|
video_pred_log: true
|
|
dyn_hidden: 1024
|
|
dyn_deter: 4096
|
|
units: 1024
|
|
encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
|
decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
|
actor: {layers: 5, dist: 'onehot'}
|
|
value: {layers: 5}
|
|
reward_head: {layers: 5}
|
|
cont_head: {layers: 5}
|
|
imag_gradient: 'reinforce'
|
|
|
|
atari100k:
|
|
steps: 4e5
|
|
envs: 1
|
|
action_repeat: 4
|
|
train_ratio: 1024
|
|
video_pred_log: true
|
|
eval_episode_num: 100
|
|
actor: {dist: 'onehot'}
|
|
imag_gradient: 'reinforce'
|
|
stickey: False
|
|
lives: unused
|
|
noops: 30
|
|
resize: opencv
|
|
actions: needed
|
|
time_limit: 108000
|
|
|
|
minecraft:
|
|
task: minecraft_diamond
|
|
step: 1e8
|
|
parallel: True
|
|
envs: 16
|
|
# no eval
|
|
eval_episode_num: 0
|
|
eval_every: 1e4
|
|
action_repeat: 1
|
|
train_ratio: 16
|
|
video_pred_log: true
|
|
dyn_hidden: 1024
|
|
dyn_deter: 4096
|
|
units: 1024
|
|
encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
|
decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
|
|
actor: {layers: 5, dist: 'onehot'}
|
|
value: {layers: 5}
|
|
reward_head: {layers: 5}
|
|
cont_head: {layers: 5}
|
|
imag_gradient: 'reinforce'
|
|
break_speed: 100.0
|
|
time_limit: 36000
|
|
|
|
memorymaze:
|
|
steps: 1e8
|
|
action_repeat: 2
|
|
actor: {dist: 'onehot'}
|
|
imag_gradient: 'reinforce'
|
|
task: 'memorymaze_9x9'
|
|
|
|
debug:
|
|
debug: True
|
|
pretrain: 1
|
|
prefill: 1
|
|
batch_size: 10
|
|
batch_length: 20
|