dreamerv3-torch/configs.yaml

193 lines
3.8 KiB
YAML
Raw Normal View History

2023-02-12 22:35:25 +09:00
defaults:
logdir: null
traindir: null
evaldir: null
offline_traindir: ''
offline_evaldir: ''
seed: 0
steps: 1e6
2023-02-12 22:35:25 +09:00
eval_every: 1e4
eval_episode_num: 10
2023-02-12 22:35:25 +09:00
log_every: 1e4
reset_every: 0
device: 'cuda:0'
compile: True
2023-05-17 22:16:55 +09:00
precision: 32
2023-02-12 22:35:25 +09:00
debug: False
expl_gifs: False
2023-05-14 23:38:46 +09:00
video_pred_log: True
2023-02-12 22:35:25 +09:00
# Environment
task: 'dmc_walker_walk'
size: [64, 64]
envs: 1
action_repeat: 2
time_limit: 1000
grayscale: False
2023-06-17 23:29:53 +08:00
prefill: 2500
2023-02-12 22:35:25 +09:00
eval_noise: 0.0
reward_EMA: True
# Model
dyn_cell: 'gru_layer_norm'
dyn_hidden: 512
dyn_deter: 512
dyn_stoch: 32
dyn_discrete: 32
dyn_input_layers: 1
dyn_output_layers: 1
2023-02-12 22:35:25 +09:00
dyn_rec_depth: 1
dyn_shared: False
dyn_mean_act: 'none'
dyn_std_act: 'sigmoid2'
dyn_min_std: 0.1
dyn_temp_post: True
2023-05-14 23:38:46 +09:00
grad_heads: ['decoder', 'reward', 'cont']
units: 512
2023-02-12 22:35:25 +09:00
reward_layers: 2
cont_layers: 2
2023-02-12 22:35:25 +09:00
value_layers: 2
actor_layers: 2
act: 'SiLU'
norm: 'LayerNorm'
2023-05-14 23:38:46 +09:00
encoder:
2023-05-21 22:00:59 +09:00
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, symlog_inputs: True}
2023-05-14 23:38:46 +09:00
decoder:
2023-05-21 22:00:59 +09:00
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: 'LayerNorm', cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse}
2023-05-14 23:38:46 +09:00
value_head: 'symlog_disc'
reward_head: 'symlog_disc'
dyn_scale: '0.5'
rep_scale: '0.1'
2023-02-12 22:35:25 +09:00
kl_free: '1.0'
cont_scale: 1.0
2023-02-12 22:35:25 +09:00
reward_scale: 1.0
weight_decay: 0.0
unimix_ratio: 0.01
action_unimix_ratio: 0.01
initial: 'learned'
2023-02-12 22:35:25 +09:00
# Training
batch_size: 16
batch_length: 64
train_ratio: 512
2023-02-12 22:35:25 +09:00
pretrain: 100
model_lr: 1e-4
opt_eps: 1e-8
grad_clip: 1000
value_lr: 3e-5
actor_lr: 3e-5
ac_opt_eps: 1e-5
value_grad_clip: 100
actor_grad_clip: 100
2023-04-05 21:38:51 +09:00
dataset_size: 1000000
2023-02-12 22:35:25 +09:00
slow_value_target: True
slow_target_update: 1
slow_target_fraction: 0.02
2023-02-12 22:35:25 +09:00
opt: 'adam'
# Behavior.
discount: 0.997
discount_lambda: 0.95
imag_horizon: 15
imag_gradient: 'dynamics'
imag_gradient_mix: '0.0'
2023-02-12 22:35:25 +09:00
imag_sample: True
actor_dist: 'normal'
2023-02-12 22:35:25 +09:00
actor_entropy: '3e-4'
actor_state_entropy: 0.0
actor_init_std: 1.0
actor_min_std: 0.1
actor_max_std: 1.0
2023-02-12 22:35:25 +09:00
actor_temp: 0.1
expl_amount: 0.0
eval_state_mean: False
collect_dyn_sample: True
behavior_stop_grad: True
value_decay: 0.0
future_entropy: False
# Exploration
expl_behavior: 'greedy'
expl_until: 0
expl_extr_scale: 0.0
expl_intr_scale: 1.0
disag_target: 'stoch'
disag_log: True
disag_models: 10
disag_offset: 1
disag_layers: 4
disag_units: 400
disag_action_cond: False
2023-05-14 23:38:46 +09:00
dmc_proprio:
steps: 5e5
2023-06-17 15:37:34 +09:00
action_repeat: 2
envs: 4
2023-05-14 23:38:46 +09:00
train_ratio: 512
video_pred_log: false
encoder: {mlp_keys: '.*', cnn_keys: '$^'}
decoder: {mlp_keys: '.*', cnn_keys: '$^'}
dmc_vision:
steps: 1e6
2023-06-17 15:37:34 +09:00
action_repeat: 2
envs: 4
2023-05-14 23:38:46 +09:00
train_ratio: 512
video_pred_log: true
encoder: {mlp_keys: '$^', cnn_keys: 'image'}
decoder: {mlp_keys: '$^', cnn_keys: 'image'}
2023-06-18 00:02:22 +09:00
crafter:
task: crafter_reward
step: 1e6
action_repeat: 1
envs: 1
2023-05-14 23:38:46 +09:00
train_ratio: 512
2023-06-18 00:02:22 +09:00
video_pred_log: true
dyn_hidden: 1024
dyn_deter: 4096
units: 1024
reward_layers: 5
cont_layers: 5
value_layers: 5
actor_layers: 5
encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
actor_dist: 'onehot'
imag_gradient: 'reinforce'
2023-05-14 23:38:46 +09:00
2023-04-15 23:16:43 +09:00
atari100k:
steps: 4e5
2023-06-17 15:37:34 +09:00
envs: 1
action_repeat: 4
2023-04-15 23:16:43 +09:00
eval_episode_num: 100
stickey: False
lives: unused
noops: 30
resize: opencv
actions: needed
actor_dist: 'onehot'
train_ratio: 1024
imag_gradient: 'reinforce'
time_limit: 108000
2023-06-13 09:58:03 +08:00
2023-02-12 22:35:25 +09:00
debug:
debug: True
pretrain: 1
prefill: 1
batch_size: 10
batch_length: 20
2023-06-13 09:58:03 +08:00
2023-06-18 09:16:32 +08:00
MemoryMaze:
2023-06-19 06:03:41 +09:00
steps: 1e7
2023-06-13 09:58:03 +08:00
action_repeat: 2
2023-06-18 17:19:03 +09:00
actor_dist: 'onehot'
imag_gradient: 'reinforce'
task: 'MemoryMaze_9x9'
2023-06-13 09:58:03 +08:00
2023-06-17 23:29:53 +08:00
2023-06-13 09:58:03 +08:00