bug fix for onehot distribution

This commit is contained in:
NM512 2024-01-14 21:55:34 +09:00
parent 5abaf056ad
commit a4fdfad938
2 changed files with 6 additions and 6 deletions

View File

@ -47,7 +47,7 @@ defaults:
decoder: decoder:
{mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0} {mlp_keys: '$^', cnn_keys: 'image', act: 'SiLU', norm: True, cnn_depth: 32, kernel_size: 4, minres: 4, mlp_layers: 2, mlp_units: 512, cnn_sigmoid: False, image_dist: mse, vector_dist: symlog_mse, outscale: 1.0}
actor: actor:
{layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0} {layers: 2, dist: 'normal', entropy: 3e-4, unimix_ratio: 0.01, std: 'learned', min_std: 0.1, max_std: 1.0, temp: 0.1, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 1.0}
critic: critic:
{layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0} {layers: 2, dist: 'symlog_disc', slow_target: True, slow_target_update: 1, slow_target_fraction: 0.02, lr: 3e-5, eps: 1e-5, grad_clip: 100.0, outscale: 0.0}
reward_head: reward_head:
@ -123,7 +123,7 @@ crafter:
units: 1024 units: 1024
encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024} encoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024} decoder: {mlp_keys: '$^', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
actor: {layers: 5, dist: 'onehot'} actor: {layers: 5, dist: 'onehot', std: 'none'}
value: {layers: 5} value: {layers: 5}
reward_head: {layers: 5} reward_head: {layers: 5}
cont_head: {layers: 5} cont_head: {layers: 5}
@ -136,7 +136,7 @@ atari100k:
train_ratio: 1024 train_ratio: 1024
video_pred_log: true video_pred_log: true
eval_episode_num: 100 eval_episode_num: 100
actor: {dist: 'onehot'} actor: {dist: 'onehot', std: 'none'}
imag_gradient: 'reinforce' imag_gradient: 'reinforce'
stickey: False stickey: False
lives: unused lives: unused
@ -161,7 +161,7 @@ minecraft:
units: 1024 units: 1024
encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024} encoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath|obs_reward', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024} decoder: {mlp_keys: 'inventory|inventory_max|equipped|health|hunger|breath', cnn_keys: 'image', cnn_depth: 96, mlp_layers: 5, mlp_units: 1024}
actor: {layers: 5, dist: 'onehot'} actor: {layers: 5, dist: 'onehot', std: 'none'}
value: {layers: 5} value: {layers: 5}
reward_head: {layers: 5} reward_head: {layers: 5}
cont_head: {layers: 5} cont_head: {layers: 5}
@ -172,7 +172,7 @@ minecraft:
memorymaze: memorymaze:
steps: 1e8 steps: 1e8
action_repeat: 2 action_repeat: 2
actor: {dist: 'onehot'} actor: {dist: 'onehot', std: 'none'}
imag_gradient: 'reinforce' imag_gradient: 'reinforce'
task: 'memorymaze_9x9' task: 'memorymaze_9x9'

View File

@ -228,7 +228,7 @@ class ImagBehavior(nn.Module):
config.act, config.act,
config.norm, config.norm,
config.actor["dist"], config.actor["dist"],
"learned", config.actor["std"],
config.actor["min_std"], config.actor["min_std"],
config.actor["max_std"], config.actor["max_std"],
absmax=1.0, absmax=1.0,