# agent agent_name: dmc_image_agent # env setting env: env: DMC game: hopper_hop base_seed: 0 n_skip: 2 n_stack: 4 max_episode_steps: 1000 gray_scale: False image_based: True clip_reward: False obs_shape: [3, 96, 96] episodic: False rl: discount: 0.99 unroll_steps: 5 # prev 5 td_steps: 5 auto_td_steps: 30000 # prev 30000 for 100K env steps td_lambda: 0.95 # optimizer optimizer: type: SGD lr: 0.2 # prev 0.2 lr_warm_up: 0.01 lr_decay_type: none lr_decay_rate: 0.1 lr_decay_steps: 100000 weight_decay: 1e-4 momentum: 0.9 # priority of data priority: use_priority: True priority_prob_alpha: 1.0 # prev 0.6 priority_prob_beta: 1.0 # prev 0.4 min_prior: 0.000001 # training train: load_model_path: '' batch_size: 256 training_steps: 200000 # 100 * 1000 offline_training_steps: 20000 # 20 * 1000 start_transitions: 2000 # 2 * 1000 eval_n_episode: 10 eval_interval: 5000 self_play_update_interval: 100 reanalyze_update_interval: 200 save_ckpt_interval: 10000 mini_batch_size: 256 reanalyze_ratio: 1.0 reward_loss_coeff: 1.0 value_loss_coeff: 0.5 # prev 0.25 policy_loss_coeff: 1.0 consistency_coeff: 2.0 decorrelation_coeff: 0.01 off_diag_coeff: 5e-3 entropy_coeff: 5e-3 max_grad_norm: 5 change_temperature: True periodic_reset: False value_reanalyze: False path_consistency: False use_decorrelation: False value_policy_detach: False optimal_Q: False v_num: 1 value_target: 'mixed' # sarsa or search or mixed or max use_IQL: False IQL_weight: 0.5 start_use_mix_training_steps: 4e4 mixed_value_threshold: 2e4 # self-play data collection data: num_envs: 4 buffer_size: 200000 # 1 * 1000 * 1000 total_transitions: 200000 # 1000 * 1000 top_transitions: 2e5 trajectory_size: 100 # prev 500 save_video: False save_as_dataset: False # MCTS mcts: language: cython num_simulations: 32 # prev 8 num_top_actions: 16 # prev 4 num_sampled_actions: 16 c_visit: 50 c_scale: 0.1 # prev 1.0 value_minmax_delta: 0.01 mpc_horizon: 1 vis: ['print'] use_mppi: False std_magnification: 3 # model architecture model: noisy_net: False action_embedding: True action_embedding_dim: 16 block_type: resnet # resnet, convnext down_sample: True state_norm: False value_prefix: False value_target: bootstrapped # bootstrapped or GAE GAE_max_steps: 15 # 10 or 15 or 20 dynamic_type: None # RNN or Transformer or None init_zero: True num_blocks: 1 # prev 1 num_channels: 64 reduced_channels: 16 projection_layers: [1024, 1024] # hidden dim, output dim prjection_head_layers: [256, 1024] # hidden dim, output dim fc_layers: [32] # prev [32] lstm_hidden_size: 512 lstm_horizon_len: 5 value_ensumble: 1 policy_distribution: squashed_gaussian # beta or squashed_gaussian or truncated_gaussian or discretized policy_loss_type: reanalyze # policy_gradient or reanalyze policy_action_num: 4 random_action_num: 12 random_type: std # std, normal, pink, OU reward_support: range: [-2, 2] scale: 0.01 env: DMC bins: 51 type: support # support or symlog value_support: range: [-299, 299] scale: 0.5 env: DMC bins: 51 type: support # support or symlog # worker process allocation actors: data_worker: 2 batch_worker: 8 # wandb wandb: project: 'ez-v2-release' tag: 'DMC-image'