although not in the paper, it would be interesting for each agent (will extend to multi-agent) to consider its own past rewards as state
This commit is contained in:
parent
187edc1414
commit
c8f75caa40
@ -1298,6 +1298,7 @@ class DynamicsModel(Module):
|
|||||||
latents = noised_latent_with_context,
|
latents = noised_latent_with_context,
|
||||||
signal_levels = signal_levels_with_context,
|
signal_levels = signal_levels_with_context,
|
||||||
step_sizes = step_size,
|
step_sizes = step_size,
|
||||||
|
rewards = decoded_rewards,
|
||||||
latent_is_noised = True,
|
latent_is_noised = True,
|
||||||
return_pred_only = True,
|
return_pred_only = True,
|
||||||
return_agent_tokens = True
|
return_agent_tokens = True
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user