although not in the paper, it would be interesting for each agent (will extend to multi-agent) to consider its own past rewards as state
This commit is contained in:
parent
187edc1414
commit
c8f75caa40
@ -1298,6 +1298,7 @@ class DynamicsModel(Module):
|
||||
latents = noised_latent_with_context,
|
||||
signal_levels = signal_levels_with_context,
|
||||
step_sizes = step_size,
|
||||
rewards = decoded_rewards,
|
||||
latent_is_noised = True,
|
||||
return_pred_only = True,
|
||||
return_agent_tokens = True
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user