although not in the paper, it would be interesting for each agent (will extend to multi-agent) to consider its own past rewards as state

This commit is contained in:
lucidrains 2025-10-08 06:40:15 -07:00
parent 187edc1414
commit c8f75caa40

View File

@ -1298,6 +1298,7 @@ class DynamicsModel(Module):
latents = noised_latent_with_context,
signal_levels = signal_levels_with_context,
step_sizes = step_size,
rewards = decoded_rewards,
latent_is_noised = True,
return_pred_only = True,
return_agent_tokens = True