although not in the paper, it would be interesting for each agent (will extend to multi-agent) to consider its own past rewards as state

2025-10-08 06:40:15 -07:00 · 2025-10-08 06:40:15 -07:00 · c8f75caa40
commit c8f75caa40
parent 187edc1414
1 changed files with 1 additions and 0 deletions
--- a/dreamer4/dreamer4.py
+++ b/dreamer4/dreamer4.py
@ -1298,6 +1298,7 @@ class DynamicsModel(Module):
                    latents = noised_latent_with_context,
                    signal_levels = signal_levels_with_context,
                    step_sizes = step_size,
+                    rewards = decoded_rewards,
                    latent_is_noised = True,
                    return_pred_only = True,
                    return_agent_tokens = True