From c8f75caa4006937c3358200515f46ae02ded39a0 Mon Sep 17 00:00:00 2001 From: lucidrains Date: Wed, 8 Oct 2025 06:40:15 -0700 Subject: [PATCH] although not in the paper, it would be interesting for each agent (will extend to multi-agent) to consider its own past rewards as state --- dreamer4/dreamer4.py | 1 + 1 file changed, 1 insertion(+) diff --git a/dreamer4/dreamer4.py b/dreamer4/dreamer4.py index 71dd3c6..f44793e 100644 --- a/dreamer4/dreamer4.py +++ b/dreamer4/dreamer4.py @@ -1298,6 +1298,7 @@ class DynamicsModel(Module): latents = noised_latent_with_context, signal_levels = signal_levels_with_context, step_sizes = step_size, + rewards = decoded_rewards, latent_is_noised = True, return_pred_only = True, return_agent_tokens = True