tiny change needed to have the world model produce both the video and predicted rewards (after phase 2 finetuning)

2025-10-08 05:52:13 -07:00 · 2025-10-08 05:52:13 -07:00 · 4de357b6c2
commit 4de357b6c2
parent 0fdb67bafa
1 changed files with 4 additions and 1 deletions
--- a/dreamer4/dreamer4.py
+++ b/dreamer4/dreamer4.py
@ -1462,7 +1462,10 @@ class DynamicsModel(Module):

            if self.add_reward_embed_to_agent_token:
                reward_embeds = self.reward_encoder.embed(two_hot_encoding)
-                reward_embeds = pad_at_dim(reward_embeds, (1, -1), dim = -2, value = 0.)  # shift as each agent token predicts the next reward
+
+                pop_last_reward = int(reward_embeds.shape[1] == agent_tokens.shape[1]) # the last reward is popped off during training, during inference, it is not known yet, so need to handle this edge case
+
+                reward_embeds = pad_at_dim(reward_embeds, (1, -pop_last_reward), dim = -2, value = 0.)  # shift as each agent token predicts the next reward

                agent_tokens = agent_tokens + reward_embeds