another consideration before knocking out the RL logic

2025-10-14 11:10:26 -07:00 · 2025-10-14 11:10:26 -07:00 · d28251e9f9
commit d28251e9f9
parent ff81dd761b
3 changed files with 9 additions and 4 deletions
--- a/dreamer4/dreamer4.py
+++ b/dreamer4/dreamer4.py
@ -1817,8 +1817,8 @@ class DynamicsWorldModel(Module):
        latent_gene_ids = None,          # (b)
        tasks = None,                    # (b)
        rewards = None,                  # (b t)
-        discrete_actions = None,         # (b t na)
-        continuous_actions = None,       # (b t na)
+        discrete_actions = None,         # (b t na) | (b t-1 na)
+        continuous_actions = None,       # (b t na) | (b t-1 na)
        discrete_action_types = None,    # (na)
        continuous_action_types = None,  # (na)
        return_pred_only = False,
@ -1980,6 +1980,11 @@ class DynamicsWorldModel(Module):
                continuous_action_types = continuous_action_types
            )

+            # handle first timestep not having an associated past action
+
+            if action_tokens.shape[1] == (time - 1):
+                action_tokens = pad_at_dim(action_tokens, (1, 0), value = 0. , dim = 1)
+
            action_tokens = add('1 d, b t d', self.action_learned_embed, action_tokens)

        else:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "dreamer4"
-version = "0.0.19"
+version = "0.0.20"
 description = "Dreamer 4"
 authors = [
    { name = "Phil Wang", email = "lucidrains@gmail.com" }
--- a/tests/test_dreamer.py
+++ b/tests/test_dreamer.py
@ -88,7 +88,7 @@ def test_e2e(

    actions = None
    if condition_on_actions:
-        actions = torch.randint(0, 4, (2, 4, 1))
+        actions = torch.randint(0, 4, (2, 3, 1))

    flow_loss = dynamics(
        **dynamics_input,