eventually we will need to learn from the outside stream of experience

2025-10-17 08:06:24 -07:00 · 2025-10-17 08:06:24 -07:00 · 61773c8219
commit 61773c8219
parent 0dba734280
2 changed files with 7 additions and 5 deletions
--- a/dreamer4/dreamer4.py
+++ b/dreamer4/dreamer4.py
@ -65,7 +65,7 @@ TokenizerLosses = namedtuple('TokenizerLosses', ('recon', 'lpips'))
 WorldModelLosses = namedtuple('WorldModelLosses', ('flow', 'reward', 'behavior_clone'))

@dataclass
-class WorldModelGenerations:
+class Experience:
    latents: Tensor
    video: Tensor | None = None
    rewards: Tensor | None = None
@ -74,6 +74,7 @@ class WorldModelGenerations:
    values: Tensor | None = None
    step_size: int | None = None
    agent_index: int = 0
+    is_from_world_model: bool = True

 # helpers

@ -1665,7 +1666,7 @@ class DynamicsWorldModel(Module):

    def learn_policy_from_generations(
        self,
-        generation: WorldModelGenerations
+        generation: Experience
    ):
        latents = generation.latents
        actions = generation.actions
@ -1914,11 +1915,12 @@ class DynamicsWorldModel(Module):

        # returning agent actions, rewards, and log probs + values for policy optimization

-        gen = WorldModelGenerations(
+        gen = Experience(
            latents = latents,
            video = video,
            step_size = step_size,
-            agent_index = agent_index
+            agent_index = agent_index,
+            is_from_world_model = True
        )

        if return_rewards_per_frame:
--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "dreamer4"
-version = "0.0.27"
+version = "0.0.29"
 description = "Dreamer 4"
 authors = [
    { name = "Phil Wang", email = "lucidrains@gmail.com" }