eventually we will need to learn from the outside stream of experience

This commit is contained in:
lucidrains 2025-10-17 08:06:24 -07:00
parent 0dba734280
commit 61773c8219
2 changed files with 7 additions and 5 deletions

View File

@ -65,7 +65,7 @@ TokenizerLosses = namedtuple('TokenizerLosses', ('recon', 'lpips'))
WorldModelLosses = namedtuple('WorldModelLosses', ('flow', 'reward', 'behavior_clone'))
@dataclass
class WorldModelGenerations:
class Experience:
latents: Tensor
video: Tensor | None = None
rewards: Tensor | None = None
@ -74,6 +74,7 @@ class WorldModelGenerations:
values: Tensor | None = None
step_size: int | None = None
agent_index: int = 0
is_from_world_model: bool = True
# helpers
@ -1665,7 +1666,7 @@ class DynamicsWorldModel(Module):
def learn_policy_from_generations(
self,
generation: WorldModelGenerations
generation: Experience
):
latents = generation.latents
actions = generation.actions
@ -1914,11 +1915,12 @@ class DynamicsWorldModel(Module):
# returning agent actions, rewards, and log probs + values for policy optimization
gen = WorldModelGenerations(
gen = Experience(
latents = latents,
video = video,
step_size = step_size,
agent_index = agent_index
agent_index = agent_index,
is_from_world_model = True
)
if return_rewards_per_frame:

View File

@ -1,6 +1,6 @@
[project]
name = "dreamer4"
version = "0.0.27"
version = "0.0.29"
description = "Dreamer 4"
authors = [
{ name = "Phil Wang", email = "lucidrains@gmail.com" }