eventually we will need to learn from the outside stream of experience
This commit is contained in:
parent
0dba734280
commit
61773c8219
@ -65,7 +65,7 @@ TokenizerLosses = namedtuple('TokenizerLosses', ('recon', 'lpips'))
|
|||||||
WorldModelLosses = namedtuple('WorldModelLosses', ('flow', 'reward', 'behavior_clone'))
|
WorldModelLosses = namedtuple('WorldModelLosses', ('flow', 'reward', 'behavior_clone'))
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class WorldModelGenerations:
|
class Experience:
|
||||||
latents: Tensor
|
latents: Tensor
|
||||||
video: Tensor | None = None
|
video: Tensor | None = None
|
||||||
rewards: Tensor | None = None
|
rewards: Tensor | None = None
|
||||||
@ -74,6 +74,7 @@ class WorldModelGenerations:
|
|||||||
values: Tensor | None = None
|
values: Tensor | None = None
|
||||||
step_size: int | None = None
|
step_size: int | None = None
|
||||||
agent_index: int = 0
|
agent_index: int = 0
|
||||||
|
is_from_world_model: bool = True
|
||||||
|
|
||||||
# helpers
|
# helpers
|
||||||
|
|
||||||
@ -1665,7 +1666,7 @@ class DynamicsWorldModel(Module):
|
|||||||
|
|
||||||
def learn_policy_from_generations(
|
def learn_policy_from_generations(
|
||||||
self,
|
self,
|
||||||
generation: WorldModelGenerations
|
generation: Experience
|
||||||
):
|
):
|
||||||
latents = generation.latents
|
latents = generation.latents
|
||||||
actions = generation.actions
|
actions = generation.actions
|
||||||
@ -1914,11 +1915,12 @@ class DynamicsWorldModel(Module):
|
|||||||
|
|
||||||
# returning agent actions, rewards, and log probs + values for policy optimization
|
# returning agent actions, rewards, and log probs + values for policy optimization
|
||||||
|
|
||||||
gen = WorldModelGenerations(
|
gen = Experience(
|
||||||
latents = latents,
|
latents = latents,
|
||||||
video = video,
|
video = video,
|
||||||
step_size = step_size,
|
step_size = step_size,
|
||||||
agent_index = agent_index
|
agent_index = agent_index,
|
||||||
|
is_from_world_model = True
|
||||||
)
|
)
|
||||||
|
|
||||||
if return_rewards_per_frame:
|
if return_rewards_per_frame:
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "dreamer4"
|
name = "dreamer4"
|
||||||
version = "0.0.27"
|
version = "0.0.29"
|
||||||
description = "Dreamer 4"
|
description = "Dreamer 4"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user