another consideration before knocking out the RL logic

This commit is contained in:
lucidrains 2025-10-14 11:10:26 -07:00
parent ff81dd761b
commit d28251e9f9
3 changed files with 9 additions and 4 deletions

View File

@ -1817,8 +1817,8 @@ class DynamicsWorldModel(Module):
latent_gene_ids = None, # (b)
tasks = None, # (b)
rewards = None, # (b t)
discrete_actions = None, # (b t na)
continuous_actions = None, # (b t na)
discrete_actions = None, # (b t na) | (b t-1 na)
continuous_actions = None, # (b t na) | (b t-1 na)
discrete_action_types = None, # (na)
continuous_action_types = None, # (na)
return_pred_only = False,
@ -1980,6 +1980,11 @@ class DynamicsWorldModel(Module):
continuous_action_types = continuous_action_types
)
# handle first timestep not having an associated past action
if action_tokens.shape[1] == (time - 1):
action_tokens = pad_at_dim(action_tokens, (1, 0), value = 0. , dim = 1)
action_tokens = add('1 d, b t d', self.action_learned_embed, action_tokens)
else:

View File

@ -1,6 +1,6 @@
[project]
name = "dreamer4"
version = "0.0.19"
version = "0.0.20"
description = "Dreamer 4"
authors = [
{ name = "Phil Wang", email = "lucidrains@gmail.com" }

View File

@ -88,7 +88,7 @@ def test_e2e(
actions = None
if condition_on_actions:
actions = torch.randint(0, 4, (2, 4, 1))
actions = torch.randint(0, 4, (2, 3, 1))
flow_loss = dynamics(
**dynamics_input,