fix a few typo bugs. Support info in return signature of environment step. Temporarily turn off flex attention when the kv_cache is used to avoid bug.

2025-11-04 17:29:12 -05:00 · 2025-11-04 17:29:12 -05:00 · b0f6b8583d
commit b0f6b8583d
parent c0a6cd56a1
1 changed files with 13 additions and 9 deletions
--- a/dreamer4/dreamer4.py
+++ b/dreamer4/dreamer4.py
@ -1179,10 +1179,11 @@ def special_token_mask(q, k, seq_len, num_tokens, special_attend_only_itself = F
 def block_mask_special_tokens_right(
    seq_len,
-    num_tokens
+    num_tokens,
    special_attend_only_itself = False
 ):
    def inner(b, h, q, k):
-        return special_token_mask(q, k, seq_len, num_tokens)
+        return special_token_mask(q, k, seq_len, num_tokens, special_attend_only_itself)
    return inner
 def compose_mask(mask1, mask2):
@ -1493,7 +1494,8 @@ class AxialSpaceTimeTransformer(Module):
        # attend functions for space and time
-        use_flex = exists(flex_attention) and tokens.is_cuda
+        has_kv_cache = exists(kv_cache) 
        use_flex = exists(flex_attention) and tokens.is_cuda and not has_kv_cache # KV cache shape breaks flex attention TODO: Fix
        attend_kwargs = dict(use_flex = use_flex, softclamp_value = self.attn_softclamp_value, special_attend_only_itself = self.special_attend_only_itself, device = device)
@ -1505,7 +1507,6 @@ class AxialSpaceTimeTransformer(Module):
        time_attn_kv_caches = []
        has_kv_cache = exists(kv_cache)
        if has_kv_cache:
            past_tokens, tokens = tokens[:, :-1], tokens[:, -1:]
@ -1847,7 +1848,7 @@ class VideoTokenizer(Module):
        losses = (recon_loss, lpips_loss)
-        return total_loss, TokenizerLosses(losses)
+        return total_loss, TokenizerLosses(*losses)
 # dynamics model, axial space-time transformer
@ -2104,7 +2105,7 @@ class DynamicsWorldModel(Module):
        self.ppo_eps_clip = ppo_eps_clip
        self.value_clip = value_clip
-        self.policy_entropy_weight = value_clip
+        self.policy_entropy_weight = policy_entropy_weight
        # pmpo related
@ -2127,7 +2128,7 @@ class DynamicsWorldModel(Module):
        self.flow_loss_normalizer = LossNormalizer(1)
        self.reward_loss_normalizer = LossNormalizer(multi_token_pred_len)
        self.discrete_actions_loss_normalizer = LossNormalizer(multi_token_pred_len) if num_discrete_actions > 0 else None
-        self.continuous_actions_loss_normalizer = LossNormalizer(multi_token_pred_len) if num_discrete_actions > 0 else None
+        self.continuous_actions_loss_normalizer = LossNormalizer(multi_token_pred_len) if num_continuous_actions > 0 else None
        self.latent_flow_loss_weight = latent_flow_loss_weight
@ -2358,6 +2359,9 @@ class DynamicsWorldModel(Module):
            elif len(env_step_out) == 4:
                next_frame, reward, terminated, truncated = env_step_out
            elif len(env_step_out) == 5:
                next_frame, reward, terminated, truncated, info = env_step_out
            # update episode lens
            episode_lens = torch.where(is_terminated, episode_lens, episode_lens + 1)
@ -3085,8 +3089,8 @@ class DynamicsWorldModel(Module):
        if latents.ndim == 4:
            latents = rearrange(latents, 'b t v d -> b t v 1 d') # 1 latent edge case
-        assert latents.shape[-2:] == self.latent_shape
+        assert latents.shape[-2:] == self.latent_shape, f'latents must have shape {self.latent_shape}, got {latents.shape[-2:]}'
-        assert latents.shape[2] == self.num_video_views
+        assert latents.shape[2] == self.num_video_views, f'latents must have {self.num_video_views} views, got {latents.shape[2]}'
        # variables