Compare commits

..

1 Commits
main ... 0.1.0

6 changed files with 128 additions and 485 deletions

View File

@ -4,16 +4,10 @@
Implementation of Danijar's [latest iteration](https://arxiv.org/abs/2509.24527v1) for his [Dreamer](https://danijar.com/project/dreamer4/) line of work
[Discord channel](https://discord.gg/PmGR7KRwxq) for collaborating with other researchers interested in this work
## Appreciation
- [@dirkmcpherson](https://github.com/dirkmcpherson) for fixes to typo errors and unpassed arguments!
## Install
```bash
$ pip install dreamer4
$ pip install dreamer4-pytorch
```
## Usage
@ -32,16 +26,9 @@ tokenizer = VideoTokenizer(
image_width = 256
)
video = torch.randn(2, 3, 10, 256, 256)
# learn the tokenizer
loss = tokenizer(video)
loss.backward()
# dynamics world model
world_model = DynamicsWorldModel(
dynamics = DynamicsWorldModel(
dim = 512,
dim_latent = 32,
video_tokenizer = tokenizer,
@ -57,7 +44,7 @@ rewards = torch.randn(2, 10)
# learn dynamics / behavior cloned model
loss = world_model(
loss = dynamics(
video = video,
rewards = rewards,
discrete_actions = discrete_actions
@ -69,7 +56,7 @@ loss.backward()
# then generate dreams
dreams = world_model.generate(
dreams = dynamics.generate(
10,
batch_size = 2,
return_decoded_video = True,
@ -78,19 +65,7 @@ dreams = world_model.generate(
# learn from the dreams
actor_loss, critic_loss = world_model.learn_from_experience(dreams)
(actor_loss + critic_loss).backward()
# learn from environment
from dreamer4.mocks import MockEnv
mock_env = MockEnv((256, 256), vectorized = True, num_envs = 4)
experience = world_model.interact_with_env(mock_env, max_timesteps = 8, env_is_vectorized = True)
actor_loss, critic_loss = world_model.learn_from_experience(experience)
actor_loss, critic_loss = dynamics.learn_from_experience(dreams)
(actor_loss + critic_loss).backward()
```
@ -109,4 +84,4 @@ actor_loss, critic_loss = world_model.learn_from_experience(experience)
}
```
*the conquest of nature is to be achieved through number and measure - angels to Descartes in a dream*
*the conquest of nature is to be achieved through number and measure* - angels to Descartes, in a dream, the story goes.

View File

@ -1,7 +1,6 @@
from dreamer4.dreamer4 import (
VideoTokenizer,
DynamicsWorldModel,
AxialSpaceTimeTransformer
DynamicsWorldModel
)

File diff suppressed because it is too large Load Diff

View File

@ -528,7 +528,7 @@ class SimTrainer(Module):
total_experience += num_experience
experiences.append(experience.cpu())
experiences.append(experience)
combined_experiences = combine_experiences(experiences)

View File

@ -1,6 +1,6 @@
[project]
name = "dreamer4"
version = "0.1.24"
version = "0.1.0"
description = "Dreamer 4"
authors = [
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
@ -36,8 +36,7 @@ dependencies = [
"hyper-connections>=0.2.1",
"torch>=2.4",
"torchvision",
"x-mlps-pytorch>=0.0.29",
"vit-pytorch>=1.15.3"
"x-mlps-pytorch>=0.0.29"
]
[project.urls]

View File

@ -15,8 +15,7 @@ def exists(v):
@param('condition_on_actions', (False, True))
@param('num_residual_streams', (1, 4))
@param('add_reward_embed_to_agent_token', (False, True))
@param('add_state_pred_head', (False, True))
@param('use_time_cache', (False, True))
@param('use_time_kv_cache', (False, True))
@param('var_len', (False, True))
def test_e2e(
pred_orig_latent,
@ -29,8 +28,7 @@ def test_e2e(
condition_on_actions,
num_residual_streams,
add_reward_embed_to_agent_token,
add_state_pred_head,
use_time_cache,
use_time_kv_cache,
var_len
):
from dreamer4.dreamer4 import VideoTokenizer, DynamicsWorldModel
@ -43,9 +41,7 @@ def test_e2e(
patch_size = 32,
attn_dim_head = 16,
num_latent_tokens = 4,
num_residual_streams = num_residual_streams,
encoder_add_decor_aux_loss = True,
decorr_sample_frac = 1.
num_residual_streams = num_residual_streams
)
video = torch.randn(2, 3, 4, 256, 256)
@ -73,13 +69,12 @@ def test_e2e(
pred_orig_latent = pred_orig_latent,
num_discrete_actions = 4,
attn_dim_head = 16,
attn_heads = heads,
attn_kwargs = dict(
heads = heads,
query_heads = query_heads,
),
prob_no_shortcut_train = prob_no_shortcut_train,
add_reward_embed_to_agent_token = add_reward_embed_to_agent_token,
add_state_pred_head = add_state_pred_head,
num_residual_streams = num_residual_streams
)
@ -126,7 +121,7 @@ def test_e2e(
image_width = 128,
batch_size = 2,
return_rewards_per_frame = True,
use_time_cache = use_time_cache
use_time_kv_cache = use_time_kv_cache
)
assert generations.video.shape == (2, 3, 10, 128, 128)
@ -620,9 +615,9 @@ def test_cache_generate():
num_residual_streams = 1
)
generated, time_cache = dynamics.generate(1, return_time_cache = True)
generated, time_cache = dynamics.generate(1, time_cache = time_cache, return_time_cache = True)
generated, time_cache = dynamics.generate(1, time_cache = time_cache, return_time_cache = True)
generated, time_kv_cache = dynamics.generate(1, return_time_kv_cache = True)
generated, time_kv_cache = dynamics.generate(1, time_kv_cache = time_kv_cache, return_time_kv_cache = True)
generated, time_kv_cache = dynamics.generate(1, time_kv_cache = time_kv_cache, return_time_kv_cache = True)
@param('vectorized', (False, True))
@param('use_pmpo', (False, True))
@ -646,9 +641,7 @@ def test_online_rl(
dim_latent = 16,
patch_size = 32,
attn_dim_head = 16,
num_latent_tokens = 1,
image_height = 256,
image_width = 256,
num_latent_tokens = 1
)
world_model_and_policy = DynamicsWorldModel(
@ -682,18 +675,10 @@ def test_online_rl(
# manually
dream_experience = world_model_and_policy.generate(10, batch_size = 1, store_agent_embed = store_agent_embed, return_for_policy_optimization = True)
one_experience = world_model_and_policy.interact_with_env(mock_env, max_timesteps = 8, env_is_vectorized = vectorized, store_agent_embed = store_agent_embed)
another_experience = world_model_and_policy.interact_with_env(mock_env, max_timesteps = 16, env_is_vectorized = vectorized, store_agent_embed = store_agent_embed)
combined_experience = combine_experiences([dream_experience, one_experience, another_experience])
# quick test moving the experience to different devices
if torch.cuda.is_available():
combined_experience = combined_experience.to(torch.device('cuda'))
combined_experience = combined_experience.to(world_model_and_policy.device)
combined_experience = combine_experiences([one_experience, another_experience])
if store_agent_embed:
assert exists(combined_experience.agent_embed)
@ -810,22 +795,3 @@ def test_epo():
fitness = torch.randn(16,)
dynamics.evolve_(fitness)
def test_images_to_video_tokenizer():
import torch
from dreamer4 import VideoTokenizer, DynamicsWorldModel, AxialSpaceTimeTransformer
tokenizer = VideoTokenizer(
dim = 512,
dim_latent = 32,
patch_size = 32,
image_height = 256,
image_width = 256,
encoder_add_decor_aux_loss = True
)
images = torch.randn(2, 3, 256, 256)
loss, (losses, recon_images) = tokenizer(images, return_intermediates = True)
loss.backward()
assert images.shape == recon_images.shape