rename
This commit is contained in:
parent
c967404471
commit
22e13c45fc
@ -1700,20 +1700,20 @@ class DynamicsWorldModel(Module):
|
|||||||
|
|
||||||
return list(set(params) - set(self.video_tokenizer.parameters()))
|
return list(set(params) - set(self.video_tokenizer.parameters()))
|
||||||
|
|
||||||
def learn_policy_from_generations(
|
def learn_from_experience(
|
||||||
self,
|
self,
|
||||||
generation: Experience,
|
experience: Experience,
|
||||||
policy_optim: Optimizer | None = None,
|
policy_optim: Optimizer | None = None,
|
||||||
value_optim: Optimizer | None = None
|
value_optim: Optimizer | None = None
|
||||||
):
|
):
|
||||||
latents = generation.latents
|
latents = experience.latents
|
||||||
actions = generation.actions
|
actions = experience.actions
|
||||||
old_log_probs = generation.log_probs
|
old_log_probs = experience.log_probs
|
||||||
old_values = generation.values
|
old_values = experience.values
|
||||||
rewards = generation.rewards
|
rewards = experience.rewards
|
||||||
|
|
||||||
step_size = generation.step_size
|
step_size = experience.step_size
|
||||||
agent_index = generation.agent_index
|
agent_index = experience.agent_index
|
||||||
|
|
||||||
assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization'
|
assert all([*map(exists, (old_log_probs, actions, old_values, rewards, step_size))]), 'the generations need to contain the log probs, values, and rewards for policy optimization'
|
||||||
|
|
||||||
|
|||||||
@ -1,6 +1,6 @@
|
|||||||
[project]
|
[project]
|
||||||
name = "dreamer4"
|
name = "dreamer4"
|
||||||
version = "0.0.31"
|
version = "0.0.32"
|
||||||
description = "Dreamer 4"
|
description = "Dreamer 4"
|
||||||
authors = [
|
authors = [
|
||||||
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
{ name = "Phil Wang", email = "lucidrains@gmail.com" }
|
||||||
|
|||||||
@ -238,7 +238,7 @@ def test_action_with_world_model():
|
|||||||
|
|
||||||
# take a reinforcement learning step
|
# take a reinforcement learning step
|
||||||
|
|
||||||
actor_loss, critic_loss = dynamics.learn_policy_from_generations(gen)
|
actor_loss, critic_loss = dynamics.learn_from_experience(gen)
|
||||||
|
|
||||||
actor_loss.backward(retain_graph = True)
|
actor_loss.backward(retain_graph = True)
|
||||||
critic_loss.backward()
|
critic_loss.backward()
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user