swap for discrete kl div, thanks to Dirk for pointing this out on the discord

2025-10-29 11:54:18 -07:00 · 2025-10-29 11:54:18 -07:00 · c4a3cb09d5
commit c4a3cb09d5
parent cb54121ace
2 changed files with 4 additions and 2 deletions
--- a/dreamer4/dreamer4.py
+++ b/dreamer4/dreamer4.py
@ -2580,7 +2580,9 @@ class DynamicsWorldModel(Module):
            if self.pmpo_kl_div_loss_weight > 0.:
                new_unembedded_actions = self.action_embedder.unembed(policy_embed, pred_head_index = 0)

-                discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(new_unembedded_actions, old_action_unembeds)
+                # mentioned that the "reverse direction for the prior KL" was used
+
+                discrete_kl_div, continuous_kl_div = self.action_embedder.kl_div(old_action_unembeds, new_unembedded_actions)

                # accumulate discrete and continuous kl div

--- a/pyproject.toml
+++ b/pyproject.toml
@ -1,6 +1,6 @@
 [project]
 name = "dreamer4"
-version = "0.0.96"
+version = "0.0.98"
 description = "Dreamer 4"
 authors = [
    { name = "Phil Wang", email = "lucidrains@gmail.com" }