From 854c932b9729debd339fcec092949ed21e86b755 Mon Sep 17 00:00:00 2001
From: TJU-Lu <lqzx1998@tju.edu.cn>
Date: Fri, 7 Nov 2025 14:55:30 +0800
Subject: [PATCH] Add acceleration cost

---
 YOPO/config/traj_opt.yaml    |  1 +
 YOPO/loss/loss_function.py   | 45 +++++++++++++++++++++---------------
 YOPO/loss/smoothness_loss.py | 14 +++++++----
 YOPO/policy/yopo_trainer.py  | 18 ++++++++-------
 4 files changed, 46 insertions(+), 32 deletions(-)

diff --git a/YOPO/config/traj_opt.yaml b/YOPO/config/traj_opt.yaml
index 1d671ee..6e5b9b2 100644
--- a/YOPO/config/traj_opt.yaml
+++ b/YOPO/config/traj_opt.yaml
@@ -9,6 +9,7 @@ acc_max_train: 6.0
 # IMPORTANT: weight of costs for unit speed (can be visualized in tensorboard)
 wg: 0.12   # guidance
 ws: 10.0   # smoothness
+wa: 1.0    # acceleration
 wc: 0.1    # collision
 
 # dataset: set image_size = primitive_num × downsampling_factor (×32 for ResNet-18) in each axis
diff --git a/YOPO/loss/loss_function.py b/YOPO/loss/loss_function.py
index eb23c36..240b7d8 100644
--- a/YOPO/loss/loss_function.py
+++ b/YOPO/loss/loss_function.py
@@ -18,15 +18,12 @@ class YOPOLoss(nn.Module):
         super(YOPOLoss, self).__init__()
         self.sgm_time = cfg["sgm_time"]
         self.device = th.device("cuda" if th.cuda.is_available() else "cpu")
-        self._C, self._B, self._L, self._R = self.qp_generation()
-        self._R = self._R.to(self.device)
+        self._C, self._B, self._L, self._RJ, self._RA = self.qp_generation()
+        self._RJ = self._RJ.to(self.device)
+        self._RA = self._RA.to(self.device)
         self._L = self._L.to(self.device)
-        vel_scale = cfg["vel_max_train"] / 1.0
-        self.smoothness_weight = cfg["ws"]
-        self.safety_weight = cfg["wc"]
-        self.goal_weight = cfg["wg"]
-        self.denormalize_weight(vel_scale)
-        self.smoothness_loss = SmoothnessLoss(self._R)
+        self.denormalize_weight()
+        self.smoothness_loss = SmoothnessLoss(self._RJ, self._RA)
         self.safety_loss = SafetyLoss(self._L)
         self.goal_loss = GuidanceLoss()
         print("------ Actual Loss ------")
@@ -43,15 +40,21 @@ class YOPOLoss(nn.Module):
             for j in range(i, 6):
                 A[2 * i + 1, j] = math.factorial(j) / math.factorial(j - i) * (self.sgm_time ** (j - i))
 
-        # H海森矩阵，论文中的矩阵Q
+        # H海森矩阵，对应Jerk
         H = th.zeros((6, 6))
         for i in range(3, 6):
             for j in range(3, 6):
                 H[i, j] = i * (i - 1) * (i - 2) * j * (j - 1) * (j - 2) / (i + j - 5) * (self.sgm_time ** (i + j - 5))
 
-        return self.stack_opt_dep(A, H)
+        # Q海森矩阵，对应Accel
+        Q = th.zeros((6, 6))
+        for i in range(2, 6):
+            for j in range(2, 6):
+                Q[i, j] = (i * (i - 1)) * (j * (j - 1)) / (i + j - 3) * (self.sgm_time ** (i + j - 3))
 
-    def stack_opt_dep(self, A, Q):
+        return self.stack_opt_dep(A, H, Q)
+
+    def stack_opt_dep(self, A, H, Q):
         Ct = th.zeros((6, 6))
         Ct[[0, 2, 4, 1, 3, 5], [0, 1, 2, 3, 4, 5]] = 1
 
@@ -63,11 +66,13 @@ class YOPOLoss(nn.Module):
 
         _L = B @ Ct
 
-        _R = _C @ (B_T) @ Q @ B @ Ct
+        _R_Jerk = _C @ (B_T) @ H @ B @ Ct
 
-        return _C, B, _L, _R
+        _R_Acc = _C @ (B_T) @ Q @ B @ Ct
 
-    def denormalize_weight(self, vel_scale):
+        return _C, B, _L, _R_Jerk, _R_Acc
+
+    def denormalize_weight(self):
         """
         Denormalize the cost weight to ensure consistency across different speeds to simplify parameter tuning.
         smoothness cost: time integral of jerk² is used as a smoothness cost.
@@ -77,9 +82,11 @@ class YOPOLoss(nn.Module):
         goal cost:       projection of the trajectory onto goal direction.
                          Independent of speed.
         """
-        self.smoothness_weight = self.smoothness_weight / vel_scale ** 5
-        self.safety_weight = self.safety_weight * vel_scale
-        self.goal_weight = self.goal_weight
+        vel_scale = cfg["vel_max_train"] / 1.0
+        self.smoothness_weight = cfg["ws"] / vel_scale ** 5
+        self.safety_weight = cfg["wc"] * vel_scale
+        self.goal_weight = cfg["wg"]
+        self.accele_weight = cfg["wa"] / vel_scale ** 3
 
     def forward(self, state, prediction, goal, map_id):
         """
@@ -102,10 +109,10 @@ class YOPOLoss(nn.Module):
         goal_cost = th.tensor(0.0, device=self.device, requires_grad=True)
 
         if self.smoothness_weight > 0:
-            smoothness_cost = self.smoothness_loss(Df, Dp)
+            smoothness_cost, acceleration_cost = self.smoothness_loss(Df, Dp)
         if self.safety_weight > 0:
             safety_cost = self.safety_loss(Df, Dp, map_id)
         if self.goal_weight > 0:
             goal_cost = self.goal_loss(Df, Dp, goal)
 
-        return self.smoothness_weight * smoothness_cost, self.safety_weight * safety_cost, self.goal_weight * goal_cost
\ No newline at end of file
+        return self.smoothness_weight * smoothness_cost, self.safety_weight * safety_cost, self.goal_weight * goal_cost, self.accele_weight * acceleration_cost
\ No newline at end of file
diff --git a/YOPO/loss/smoothness_loss.py b/YOPO/loss/smoothness_loss.py
index b827d4b..34167a9 100644
--- a/YOPO/loss/smoothness_loss.py
+++ b/YOPO/loss/smoothness_loss.py
@@ -3,9 +3,10 @@ import torch as th
 
 
 class SmoothnessLoss(nn.Module):
-    def __init__(self, R):
+    def __init__(self, RJ, RA):
         super(SmoothnessLoss, self).__init__()
-        self._R = R
+        self._RJ = RJ
+        self._RA = RA
 
     def forward(self, Df, Dp):
         """
@@ -15,13 +16,16 @@ class SmoothnessLoss(nn.Module):
         Returns:
             cost_smooth: (batch_size) → smoothness loss
         """
-        R = self._R.unsqueeze(0).expand(Dp.shape[0], -1, -1)
+        RJ = self._RJ.unsqueeze(0).expand(Dp.shape[0], -1, -1)
+        RA = self._RA.unsqueeze(0).expand(Dp.shape[0], -1, -1)
         D_all = th.cat([Df, Dp], dim=2)  # dx, dy, dz will be rows
 
         # Reshape for matmul: (batch, 6, 1)
         dx, dy, dz = D_all[:, 0].unsqueeze(2), D_all[:, 1].unsqueeze(2), D_all[:, 2].unsqueeze(2)
 
         # Compute smoothness loss: dxᵀ R dx + ...
-        cost_smooth = dx.transpose(1, 2) @ R @ dx + dy.transpose(1, 2) @ R @ dy + dz.transpose(1, 2) @ R @ dz
+        jerk_smooth = dx.transpose(1, 2) @ RJ @ dx + dy.transpose(1, 2) @ RJ @ dy + dz.transpose(1, 2) @ RJ @ dz
 
-        return cost_smooth.squeeze()
\ No newline at end of file
+        accel_smooth = dx.transpose(1, 2) @ RA @ dx + dy.transpose(1, 2) @ RA @ dy + dz.transpose(1, 2) @ RA @ dz
+
+        return jerk_smooth.squeeze(), accel_smooth.squeeze()
\ No newline at end of file
diff --git a/YOPO/policy/yopo_trainer.py b/YOPO/policy/yopo_trainer.py
index 2bb5f59..627b16b 100644
--- a/YOPO/policy/yopo_trainer.py
+++ b/YOPO/policy/yopo_trainer.py
@@ -82,13 +82,13 @@ class YopoTrainer:
     def train_one_epoch(self, epoch: int, total_progress):
         one_epoch_progress = self.progress_log.add_task(f"Epoch: {epoch}", total=len(self.train_dataloader))
         inspect_interval = max(1, len(self.train_dataloader) // 16)
-        traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, start_time = [], [], [], [], [], time.time()
+        traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, acc_losses, start_time = [], [], [], [], [], [], time.time()
         for step, (depth, pos, rot, obs_b, map_id) in enumerate(self.train_dataloader):  # obs: body frame
             if depth.shape[0] != self.batch_size:  continue  # batch size == number of env
 
             self.optimizer.zero_grad()
 
-            trajectory_loss, score_loss, smooth_cost, safety_cost, goal_cost = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)
+            trajectory_loss, score_loss, smooth_cost, safety_cost, goal_cost, acc_cost = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)
 
             loss = self.loss_weight[0] * trajectory_loss + self.loss_weight[1] * score_loss
 
@@ -101,6 +101,7 @@ class YopoTrainer:
             smooth_losses.append(self.loss_weight[0] * smooth_cost.item())
             safety_losses.append(self.loss_weight[0] * safety_cost.item())
             goal_losses.append(self.loss_weight[0] * goal_cost.item())
+            acc_losses.append(self.loss_weight[0] * acc_cost.item())
 
             if step % inspect_interval == inspect_interval - 1:
                 batch_fps = inspect_interval / (time.time() - start_time)
@@ -112,7 +113,8 @@ class YopoTrainer:
                 self.tensorboard_log.add_scalar("Detail/SmoothLoss", np.mean(smooth_losses), epoch * len(self.train_dataloader) + step)
                 self.tensorboard_log.add_scalar("Detail/SafetyLoss", np.mean(safety_losses), epoch * len(self.train_dataloader) + step)
                 self.tensorboard_log.add_scalar("Detail/GoalLoss", np.mean(goal_losses), epoch * len(self.train_dataloader) + step)
-                traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, start_time = [], [], [], [], [], time.time()
+                self.tensorboard_log.add_scalar("Detail/AccelLoss", np.mean(acc_losses), epoch * len(self.train_dataloader) + step)
+                traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, acc_losses, start_time = [], [], [], [], [], [], time.time()
 
             self.progress_log.update(one_epoch_progress, advance=1)
             self.progress_log.update(total_progress, advance=1 / len(self.train_dataloader))
@@ -126,7 +128,7 @@ class YopoTrainer:
         for step, (depth, pos, rot, obs_b, map_id) in enumerate(self.val_dataloader):  # obs: body frame
             if depth.shape[0] != self.batch_size:  continue  # batch size == num of env
 
-            trajectory_loss, score_loss, _, _, _ = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)
+            trajectory_loss, score_loss, _, _, _, _ = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)
 
             traj_losses.append(self.loss_weight[0] * trajectory_loss.item())
             score_losses.append(self.loss_weight[1] * score_loss.item())
@@ -166,12 +168,12 @@ class YopoTrainer:
         # [B*V*H, 3, 3]: [px, py, pz; vx, vy, vz; ax, ay, az]
         end_state_w = torch.stack([end_pos_w, end_vel_w, end_acc_w], dim=1)
 
-        smooth_cost, safety_cost, goal_cost = self.yopo_loss(start_state_w, end_state_w, goal_w, map_id)
-        trajectory_loss = (smooth_cost + safety_cost + goal_cost).mean()
+        smooth_cost, safety_cost, goal_cost, acc_cost = self.yopo_loss(start_state_w, end_state_w, goal_w, map_id)
+        trajectory_loss = (smooth_cost + safety_cost + goal_cost + acc_cost).mean()
 
-        score_label = (smooth_cost + safety_cost + goal_cost).clone().detach()
+        score_label = (smooth_cost + safety_cost + goal_cost + acc_cost).clone().detach()
         score_loss = F.smooth_l1_loss(score_flat, score_label)
-        return trajectory_loss, score_loss, smooth_cost.mean(), safety_cost.mean(), goal_cost.mean()
+        return trajectory_loss, score_loss, smooth_cost.mean(), safety_cost.mean(), goal_cost.mean(), acc_cost.mean()
 
     def save_model(self):
         if hasattr(self, "epoch_i"):