Add acceleration cost

2025-11-07 14:55:30 +08:00 · 2025-11-07 14:55:30 +08:00 · 854c932b97
commit 854c932b97
parent 3826bf7d34
4 changed files with 46 additions and 32 deletions
--- a/YOPO/config/traj_opt.yaml
+++ b/YOPO/config/traj_opt.yaml
@ -9,6 +9,7 @@ acc_max_train: 6.0
 # IMPORTANT: weight of costs for unit speed (can be visualized in tensorboard)
 wg: 0.12   # guidance
 ws: 10.0   # smoothness
+wa: 1.0    # acceleration
 wc: 0.1    # collision

 # dataset: set image_size = primitive_num × downsampling_factor (×32 for ResNet-18) in each axis
--- a/YOPO/loss/loss_function.py
+++ b/YOPO/loss/loss_function.py
@ -18,15 +18,12 @@ class YOPOLoss(nn.Module):
        super(YOPOLoss, self).__init__()
        self.sgm_time = cfg["sgm_time"]
        self.device = th.device("cuda" if th.cuda.is_available() else "cpu")
-        self._C, self._B, self._L, self._R = self.qp_generation()
-        self._R = self._R.to(self.device)
+        self._C, self._B, self._L, self._RJ, self._RA = self.qp_generation()
+        self._RJ = self._RJ.to(self.device)
+        self._RA = self._RA.to(self.device)
        self._L = self._L.to(self.device)
-        vel_scale = cfg["vel_max_train"] / 1.0
-        self.smoothness_weight = cfg["ws"]
-        self.safety_weight = cfg["wc"]
-        self.goal_weight = cfg["wg"]
-        self.denormalize_weight(vel_scale)
-        self.smoothness_loss = SmoothnessLoss(self._R)
+        self.denormalize_weight()
+        self.smoothness_loss = SmoothnessLoss(self._RJ, self._RA)
        self.safety_loss = SafetyLoss(self._L)
        self.goal_loss = GuidanceLoss()
        print("------ Actual Loss ------")
@ -43,15 +40,21 @@ class YOPOLoss(nn.Module):
            for j in range(i, 6):
                A[2 * i + 1, j] = math.factorial(j) / math.factorial(j - i) * (self.sgm_time ** (j - i))

-        # H海森矩阵，论文中的矩阵Q
+        # H海森矩阵，对应Jerk
        H = th.zeros((6, 6))
        for i in range(3, 6):
            for j in range(3, 6):
                H[i, j] = i * (i - 1) * (i - 2) * j * (j - 1) * (j - 2) / (i + j - 5) * (self.sgm_time ** (i + j - 5))

-        return self.stack_opt_dep(A, H)
+        # Q海森矩阵，对应Accel
+        Q = th.zeros((6, 6))
+        for i in range(2, 6):
+            for j in range(2, 6):
+                Q[i, j] = (i * (i - 1)) * (j * (j - 1)) / (i + j - 3) * (self.sgm_time ** (i + j - 3))

-    def stack_opt_dep(self, A, Q):
+        return self.stack_opt_dep(A, H, Q)
+
+    def stack_opt_dep(self, A, H, Q):
        Ct = th.zeros((6, 6))
        Ct[[0, 2, 4, 1, 3, 5], [0, 1, 2, 3, 4, 5]] = 1

@ -63,11 +66,13 @@ class YOPOLoss(nn.Module):

        _L = B @ Ct

-        _R = _C @ (B_T) @ Q @ B @ Ct
+        _R_Jerk = _C @ (B_T) @ H @ B @ Ct

-        return _C, B, _L, _R
+        _R_Acc = _C @ (B_T) @ Q @ B @ Ct

-    def denormalize_weight(self, vel_scale):
+        return _C, B, _L, _R_Jerk, _R_Acc
+
+    def denormalize_weight(self):
        """
        Denormalize the cost weight to ensure consistency across different speeds to simplify parameter tuning.
        smoothness cost: time integral of jerk² is used as a smoothness cost.
@ -77,9 +82,11 @@ class YOPOLoss(nn.Module):
        goal cost:       projection of the trajectory onto goal direction.
                         Independent of speed.
        """
-        self.smoothness_weight = self.smoothness_weight / vel_scale ** 5
-        self.safety_weight = self.safety_weight * vel_scale
-        self.goal_weight = self.goal_weight
+        vel_scale = cfg["vel_max_train"] / 1.0
+        self.smoothness_weight = cfg["ws"] / vel_scale ** 5
+        self.safety_weight = cfg["wc"] * vel_scale
+        self.goal_weight = cfg["wg"]
+        self.accele_weight = cfg["wa"] / vel_scale ** 3

    def forward(self, state, prediction, goal, map_id):
        """
@ -102,10 +109,10 @@ class YOPOLoss(nn.Module):
        goal_cost = th.tensor(0.0, device=self.device, requires_grad=True)

        if self.smoothness_weight > 0:
-            smoothness_cost = self.smoothness_loss(Df, Dp)
+            smoothness_cost, acceleration_cost = self.smoothness_loss(Df, Dp)
        if self.safety_weight > 0:
            safety_cost = self.safety_loss(Df, Dp, map_id)
        if self.goal_weight > 0:
            goal_cost = self.goal_loss(Df, Dp, goal)

-        return self.smoothness_weight * smoothness_cost, self.safety_weight * safety_cost, self.goal_weight * goal_cost
+        return self.smoothness_weight * smoothness_cost, self.safety_weight * safety_cost, self.goal_weight * goal_cost, self.accele_weight * acceleration_cost
--- a/YOPO/loss/smoothness_loss.py
+++ b/YOPO/loss/smoothness_loss.py
@ -3,9 +3,10 @@ import torch as th


 class SmoothnessLoss(nn.Module):
-    def __init__(self, R):
+    def __init__(self, RJ, RA):
        super(SmoothnessLoss, self).__init__()
-        self._R = R
+        self._RJ = RJ
+        self._RA = RA

    def forward(self, Df, Dp):
        """
@ -15,13 +16,16 @@ class SmoothnessLoss(nn.Module):
        Returns:
            cost_smooth: (batch_size) → smoothness loss
        """
-        R = self._R.unsqueeze(0).expand(Dp.shape[0], -1, -1)
+        RJ = self._RJ.unsqueeze(0).expand(Dp.shape[0], -1, -1)
+        RA = self._RA.unsqueeze(0).expand(Dp.shape[0], -1, -1)
        D_all = th.cat([Df, Dp], dim=2)  # dx, dy, dz will be rows

        # Reshape for matmul: (batch, 6, 1)
        dx, dy, dz = D_all[:, 0].unsqueeze(2), D_all[:, 1].unsqueeze(2), D_all[:, 2].unsqueeze(2)

        # Compute smoothness loss: dxᵀ R dx + ...
-        cost_smooth = dx.transpose(1, 2) @ R @ dx + dy.transpose(1, 2) @ R @ dy + dz.transpose(1, 2) @ R @ dz
+        jerk_smooth = dx.transpose(1, 2) @ RJ @ dx + dy.transpose(1, 2) @ RJ @ dy + dz.transpose(1, 2) @ RJ @ dz

-        return cost_smooth.squeeze()
+        accel_smooth = dx.transpose(1, 2) @ RA @ dx + dy.transpose(1, 2) @ RA @ dy + dz.transpose(1, 2) @ RA @ dz
+
+        return jerk_smooth.squeeze(), accel_smooth.squeeze()
--- a/YOPO/policy/yopo_trainer.py
+++ b/YOPO/policy/yopo_trainer.py
@ -82,13 +82,13 @@ class YopoTrainer:
    def train_one_epoch(self, epoch: int, total_progress):
        one_epoch_progress = self.progress_log.add_task(f"Epoch: {epoch}", total=len(self.train_dataloader))
        inspect_interval = max(1, len(self.train_dataloader) // 16)
-        traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, start_time = [], [], [], [], [], time.time()
+        traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, acc_losses, start_time = [], [], [], [], [], [], time.time()
        for step, (depth, pos, rot, obs_b, map_id) in enumerate(self.train_dataloader):  # obs: body frame
            if depth.shape[0] != self.batch_size:  continue  # batch size == number of env

            self.optimizer.zero_grad()

-            trajectory_loss, score_loss, smooth_cost, safety_cost, goal_cost = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)
+            trajectory_loss, score_loss, smooth_cost, safety_cost, goal_cost, acc_cost = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)

            loss = self.loss_weight[0] * trajectory_loss + self.loss_weight[1] * score_loss

@ -101,6 +101,7 @@ class YopoTrainer:
            smooth_losses.append(self.loss_weight[0] * smooth_cost.item())
            safety_losses.append(self.loss_weight[0] * safety_cost.item())
            goal_losses.append(self.loss_weight[0] * goal_cost.item())
+            acc_losses.append(self.loss_weight[0] * acc_cost.item())

            if step % inspect_interval == inspect_interval - 1:
                batch_fps = inspect_interval / (time.time() - start_time)
@ -112,7 +113,8 @@ class YopoTrainer:
                self.tensorboard_log.add_scalar("Detail/SmoothLoss", np.mean(smooth_losses), epoch * len(self.train_dataloader) + step)
                self.tensorboard_log.add_scalar("Detail/SafetyLoss", np.mean(safety_losses), epoch * len(self.train_dataloader) + step)
                self.tensorboard_log.add_scalar("Detail/GoalLoss", np.mean(goal_losses), epoch * len(self.train_dataloader) + step)
-                traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, start_time = [], [], [], [], [], time.time()
+                self.tensorboard_log.add_scalar("Detail/AccelLoss", np.mean(acc_losses), epoch * len(self.train_dataloader) + step)
+                traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, acc_losses, start_time = [], [], [], [], [], [], time.time()

            self.progress_log.update(one_epoch_progress, advance=1)
            self.progress_log.update(total_progress, advance=1 / len(self.train_dataloader))
@ -126,7 +128,7 @@ class YopoTrainer:
        for step, (depth, pos, rot, obs_b, map_id) in enumerate(self.val_dataloader):  # obs: body frame
            if depth.shape[0] != self.batch_size:  continue  # batch size == num of env

-            trajectory_loss, score_loss, _, _, _ = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)
+            trajectory_loss, score_loss, _, _, _, _ = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id)

            traj_losses.append(self.loss_weight[0] * trajectory_loss.item())
            score_losses.append(self.loss_weight[1] * score_loss.item())
@ -166,12 +168,12 @@ class YopoTrainer:
        # [B*V*H, 3, 3]: [px, py, pz; vx, vy, vz; ax, ay, az]
        end_state_w = torch.stack([end_pos_w, end_vel_w, end_acc_w], dim=1)

-        smooth_cost, safety_cost, goal_cost = self.yopo_loss(start_state_w, end_state_w, goal_w, map_id)
-        trajectory_loss = (smooth_cost + safety_cost + goal_cost).mean()
+        smooth_cost, safety_cost, goal_cost, acc_cost = self.yopo_loss(start_state_w, end_state_w, goal_w, map_id)
+        trajectory_loss = (smooth_cost + safety_cost + goal_cost + acc_cost).mean()

-        score_label = (smooth_cost + safety_cost + goal_cost).clone().detach()
+        score_label = (smooth_cost + safety_cost + goal_cost + acc_cost).clone().detach()
        score_loss = F.smooth_l1_loss(score_flat, score_label)
-        return trajectory_loss, score_loss, smooth_cost.mean(), safety_cost.mean(), goal_cost.mean()
+        return trajectory_loss, score_loss, smooth_cost.mean(), safety_cost.mean(), goal_cost.mean(), acc_cost.mean()

    def save_model(self):
        if hasattr(self, "epoch_i"):