From 854c932b9729debd339fcec092949ed21e86b755 Mon Sep 17 00:00:00 2001 From: TJU-Lu Date: Fri, 7 Nov 2025 14:55:30 +0800 Subject: [PATCH] Add acceleration cost --- YOPO/config/traj_opt.yaml | 1 + YOPO/loss/loss_function.py | 45 +++++++++++++++++++++--------------- YOPO/loss/smoothness_loss.py | 14 +++++++---- YOPO/policy/yopo_trainer.py | 18 ++++++++------- 4 files changed, 46 insertions(+), 32 deletions(-) diff --git a/YOPO/config/traj_opt.yaml b/YOPO/config/traj_opt.yaml index 1d671ee..6e5b9b2 100644 --- a/YOPO/config/traj_opt.yaml +++ b/YOPO/config/traj_opt.yaml @@ -9,6 +9,7 @@ acc_max_train: 6.0 # IMPORTANT: weight of costs for unit speed (can be visualized in tensorboard) wg: 0.12 # guidance ws: 10.0 # smoothness +wa: 1.0 # acceleration wc: 0.1 # collision # dataset: set image_size = primitive_num × downsampling_factor (×32 for ResNet-18) in each axis diff --git a/YOPO/loss/loss_function.py b/YOPO/loss/loss_function.py index eb23c36..240b7d8 100644 --- a/YOPO/loss/loss_function.py +++ b/YOPO/loss/loss_function.py @@ -18,15 +18,12 @@ class YOPOLoss(nn.Module): super(YOPOLoss, self).__init__() self.sgm_time = cfg["sgm_time"] self.device = th.device("cuda" if th.cuda.is_available() else "cpu") - self._C, self._B, self._L, self._R = self.qp_generation() - self._R = self._R.to(self.device) + self._C, self._B, self._L, self._RJ, self._RA = self.qp_generation() + self._RJ = self._RJ.to(self.device) + self._RA = self._RA.to(self.device) self._L = self._L.to(self.device) - vel_scale = cfg["vel_max_train"] / 1.0 - self.smoothness_weight = cfg["ws"] - self.safety_weight = cfg["wc"] - self.goal_weight = cfg["wg"] - self.denormalize_weight(vel_scale) - self.smoothness_loss = SmoothnessLoss(self._R) + self.denormalize_weight() + self.smoothness_loss = SmoothnessLoss(self._RJ, self._RA) self.safety_loss = SafetyLoss(self._L) self.goal_loss = GuidanceLoss() print("------ Actual Loss ------") @@ -43,15 +40,21 @@ class YOPOLoss(nn.Module): for j in range(i, 6): A[2 * i + 1, j] = math.factorial(j) / math.factorial(j - i) * (self.sgm_time ** (j - i)) - # H海森矩阵,论文中的矩阵Q + # H海森矩阵,对应Jerk H = th.zeros((6, 6)) for i in range(3, 6): for j in range(3, 6): H[i, j] = i * (i - 1) * (i - 2) * j * (j - 1) * (j - 2) / (i + j - 5) * (self.sgm_time ** (i + j - 5)) - return self.stack_opt_dep(A, H) + # Q海森矩阵,对应Accel + Q = th.zeros((6, 6)) + for i in range(2, 6): + for j in range(2, 6): + Q[i, j] = (i * (i - 1)) * (j * (j - 1)) / (i + j - 3) * (self.sgm_time ** (i + j - 3)) - def stack_opt_dep(self, A, Q): + return self.stack_opt_dep(A, H, Q) + + def stack_opt_dep(self, A, H, Q): Ct = th.zeros((6, 6)) Ct[[0, 2, 4, 1, 3, 5], [0, 1, 2, 3, 4, 5]] = 1 @@ -63,11 +66,13 @@ class YOPOLoss(nn.Module): _L = B @ Ct - _R = _C @ (B_T) @ Q @ B @ Ct + _R_Jerk = _C @ (B_T) @ H @ B @ Ct - return _C, B, _L, _R + _R_Acc = _C @ (B_T) @ Q @ B @ Ct - def denormalize_weight(self, vel_scale): + return _C, B, _L, _R_Jerk, _R_Acc + + def denormalize_weight(self): """ Denormalize the cost weight to ensure consistency across different speeds to simplify parameter tuning. smoothness cost: time integral of jerk² is used as a smoothness cost. @@ -77,9 +82,11 @@ class YOPOLoss(nn.Module): goal cost: projection of the trajectory onto goal direction. Independent of speed. """ - self.smoothness_weight = self.smoothness_weight / vel_scale ** 5 - self.safety_weight = self.safety_weight * vel_scale - self.goal_weight = self.goal_weight + vel_scale = cfg["vel_max_train"] / 1.0 + self.smoothness_weight = cfg["ws"] / vel_scale ** 5 + self.safety_weight = cfg["wc"] * vel_scale + self.goal_weight = cfg["wg"] + self.accele_weight = cfg["wa"] / vel_scale ** 3 def forward(self, state, prediction, goal, map_id): """ @@ -102,10 +109,10 @@ class YOPOLoss(nn.Module): goal_cost = th.tensor(0.0, device=self.device, requires_grad=True) if self.smoothness_weight > 0: - smoothness_cost = self.smoothness_loss(Df, Dp) + smoothness_cost, acceleration_cost = self.smoothness_loss(Df, Dp) if self.safety_weight > 0: safety_cost = self.safety_loss(Df, Dp, map_id) if self.goal_weight > 0: goal_cost = self.goal_loss(Df, Dp, goal) - return self.smoothness_weight * smoothness_cost, self.safety_weight * safety_cost, self.goal_weight * goal_cost \ No newline at end of file + return self.smoothness_weight * smoothness_cost, self.safety_weight * safety_cost, self.goal_weight * goal_cost, self.accele_weight * acceleration_cost \ No newline at end of file diff --git a/YOPO/loss/smoothness_loss.py b/YOPO/loss/smoothness_loss.py index b827d4b..34167a9 100644 --- a/YOPO/loss/smoothness_loss.py +++ b/YOPO/loss/smoothness_loss.py @@ -3,9 +3,10 @@ import torch as th class SmoothnessLoss(nn.Module): - def __init__(self, R): + def __init__(self, RJ, RA): super(SmoothnessLoss, self).__init__() - self._R = R + self._RJ = RJ + self._RA = RA def forward(self, Df, Dp): """ @@ -15,13 +16,16 @@ class SmoothnessLoss(nn.Module): Returns: cost_smooth: (batch_size) → smoothness loss """ - R = self._R.unsqueeze(0).expand(Dp.shape[0], -1, -1) + RJ = self._RJ.unsqueeze(0).expand(Dp.shape[0], -1, -1) + RA = self._RA.unsqueeze(0).expand(Dp.shape[0], -1, -1) D_all = th.cat([Df, Dp], dim=2) # dx, dy, dz will be rows # Reshape for matmul: (batch, 6, 1) dx, dy, dz = D_all[:, 0].unsqueeze(2), D_all[:, 1].unsqueeze(2), D_all[:, 2].unsqueeze(2) # Compute smoothness loss: dxᵀ R dx + ... - cost_smooth = dx.transpose(1, 2) @ R @ dx + dy.transpose(1, 2) @ R @ dy + dz.transpose(1, 2) @ R @ dz + jerk_smooth = dx.transpose(1, 2) @ RJ @ dx + dy.transpose(1, 2) @ RJ @ dy + dz.transpose(1, 2) @ RJ @ dz - return cost_smooth.squeeze() \ No newline at end of file + accel_smooth = dx.transpose(1, 2) @ RA @ dx + dy.transpose(1, 2) @ RA @ dy + dz.transpose(1, 2) @ RA @ dz + + return jerk_smooth.squeeze(), accel_smooth.squeeze() \ No newline at end of file diff --git a/YOPO/policy/yopo_trainer.py b/YOPO/policy/yopo_trainer.py index 2bb5f59..627b16b 100644 --- a/YOPO/policy/yopo_trainer.py +++ b/YOPO/policy/yopo_trainer.py @@ -82,13 +82,13 @@ class YopoTrainer: def train_one_epoch(self, epoch: int, total_progress): one_epoch_progress = self.progress_log.add_task(f"Epoch: {epoch}", total=len(self.train_dataloader)) inspect_interval = max(1, len(self.train_dataloader) // 16) - traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, start_time = [], [], [], [], [], time.time() + traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, acc_losses, start_time = [], [], [], [], [], [], time.time() for step, (depth, pos, rot, obs_b, map_id) in enumerate(self.train_dataloader): # obs: body frame if depth.shape[0] != self.batch_size: continue # batch size == number of env self.optimizer.zero_grad() - trajectory_loss, score_loss, smooth_cost, safety_cost, goal_cost = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id) + trajectory_loss, score_loss, smooth_cost, safety_cost, goal_cost, acc_cost = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id) loss = self.loss_weight[0] * trajectory_loss + self.loss_weight[1] * score_loss @@ -101,6 +101,7 @@ class YopoTrainer: smooth_losses.append(self.loss_weight[0] * smooth_cost.item()) safety_losses.append(self.loss_weight[0] * safety_cost.item()) goal_losses.append(self.loss_weight[0] * goal_cost.item()) + acc_losses.append(self.loss_weight[0] * acc_cost.item()) if step % inspect_interval == inspect_interval - 1: batch_fps = inspect_interval / (time.time() - start_time) @@ -112,7 +113,8 @@ class YopoTrainer: self.tensorboard_log.add_scalar("Detail/SmoothLoss", np.mean(smooth_losses), epoch * len(self.train_dataloader) + step) self.tensorboard_log.add_scalar("Detail/SafetyLoss", np.mean(safety_losses), epoch * len(self.train_dataloader) + step) self.tensorboard_log.add_scalar("Detail/GoalLoss", np.mean(goal_losses), epoch * len(self.train_dataloader) + step) - traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, start_time = [], [], [], [], [], time.time() + self.tensorboard_log.add_scalar("Detail/AccelLoss", np.mean(acc_losses), epoch * len(self.train_dataloader) + step) + traj_losses, score_losses, smooth_losses, safety_losses, goal_losses, acc_losses, start_time = [], [], [], [], [], [], time.time() self.progress_log.update(one_epoch_progress, advance=1) self.progress_log.update(total_progress, advance=1 / len(self.train_dataloader)) @@ -126,7 +128,7 @@ class YopoTrainer: for step, (depth, pos, rot, obs_b, map_id) in enumerate(self.val_dataloader): # obs: body frame if depth.shape[0] != self.batch_size: continue # batch size == num of env - trajectory_loss, score_loss, _, _, _ = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id) + trajectory_loss, score_loss, _, _, _, _ = self.forward_and_compute_loss(depth, pos, rot, obs_b, map_id) traj_losses.append(self.loss_weight[0] * trajectory_loss.item()) score_losses.append(self.loss_weight[1] * score_loss.item()) @@ -166,12 +168,12 @@ class YopoTrainer: # [B*V*H, 3, 3]: [px, py, pz; vx, vy, vz; ax, ay, az] end_state_w = torch.stack([end_pos_w, end_vel_w, end_acc_w], dim=1) - smooth_cost, safety_cost, goal_cost = self.yopo_loss(start_state_w, end_state_w, goal_w, map_id) - trajectory_loss = (smooth_cost + safety_cost + goal_cost).mean() + smooth_cost, safety_cost, goal_cost, acc_cost = self.yopo_loss(start_state_w, end_state_w, goal_w, map_id) + trajectory_loss = (smooth_cost + safety_cost + goal_cost + acc_cost).mean() - score_label = (smooth_cost + safety_cost + goal_cost).clone().detach() + score_label = (smooth_cost + safety_cost + goal_cost + acc_cost).clone().detach() score_loss = F.smooth_l1_loss(score_flat, score_label) - return trajectory_loss, score_loss, smooth_cost.mean(), safety_cost.mean(), goal_cost.mean() + return trajectory_loss, score_loss, smooth_cost.mean(), safety_cost.mean(), goal_cost.mean(), acc_cost.mean() def save_model(self): if hasattr(self, "epoch_i"):