diff --git a/flightpolicy/yopo/yopo_algorithm.py b/flightpolicy/yopo/yopo_algorithm.py
index 4239a74..fdd00f1 100644
--- a/flightpolicy/yopo/yopo_algorithm.py
+++ b/flightpolicy/yopo/yopo_algorithm.py
@@ -157,8 +157,7 @@ class YopoAlgorithm:
                 if log_interval is not None and n_updates % log_interval[0] == 0:
                     self.logger.record("time/epoch", epoch_, exclude="tensorboard")
                     self.logger.record("time/steps", n_updates, exclude="tensorboard")
-                    self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time),
-                                       exclude="tensorboard")
+                    self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time), exclude="tensorboard")
                     self.logger.record("train/trajectory_cost", np.mean(cost_losses))
                     self.logger.record("train/score_loss", np.mean(score_losses))
                     self.logger.dump(step=n_updates)
@@ -260,6 +259,7 @@ class YopoAlgorithm:
                 costs.append(rew)
                 ep_len += 1
             print("round ", n_roll, ", total steps:", len(costs), ", avg cost:", sum(costs) / len(costs))
+        self.env.disconnectUnity()
 
     def train(self, gradient_steps: int, batch_size: int) -> None:
         """
@@ -364,25 +364,17 @@ class YopoAlgorithm:
             convert the observation from body frame to primitive frame,
             and then concatenate it with the depth features (to ensure the translational invariance)
         """
-        obs_return = np.ones(
-            (obs.shape[0], self.lattice_space.vertical_num, self.lattice_space.horizon_num, obs.shape[1]),
-            dtype=np.float32)
+        obs_return = np.ones((obs.shape[0], obs.shape[1], self.lattice_space.vertical_num, self.lattice_space.horizon_num), dtype=np.float32)
         id = 0
-        v_b = obs[:, 0:3]
-        a_b = obs[:, 3:6]
-        g_b = obs[:, 6:9]
+        v_b, a_b, g_b = obs[:, 0:3], obs[:, 3:6], obs[:, 6:9]
         for i in range(self.lattice_space.vertical_num - 1, -1, -1):
             for j in range(self.lattice_space.horizon_num - 1, -1, -1):
                 Rbp = self.lattice_primitive.getRotation(id)
-                v_p = np.dot(Rbp.T, v_b.T).T
-                a_p = np.dot(Rbp.T, a_b.T).T
-                g_p = np.dot(Rbp.T, g_b.T).T
-                obs_return[:, i, j, 0:3] = v_p
-                obs_return[:, i, j, 3:6] = a_p
-                obs_return[:, i, j, 6:9] = g_p
-                # obs_return[:, i, j, 0:6] = self.normalize_obs(obs_return[:, i, j, 0:6])
+                obs_return[:, 0:3, i, j] = np.dot(v_b, Rbp)  # v_p
+                obs_return[:, 3:6, i, j] = np.dot(a_b, Rbp)  # a_p
+                obs_return[:, 6:9, i, j] = np.dot(g_b, Rbp)  # g_p
+                # obs_return[:, 0:6, i, j] = self.normalize_obs(obs_return[:, 0:6, i, j])
                 id = id + 1
-        obs_return = np.transpose(obs_return, [0, 3, 1, 2])
         return th.from_numpy(obs_return)
 
     def unnormalize_obs(self, vel_acc_norm):
@@ -421,7 +413,7 @@ class YopoAlgorithm:
         self._last_obs = self.env.reset()
         self._last_depth = self.env.getDepthImage()
         self._last_goal = np.zeros([self.env.num_envs, 3], dtype=np.float32)
-        for i in range(0, self.env.num_envs):
+        for i in range(self.env.num_envs):
             self._last_goal[i] = self.get_random_goal(self._last_obs[i])
         self._map_id = np.zeros((self.env.num_envs, 1), dtype=np.float32)
 
@@ -503,7 +495,7 @@ class YopoAlgorithm:
         self.env.setMapID(-np.ones((self.env.num_envs, 1)))
         self._last_obs = self.env.reset()
         self._last_depth = self.env.getDepthImage()
-        for i in range(0, self.env.num_envs):
+        for i in range(self.env.num_envs):
             self._last_goal[i] = self.get_random_goal(self._last_obs[i])
 
     def _convert_train_freq(self) -> None:
diff --git a/flightpolicy/yopo/yopo_policy.py b/flightpolicy/yopo/yopo_policy.py
index 90520ed..e8c82a9 100644
--- a/flightpolicy/yopo/yopo_policy.py
+++ b/flightpolicy/yopo/yopo_policy.py
@@ -51,10 +51,9 @@ class YopoPolicy(nn.Module):
         output_dim = (self.action_dim + 1) * self.lattice_space.vel_num * self.lattice_space.radio_num
         # input state dim = hidden_state + vel + acc + goal
         input_dim = self.hidden_state + 9
-        self.image_backbone = YopoBackbone(self.hidden_state,
-                                           self.lattice_space.horizon_num * self.lattice_space.vertical_num)
+        self.image_backbone = YopoBackbone(self.hidden_state, self.lattice_space.horizon_num * self.lattice_space.vertical_num)
         self.state_backbone = nn.Sequential()
-        self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn, True)
+        self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn)
         self.grad_layer = CostAndGradLayer.apply
         # Setup optimizer with initial learning rate
         learning_rate = lr_schedule(1) if lr_schedule is not None else 1e-3
@@ -63,26 +62,24 @@ class YopoPolicy(nn.Module):
     # TenserRT Transfer
     def forward(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
         """
-            forward propagation of neural network, only used for TensorRT conversion.
+            forward propagation of neural network, separated for TensorRT conversion.
         """
         depth_feature = self.image_backbone(depth)
         obs_feature = self.state_backbone(obs)
         input_tensor = th.cat((obs_feature, depth_feature), 1)
         output = self.yopo_header(input_tensor)
-        # [batch, endstate+score, lattice_row, lattice_col]
-        return output
+        endstate = th.tanh(output[:, :9])
+        score = th.relu(output[:, 9:])
+        return th.cat((endstate, score), dim=1)  # [batch, endstate+score, lattice_row, lattice_col]
 
     # Training Policy
     def inference(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
         """
             For network training:
-            (1) predicted the endstate(end_state) and score
+            (1) predicted the endstate and score
             (2) record the gradients and costs of prediction
         """
-        depth_feature = self.image_backbone(depth)
-        obs_feature = self.state_backbone(obs)
-        input_tensor = th.cat((obs_feature, depth_feature), 1)
-        output = self.yopo_header(input_tensor)
+        output = self.forward(depth, obs)
 
         # [batch, endstate+score, lattice_num]
         batch_size = obs.shape[0]
@@ -93,7 +90,7 @@ class YopoPolicy(nn.Module):
 
         endstate_score_predictions = th.zeros_like(output).to(self.device)
         cost_labels = th.zeros((batch_size, self.lattice_space.horizon_num * self.lattice_space.vertical_num)).to(self.device)
-        for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num):
+        for i in range(self.lattice_space.horizon_num * self.lattice_space.vertical_num):
             id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i
             ids = id * np.ones((batch_size, 1))
             endstate = self.pred_to_endstate(endstate_pred[:, :, i], id)
@@ -106,40 +103,30 @@ class YopoPolicy(nn.Module):
         return endstate_score_predictions, cost_labels
 
     # Testing Policy
-    def predict(self, depth: th.Tensor, obs: th.Tensor, return_all_preds=False) -> th.Tensor:
+    def predict(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor:
         """
             For network testing:
-            (1) predicted the endstate(end_state) and score
+            (1) predicted the endstate and score, and return the optimal
         """
         with th.no_grad():
-            depth_feature = self.image_backbone(depth)
-            obs_feature = self.state_backbone(obs.float())
-            input_tensor = th.cat((obs_feature, depth_feature), 1)
-            output = self.yopo_header(input_tensor)
+            output = self.forward(depth, obs.float())
+
             batch_size = obs.shape[0]
             output = output.view(batch_size, 10, self.lattice_space.horizon_num * self.lattice_space.vertical_num)
             endstate_pred = output[:, 0:9, :]
             score_pred = output[:, 9, :]
 
-            if not return_all_preds:
-                endstate_prediction = th.zeros(batch_size, self.action_dim)
-                score_prediction = th.zeros(batch_size, 1)
-                for i in range(0, batch_size):
-                    action_id = th.argmin(score_pred[i]).item()
-                    lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id
-                    endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id)
-                    score_prediction[i] = score_pred[i, action_id]
-            else:
-                endstate_prediction = th.zeros_like(endstate_pred)
-                score_prediction = score_pred
-                for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num):
-                    lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i
-                    endstate = self.pred_to_endstate(endstate_pred[:, :, i], lattice_id)
-                    endstate_prediction[:, :, i] = endstate
+            endstate_prediction = th.zeros(batch_size, self.action_dim)
+            score_prediction = th.zeros(batch_size, 1)
+            for i in range(batch_size):
+                action_id = th.argmin(score_pred[i]).item()
+                lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id
+                endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id)
+                score_prediction[i] = score_pred[i, action_id]
 
         return endstate_prediction, score_prediction
 
-    def pred_to_endstate(self, endstate_pred: th.Tensor, id: int):
+    def pred_to_endstate(self, endstate_pred: th.Tensor, id: int) -> th.Tensor:
         """
             Transform the predicted state to the body frame.
         """
@@ -154,9 +141,9 @@ class YopoPolicy(nn.Module):
 
         endstate_vp = endstate_pred[:, 3:6] * self.lattice_space.vel_max
         endstate_ap = endstate_pred[:, 6:9] * self.lattice_space.acc_max
-        Rbp = self.lattice_primitive.getRotation(id)
-        endstate_vb = th.matmul(th.tensor(Rbp).to(self.device), endstate_vp.t()).t()
-        endstate_ab = th.matmul(th.tensor(Rbp).to(self.device), endstate_ap.t()).t()
+        Rpb = th.tensor(self.lattice_primitive.getRotation(id).T).to(self.device)
+        endstate_vb = th.matmul(endstate_vp, Rpb)
+        endstate_ab = th.matmul(endstate_ap, Rpb)
         endstate = th.cat((endstate_p, endstate_vb, endstate_ab), dim=1)
         endstate[:, [0, 1, 2, 3, 4, 5, 6, 7, 8]] = endstate[:, [0, 3, 6, 1, 4, 7, 2, 5, 8]]
         return endstate
@@ -170,20 +157,18 @@ class YopoPolicy(nn.Module):
                       ) -> nn.Sequential:
 
         if len(net_arch) > 0:
-            modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0),
-                       activation_fn()]
+            modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0), activation_fn()]
         else:
             modules = []
 
         for idx in range(len(net_arch) - 1):
-            modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1,
-                                     padding=0))
+            modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1, padding=0))
             modules.append(activation_fn())
 
         if output_dim > 0:
             last_layer_dim = net_arch[-1] if len(net_arch) > 0 else input_dim
-            modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1,
-                                     padding=0))
+            modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1, padding=0))
+
         if squash_output:
             modules.append(nn.Tanh())
         return nn.Sequential(*modules)
diff --git a/run/yopo_trt_transfer.py b/run/yopo_trt_transfer.py
index e86af55..3a2c020 100644
--- a/run/yopo_trt_transfer.py
+++ b/run/yopo_trt_transfer.py
@@ -1,10 +1,11 @@
 """
     将yopo模型转换为Tensorrt
     prepare:
-        1 pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com
-        2 git clone https://github.com/NVIDIA-AI-IOT/torch2trt
-          cd torch2trt
-          python setup.py install
+        0. make sure you install already install TensorRT
+        1. pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com
+        2. git clone https://github.com/NVIDIA-AI-IOT/torch2trt
+           cd torch2trt
+           python setup.py install
 """
 
 import argparse
@@ -19,28 +20,6 @@ from flightpolicy.envs import vec_env_wrapper as wrapper
 from flightpolicy.yopo.yopo_algorithm import YopoAlgorithm
 
 
-def prapare_input_observation(obs, lattice_space, lattice_primitive):
-    obs_return = np.ones(
-        (obs.shape[0], lattice_space.vertical_num, lattice_space.horizon_num, obs.shape[1]),
-        dtype=np.float32)
-    id = 0
-    v_b = obs[:, 0:3]
-    a_b = obs[:, 3:6]
-    g_b = obs[:, 6:9]
-    for i in range(lattice_space.vertical_num - 1, -1, -1):
-        for j in range(lattice_space.horizon_num - 1, -1, -1):
-            Rbp = lattice_primitive.getRotation(id)
-            v_p = np.dot(Rbp.T, v_b.T).T
-            a_p = np.dot(Rbp.T, a_b.T).T
-            g_p = np.dot(Rbp.T, g_b.T).T
-            obs_return[:, i, j, 0:3] = v_p
-            obs_return[:, i, j, 3:6] = a_p
-            obs_return[:, i, j, 6:9] = g_p
-            id = id + 1
-    obs_return = np.transpose(obs_return, [0, 3, 1, 2])
-    return obs_return
-
-
 def parser():
     parser = argparse.ArgumentParser()
     parser.add_argument("--trial", type=int, default=1, help="trial number")
@@ -84,10 +63,9 @@ if __name__ == "__main__":
     # The inputs should be consistent with training
     print("TensorRT Transfer...")
     depth = np.zeros(shape=[1, 1, 96, 160], dtype=np.float32)
-    obs = np.zeros(shape=[1, 9], dtype=np.float32)
-    obs_input = prapare_input_observation(obs, lattice_space, lattice_primitive)
+    obs = np.zeros(shape=[1, 9, lattice_space.vertical_num, lattice_space.horizon_num], dtype=np.float32)
     depth_in = torch.from_numpy(depth).cuda()
-    obs_in = torch.from_numpy(obs_input).cuda()
+    obs_in = torch.from_numpy(obs).cuda()
     model_trt = torch2trt(model.policy, [depth_in, obs_in], fp16_mode=args.fp16_mode)
     torch.save(model_trt.state_dict(), args.filename)
     print("TensorRT Transfer Finish!")