diff --git a/flightpolicy/yopo/yopo_algorithm.py b/flightpolicy/yopo/yopo_algorithm.py index 4239a74..fdd00f1 100644 --- a/flightpolicy/yopo/yopo_algorithm.py +++ b/flightpolicy/yopo/yopo_algorithm.py @@ -157,8 +157,7 @@ class YopoAlgorithm: if log_interval is not None and n_updates % log_interval[0] == 0: self.logger.record("time/epoch", epoch_, exclude="tensorboard") self.logger.record("time/steps", n_updates, exclude="tensorboard") - self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time), - exclude="tensorboard") + self.logger.record("time/batch_fps", log_interval[0] / (time.time() - start_time), exclude="tensorboard") self.logger.record("train/trajectory_cost", np.mean(cost_losses)) self.logger.record("train/score_loss", np.mean(score_losses)) self.logger.dump(step=n_updates) @@ -260,6 +259,7 @@ class YopoAlgorithm: costs.append(rew) ep_len += 1 print("round ", n_roll, ", total steps:", len(costs), ", avg cost:", sum(costs) / len(costs)) + self.env.disconnectUnity() def train(self, gradient_steps: int, batch_size: int) -> None: """ @@ -364,25 +364,17 @@ class YopoAlgorithm: convert the observation from body frame to primitive frame, and then concatenate it with the depth features (to ensure the translational invariance) """ - obs_return = np.ones( - (obs.shape[0], self.lattice_space.vertical_num, self.lattice_space.horizon_num, obs.shape[1]), - dtype=np.float32) + obs_return = np.ones((obs.shape[0], obs.shape[1], self.lattice_space.vertical_num, self.lattice_space.horizon_num), dtype=np.float32) id = 0 - v_b = obs[:, 0:3] - a_b = obs[:, 3:6] - g_b = obs[:, 6:9] + v_b, a_b, g_b = obs[:, 0:3], obs[:, 3:6], obs[:, 6:9] for i in range(self.lattice_space.vertical_num - 1, -1, -1): for j in range(self.lattice_space.horizon_num - 1, -1, -1): Rbp = self.lattice_primitive.getRotation(id) - v_p = np.dot(Rbp.T, v_b.T).T - a_p = np.dot(Rbp.T, a_b.T).T - g_p = np.dot(Rbp.T, g_b.T).T - obs_return[:, i, j, 0:3] = v_p - obs_return[:, i, j, 3:6] = a_p - obs_return[:, i, j, 6:9] = g_p - # obs_return[:, i, j, 0:6] = self.normalize_obs(obs_return[:, i, j, 0:6]) + obs_return[:, 0:3, i, j] = np.dot(v_b, Rbp) # v_p + obs_return[:, 3:6, i, j] = np.dot(a_b, Rbp) # a_p + obs_return[:, 6:9, i, j] = np.dot(g_b, Rbp) # g_p + # obs_return[:, 0:6, i, j] = self.normalize_obs(obs_return[:, 0:6, i, j]) id = id + 1 - obs_return = np.transpose(obs_return, [0, 3, 1, 2]) return th.from_numpy(obs_return) def unnormalize_obs(self, vel_acc_norm): @@ -421,7 +413,7 @@ class YopoAlgorithm: self._last_obs = self.env.reset() self._last_depth = self.env.getDepthImage() self._last_goal = np.zeros([self.env.num_envs, 3], dtype=np.float32) - for i in range(0, self.env.num_envs): + for i in range(self.env.num_envs): self._last_goal[i] = self.get_random_goal(self._last_obs[i]) self._map_id = np.zeros((self.env.num_envs, 1), dtype=np.float32) @@ -503,7 +495,7 @@ class YopoAlgorithm: self.env.setMapID(-np.ones((self.env.num_envs, 1))) self._last_obs = self.env.reset() self._last_depth = self.env.getDepthImage() - for i in range(0, self.env.num_envs): + for i in range(self.env.num_envs): self._last_goal[i] = self.get_random_goal(self._last_obs[i]) def _convert_train_freq(self) -> None: diff --git a/flightpolicy/yopo/yopo_policy.py b/flightpolicy/yopo/yopo_policy.py index 90520ed..e8c82a9 100644 --- a/flightpolicy/yopo/yopo_policy.py +++ b/flightpolicy/yopo/yopo_policy.py @@ -51,10 +51,9 @@ class YopoPolicy(nn.Module): output_dim = (self.action_dim + 1) * self.lattice_space.vel_num * self.lattice_space.radio_num # input state dim = hidden_state + vel + acc + goal input_dim = self.hidden_state + 9 - self.image_backbone = YopoBackbone(self.hidden_state, - self.lattice_space.horizon_num * self.lattice_space.vertical_num) + self.image_backbone = YopoBackbone(self.hidden_state, self.lattice_space.horizon_num * self.lattice_space.vertical_num) self.state_backbone = nn.Sequential() - self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn, True) + self.yopo_header = self.create_header(input_dim, output_dim, self.net_arch, self.activation_fn) self.grad_layer = CostAndGradLayer.apply # Setup optimizer with initial learning rate learning_rate = lr_schedule(1) if lr_schedule is not None else 1e-3 @@ -63,26 +62,24 @@ class YopoPolicy(nn.Module): # TenserRT Transfer def forward(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor: """ - forward propagation of neural network, only used for TensorRT conversion. + forward propagation of neural network, separated for TensorRT conversion. """ depth_feature = self.image_backbone(depth) obs_feature = self.state_backbone(obs) input_tensor = th.cat((obs_feature, depth_feature), 1) output = self.yopo_header(input_tensor) - # [batch, endstate+score, lattice_row, lattice_col] - return output + endstate = th.tanh(output[:, :9]) + score = th.relu(output[:, 9:]) + return th.cat((endstate, score), dim=1) # [batch, endstate+score, lattice_row, lattice_col] # Training Policy def inference(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor: """ For network training: - (1) predicted the endstate(end_state) and score + (1) predicted the endstate and score (2) record the gradients and costs of prediction """ - depth_feature = self.image_backbone(depth) - obs_feature = self.state_backbone(obs) - input_tensor = th.cat((obs_feature, depth_feature), 1) - output = self.yopo_header(input_tensor) + output = self.forward(depth, obs) # [batch, endstate+score, lattice_num] batch_size = obs.shape[0] @@ -93,7 +90,7 @@ class YopoPolicy(nn.Module): endstate_score_predictions = th.zeros_like(output).to(self.device) cost_labels = th.zeros((batch_size, self.lattice_space.horizon_num * self.lattice_space.vertical_num)).to(self.device) - for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num): + for i in range(self.lattice_space.horizon_num * self.lattice_space.vertical_num): id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i ids = id * np.ones((batch_size, 1)) endstate = self.pred_to_endstate(endstate_pred[:, :, i], id) @@ -106,40 +103,30 @@ class YopoPolicy(nn.Module): return endstate_score_predictions, cost_labels # Testing Policy - def predict(self, depth: th.Tensor, obs: th.Tensor, return_all_preds=False) -> th.Tensor: + def predict(self, depth: th.Tensor, obs: th.Tensor) -> th.Tensor: """ For network testing: - (1) predicted the endstate(end_state) and score + (1) predicted the endstate and score, and return the optimal """ with th.no_grad(): - depth_feature = self.image_backbone(depth) - obs_feature = self.state_backbone(obs.float()) - input_tensor = th.cat((obs_feature, depth_feature), 1) - output = self.yopo_header(input_tensor) + output = self.forward(depth, obs.float()) + batch_size = obs.shape[0] output = output.view(batch_size, 10, self.lattice_space.horizon_num * self.lattice_space.vertical_num) endstate_pred = output[:, 0:9, :] score_pred = output[:, 9, :] - if not return_all_preds: - endstate_prediction = th.zeros(batch_size, self.action_dim) - score_prediction = th.zeros(batch_size, 1) - for i in range(0, batch_size): - action_id = th.argmin(score_pred[i]).item() - lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id - endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id) - score_prediction[i] = score_pred[i, action_id] - else: - endstate_prediction = th.zeros_like(endstate_pred) - score_prediction = score_pred - for i in range(0, self.lattice_space.horizon_num * self.lattice_space.vertical_num): - lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - i - endstate = self.pred_to_endstate(endstate_pred[:, :, i], lattice_id) - endstate_prediction[:, :, i] = endstate + endstate_prediction = th.zeros(batch_size, self.action_dim) + score_prediction = th.zeros(batch_size, 1) + for i in range(batch_size): + action_id = th.argmin(score_pred[i]).item() + lattice_id = self.lattice_space.horizon_num * self.lattice_space.vertical_num - 1 - action_id + endstate_prediction[i] = self.pred_to_endstate(th.unsqueeze(endstate_pred[i, :, action_id], 0), lattice_id) + score_prediction[i] = score_pred[i, action_id] return endstate_prediction, score_prediction - def pred_to_endstate(self, endstate_pred: th.Tensor, id: int): + def pred_to_endstate(self, endstate_pred: th.Tensor, id: int) -> th.Tensor: """ Transform the predicted state to the body frame. """ @@ -154,9 +141,9 @@ class YopoPolicy(nn.Module): endstate_vp = endstate_pred[:, 3:6] * self.lattice_space.vel_max endstate_ap = endstate_pred[:, 6:9] * self.lattice_space.acc_max - Rbp = self.lattice_primitive.getRotation(id) - endstate_vb = th.matmul(th.tensor(Rbp).to(self.device), endstate_vp.t()).t() - endstate_ab = th.matmul(th.tensor(Rbp).to(self.device), endstate_ap.t()).t() + Rpb = th.tensor(self.lattice_primitive.getRotation(id).T).to(self.device) + endstate_vb = th.matmul(endstate_vp, Rpb) + endstate_ab = th.matmul(endstate_ap, Rpb) endstate = th.cat((endstate_p, endstate_vb, endstate_ab), dim=1) endstate[:, [0, 1, 2, 3, 4, 5, 6, 7, 8]] = endstate[:, [0, 3, 6, 1, 4, 7, 2, 5, 8]] return endstate @@ -170,20 +157,18 @@ class YopoPolicy(nn.Module): ) -> nn.Sequential: if len(net_arch) > 0: - modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0), - activation_fn()] + modules = [nn.Conv2d(in_channels=input_dim, out_channels=net_arch[0], kernel_size=1, stride=1, padding=0), activation_fn()] else: modules = [] for idx in range(len(net_arch) - 1): - modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1, - padding=0)) + modules.append(nn.Conv2d(in_channels=net_arch[idx], out_channels=net_arch[idx + 1], kernel_size=1, stride=1, padding=0)) modules.append(activation_fn()) if output_dim > 0: last_layer_dim = net_arch[-1] if len(net_arch) > 0 else input_dim - modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1, - padding=0)) + modules.append(nn.Conv2d(in_channels=last_layer_dim, out_channels=output_dim, kernel_size=1, stride=1, padding=0)) + if squash_output: modules.append(nn.Tanh()) return nn.Sequential(*modules) diff --git a/run/yopo_trt_transfer.py b/run/yopo_trt_transfer.py index e86af55..3a2c020 100644 --- a/run/yopo_trt_transfer.py +++ b/run/yopo_trt_transfer.py @@ -1,10 +1,11 @@ """ 将yopo模型转换为Tensorrt prepare: - 1 pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com - 2 git clone https://github.com/NVIDIA-AI-IOT/torch2trt - cd torch2trt - python setup.py install + 0. make sure you install already install TensorRT + 1. pip install -U nvidia-tensorrt --index-url https://pypi.ngc.nvidia.com + 2. git clone https://github.com/NVIDIA-AI-IOT/torch2trt + cd torch2trt + python setup.py install """ import argparse @@ -19,28 +20,6 @@ from flightpolicy.envs import vec_env_wrapper as wrapper from flightpolicy.yopo.yopo_algorithm import YopoAlgorithm -def prapare_input_observation(obs, lattice_space, lattice_primitive): - obs_return = np.ones( - (obs.shape[0], lattice_space.vertical_num, lattice_space.horizon_num, obs.shape[1]), - dtype=np.float32) - id = 0 - v_b = obs[:, 0:3] - a_b = obs[:, 3:6] - g_b = obs[:, 6:9] - for i in range(lattice_space.vertical_num - 1, -1, -1): - for j in range(lattice_space.horizon_num - 1, -1, -1): - Rbp = lattice_primitive.getRotation(id) - v_p = np.dot(Rbp.T, v_b.T).T - a_p = np.dot(Rbp.T, a_b.T).T - g_p = np.dot(Rbp.T, g_b.T).T - obs_return[:, i, j, 0:3] = v_p - obs_return[:, i, j, 3:6] = a_p - obs_return[:, i, j, 6:9] = g_p - id = id + 1 - obs_return = np.transpose(obs_return, [0, 3, 1, 2]) - return obs_return - - def parser(): parser = argparse.ArgumentParser() parser.add_argument("--trial", type=int, default=1, help="trial number") @@ -84,10 +63,9 @@ if __name__ == "__main__": # The inputs should be consistent with training print("TensorRT Transfer...") depth = np.zeros(shape=[1, 1, 96, 160], dtype=np.float32) - obs = np.zeros(shape=[1, 9], dtype=np.float32) - obs_input = prapare_input_observation(obs, lattice_space, lattice_primitive) + obs = np.zeros(shape=[1, 9, lattice_space.vertical_num, lattice_space.horizon_num], dtype=np.float32) depth_in = torch.from_numpy(depth).cuda() - obs_in = torch.from_numpy(obs_input).cuda() + obs_in = torch.from_numpy(obs).cuda() model_trt = torch2trt(model.policy, [depth_in, obs_in], fp16_mode=args.fp16_mode) torch.save(model_trt.state_dict(), args.filename) print("TensorRT Transfer Finish!")