import abc import time import einops import numpy as np import torch from torch import nn from mp_baselines.planners.costs.cost_functions import CostGPTrajectory from mp_baselines.planners.costs.factors.mp_priors_multi import MultiMPPrior from torch_robotics.torch_planning_objectives.fields.distance_fields import interpolate_points_v1 from torch_robotics.torch_utils.torch_utils import to_torch class GuideManagerTrajectories(nn.Module): def __init__(self, dataset, cost, clip_grad=False, clip_grad_rule='norm', max_grad_norm=1., max_grad_value=0.1, interpolate_trajectories_for_collision=False, num_interpolated_points_for_collision=128, use_velocity_from_finite_difference=False, start_state_pos=None, goal_state_pos=None, num_steps=100, robot=None, n_samples=1, tensor_args=None, **kwargs): super().__init__() self.cost = cost self.dataset = dataset self.interpolate_trajectories_for_collision = interpolate_trajectories_for_collision self.num_interpolated_points_for_collision = num_interpolated_points_for_collision self.clip_grad = clip_grad self.clip_grad_rule = clip_grad_rule self.max_grad_norm = max_grad_norm self.max_grad_value = max_grad_value # velocity self.use_velocity_from_finite_difference = use_velocity_from_finite_difference self.robot = robot self.start_state_pos = start_state_pos self.goal_state_pos = goal_state_pos # initialize velocity trajectory with a constant velocity self.velocity = self.robot.get_velocity( MultiMPPrior.const_vel_trajectory( start_state_pos, goal_state_pos, robot.dt, num_steps, self.robot.q_dim, set_initial_final_vel_to_zero=True, tensor_args=tensor_args) ) self.velocity = to_torch(self.velocity, **tensor_args) self.velocity = einops.repeat(self.velocity, "H D -> B H D", B=n_samples) def forward(self, x_pos_normalized): x_pos = x_pos_normalized.clone() with torch.enable_grad(): x_pos.requires_grad_(True) self.velocity.requires_grad_(True) # unnormalize x # x is normalized, but the guides are defined on unnormalized trajectory space x_pos = self.dataset.unnormalize_trajectories(x_pos) if self.interpolate_trajectories_for_collision: # finer interpolation of trajectory for better collision avoidance x_interpolated = interpolate_points_v1(x_pos, num_interpolated_points=self.num_interpolated_points_for_collision) else: x_interpolated = x_pos # compute costs # append the current velocity trajectory to the position trajectory only for non-interpolated trajectories if self.use_velocity_from_finite_difference: x_vel = self.robot.get_velocity(x_pos) x_pos_vel = torch.cat((x_pos, x_vel), dim=-1) else: x_pos_vel = torch.cat((x_pos, self.velocity), dim=-1) cost_l, weight_grad_cost_l = self.cost(x_pos_vel, x_interpolated=x_interpolated, return_invidual_costs_and_weights=True) grad = 0 grad_velocity = 0 for cost, weight_grad_cost in zip(cost_l, weight_grad_cost_l): if torch.is_tensor(cost): # y.sum() is a surrogate to compute gradients of independent quantities over the batch dimension # x are the support points. Compute gradients wrt x, not x_interpolated if self.use_velocity_from_finite_difference: grad_cost = torch.autograd.grad([cost.sum()], [x_pos], retain_graph=True)[0] else: grad_cost, grad_cost_velocity = torch.autograd.grad([cost.sum()], [x_pos, self.velocity]) # clip gradients grad_cost_clipped = self.clip_gradient(grad_cost) if not self.use_velocity_from_finite_difference: grad_cost_velocity_clipped = self.clip_gradient(grad_cost_velocity) # zeroing gradients at start and goal grad_cost_clipped[..., 0, :] = 0. grad_cost_clipped[..., -1, :] = 0. if not self.use_velocity_from_finite_difference: grad_cost_velocity_clipped[..., 0, :] = 0. grad_cost_velocity_clipped[..., -1, :] = 0. # combine gradients grad_cost_clipped_weighted = weight_grad_cost * grad_cost_clipped grad += grad_cost_clipped_weighted if not self.use_velocity_from_finite_difference: grad_cost_velocity_clipped_weighted = weight_grad_cost * grad_cost_velocity_clipped grad_velocity += grad_cost_velocity_clipped_weighted # Update the velocity if not self.use_velocity_from_finite_difference: self.velocity = self.velocity - grad_velocity # gradient ascent grad = -1. * grad return grad def clip_gradient(self, grad): if self.clip_grad: if self.clip_grad_rule == 'norm': return self.clip_grad_by_norm(grad) elif self.clip_grad_rule == 'value': return self.clip_grad_by_value(grad) else: raise NotImplementedError else: return grad def clip_grad_by_norm(self, grad): # clip gradient by norm if self.clip_grad: grad_norm = torch.linalg.norm(grad + 1e-6, dim=-1, keepdims=True) scale_ratio = torch.clip(grad_norm, 0., self.max_grad_norm) / grad_norm grad = scale_ratio * grad return grad def clip_grad_by_value(self, grad): # clip gradient by value if self.clip_grad: grad = torch.clip(grad, -self.max_grad_value, self.max_grad_value) return grad class GuideManagerTrajectoriesWithVelocity(nn.Module): def __init__(self, dataset, cost, clip_grad=False, clip_grad_rule='norm', max_grad_norm=1., max_grad_value=0.1, interpolate_trajectories_for_collision=False, num_interpolated_points_for_collision=128, start_state_pos=None, goal_state_pos=None, num_steps=100, robot=None, n_samples=1, tensor_args=None, **kwargs): super().__init__() self.cost = cost self.dataset = dataset self.interpolate_trajectories_for_collision = interpolate_trajectories_for_collision self.num_interpolated_points_for_collision = num_interpolated_points_for_collision self.clip_grad = clip_grad self.clip_grad_rule = clip_grad_rule self.max_grad_norm = max_grad_norm self.max_grad_value = max_grad_value def forward(self, x_normalized): x = x_normalized.clone() with torch.enable_grad(): x.requires_grad_(True) # unnormalize x # x is normalized, but the guides are defined on unnormalized trajectory space x = self.dataset.unnormalize_trajectories(x) if self.interpolate_trajectories_for_collision: # finer interpolation of trajectory for better collision avoidance x_interpolated = interpolate_points_v1(x, num_interpolated_points=self.num_interpolated_points_for_collision) else: x_interpolated = x # compute costs # append the current velocity trajectory to the position trajectory only for non-interpolated trajectories cost_l, weight_grad_cost_l = self.cost(x, x_interpolated=x_interpolated, return_invidual_costs_and_weights=True) grad = 0 for cost, weight_grad_cost in zip(cost_l, weight_grad_cost_l): if torch.is_tensor(cost): # y.sum() is a surrogate to compute gradients of independent quantities over the batch dimension # x are the support points. Compute gradients wrt x, not x_interpolated grad_cost = torch.autograd.grad([cost.sum()], [x], retain_graph=True)[0] # clip gradients grad_cost_clipped = self.clip_gradient(grad_cost) # zeroing gradients at start and goal grad_cost_clipped[..., 0, :] = 0. grad_cost_clipped[..., -1, :] = 0. # combine gradients grad_cost_clipped_weighted = weight_grad_cost * grad_cost_clipped grad += grad_cost_clipped_weighted # gradient ascent grad = -1. * grad return grad def clip_gradient(self, grad): if self.clip_grad: if self.clip_grad_rule == 'norm': return self.clip_grad_by_norm(grad) elif self.clip_grad_rule == 'value': return self.clip_grad_by_value(grad) else: raise NotImplementedError else: return grad def clip_grad_by_norm(self, grad): # clip gradient by norm if self.clip_grad: grad_norm = torch.linalg.norm(grad + 1e-6, dim=-1, keepdims=True) scale_ratio = torch.clip(grad_norm, 0., self.max_grad_norm) / grad_norm grad = scale_ratio * grad return grad def clip_grad_by_value(self, grad): # clip gradient by value if self.clip_grad: grad = torch.clip(grad, -self.max_grad_value, self.max_grad_value) return grad class GuideBase(nn.Module, abc.ABC): def __init__(self, scale=1e-3, tensor_args=None, **kwargs): super().__init__() self.tensor_args = tensor_args self.scale = scale @abc.abstractmethod def forward(self, x): raise NotImplementedError def gradients(self, x): x.requires_grad_() y = self(x) # y.sum() is a surrogate to compute gradients of independent quantities over the batch dimension grad = torch.autograd.grad([y.sum()], [x])[0] x.detach() return y, grad class GuideCollisionAvoidance(GuideBase): """ Computes the collision sdf for all points in a trajectory """ def __init__(self, env, **kwargs): super().__init__(**kwargs) self.env = env def forward(self, x): collision_cost = self.env.compute_collision_cost(x, field_type='sdf') cost = collision_cost.sum(-1) return -1 * cost # maximize class GuideSmoothnessFiniteDifferenceVelocity(GuideBase): """ Smoothness cost as the central finite difference of velocity, aka acceleration """ def __init__(self, env, method='central', **kwargs): super().__init__(**kwargs) self.env = env self.method = method def forward(self, x): if self.method == 'central': vel = self.env.get_q_velocity(x) acc = 0.5 * (vel[..., 1:, :] - vel[..., :-1, :]) # minimize sum of accelerations along trajectory cost = torch.linalg.norm(acc, dim=-1).sum(-1) else: raise NotImplementedError return -1 * cost # maximize class GuideSmoothnessGPPrior(GuideBase): """ Smoothness cost as a GP Prior """ def __init__(self, n_dofs, n_support_points_des, start_state, dt, cost_sigmas, **kwargs): super().__init__(**kwargs) # self.cost_prior = CostGP(n_dofs, n_support_points_des, start_state, dt, cost_sigmas, **kwargs) self.cost_prior = CostGPTrajectory(n_dofs, n_support_points_des, start_state, dt, cost_sigmas, **kwargs) def forward(self, x): cost = self.cost_prior(x) return -1 * cost # maximize class GuideStateGoal(GuideBase): """ State desired cost """ def __init__(self, state_des, idx, **kwargs): super().__init__(**kwargs) self.state_des = state_des self.idx = idx def forward(self, x): assert x.ndim >= 3 cost = torch.linalg.norm(x[..., self.idx, :] - self.state_des, dim=-1) return -1 * cost # maximize class GuideTrajectorySmoothnessFirstOrder(GuideBase): """ Computes the first-order smoothness of a trajectory d(q_0,...,q_{H-1}) = sum_{i=1}^{H-1}(||q_i-q_{i-1}||^2) """ def __init__(self, **kwargs): super().__init__(**kwargs) def forward(self, x): # x: [batch, horizon, dim] position_diff = torch.diff(x, dim=1) distance = torch.linalg.norm(position_diff, dim=-1).sum(-1) return -1. * distance # maximize the negative distance = minimize distance class GuideSE3OrientationGoal(GuideBase): """ End effector orientation goal """ def __init__(self, env, rot_des=None, **kwargs): super().__init__(**kwargs) self.env = env if rot_des is not None: self.rot_des = rot_des.to(**self.tensor_args) else: self.rot_des = torch.eye(1).to(**self.tensor_args) self.w_rot = 1. def forward(self, q): b = 1 h = 1 if q.ndim == 1: q = q.unsqueeze(0).unsqueeze(0) elif q.ndim == 2: b = q.shape[0] q = q.unsqueeze(0) elif q.ndim == 3: b = q.shape[0] h = q.shape[1] elif q.ndim > 3: raise NotImplementedError # batch, trajectory length, q dimension q = einops.rearrange(q, 'b h d -> (b h) d') # link_tensor = self.diff_panda.compute_forward_kinematics_all_links(q) link_tensor_EE = self.env.diff_panda.compute_forward_kinematics_link_list(q, link_list=[self.env.link_name_ee]) # reshape to batch, trajectory, link poses link_tensor_EE = einops.rearrange(link_tensor_EE, '(b h) 1 d1 d2 -> b h d1 d2', b=b, h=h) H_des = link_tensor_EE.clone() H_des[..., :3, :3] = self.rot_des cost = SE3_distance(link_tensor_EE, H_des, w_rot=self.w_rot) cost = cost.sum(-1) return -1 * cost # maximize class GuideTrajectoryLastPoint(GuideBase): """ https://arxiv.org/pdf/2301.06015.pdf """ def __init__(self, goal_state, **kwargs): super().__init__(**kwargs) self._goal_state = goal_state def forward(self, x): # x: [batch, horizon, dim] l1_dist_goal_state = torch.abs(self._goal_state - x).sum(-1) # Equation A5 # objective = torch.exp(1/l1_dist_goal_state).sum(-1) # Equation A8 objective = -1. * l1_dist_goal_state.sum(-1) return objective class GuideTrajectoryObstacleAvoidanceMultiSphere(GuideBase): """ Computes the sdf for all points in a trajectory """ def __init__(self, obstacles, tensor_args=None, **kwargs): super().__init__(**kwargs) obst_params = dict(obst_type='sdf') self.shape = MultiSphere(tensor_args, **obst_params) self.convert_obstacles_to_shape(obstacles) self._max_sdf = 0.02 def convert_obstacles_to_shape(self, obstacles): centers = [] radii = [] for obstacle in obstacles: assert isinstance(obstacle, ObstacleSphere), "Only accepts circles for now" centers.append(obstacle.get_center()) radii.append(obstacle.radius) centers = np.array(centers) radii = np.array(radii) self.shape.set_obst(centers=centers, radii=radii) def forward(self, x): sdf_points = self.shape.compute_cost(x) cost_points = torch.relu(self._max_sdf - sdf_points) # cost_points = smooth_distance_penalty(sdf_points) cost_trajectory = cost_points.sum(-1) return -1 * cost_trajectory # maximize def smooth_distance_penalty(d, max_sdf=0.1): # TODO this does not allow the gradient to backpropagate because of the torch.where operator raise NotImplementedError # https://www.ri.cmu.edu/pub_files/2009/5/icra09-chomp.pdf # Figure 2 d_new = torch.zeros_like(d) idxs1 = torch.where(d < 0) d[idxs1] = -d[idxs1] + 0.5*max_sdf idxs2 = torch.where(torch.logical_and(d >= 0, d <= max_sdf)) d[idxs2] = 1/(2*max_sdf)*(d[idxs2] - max_sdf)**2 return d_new class GuideStochGPMP(GuideBase): def __init__(self, env, start_state, goal_state, tensor_args, **kwargs): super().__init__(**kwargs) self.env = env self.start_state = start_state self.goal_state = goal_state self.tensor_args = tensor_args def forward(self, x): pass def gradients(self, x): # One step of local StochGPMP y, grad = self.plan_sgpmp(x) return y, grad def plan_sgpmp(self, prior_traj, sgpmp_opt_iters=1, step_size=0.05, return_trajs=False): prior_traj = to_torch(prior_traj, **self.tensor_args) b, h, d = prior_traj.shape n_support_points_des = h # SGPMP planner n_dofs = self.env.n_dofs # Add velocities to state start_state = torch.cat((self.start_state, torch.zeros(n_dofs, **self.tensor_args))) multi_goal_states = einops.repeat(torch.cat((self.goal_state, torch.zeros(n_dofs, **self.tensor_args))), 'd -> b d', b=b) dt = 0.001 * 64 / n_support_points_des num_particles_per_goal = 1 num_samples = 50 sgpmp_opt_iters = sgpmp_opt_iters # Construct cost functions cost_sigmas = dict( sigma_start=0.0001, sigma_gp=0.3, ) cost_prior = CostGP( n_dofs, n_support_points_des, start_state, dt, cost_sigmas, self.tensor_args ) sigma_goal_prior = 0.0001 cost_goal_prior = CostGoalPrior( n_dofs, n_support_points_des, multi_goal_states=multi_goal_states, num_particles_per_goal=num_particles_per_goal, num_samples=num_samples, sigma_goal_prior=sigma_goal_prior, tensor_args=self.tensor_args ) sigma_coll = 1e-5 cost_obst_2D = CostCollision( n_dofs, n_support_points_des, field=self.env.obstacle_map, sigma_coll=sigma_coll, tensor_args=self.tensor_args ) cost_func_list = [cost_prior, cost_goal_prior, cost_obst_2D] cost_composite = CostComposite(n_dofs, n_support_points_des, cost_func_list) # Prior mean trajectory initial_particle_means = None if prior_traj is not None: # Interpolate and smooth to desired trajectory length # prior_traj = smoothen_trajectory(prior_traj, n_support_points_des, self.tensor_args, smooth_factor=0.01) # Reshape for sgpmp interface prior_traj = einops.repeat(prior_traj, 'b h d -> b n h d', n=num_particles_per_goal) # Add velocities to state initial_particle_means = torch.cat((prior_traj, torch.zeros_like(prior_traj)), dim=-1) sgpmp_params = dict( start_state=start_state, multi_goal_states=multi_goal_states, cost=cost_composite, initial_particle_means=initial_particle_means, num_particles_per_goal=num_particles_per_goal, num_samples=num_samples, n_support_points=n_support_points_des, dt=dt, n_dof=n_dofs, opt_iters=sgpmp_opt_iters, temp=0.1, step_size=step_size, sigma_start_init=1e-3, sigma_goal_init=1e-3, sigma_gp_init=5., sigma_start_sample=1e-3, sigma_goal_sample=1e-3, sigma_gp_sample=5., tensor_args=self.tensor_args, ) sgpmp_planner = StochGPMP(**sgpmp_params) obs = {} # Optimize sgpmp_time_start = time.time() _, _, _, _, costs, grad = sgpmp_planner.optimize(**obs) # costs of all particles costs = costs.sum(1) # gradient includes velocities grad = grad[..., :2] sgpmp_time_finish = time.time() controls, _, trajectories, trajectory_means, weights = sgpmp_planner.get_recent_samples() # print(f'SGPMP time: {sgpmp_time_finish - sgpmp_time_start:.4f} sec') if return_trajs: return trajectories return costs, grad