From 068c4068ec735d4a0c77c545ca63233ed8262f1c Mon Sep 17 00:00:00 2001 From: Minghao Zhang <37171450+Mehooz@users.noreply.github.com> Date: Sat, 28 Mar 2020 12:03:49 +0800 Subject: [PATCH] fix atari/mujoco env (#7) * update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines --- .gitignore | 3 + README.md | 2 +- examples/discrete_net.py | 10 +- test/discrete/net.py | 11 +- tianshou/env/atari.py | 124 ++++++ tianshou/env/mujoco/__init__.py | 26 ++ tianshou/env/mujoco/assets/point.xml | 34 ++ tianshou/env/mujoco/maze_env_utils.py | 196 +++++++++ tianshou/env/mujoco/point.py | 93 +++++ tianshou/env/mujoco/point_maze_env.py | 563 ++++++++++++++++++++++++++ 10 files changed, 1053 insertions(+), 9 deletions(-) create mode 100644 tianshou/env/atari.py create mode 100644 tianshou/env/mujoco/__init__.py create mode 100644 tianshou/env/mujoco/assets/point.xml create mode 100644 tianshou/env/mujoco/maze_env_utils.py create mode 100644 tianshou/env/mujoco/point.py create mode 100644 tianshou/env/mujoco/point_maze_env.py diff --git a/.gitignore b/.gitignore index 28b6ccc..4d6d8b9 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,9 @@ __pycache__/ # C extensions *.so +# .idea folder +.idea/ + # Distribution / packaging .Python build/ diff --git a/README.md b/README.md index 458d08d..6b7e913 100644 --- a/README.md +++ b/README.md @@ -212,7 +212,7 @@ If you find Tianshou useful, please cite it in your publications. ```latex @misc{tianshou, - author = {Jiayi Weng}, + author = {Jiayi Weng, Minghao Zhang}, title = {Tianshou}, year = {2020}, publisher = {GitHub}, diff --git a/examples/discrete_net.py b/examples/discrete_net.py index 14d2c6e..9c7a8a4 100644 --- a/examples/discrete_net.py +++ b/examples/discrete_net.py @@ -34,7 +34,7 @@ class Actor(nn.Module): def forward(self, s, state=None, info={}): logits, h = self.preprocess(s, state) - logits = F.softmax(self.last(logits), dim=-1) + logits = F.softmax(logits, dim=-1) return logits, h @@ -56,7 +56,7 @@ class DQN(nn.Module): super(DQN, self).__init__() self.device = device - self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2) + self.conv1 = nn.Conv2d(4, 16, kernel_size=5, stride=2) self.bn1 = nn.BatchNorm2d(16) self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2) self.bn2 = nn.BatchNorm2d(32) @@ -69,7 +69,8 @@ class DQN(nn.Module): convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w))) convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h))) linear_input_size = convw * convh * 32 - self.head = nn.Linear(linear_input_size, action_shape) + self.fc = nn.Linear(linear_input_size, 512) + self.head = nn.Linear(512, action_shape) def forward(self, x, state=None, info={}): if not isinstance(x, torch.Tensor): @@ -78,4 +79,5 @@ class DQN(nn.Module): x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) - return self.head(x.view(x.size(0), -1)), state + x = self.fc(x.reshape(x.size(0), -1)) + return self.head(x), state diff --git a/test/discrete/net.py b/test/discrete/net.py index dbbb382..e3d9a4a 100644 --- a/test/discrete/net.py +++ b/test/discrete/net.py @@ -51,11 +51,12 @@ class Critic(nn.Module): class DQN(nn.Module): + def __init__(self, h, w, action_shape, device='cpu'): super(DQN, self).__init__() self.device = device - self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2) + self.conv1 = nn.Conv2d(4, 16, kernel_size=5, stride=2) self.bn1 = nn.BatchNorm2d(16) self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2) self.bn2 = nn.BatchNorm2d(32) @@ -68,12 +69,14 @@ class DQN(nn.Module): convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w))) convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h))) linear_input_size = convw * convh * 32 - self.head = nn.Linear(linear_input_size, action_shape) + self.fc = nn.Linear(linear_input_size, 512) + self.head = nn.Linear(512, action_shape) def forward(self, x, state=None, info={}): if not isinstance(x, torch.Tensor): - x = torch.tensor(x, device=self.device, dtype=torch.float) + s = torch.tensor(x, device=self.device, dtype=torch.float) x = F.relu(self.bn1(self.conv1(x))) x = F.relu(self.bn2(self.conv2(x))) x = F.relu(self.bn3(self.conv3(x))) - return self.head(x.view(x.size(0), -1)), state + x = self.fc(x.reshape(x.size(0), -1)) + return self.head(x), state diff --git a/tianshou/env/atari.py b/tianshou/env/atari.py new file mode 100644 index 0000000..4e243cd --- /dev/null +++ b/tianshou/env/atari.py @@ -0,0 +1,124 @@ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +import cv2 +import gym +import numpy as np +from gym.spaces.box import Box + + +def create_atari_environment(name=None, sticky_actions=True, max_episode_steps=2000): + game_version = 'v0' if sticky_actions else 'v4' + name = '{}NoFrameskip-{}'.format(name, game_version) + env = gym.make(name) + env = env.env + env = preprocessing(env, max_episode_steps=max_episode_steps) + return env + + +class preprocessing(object): + + def __init__(self, env, frame_skip=4, terminal_on_life_loss=False, + size=84, max_episode_steps=2000): + self.max_episode_steps = max_episode_steps + self.env = env + self.terminal_on_life_loss = terminal_on_life_loss + self.frame_skip = frame_skip + self.size = size + self.count = 0 + obs_dims = self.env.observation_space + + self.screen_buffer = [ + np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8), + np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8) + ] + + self.game_over = False + self.lives = 0 + + @property + def observation_space(self): + + return Box(low=0, high=255, shape=(self.size, self.size, 4), + dtype=np.uint8) + + def action_space(self): + return self.env.action_space + + def reward_range(self): + return self.env.reward_range + + def metadata(self): + return self.env.metadata + + def close(self): + return self.env.close() + + def reset(self): + self.count = 0 + self.env.reset() + self.lives = self.env.ale.lives() + self._grayscale_obs(self.screen_buffer[0]) + self.screen_buffer[1].fill(0) + + return np.stack([self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1) + + def render(self, mode): + + return self.env.render(mode) + + def step(self, action): + + total_reward = 0. + observation = [] + for t in range(self.frame_skip): + self.count += 1 + _, reward, terminal, info = self.env.step(action) + total_reward += reward + + if self.terminal_on_life_loss: + lives = self.env.ale.lives() + is_terminal = terminal or lives < self.lives + self.lives = lives + else: + is_terminal = terminal + + if is_terminal: + break + elif t >= self.frame_skip - 2: + t_ = t - (self.frame_skip - 2) + self._grayscale_obs(self.screen_buffer[t_]) + + observation.append(self._pool_and_resize()) + while len(observation) > 0 and len(observation) < self.frame_skip: + observation.append(observation[-1]) + if len(observation) > 0: + observation = np.stack(observation, axis=-1) + else: + observation = np.stack([self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1) + if self.count >= self.max_episode_steps: + terminal = True + self.terminal = terminal + return observation, total_reward, is_terminal, info + + def _grayscale_obs(self, output): + + self.env.ale.getScreenGrayscale(output) + return output + + def _pool_and_resize(self): + + if self.frame_skip > 1: + np.maximum(self.screen_buffer[0], self.screen_buffer[1], + out=self.screen_buffer[0]) + + transformed_image = cv2.resize(self.screen_buffer[0], + (self.size, self.size), + interpolation=cv2.INTER_AREA) + int_image = np.asarray(transformed_image, dtype=np.uint8) + # return np.expand_dims(int_image, axis=2) + return int_image + + +if __name__ == '__main__': + create_atari_environment() diff --git a/tianshou/env/mujoco/__init__.py b/tianshou/env/mujoco/__init__.py new file mode 100644 index 0000000..5f53fd2 --- /dev/null +++ b/tianshou/env/mujoco/__init__.py @@ -0,0 +1,26 @@ +from gym.envs.registration import register +import gym + +register( + id='PointMaze-v0', + entry_point='tianshou.env.mujoco.point_maze_env:PointMazeEnv', + kwargs={ + "maze_size_scaling": 4, + "maze_id": "Maze2", + "maze_height": 0.5, + "manual_collision": True, + "goal": (1, 3), + } +) + +register( + id='PointMaze-v1', + entry_point='tianshou.env.mujoco.point_maze_env:PointMazeEnv', + kwargs={ + "maze_size_scaling": 2, + "maze_id": "Maze2", + "maze_height": 0.5, + "manual_collision": True, + "goal": (1, 3), + } +) diff --git a/tianshou/env/mujoco/assets/point.xml b/tianshou/env/mujoco/assets/point.xml new file mode 100644 index 0000000..38cc644 --- /dev/null +++ b/tianshou/env/mujoco/assets/point.xml @@ -0,0 +1,34 @@ + + + diff --git a/tianshou/env/mujoco/maze_env_utils.py b/tianshou/env/mujoco/maze_env_utils.py new file mode 100644 index 0000000..059432e --- /dev/null +++ b/tianshou/env/mujoco/maze_env_utils.py @@ -0,0 +1,196 @@ +"""Adapted from rllab maze_env_utils.py.""" +import numpy as np +import math + + +class Move(object): + X = 11 + Y = 12 + Z = 13 + XY = 14 + XZ = 15 + YZ = 16 + XYZ = 17 + SpinXY = 18 + + +def can_move_x(movable): + return movable in [Move.X, Move.XY, Move.XZ, Move.XYZ, + Move.SpinXY] + + +def can_move_y(movable): + return movable in [Move.Y, Move.XY, Move.YZ, Move.XYZ, + Move.SpinXY] + + +def can_move_z(movable): + return movable in [Move.Z, Move.XZ, Move.YZ, Move.XYZ] + + +def can_spin(movable): + return movable in [Move.SpinXY] + + +def can_move(movable): + return can_move_x(movable) or can_move_y(movable) or can_move_z(movable) + + +def construct_maze(maze_id='Maze'): + if maze_id == 'Maze': + structure = [ + [1, 1, 1, 1, 1], + [1, 'r', 0, 0, 1], + [1, 1, 1, 0, 1], + [1, 'g', 0, 0, 1], + [1, 1, 1, 1, 1], + ] + elif maze_id == 'Maze1': + structure = [ + [1, 1, 1, 1, 1, 1, 1, 1], + [1, 'r', 1, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 1, 1, 0, 1], + [1, 1, 1, 1, 1, 0, 0, 1], + [1, 0, 0, 0, 1, 0, 1, 1], + [1, 0, 0, 0, 1, 0, 1, 1], + [1, 0, 1, 0, 0, 0, 0, 1], + [1, 1, 1, 1, 1, 1, 1, 1], + ] + elif maze_id == 'Maze2': + structure = [ + [0, 0, 0, 0, 0], + [0, 'r', 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + [0, 0, 0, 0, 0], + ] + # transfer maze + elif maze_id == 'Maze3': + structure = [ + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], + [1, 0, 'r', 0, 0, 1, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1], + [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1], + [1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1], + [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 'g', 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + ] + elif maze_id == 'Maze4': + structure = [ + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], + [1, 0, 'r', 0, 0, 1, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], + [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1], + [1, 0, 0, 0, 0, 1, 0, 0, 0, 'g', 1], + [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], + ] + elif maze_id == 'Push': + structure = [ + [1, 1, 1, 1, 1], + [1, 0, 'r', 1, 1], + [1, 0, Move.XY, 0, 1], + [1, 1, 0, 1, 1], + [1, 1, 1, 1, 1], + ] + elif maze_id == 'Fall': + structure = [ + [1, 1, 1, 1], + [1, 'r', 0, 1], + [1, 0, Move.YZ, 1], + [1, -1, -1, 1], + [1, 0, 0, 1], + [1, 1, 1, 1], + ] + elif maze_id == 'Block': + O = 'r' + structure = [ + [1, 1, 1, 1, 1], + [1, O, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 1, 1, 1, 1], + ] + elif maze_id == 'BlockMaze': + O = 'r' + structure = [ + [1, 1, 1, 1], + [1, O, 0, 1], + [1, 1, 0, 1], + [1, 0, 0, 1], + [1, 1, 1, 1], + ] + else: + raise NotImplementedError('The provided MazeId %s is not recognized' % maze_id) + + return structure + + +def line_intersect(pt1, pt2, ptA, ptB): + """ + Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html + this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB) + """ + + DET_TOLERANCE = 0.00000001 + + # the first line is pt1 + r*(pt2-pt1) + # in component form: + x1, y1 = pt1 + x2, y2 = pt2 + dx1 = x2 - x1 + dy1 = y2 - y1 + + # the second line is ptA + s*(ptB-ptA) + x, y = ptA + xB, yB = ptB + dx = xB - x + dy = yB - y + + DET = (-dx1 * dy + dy1 * dx) + + if math.fabs(DET) < DET_TOLERANCE: return (0, 0, 0, 0, 0) + + # now, the determinant should be OK + DETinv = 1.0 / DET + + # find the scalar amount along the "self" segment + r = DETinv * (-dy * (x - x1) + dx * (y - y1)) + + # find the scalar amount along the input line + s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1)) + + # return the average of the two descriptions + xi = (x1 + r * dx1 + x + s * dx) / 2.0 + yi = (y1 + r * dy1 + y + s * dy) / 2.0 + return (xi, yi, 1, r, s) + + +def ray_segment_intersect(ray, segment): + """ + Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2), + and return the intersection point if there is one + """ + (x, y), theta = ray + # (x1, y1), (x2, y2) = segment + pt1 = (x, y) + len = 1 + pt2 = (x + len * math.cos(theta), y + len * math.sin(theta)) + xo, yo, valid, r, s = line_intersect(pt1, pt2, *segment) + if valid and r >= 0 and 0 <= s <= 1: + return (xo, yo) + return None + + +def point_distance(p1, p2): + x1, y1 = p1 + x2, y2 = p2 + return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5 diff --git a/tianshou/env/mujoco/point.py b/tianshou/env/mujoco/point.py new file mode 100644 index 0000000..2a6a08d --- /dev/null +++ b/tianshou/env/mujoco/point.py @@ -0,0 +1,93 @@ +"""Wrapper for creating the ant environment in gym_mujoco.""" + +import math +import numpy as np +from gym import utils +from gym.envs.mujoco import mujoco_env + + +class PointEnv(mujoco_env.MujocoEnv, utils.EzPickle): + FILE = "point.xml" + ORI_IND = 2 + + def __init__(self, file_path=None, expose_all_qpos=True, noisy_init=False): + self._expose_all_qpos = expose_all_qpos + self.noisy_init = noisy_init + mujoco_env.MujocoEnv.__init__(self, file_path, 1) + utils.EzPickle.__init__(self) + + @property + def physics(self): + return self.model + + def _step(self, a): + return self.step(a) + + def step(self, action): + # action[0] is velocity, action[1] is direction + action[0] = 0.2 * action[0] + qpos = np.copy(self.data.qpos) + qpos[2] += action[1] + ori = qpos[2] + # compute increment in each direction + dx = math.cos(ori) * action[0] + dy = math.sin(ori) * action[0] + # ensure that the robot is within reasonable range + qpos[0] = np.clip(qpos[0] + dx, -100, 100) + qpos[1] = np.clip(qpos[1] + dy, -100, 100) + qvel = np.squeeze(self.data.qvel) + self.set_state(qpos, qvel) + for _ in range(0, self.frame_skip): + # self.physics.step() + self.sim.step() + next_obs = self._get_obs() + reward = 0 + done = False + info = {} + return next_obs, reward, done, info + + def _get_obs(self): + if self._expose_all_qpos: + return np.concatenate([ + self.data.qpos.flat[:3], # Only point-relevant coords. + self.data.qvel.flat[:3]]) + return np.concatenate([ + self.data.qpos.flat[2:3], + self.data.qvel.flat[:3]]) + + def reset_model(self): + if self.noisy_init: + qpos = self.init_qpos + self.np_random.uniform( + size=self.model.nq, low=-.1, high=.1) + qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1 + + else: + qpos = self.init_qpos + qvel = self.init_qvel + + # Set everything other than point to original position and 0 velocity. + qpos[3:] = self.init_qpos[3:] + qvel[3:] = 0. + self.set_state(qpos, qvel) + return self._get_obs() + + def get_ori(self): + return self.data.qpos[self.__class__.ORI_IND] + + def set_xy(self, xy): + qpos = np.copy(self.data.qpos) + qpos[0] = xy[0] + qpos[1] = xy[1] + + qvel = self.data.qvel + self.set_state(qpos, qvel) + + def get_xy(self): + qpos = np.copy(self.data.qpos) + return qpos[:2] + + def viewer_setup(self): + + self.viewer.cam.trackbodyid = -1 + self.viewer.cam.distance = 80 + self.viewer.cam.elevation = -90 diff --git a/tianshou/env/mujoco/point_maze_env.py b/tianshou/env/mujoco/point_maze_env.py new file mode 100644 index 0000000..9eb495b --- /dev/null +++ b/tianshou/env/mujoco/point_maze_env.py @@ -0,0 +1,563 @@ +"""Adapted from rllab maze_env.py.""" + +import os +import tempfile +import xml.etree.ElementTree as ET +import math +import numpy as np +import gym +from . import maze_env_utils +from .point import PointEnv +from gym.utils import seeding + +# Directory that contains mujoco xml files. +MODEL_DIR = os.path.join(os.path.dirname(__file__), 'assets') + + +class PointMazeEnv(gym.Env): + MODEL_CLASS = PointEnv + + MAZE_HEIGHT = None + MAZE_SIZE_SCALING = None + + def __init__( + self, + maze_id=None, + maze_height=0.5, + maze_size_scaling=8, + n_bins=0, + sensor_range=3., + sensor_span=2 * math.pi, + observe_blocks=False, + put_spin_near_agent=False, + top_down_view=False, + manual_collision=False, + goal=None, + EPS=0.25, + max_episode_steps=2000, + *args, + **kwargs): + self._maze_id = maze_id + + model_cls = self.__class__.MODEL_CLASS + if model_cls is None: + raise "MODEL_CLASS unspecified!" + xml_path = os.path.join(MODEL_DIR, model_cls.FILE) + self.tree = tree = ET.parse(xml_path) + self.worldbody = worldbody = tree.find(".//worldbody") + self.visualize_goal = False + self.max_episode_steps = max_episode_steps + self.t = 0 + self.MAZE_HEIGHT = height = maze_height + self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling + self._n_bins = n_bins + self._sensor_range = sensor_range * size_scaling + self._sensor_span = sensor_span + self._observe_blocks = observe_blocks + self._put_spin_near_agent = put_spin_near_agent + self._top_down_view = top_down_view + self._manual_collision = manual_collision + + self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze( + maze_id=self._maze_id) + # Elevate the maze to allow for falling. + self.elevated = any(-1 in row for row in structure) + self.blocks = any( + any(maze_env_utils.can_move(r) for r in row) + for row in structure) # Are there any movable blocks? + + torso_x, torso_y = self._find_robot() # x, y coordinates + self._init_torso_x = torso_x + self._init_torso_y = torso_y + self._init_positions = [ + (x - torso_x, y - torso_y) + for x, y in self._find_all_robots()] + + self._view = np.zeros([5, 5, 3]) + + height_offset = 0. + if self.elevated: + height_offset = height * size_scaling + torso = tree.find(".//body[@name='torso']") + torso.set('pos', '0 0 %.2f' % (0.75 + height_offset)) + if self.blocks: + default = tree.find(".//default") + default.find('.//geom').set('solimp', '.995 .995 .01') + + self.movable_blocks = [] + for i in range(len(structure)): + for j in range(len(structure[0])): + struct = structure[i][j] + if struct == 'r' and self._put_spin_near_agent: + struct = maze_env_utils.Move.SpinXY + if self.elevated and struct not in [-1]: + # Create elevated platform. + ET.SubElement( + worldbody, "geom", + name="elevated_%d_%d" % (i, j), + pos="%f %f %f" % (j * size_scaling - torso_x, + i * size_scaling - torso_y, + height / 2 * size_scaling), + size="%f %f %f" % (0.5 * size_scaling, + 0.5 * size_scaling, + height / 2 * size_scaling), + type="box", + material="", + contype="1", + conaffinity="1", + rgba="0.9 0.9 0.9 1", + ) + if struct == 1: # Unmovable block. + # Offset all coordinates so that robot starts at the origin. + ET.SubElement( + worldbody, "geom", + name="block_%d_%d" % (i, j), + pos="%f %f %f" % (j * size_scaling - torso_x, + i * size_scaling - torso_y, + height_offset + + height / 2 * size_scaling), + size="%f %f %f" % (0.5 * size_scaling, + 0.5 * size_scaling, + height / 2 * size_scaling), + type="box", + material="", + contype="1", + conaffinity="1", + rgba="0.4 0.4 0.4 1", + ) + elif maze_env_utils.can_move(struct): + name = "movable_%d_%d" % (i, j) + self.movable_blocks.append((name, struct)) + falling = maze_env_utils.can_move_z(struct) + spinning = maze_env_utils.can_spin(struct) + x_offset = 0.25 * size_scaling if spinning else 0.0 + y_offset = 0.0 + shrink = 0.1 if spinning else 0.99 if falling else 1.0 + height_shrink = 0.1 if spinning else 1.0 + movable_body = ET.SubElement( + worldbody, "body", + name=name, + pos="%f %f %f" % (j * size_scaling - torso_x + x_offset, + i * size_scaling - torso_y + y_offset, + height_offset + + height / 2 * size_scaling * height_shrink), + ) + ET.SubElement( + movable_body, "geom", + name="block_%d_%d" % (i, j), + pos="0 0 0", + size="%f %f %f" % (0.5 * size_scaling * shrink, + 0.5 * size_scaling * shrink, + height / 2 * size_scaling * height_shrink), + type="box", + material="", + mass="0.001" if falling else "0.0002", + contype="1", + conaffinity="1", + rgba="0.9 0.1 0.1 1" + ) + if maze_env_utils.can_move_x(struct): + ET.SubElement( + movable_body, "joint", + armature="0", + axis="1 0 0", + damping="0.0", + limited="true" if falling else "false", + range="%f %f" % (-size_scaling, size_scaling), + margin="0.01", + name="movable_x_%d_%d" % (i, j), + pos="0 0 0", + type="slide" + ) + if maze_env_utils.can_move_y(struct): + ET.SubElement( + movable_body, "joint", + armature="0", + axis="0 1 0", + damping="0.0", + limited="true" if falling else "false", + range="%f %f" % (-size_scaling, size_scaling), + margin="0.01", + name="movable_y_%d_%d" % (i, j), + pos="0 0 0", + type="slide" + ) + if maze_env_utils.can_move_z(struct): + ET.SubElement( + movable_body, "joint", + armature="0", + axis="0 0 1", + damping="0.0", + limited="true", + range="%f 0" % (-height_offset), + margin="0.01", + name="movable_z_%d_%d" % (i, j), + pos="0 0 0", + type="slide" + ) + if maze_env_utils.can_spin(struct): + ET.SubElement( + movable_body, "joint", + armature="0", + axis="0 0 1", + damping="0.0", + limited="false", + name="spinable_%d_%d" % (i, j), + pos="0 0 0", + type="ball" + ) + + torso = tree.find(".//body[@name='torso']") + geoms = torso.findall(".//geom") + for geom in geoms: + if 'name' not in geom.attrib: + raise Exception("Every geom of the torso must have a name " + "defined") + + _, file_path = tempfile.mkstemp(text=True, suffix='.xml') + tree.write(file_path) + + self.wrapped_env = model_cls(*args, file_path=file_path, **kwargs) + self.args = args + self.kwargs = kwargs + self.GOAL = goal + if self.GOAL is not None: + self.GOAL = self.unwrapped._rowcol_to_xy(*self.GOAL) + self.EPS = EPS + + def get_ori(self): + return self.wrapped_env.get_ori() + + def get_top_down_view(self): + self._view = np.zeros_like(self._view) + + def valid(row, col): + return self._view.shape[0] > row >= 0 and self._view.shape[1] > col >= 0 + + def update_view(x, y, d, row=None, col=None): + if row is None or col is None: + x = x - self._robot_x + y = y - self._robot_y + th = self._robot_ori + + row, col = self._xy_to_rowcol(x, y) + update_view(x, y, d, row=row, col=col) + return + + row, row_frac, col, col_frac = int(row), row % 1, int(col), col % 1 + if row_frac < 0: + row_frac += 1 + if col_frac < 0: + col_frac += 1 + + if valid(row, col): + self._view[row, col, d] += ( + (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * + (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) + if valid(row - 1, col): + self._view[row - 1, col, d] += ( + (max(0., 0.5 - row_frac)) * + (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) + if valid(row + 1, col): + self._view[row + 1, col, d] += ( + (max(0., row_frac - 0.5)) * + (min(1., col_frac + 0.5) - max(0., col_frac - 0.5))) + if valid(row, col - 1): + self._view[row, col - 1, d] += ( + (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * + (max(0., 0.5 - col_frac))) + if valid(row, col + 1): + self._view[row, col + 1, d] += ( + (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) * + (max(0., col_frac - 0.5))) + if valid(row - 1, col - 1): + self._view[row - 1, col - 1, d] += ( + (max(0., 0.5 - row_frac)) * max(0., 0.5 - col_frac)) + if valid(row - 1, col + 1): + self._view[row - 1, col + 1, d] += ( + (max(0., 0.5 - row_frac)) * max(0., col_frac - 0.5)) + if valid(row + 1, col + 1): + self._view[row + 1, col + 1, d] += ( + (max(0., row_frac - 0.5)) * max(0., col_frac - 0.5)) + if valid(row + 1, col - 1): + self._view[row + 1, col - 1, d] += ( + (max(0., row_frac - 0.5)) * max(0., 0.5 - col_frac)) + + # Draw ant. + robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2] + self._robot_x = robot_x + self._robot_y = robot_y + self._robot_ori = self.get_ori() + + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + height = self.MAZE_HEIGHT + + # Draw immovable blocks and chasms. + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == 1: # Wall. + update_view(j * size_scaling - self._init_torso_x, + i * size_scaling - self._init_torso_y, + 0) + if structure[i][j] == -1: # Chasm. + update_view(j * size_scaling - self._init_torso_x, + i * size_scaling - self._init_torso_y, + 1) + + # Draw movable blocks. + for block_name, block_type in self.movable_blocks: + block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2] + update_view(block_x, block_y, 2) + + import cv2 + cv2.imshow('x.jpg', cv2.resize(np.uint8(self._view * 255), (512, 512), interpolation=cv2.INTER_CUBIC)) + cv2.waitKey(0) + + return self._view + + def get_range_sensor_obs(self): + """Returns egocentric range sensor observations of maze.""" + robot_x, robot_y, robot_z = self.wrapped_env.get_body_com("torso")[:3] + ori = self.get_ori() + + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + height = self.MAZE_HEIGHT + + segments = [] + # Get line segments (corresponding to outer boundary) of each immovable + # block or drop-off. + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] in [1, -1]: # There's a wall or drop-off. + cx = j * size_scaling - self._init_torso_x + cy = i * size_scaling - self._init_torso_y + x1 = cx - 0.5 * size_scaling + x2 = cx + 0.5 * size_scaling + y1 = cy - 0.5 * size_scaling + y2 = cy + 0.5 * size_scaling + struct_segments = [ + ((x1, y1), (x2, y1)), + ((x2, y1), (x2, y2)), + ((x2, y2), (x1, y2)), + ((x1, y2), (x1, y1)), + ] + for seg in struct_segments: + segments.append(dict( + segment=seg, + type=structure[i][j], + )) + + for block_name, block_type in self.movable_blocks: + block_x, block_y, block_z = self.wrapped_env.get_body_com(block_name)[ + :3] + if (block_z + height * size_scaling / 2 >= robot_z and + robot_z >= block_z - height * size_scaling / 2): # Block in view. + x1 = block_x - 0.5 * size_scaling + x2 = block_x + 0.5 * size_scaling + y1 = block_y - 0.5 * size_scaling + y2 = block_y + 0.5 * size_scaling + struct_segments = [ + ((x1, y1), (x2, y1)), + ((x2, y1), (x2, y2)), + ((x2, y2), (x1, y2)), + ((x1, y2), (x1, y1)), + ] + for seg in struct_segments: + segments.append(dict( + segment=seg, + type=block_type, + )) + + # 3 for wall, drop-off, block + sensor_readings = np.zeros((self._n_bins, 3)) + for ray_idx in range(self._n_bins): + ray_ori = (ori - self._sensor_span * 0.5 + + (2 * ray_idx + 1.0) / (2 * self._n_bins) * self._sensor_span) + ray_segments = [] + # Get all segments that intersect with ray. + for seg in segments: + p = maze_env_utils.ray_segment_intersect( + ray=((robot_x, robot_y), ray_ori), + segment=seg["segment"]) + if p is not None: + ray_segments.append(dict( + segment=seg["segment"], + type=seg["type"], + ray_ori=ray_ori, + distance=maze_env_utils.point_distance( + p, (robot_x, robot_y)), + )) + if len(ray_segments) > 0: + # Find out which segment is intersected first. + first_seg = sorted( + ray_segments, key=lambda x: x["distance"])[0] + seg_type = first_seg["type"] + idx = (0 if seg_type == 1 else # Wall. + 1 if seg_type == -1 else # Drop-off. + 2 if maze_env_utils.can_move(seg_type) else # Block. + None) + if first_seg["distance"] <= self._sensor_range: + sensor_readings[ray_idx][idx] = ( + self._sensor_range - first_seg[ + "distance"]) / self._sensor_range + return sensor_readings + + def _get_obs(self): + wrapped_obs = self.wrapped_env._get_obs() + # print("ant obs", wrapped_obs) + if self._top_down_view: + view = [self.get_top_down_view().flat] + else: + view = [] + + if self._observe_blocks: + additional_obs = [] + for block_name, block_type in self.movable_blocks: + additional_obs.append( + self.wrapped_env.get_body_com(block_name)) + wrapped_obs = np.concatenate([wrapped_obs[:3]] + additional_obs + + [wrapped_obs[3:]]) + + range_sensor_obs = self.get_range_sensor_obs() + return wrapped_obs + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def reset(self, goal=None): + self.goal = goal + + if self.visualize_goal: # remove the prev goal and add a new goal + goal_x, goal_y = goal[0], goal[1] + size_scaling = self.MAZE_SIZE_SCALING + # remove the original goal + try: + self.worldbody.remove(self.goal_element) + except AttributeError: + pass + # offset all coordinates so that robot starts at the origin + self.goal_element = \ + ET.SubElement( + self.worldbody, "geom", + name="goal_%d_%d" % (goal_x, goal_y), + pos="%f %f %f" % (goal_x, + goal_y, + self.MAZE_HEIGHT / 2 * size_scaling), + size="%f %f %f" % (0.1 * size_scaling, # smaller than the block to prevent collision + 0.1 * size_scaling, + self.MAZE_HEIGHT / 2 * size_scaling), + type="box", + material="", + contype="1", + conaffinity="1", + rgba="1.0 0.0 0.0 0.5" + ) + # Note: running the lines below will make the robot position wrong! (because the graph is rebuilt) + torso = self.tree.find(".//body[@name='torso']") + geoms = torso.findall(".//geom") + for geom in geoms: + if 'name' not in geom.attrib: + raise Exception("Every geom of the torso must have a name " + "defined") + _, file_path = tempfile.mkstemp(text=True, suffix='.xml') + self.tree.write( + file_path) # here we write a temporal file with the robot specifications. Why not the original one?? + + model_cls = self.__class__.MODEL_CLASS + self.wrapped_env = model_cls(*self.args, file_path=file_path, + **self.kwargs) # file to the robot specifications; model_cls is AntEnv + + self.t = 0 + self.trajectory = [] + self.wrapped_env.reset() + if len(self._init_positions) > 1: + xy = self._init_positions[self.np_random.randint(len(self._init_positions))] + self.wrapped_env.set_xy(xy) + return self._get_obs() + + @property + def viewer(self): + return self.wrapped_env.viewer + + def render(self, *args, **kwargs): + return self.wrapped_env.render(*args, **kwargs) + + @property + def observation_space(self): + shape = self._get_obs().shape + high = np.inf * np.ones(shape) + low = -high + return gym.spaces.Box(low, high) + + @property + def action_space(self): + return self.wrapped_env.action_space + + def _find_robot(self): + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == 'r': + return j * size_scaling, i * size_scaling + assert False, 'No robot in maze specification.' + + def _find_all_robots(self): + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + coords = [] + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == 'r': + coords.append((j * size_scaling, i * size_scaling)) + return coords + + def _is_in_collision(self, pos): + x, y = pos + structure = self.MAZE_STRUCTURE + size_scaling = self.MAZE_SIZE_SCALING + for i in range(len(structure)): + for j in range(len(structure[0])): + if structure[i][j] == 1: + minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x + maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x + miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y + maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y + if minx <= x <= maxx and miny <= y <= maxy: + return True + return False + + def _rowcol_to_xy(self, j, i): + size_scaling = self.MAZE_SIZE_SCALING + minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x + maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x + miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y + maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y + return (minx + maxx) / 2, (miny + maxy) / 2 + + def step(self, action): + self.t += 1 + if self._manual_collision: + old_pos = self.wrapped_env.get_xy() + inner_next_obs, inner_reward, inner_done, info = self.wrapped_env.step( + action) + new_pos = self.wrapped_env.get_xy() + if self._is_in_collision(new_pos): + self.wrapped_env.set_xy(old_pos) + else: + inner_next_obs, inner_reward, inner_done, info = self.wrapped_env.step( + action) + next_obs = self._get_obs() + done = False + if self.goal is not None: + done = bool(((next_obs[:2] - self.goal[:2]) ** 2).sum() < self.EPS) + + new_pos = self.wrapped_env.get_xy() + if self._is_in_collision(new_pos) or inner_done: + done = True + if self.t >= self.max_episode_steps: + done = True + return next_obs, inner_reward, done, info