From 068c4068ec735d4a0c77c545ca63233ed8262f1c Mon Sep 17 00:00:00 2001
From: Minghao Zhang <37171450+Mehooz@users.noreply.github.com>
Date: Sat, 28 Mar 2020 12:03:49 +0800
Subject: [PATCH] fix atari/mujoco env (#7)

* update atari.py

* fix setup.py
pass the pytest

* fix setup.py
pass the pytest

* add args "render"

* change the tensorboard writter

* change the tensorboard writter

* change device, render, tensorboard log location

* change device, render, tensorboard log location

* remove some wrong local files

* fix some tab mistakes and the envs name in continuous/test_xx.py

* add examples and point robot maze environment

* fix some bugs during testing examples

* add dqn network and fix some args

* change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally

* add a warning to collector

* rm some unrelated files

* reformat

* fix a bug in test_dqn due to the model wrong selection

* change atari frame skip and observation to improve performance

* readd some files

* change import

* modified readme

* rm tensorboard log

* update atari and mujoco which are ignored

* rm the wrong lines
---
 .gitignore                            |   3 +
 README.md                             |   2 +-
 examples/discrete_net.py              |  10 +-
 test/discrete/net.py                  |  11 +-
 tianshou/env/atari.py                 | 124 ++++++
 tianshou/env/mujoco/__init__.py       |  26 ++
 tianshou/env/mujoco/assets/point.xml  |  34 ++
 tianshou/env/mujoco/maze_env_utils.py | 196 +++++++++
 tianshou/env/mujoco/point.py          |  93 +++++
 tianshou/env/mujoco/point_maze_env.py | 563 ++++++++++++++++++++++++++
 10 files changed, 1053 insertions(+), 9 deletions(-)
 create mode 100644 tianshou/env/atari.py
 create mode 100644 tianshou/env/mujoco/__init__.py
 create mode 100644 tianshou/env/mujoco/assets/point.xml
 create mode 100644 tianshou/env/mujoco/maze_env_utils.py
 create mode 100644 tianshou/env/mujoco/point.py
 create mode 100644 tianshou/env/mujoco/point_maze_env.py

diff --git a/.gitignore b/.gitignore
index 28b6ccc..4d6d8b9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,9 @@ __pycache__/
 # C extensions
 *.so
 
+# .idea folder
+.idea/
+
 # Distribution / packaging
 .Python
 build/
diff --git a/README.md b/README.md
index 458d08d..6b7e913 100644
--- a/README.md
+++ b/README.md
@@ -212,7 +212,7 @@ If you find Tianshou useful, please cite it in your publications.
 
 ```latex
 @misc{tianshou,
-  author = {Jiayi Weng},
+  author = {Jiayi Weng, Minghao Zhang},
   title = {Tianshou},
   year = {2020},
   publisher = {GitHub},
diff --git a/examples/discrete_net.py b/examples/discrete_net.py
index 14d2c6e..9c7a8a4 100644
--- a/examples/discrete_net.py
+++ b/examples/discrete_net.py
@@ -34,7 +34,7 @@ class Actor(nn.Module):
 
     def forward(self, s, state=None, info={}):
         logits, h = self.preprocess(s, state)
-        logits = F.softmax(self.last(logits), dim=-1)
+        logits = F.softmax(logits, dim=-1)
         return logits, h
 
 
@@ -56,7 +56,7 @@ class DQN(nn.Module):
         super(DQN, self).__init__()
         self.device = device
 
-        self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2)
+        self.conv1 = nn.Conv2d(4, 16, kernel_size=5, stride=2)
         self.bn1 = nn.BatchNorm2d(16)
         self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
         self.bn2 = nn.BatchNorm2d(32)
@@ -69,7 +69,8 @@ class DQN(nn.Module):
         convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
         convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
         linear_input_size = convw * convh * 32
-        self.head = nn.Linear(linear_input_size, action_shape)
+        self.fc = nn.Linear(linear_input_size, 512)
+        self.head = nn.Linear(512, action_shape)
 
     def forward(self, x, state=None, info={}):
         if not isinstance(x, torch.Tensor):
@@ -78,4 +79,5 @@ class DQN(nn.Module):
         x = F.relu(self.bn1(self.conv1(x)))
         x = F.relu(self.bn2(self.conv2(x)))
         x = F.relu(self.bn3(self.conv3(x)))
-        return self.head(x.view(x.size(0), -1)), state
+        x = self.fc(x.reshape(x.size(0), -1))
+        return self.head(x), state
diff --git a/test/discrete/net.py b/test/discrete/net.py
index dbbb382..e3d9a4a 100644
--- a/test/discrete/net.py
+++ b/test/discrete/net.py
@@ -51,11 +51,12 @@ class Critic(nn.Module):
 
 
 class DQN(nn.Module):
+
     def __init__(self, h, w, action_shape, device='cpu'):
         super(DQN, self).__init__()
         self.device = device
 
-        self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
+        self.conv1 = nn.Conv2d(4, 16, kernel_size=5, stride=2)
         self.bn1 = nn.BatchNorm2d(16)
         self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
         self.bn2 = nn.BatchNorm2d(32)
@@ -68,12 +69,14 @@ class DQN(nn.Module):
         convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
         convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
         linear_input_size = convw * convh * 32
-        self.head = nn.Linear(linear_input_size, action_shape)
+        self.fc = nn.Linear(linear_input_size, 512)
+        self.head = nn.Linear(512, action_shape)
 
     def forward(self, x, state=None, info={}):
         if not isinstance(x, torch.Tensor):
-            x = torch.tensor(x, device=self.device, dtype=torch.float)
+            s = torch.tensor(x, device=self.device, dtype=torch.float)
         x = F.relu(self.bn1(self.conv1(x)))
         x = F.relu(self.bn2(self.conv2(x)))
         x = F.relu(self.bn3(self.conv3(x)))
-        return self.head(x.view(x.size(0), -1)), state
+        x = self.fc(x.reshape(x.size(0), -1))
+        return self.head(x), state
diff --git a/tianshou/env/atari.py b/tianshou/env/atari.py
new file mode 100644
index 0000000..4e243cd
--- /dev/null
+++ b/tianshou/env/atari.py
@@ -0,0 +1,124 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import cv2
+import gym
+import numpy as np
+from gym.spaces.box import Box
+
+
+def create_atari_environment(name=None, sticky_actions=True, max_episode_steps=2000):
+    game_version = 'v0' if sticky_actions else 'v4'
+    name = '{}NoFrameskip-{}'.format(name, game_version)
+    env = gym.make(name)
+    env = env.env
+    env = preprocessing(env, max_episode_steps=max_episode_steps)
+    return env
+
+
+class preprocessing(object):
+
+    def __init__(self, env, frame_skip=4, terminal_on_life_loss=False,
+                 size=84, max_episode_steps=2000):
+        self.max_episode_steps = max_episode_steps
+        self.env = env
+        self.terminal_on_life_loss = terminal_on_life_loss
+        self.frame_skip = frame_skip
+        self.size = size
+        self.count = 0
+        obs_dims = self.env.observation_space
+
+        self.screen_buffer = [
+            np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8),
+            np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8)
+        ]
+
+        self.game_over = False
+        self.lives = 0
+
+    @property
+    def observation_space(self):
+
+        return Box(low=0, high=255, shape=(self.size, self.size, 4),
+                   dtype=np.uint8)
+
+    def action_space(self):
+        return self.env.action_space
+
+    def reward_range(self):
+        return self.env.reward_range
+
+    def metadata(self):
+        return self.env.metadata
+
+    def close(self):
+        return self.env.close()
+
+    def reset(self):
+        self.count = 0
+        self.env.reset()
+        self.lives = self.env.ale.lives()
+        self._grayscale_obs(self.screen_buffer[0])
+        self.screen_buffer[1].fill(0)
+
+        return np.stack([self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1)
+
+    def render(self, mode):
+
+        return self.env.render(mode)
+
+    def step(self, action):
+
+        total_reward = 0.
+        observation = []
+        for t in range(self.frame_skip):
+            self.count += 1
+            _, reward, terminal, info = self.env.step(action)
+            total_reward += reward
+
+            if self.terminal_on_life_loss:
+                lives = self.env.ale.lives()
+                is_terminal = terminal or lives < self.lives
+                self.lives = lives
+            else:
+                is_terminal = terminal
+
+            if is_terminal:
+                break
+            elif t >= self.frame_skip - 2:
+                t_ = t - (self.frame_skip - 2)
+                self._grayscale_obs(self.screen_buffer[t_])
+
+            observation.append(self._pool_and_resize())
+        while len(observation) > 0 and len(observation) < self.frame_skip:
+            observation.append(observation[-1])
+        if len(observation) > 0:
+            observation = np.stack(observation, axis=-1)
+        else:
+            observation = np.stack([self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1)
+        if self.count >= self.max_episode_steps:
+            terminal = True
+        self.terminal = terminal
+        return observation, total_reward, is_terminal, info
+
+    def _grayscale_obs(self, output):
+
+        self.env.ale.getScreenGrayscale(output)
+        return output
+
+    def _pool_and_resize(self):
+
+        if self.frame_skip > 1:
+            np.maximum(self.screen_buffer[0], self.screen_buffer[1],
+                       out=self.screen_buffer[0])
+
+        transformed_image = cv2.resize(self.screen_buffer[0],
+                                       (self.size, self.size),
+                                       interpolation=cv2.INTER_AREA)
+        int_image = np.asarray(transformed_image, dtype=np.uint8)
+        # return np.expand_dims(int_image, axis=2)
+        return int_image
+
+
+if __name__ == '__main__':
+    create_atari_environment()
diff --git a/tianshou/env/mujoco/__init__.py b/tianshou/env/mujoco/__init__.py
new file mode 100644
index 0000000..5f53fd2
--- /dev/null
+++ b/tianshou/env/mujoco/__init__.py
@@ -0,0 +1,26 @@
+from gym.envs.registration import register
+import gym
+
+register(
+    id='PointMaze-v0',
+    entry_point='tianshou.env.mujoco.point_maze_env:PointMazeEnv',
+    kwargs={
+        "maze_size_scaling": 4,
+        "maze_id": "Maze2",
+        "maze_height": 0.5,
+        "manual_collision": True,
+        "goal": (1, 3),
+    }
+)
+
+register(
+    id='PointMaze-v1',
+    entry_point='tianshou.env.mujoco.point_maze_env:PointMazeEnv',
+    kwargs={
+        "maze_size_scaling": 2,
+        "maze_id": "Maze2",
+        "maze_height": 0.5,
+        "manual_collision": True,
+        "goal": (1, 3),
+    }
+)
diff --git a/tianshou/env/mujoco/assets/point.xml b/tianshou/env/mujoco/assets/point.xml
new file mode 100644
index 0000000..38cc644
--- /dev/null
+++ b/tianshou/env/mujoco/assets/point.xml
@@ -0,0 +1,34 @@
+<mujoco>
+    <compiler inertiafromgeom="true" angle="degree" coordinate="local"/>
+    <option timestep="0.02" integrator="RK4"/>
+    <default>
+        <joint limited="false" armature="0" damping="0"/>
+        <geom condim="3" conaffinity="0" margin="0" friction="1 0.5 0.5" rgba="0.8 0.6 0.4 1" density="100"/>
+    </default>
+    <asset>
+        <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0"/>
+        <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4"
+                 rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01"/>
+        <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100"/>
+        <material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5"/>
+        <material name='geom' texture="texgeom" texuniform="true"/>
+    </asset>
+    <worldbody>
+        <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3"
+               dir="-0 0 -1.3"/>
+        <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1'
+              rgba='0.8 0.9 0.8 1' condim='3'/>
+        <body name="torso" pos="0 0 0">
+            <geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5"/>
+            <geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5"/>
+            <joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0'/>
+            <joint name='bally' type='slide' axis='0 1 0' pos='0 0 0'/>
+            <joint name='rot' type='hinge' axis='0 0 1' pos='0 0 0' limited="false"/>
+        </body>
+    </worldbody>
+    <actuator>
+        <!-- Those are just dummy actuators for providing ranges -->
+        <motor joint='ballx' ctrlrange="-1 1" ctrllimited="true"/>
+        <motor joint='rot' ctrlrange="-0.25 0.25" ctrllimited="true"/>
+    </actuator>
+</mujoco>
diff --git a/tianshou/env/mujoco/maze_env_utils.py b/tianshou/env/mujoco/maze_env_utils.py
new file mode 100644
index 0000000..059432e
--- /dev/null
+++ b/tianshou/env/mujoco/maze_env_utils.py
@@ -0,0 +1,196 @@
+"""Adapted from rllab maze_env_utils.py."""
+import numpy as np
+import math
+
+
+class Move(object):
+    X = 11
+    Y = 12
+    Z = 13
+    XY = 14
+    XZ = 15
+    YZ = 16
+    XYZ = 17
+    SpinXY = 18
+
+
+def can_move_x(movable):
+    return movable in [Move.X, Move.XY, Move.XZ, Move.XYZ,
+                       Move.SpinXY]
+
+
+def can_move_y(movable):
+    return movable in [Move.Y, Move.XY, Move.YZ, Move.XYZ,
+                       Move.SpinXY]
+
+
+def can_move_z(movable):
+    return movable in [Move.Z, Move.XZ, Move.YZ, Move.XYZ]
+
+
+def can_spin(movable):
+    return movable in [Move.SpinXY]
+
+
+def can_move(movable):
+    return can_move_x(movable) or can_move_y(movable) or can_move_z(movable)
+
+
+def construct_maze(maze_id='Maze'):
+    if maze_id == 'Maze':
+        structure = [
+            [1, 1, 1, 1, 1],
+            [1, 'r', 0, 0, 1],
+            [1, 1, 1, 0, 1],
+            [1, 'g', 0, 0, 1],
+            [1, 1, 1, 1, 1],
+        ]
+    elif maze_id == 'Maze1':
+        structure = [
+            [1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 'r', 1, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 1, 1, 0, 1],
+            [1, 1, 1, 1, 1, 0, 0, 1],
+            [1, 0, 0, 0, 1, 0, 1, 1],
+            [1, 0, 0, 0, 1, 0, 1, 1],
+            [1, 0, 1, 0, 0, 0, 0, 1],
+            [1, 1, 1, 1, 1, 1, 1, 1],
+        ]
+    elif maze_id == 'Maze2':
+        structure = [
+            [0, 0, 0, 0, 0],
+            [0, 'r', 0, 0, 0],
+            [0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0],
+            [0, 0, 0, 0, 0],
+        ]
+    # transfer maze
+    elif maze_id == 'Maze3':
+        structure = [
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 0, 'r', 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1],
+            [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
+            [1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1],
+            [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 'g', 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        ]
+    elif maze_id == 'Maze4':
+        structure = [
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 0, 'r', 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
+            [1, 0, 0, 0, 0, 1, 0, 0, 0, 'g', 1],
+            [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
+        ]
+    elif maze_id == 'Push':
+        structure = [
+            [1, 1, 1, 1, 1],
+            [1, 0, 'r', 1, 1],
+            [1, 0, Move.XY, 0, 1],
+            [1, 1, 0, 1, 1],
+            [1, 1, 1, 1, 1],
+        ]
+    elif maze_id == 'Fall':
+        structure = [
+            [1, 1, 1, 1],
+            [1, 'r', 0, 1],
+            [1, 0, Move.YZ, 1],
+            [1, -1, -1, 1],
+            [1, 0, 0, 1],
+            [1, 1, 1, 1],
+        ]
+    elif maze_id == 'Block':
+        O = 'r'
+        structure = [
+            [1, 1, 1, 1, 1],
+            [1, O, 0, 0, 1],
+            [1, 0, 0, 0, 1],
+            [1, 0, 0, 0, 1],
+            [1, 1, 1, 1, 1],
+        ]
+    elif maze_id == 'BlockMaze':
+        O = 'r'
+        structure = [
+            [1, 1, 1, 1],
+            [1, O, 0, 1],
+            [1, 1, 0, 1],
+            [1, 0, 0, 1],
+            [1, 1, 1, 1],
+        ]
+    else:
+        raise NotImplementedError('The provided MazeId %s is not recognized' % maze_id)
+
+    return structure
+
+
+def line_intersect(pt1, pt2, ptA, ptB):
+    """
+    Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html
+    this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB)
+    """
+
+    DET_TOLERANCE = 0.00000001
+
+    # the first line is pt1 + r*(pt2-pt1)
+    # in component form:
+    x1, y1 = pt1
+    x2, y2 = pt2
+    dx1 = x2 - x1
+    dy1 = y2 - y1
+
+    # the second line is ptA + s*(ptB-ptA)
+    x, y = ptA
+    xB, yB = ptB
+    dx = xB - x
+    dy = yB - y
+
+    DET = (-dx1 * dy + dy1 * dx)
+
+    if math.fabs(DET) < DET_TOLERANCE: return (0, 0, 0, 0, 0)
+
+    # now, the determinant should be OK
+    DETinv = 1.0 / DET
+
+    # find the scalar amount along the "self" segment
+    r = DETinv * (-dy * (x - x1) + dx * (y - y1))
+
+    # find the scalar amount along the input line
+    s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1))
+
+    # return the average of the two descriptions
+    xi = (x1 + r * dx1 + x + s * dx) / 2.0
+    yi = (y1 + r * dy1 + y + s * dy) / 2.0
+    return (xi, yi, 1, r, s)
+
+
+def ray_segment_intersect(ray, segment):
+    """
+    Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2),
+    and return the intersection point if there is one
+    """
+    (x, y), theta = ray
+    # (x1, y1), (x2, y2) = segment
+    pt1 = (x, y)
+    len = 1
+    pt2 = (x + len * math.cos(theta), y + len * math.sin(theta))
+    xo, yo, valid, r, s = line_intersect(pt1, pt2, *segment)
+    if valid and r >= 0 and 0 <= s <= 1:
+        return (xo, yo)
+    return None
+
+
+def point_distance(p1, p2):
+    x1, y1 = p1
+    x2, y2 = p2
+    return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
diff --git a/tianshou/env/mujoco/point.py b/tianshou/env/mujoco/point.py
new file mode 100644
index 0000000..2a6a08d
--- /dev/null
+++ b/tianshou/env/mujoco/point.py
@@ -0,0 +1,93 @@
+"""Wrapper for creating the ant environment in gym_mujoco."""
+
+import math
+import numpy as np
+from gym import utils
+from gym.envs.mujoco import mujoco_env
+
+
+class PointEnv(mujoco_env.MujocoEnv, utils.EzPickle):
+    FILE = "point.xml"
+    ORI_IND = 2
+
+    def __init__(self, file_path=None, expose_all_qpos=True, noisy_init=False):
+        self._expose_all_qpos = expose_all_qpos
+        self.noisy_init = noisy_init
+        mujoco_env.MujocoEnv.__init__(self, file_path, 1)
+        utils.EzPickle.__init__(self)
+
+    @property
+    def physics(self):
+        return self.model
+
+    def _step(self, a):
+        return self.step(a)
+
+    def step(self, action):
+        # action[0] is velocity, action[1] is direction
+        action[0] = 0.2 * action[0]
+        qpos = np.copy(self.data.qpos)
+        qpos[2] += action[1]
+        ori = qpos[2]
+        # compute increment in each direction
+        dx = math.cos(ori) * action[0]
+        dy = math.sin(ori) * action[0]
+        # ensure that the robot is within reasonable range
+        qpos[0] = np.clip(qpos[0] + dx, -100, 100)
+        qpos[1] = np.clip(qpos[1] + dy, -100, 100)
+        qvel = np.squeeze(self.data.qvel)
+        self.set_state(qpos, qvel)
+        for _ in range(0, self.frame_skip):
+            # self.physics.step()
+            self.sim.step()
+        next_obs = self._get_obs()
+        reward = 0
+        done = False
+        info = {}
+        return next_obs, reward, done, info
+
+    def _get_obs(self):
+        if self._expose_all_qpos:
+            return np.concatenate([
+                self.data.qpos.flat[:3],  # Only point-relevant coords.
+                self.data.qvel.flat[:3]])
+        return np.concatenate([
+            self.data.qpos.flat[2:3],
+            self.data.qvel.flat[:3]])
+
+    def reset_model(self):
+        if self.noisy_init:
+            qpos = self.init_qpos + self.np_random.uniform(
+                size=self.model.nq, low=-.1, high=.1)
+            qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
+
+        else:
+            qpos = self.init_qpos
+            qvel = self.init_qvel
+
+        # Set everything other than point to original position and 0 velocity.
+        qpos[3:] = self.init_qpos[3:]
+        qvel[3:] = 0.
+        self.set_state(qpos, qvel)
+        return self._get_obs()
+
+    def get_ori(self):
+        return self.data.qpos[self.__class__.ORI_IND]
+
+    def set_xy(self, xy):
+        qpos = np.copy(self.data.qpos)
+        qpos[0] = xy[0]
+        qpos[1] = xy[1]
+
+        qvel = self.data.qvel
+        self.set_state(qpos, qvel)
+
+    def get_xy(self):
+        qpos = np.copy(self.data.qpos)
+        return qpos[:2]
+
+    def viewer_setup(self):
+
+        self.viewer.cam.trackbodyid = -1
+        self.viewer.cam.distance = 80
+        self.viewer.cam.elevation = -90
diff --git a/tianshou/env/mujoco/point_maze_env.py b/tianshou/env/mujoco/point_maze_env.py
new file mode 100644
index 0000000..9eb495b
--- /dev/null
+++ b/tianshou/env/mujoco/point_maze_env.py
@@ -0,0 +1,563 @@
+"""Adapted from rllab maze_env.py."""
+
+import os
+import tempfile
+import xml.etree.ElementTree as ET
+import math
+import numpy as np
+import gym
+from . import maze_env_utils
+from .point import PointEnv
+from gym.utils import seeding
+
+# Directory that contains mujoco xml files.
+MODEL_DIR = os.path.join(os.path.dirname(__file__), 'assets')
+
+
+class PointMazeEnv(gym.Env):
+    MODEL_CLASS = PointEnv
+
+    MAZE_HEIGHT = None
+    MAZE_SIZE_SCALING = None
+
+    def __init__(
+            self,
+            maze_id=None,
+            maze_height=0.5,
+            maze_size_scaling=8,
+            n_bins=0,
+            sensor_range=3.,
+            sensor_span=2 * math.pi,
+            observe_blocks=False,
+            put_spin_near_agent=False,
+            top_down_view=False,
+            manual_collision=False,
+            goal=None,
+            EPS=0.25,
+            max_episode_steps=2000,
+            *args,
+            **kwargs):
+        self._maze_id = maze_id
+
+        model_cls = self.__class__.MODEL_CLASS
+        if model_cls is None:
+            raise "MODEL_CLASS unspecified!"
+        xml_path = os.path.join(MODEL_DIR, model_cls.FILE)
+        self.tree = tree = ET.parse(xml_path)
+        self.worldbody = worldbody = tree.find(".//worldbody")
+        self.visualize_goal = False
+        self.max_episode_steps = max_episode_steps
+        self.t = 0
+        self.MAZE_HEIGHT = height = maze_height
+        self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling
+        self._n_bins = n_bins
+        self._sensor_range = sensor_range * size_scaling
+        self._sensor_span = sensor_span
+        self._observe_blocks = observe_blocks
+        self._put_spin_near_agent = put_spin_near_agent
+        self._top_down_view = top_down_view
+        self._manual_collision = manual_collision
+
+        self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze(
+            maze_id=self._maze_id)
+        # Elevate the maze to allow for falling.
+        self.elevated = any(-1 in row for row in structure)
+        self.blocks = any(
+            any(maze_env_utils.can_move(r) for r in row)
+            for row in structure)  # Are there any movable blocks?
+
+        torso_x, torso_y = self._find_robot()  # x, y coordinates
+        self._init_torso_x = torso_x
+        self._init_torso_y = torso_y
+        self._init_positions = [
+            (x - torso_x, y - torso_y)
+            for x, y in self._find_all_robots()]
+
+        self._view = np.zeros([5, 5, 3])
+
+        height_offset = 0.
+        if self.elevated:
+            height_offset = height * size_scaling
+            torso = tree.find(".//body[@name='torso']")
+            torso.set('pos', '0 0 %.2f' % (0.75 + height_offset))
+        if self.blocks:
+            default = tree.find(".//default")
+            default.find('.//geom').set('solimp', '.995 .995 .01')
+
+        self.movable_blocks = []
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                struct = structure[i][j]
+                if struct == 'r' and self._put_spin_near_agent:
+                    struct = maze_env_utils.Move.SpinXY
+                if self.elevated and struct not in [-1]:
+                    # Create elevated platform.
+                    ET.SubElement(
+                        worldbody, "geom",
+                        name="elevated_%d_%d" % (i, j),
+                        pos="%f %f %f" % (j * size_scaling - torso_x,
+                                          i * size_scaling - torso_y,
+                                          height / 2 * size_scaling),
+                        size="%f %f %f" % (0.5 * size_scaling,
+                                           0.5 * size_scaling,
+                                           height / 2 * size_scaling),
+                        type="box",
+                        material="",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.9 0.9 0.9 1",
+                    )
+                if struct == 1:  # Unmovable block.
+                    # Offset all coordinates so that robot starts at the origin.
+                    ET.SubElement(
+                        worldbody, "geom",
+                        name="block_%d_%d" % (i, j),
+                        pos="%f %f %f" % (j * size_scaling - torso_x,
+                                          i * size_scaling - torso_y,
+                                          height_offset +
+                                          height / 2 * size_scaling),
+                        size="%f %f %f" % (0.5 * size_scaling,
+                                           0.5 * size_scaling,
+                                           height / 2 * size_scaling),
+                        type="box",
+                        material="",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.4 0.4 0.4 1",
+                    )
+                elif maze_env_utils.can_move(struct):
+                    name = "movable_%d_%d" % (i, j)
+                    self.movable_blocks.append((name, struct))
+                    falling = maze_env_utils.can_move_z(struct)
+                    spinning = maze_env_utils.can_spin(struct)
+                    x_offset = 0.25 * size_scaling if spinning else 0.0
+                    y_offset = 0.0
+                    shrink = 0.1 if spinning else 0.99 if falling else 1.0
+                    height_shrink = 0.1 if spinning else 1.0
+                    movable_body = ET.SubElement(
+                        worldbody, "body",
+                        name=name,
+                        pos="%f %f %f" % (j * size_scaling - torso_x + x_offset,
+                                          i * size_scaling - torso_y + y_offset,
+                                          height_offset +
+                                          height / 2 * size_scaling * height_shrink),
+                    )
+                    ET.SubElement(
+                        movable_body, "geom",
+                        name="block_%d_%d" % (i, j),
+                        pos="0 0 0",
+                        size="%f %f %f" % (0.5 * size_scaling * shrink,
+                                           0.5 * size_scaling * shrink,
+                                           height / 2 * size_scaling * height_shrink),
+                        type="box",
+                        material="",
+                        mass="0.001" if falling else "0.0002",
+                        contype="1",
+                        conaffinity="1",
+                        rgba="0.9 0.1 0.1 1"
+                    )
+                    if maze_env_utils.can_move_x(struct):
+                        ET.SubElement(
+                            movable_body, "joint",
+                            armature="0",
+                            axis="1 0 0",
+                            damping="0.0",
+                            limited="true" if falling else "false",
+                            range="%f %f" % (-size_scaling, size_scaling),
+                            margin="0.01",
+                            name="movable_x_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="slide"
+                        )
+                    if maze_env_utils.can_move_y(struct):
+                        ET.SubElement(
+                            movable_body, "joint",
+                            armature="0",
+                            axis="0 1 0",
+                            damping="0.0",
+                            limited="true" if falling else "false",
+                            range="%f %f" % (-size_scaling, size_scaling),
+                            margin="0.01",
+                            name="movable_y_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="slide"
+                        )
+                    if maze_env_utils.can_move_z(struct):
+                        ET.SubElement(
+                            movable_body, "joint",
+                            armature="0",
+                            axis="0 0 1",
+                            damping="0.0",
+                            limited="true",
+                            range="%f 0" % (-height_offset),
+                            margin="0.01",
+                            name="movable_z_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="slide"
+                        )
+                    if maze_env_utils.can_spin(struct):
+                        ET.SubElement(
+                            movable_body, "joint",
+                            armature="0",
+                            axis="0 0 1",
+                            damping="0.0",
+                            limited="false",
+                            name="spinable_%d_%d" % (i, j),
+                            pos="0 0 0",
+                            type="ball"
+                        )
+
+        torso = tree.find(".//body[@name='torso']")
+        geoms = torso.findall(".//geom")
+        for geom in geoms:
+            if 'name' not in geom.attrib:
+                raise Exception("Every geom of the torso must have a name "
+                                "defined")
+
+        _, file_path = tempfile.mkstemp(text=True, suffix='.xml')
+        tree.write(file_path)
+
+        self.wrapped_env = model_cls(*args, file_path=file_path, **kwargs)
+        self.args = args
+        self.kwargs = kwargs
+        self.GOAL = goal
+        if self.GOAL is not None:
+            self.GOAL = self.unwrapped._rowcol_to_xy(*self.GOAL)
+        self.EPS = EPS
+
+    def get_ori(self):
+        return self.wrapped_env.get_ori()
+
+    def get_top_down_view(self):
+        self._view = np.zeros_like(self._view)
+
+        def valid(row, col):
+            return self._view.shape[0] > row >= 0 and self._view.shape[1] > col >= 0
+
+        def update_view(x, y, d, row=None, col=None):
+            if row is None or col is None:
+                x = x - self._robot_x
+                y = y - self._robot_y
+                th = self._robot_ori
+
+                row, col = self._xy_to_rowcol(x, y)
+                update_view(x, y, d, row=row, col=col)
+                return
+
+            row, row_frac, col, col_frac = int(row), row % 1, int(col), col % 1
+            if row_frac < 0:
+                row_frac += 1
+            if col_frac < 0:
+                col_frac += 1
+
+            if valid(row, col):
+                self._view[row, col, d] += (
+                        (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) *
+                        (min(1., col_frac + 0.5) - max(0., col_frac - 0.5)))
+            if valid(row - 1, col):
+                self._view[row - 1, col, d] += (
+                        (max(0., 0.5 - row_frac)) *
+                        (min(1., col_frac + 0.5) - max(0., col_frac - 0.5)))
+            if valid(row + 1, col):
+                self._view[row + 1, col, d] += (
+                        (max(0., row_frac - 0.5)) *
+                        (min(1., col_frac + 0.5) - max(0., col_frac - 0.5)))
+            if valid(row, col - 1):
+                self._view[row, col - 1, d] += (
+                        (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) *
+                        (max(0., 0.5 - col_frac)))
+            if valid(row, col + 1):
+                self._view[row, col + 1, d] += (
+                        (min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) *
+                        (max(0., col_frac - 0.5)))
+            if valid(row - 1, col - 1):
+                self._view[row - 1, col - 1, d] += (
+                        (max(0., 0.5 - row_frac)) * max(0., 0.5 - col_frac))
+            if valid(row - 1, col + 1):
+                self._view[row - 1, col + 1, d] += (
+                        (max(0., 0.5 - row_frac)) * max(0., col_frac - 0.5))
+            if valid(row + 1, col + 1):
+                self._view[row + 1, col + 1, d] += (
+                        (max(0., row_frac - 0.5)) * max(0., col_frac - 0.5))
+            if valid(row + 1, col - 1):
+                self._view[row + 1, col - 1, d] += (
+                        (max(0., row_frac - 0.5)) * max(0., 0.5 - col_frac))
+
+        # Draw ant.
+        robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
+        self._robot_x = robot_x
+        self._robot_y = robot_y
+        self._robot_ori = self.get_ori()
+
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        height = self.MAZE_HEIGHT
+
+        # Draw immovable blocks and chasms.
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == 1:  # Wall.
+                    update_view(j * size_scaling - self._init_torso_x,
+                                i * size_scaling - self._init_torso_y,
+                                0)
+                if structure[i][j] == -1:  # Chasm.
+                    update_view(j * size_scaling - self._init_torso_x,
+                                i * size_scaling - self._init_torso_y,
+                                1)
+
+        # Draw movable blocks.
+        for block_name, block_type in self.movable_blocks:
+            block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2]
+            update_view(block_x, block_y, 2)
+
+        import cv2
+        cv2.imshow('x.jpg', cv2.resize(np.uint8(self._view * 255), (512, 512), interpolation=cv2.INTER_CUBIC))
+        cv2.waitKey(0)
+
+        return self._view
+
+    def get_range_sensor_obs(self):
+        """Returns egocentric range sensor observations of maze."""
+        robot_x, robot_y, robot_z = self.wrapped_env.get_body_com("torso")[:3]
+        ori = self.get_ori()
+
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        height = self.MAZE_HEIGHT
+
+        segments = []
+        # Get line segments (corresponding to outer boundary) of each immovable
+        # block or drop-off.
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] in [1, -1]:  # There's a wall or drop-off.
+                    cx = j * size_scaling - self._init_torso_x
+                    cy = i * size_scaling - self._init_torso_y
+                    x1 = cx - 0.5 * size_scaling
+                    x2 = cx + 0.5 * size_scaling
+                    y1 = cy - 0.5 * size_scaling
+                    y2 = cy + 0.5 * size_scaling
+                    struct_segments = [
+                        ((x1, y1), (x2, y1)),
+                        ((x2, y1), (x2, y2)),
+                        ((x2, y2), (x1, y2)),
+                        ((x1, y2), (x1, y1)),
+                    ]
+                    for seg in struct_segments:
+                        segments.append(dict(
+                            segment=seg,
+                            type=structure[i][j],
+                        ))
+
+        for block_name, block_type in self.movable_blocks:
+            block_x, block_y, block_z = self.wrapped_env.get_body_com(block_name)[
+                                        :3]
+            if (block_z + height * size_scaling / 2 >= robot_z and
+                    robot_z >= block_z - height * size_scaling / 2):  # Block in view.
+                x1 = block_x - 0.5 * size_scaling
+                x2 = block_x + 0.5 * size_scaling
+                y1 = block_y - 0.5 * size_scaling
+                y2 = block_y + 0.5 * size_scaling
+                struct_segments = [
+                    ((x1, y1), (x2, y1)),
+                    ((x2, y1), (x2, y2)),
+                    ((x2, y2), (x1, y2)),
+                    ((x1, y2), (x1, y1)),
+                ]
+                for seg in struct_segments:
+                    segments.append(dict(
+                        segment=seg,
+                        type=block_type,
+                    ))
+
+        # 3 for wall, drop-off, block
+        sensor_readings = np.zeros((self._n_bins, 3))
+        for ray_idx in range(self._n_bins):
+            ray_ori = (ori - self._sensor_span * 0.5 +
+                       (2 * ray_idx + 1.0) / (2 * self._n_bins) * self._sensor_span)
+            ray_segments = []
+            # Get all segments that intersect with ray.
+            for seg in segments:
+                p = maze_env_utils.ray_segment_intersect(
+                    ray=((robot_x, robot_y), ray_ori),
+                    segment=seg["segment"])
+                if p is not None:
+                    ray_segments.append(dict(
+                        segment=seg["segment"],
+                        type=seg["type"],
+                        ray_ori=ray_ori,
+                        distance=maze_env_utils.point_distance(
+                            p, (robot_x, robot_y)),
+                    ))
+            if len(ray_segments) > 0:
+                # Find out which segment is intersected first.
+                first_seg = sorted(
+                    ray_segments, key=lambda x: x["distance"])[0]
+                seg_type = first_seg["type"]
+                idx = (0 if seg_type == 1 else  # Wall.
+                       1 if seg_type == -1 else  # Drop-off.
+                       2 if maze_env_utils.can_move(seg_type) else  # Block.
+                       None)
+                if first_seg["distance"] <= self._sensor_range:
+                    sensor_readings[ray_idx][idx] = (
+                                                            self._sensor_range - first_seg[
+                                                        "distance"]) / self._sensor_range
+        return sensor_readings
+
+    def _get_obs(self):
+        wrapped_obs = self.wrapped_env._get_obs()
+        # print("ant obs", wrapped_obs)
+        if self._top_down_view:
+            view = [self.get_top_down_view().flat]
+        else:
+            view = []
+
+        if self._observe_blocks:
+            additional_obs = []
+            for block_name, block_type in self.movable_blocks:
+                additional_obs.append(
+                    self.wrapped_env.get_body_com(block_name))
+            wrapped_obs = np.concatenate([wrapped_obs[:3]] + additional_obs +
+                                         [wrapped_obs[3:]])
+
+        range_sensor_obs = self.get_range_sensor_obs()
+        return wrapped_obs
+
+    def seed(self, seed=None):
+        self.np_random, seed = seeding.np_random(seed)
+        return [seed]
+
+    def reset(self, goal=None):
+        self.goal = goal
+
+        if self.visualize_goal:  # remove the prev goal and add a new goal
+            goal_x, goal_y = goal[0], goal[1]
+            size_scaling = self.MAZE_SIZE_SCALING
+            # remove the original goal
+            try:
+                self.worldbody.remove(self.goal_element)
+            except AttributeError:
+                pass
+            # offset all coordinates so that robot starts at the origin
+            self.goal_element = \
+                ET.SubElement(
+                    self.worldbody, "geom",
+                    name="goal_%d_%d" % (goal_x, goal_y),
+                    pos="%f %f %f" % (goal_x,
+                                      goal_y,
+                                      self.MAZE_HEIGHT / 2 * size_scaling),
+                    size="%f %f %f" % (0.1 * size_scaling,  # smaller than the block to prevent collision
+                                       0.1 * size_scaling,
+                                       self.MAZE_HEIGHT / 2 * size_scaling),
+                    type="box",
+                    material="",
+                    contype="1",
+                    conaffinity="1",
+                    rgba="1.0 0.0 0.0 0.5"
+                )
+            # Note: running the lines below will make the robot position wrong! (because the graph is rebuilt)
+            torso = self.tree.find(".//body[@name='torso']")
+            geoms = torso.findall(".//geom")
+            for geom in geoms:
+                if 'name' not in geom.attrib:
+                    raise Exception("Every geom of the torso must have a name "
+                                    "defined")
+            _, file_path = tempfile.mkstemp(text=True, suffix='.xml')
+            self.tree.write(
+                file_path)  # here we write a temporal file with the robot specifications. Why not the original one??
+
+            model_cls = self.__class__.MODEL_CLASS
+            self.wrapped_env = model_cls(*self.args, file_path=file_path,
+                                         **self.kwargs)  # file to the robot specifications; model_cls is AntEnv
+
+        self.t = 0
+        self.trajectory = []
+        self.wrapped_env.reset()
+        if len(self._init_positions) > 1:
+            xy = self._init_positions[self.np_random.randint(len(self._init_positions))]
+            self.wrapped_env.set_xy(xy)
+        return self._get_obs()
+
+    @property
+    def viewer(self):
+        return self.wrapped_env.viewer
+
+    def render(self, *args, **kwargs):
+        return self.wrapped_env.render(*args, **kwargs)
+
+    @property
+    def observation_space(self):
+        shape = self._get_obs().shape
+        high = np.inf * np.ones(shape)
+        low = -high
+        return gym.spaces.Box(low, high)
+
+    @property
+    def action_space(self):
+        return self.wrapped_env.action_space
+
+    def _find_robot(self):
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == 'r':
+                    return j * size_scaling, i * size_scaling
+        assert False, 'No robot in maze specification.'
+
+    def _find_all_robots(self):
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        coords = []
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == 'r':
+                    coords.append((j * size_scaling, i * size_scaling))
+        return coords
+
+    def _is_in_collision(self, pos):
+        x, y = pos
+        structure = self.MAZE_STRUCTURE
+        size_scaling = self.MAZE_SIZE_SCALING
+        for i in range(len(structure)):
+            for j in range(len(structure[0])):
+                if structure[i][j] == 1:
+                    minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x
+                    maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x
+                    miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y
+                    maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y
+                    if minx <= x <= maxx and miny <= y <= maxy:
+                        return True
+        return False
+
+    def _rowcol_to_xy(self, j, i):
+        size_scaling = self.MAZE_SIZE_SCALING
+        minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x
+        maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x
+        miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y
+        maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y
+        return (minx + maxx) / 2, (miny + maxy) / 2
+
+    def step(self, action):
+        self.t += 1
+        if self._manual_collision:
+            old_pos = self.wrapped_env.get_xy()
+            inner_next_obs, inner_reward, inner_done, info = self.wrapped_env.step(
+                action)
+            new_pos = self.wrapped_env.get_xy()
+            if self._is_in_collision(new_pos):
+                self.wrapped_env.set_xy(old_pos)
+        else:
+            inner_next_obs, inner_reward, inner_done, info = self.wrapped_env.step(
+                action)
+        next_obs = self._get_obs()
+        done = False
+        if self.goal is not None:
+            done = bool(((next_obs[:2] - self.goal[:2]) ** 2).sum() < self.EPS)
+
+        new_pos = self.wrapped_env.get_xy()
+        if self._is_in_collision(new_pos) or inner_done:
+            done = True
+        if self.t >= self.max_episode_steps:
+            done = True
+        return next_obs, inner_reward, done, info