fix atari/mujoco env (#7)
* update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest * add args "render" * change the tensorboard writter * change the tensorboard writter * change device, render, tensorboard log location * change device, render, tensorboard log location * remove some wrong local files * fix some tab mistakes and the envs name in continuous/test_xx.py * add examples and point robot maze environment * fix some bugs during testing examples * add dqn network and fix some args * change back the tensorboard writter's frequency to ensure ppo and a2c can write things normally * add a warning to collector * rm some unrelated files * reformat * fix a bug in test_dqn due to the model wrong selection * change atari frame skip and observation to improve performance * readd some files * change import * modified readme * rm tensorboard log * update atari and mujoco which are ignored * rm the wrong lines
This commit is contained in:
parent
c42990c725
commit
068c4068ec
3
.gitignore
vendored
3
.gitignore
vendored
@ -6,6 +6,9 @@ __pycache__/
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# .idea folder
|
||||
.idea/
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
|
@ -212,7 +212,7 @@ If you find Tianshou useful, please cite it in your publications.
|
||||
|
||||
```latex
|
||||
@misc{tianshou,
|
||||
author = {Jiayi Weng},
|
||||
author = {Jiayi Weng, Minghao Zhang},
|
||||
title = {Tianshou},
|
||||
year = {2020},
|
||||
publisher = {GitHub},
|
||||
|
@ -34,7 +34,7 @@ class Actor(nn.Module):
|
||||
|
||||
def forward(self, s, state=None, info={}):
|
||||
logits, h = self.preprocess(s, state)
|
||||
logits = F.softmax(self.last(logits), dim=-1)
|
||||
logits = F.softmax(logits, dim=-1)
|
||||
return logits, h
|
||||
|
||||
|
||||
@ -56,7 +56,7 @@ class DQN(nn.Module):
|
||||
super(DQN, self).__init__()
|
||||
self.device = device
|
||||
|
||||
self.conv1 = nn.Conv2d(1, 16, kernel_size=5, stride=2)
|
||||
self.conv1 = nn.Conv2d(4, 16, kernel_size=5, stride=2)
|
||||
self.bn1 = nn.BatchNorm2d(16)
|
||||
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
|
||||
self.bn2 = nn.BatchNorm2d(32)
|
||||
@ -69,7 +69,8 @@ class DQN(nn.Module):
|
||||
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
|
||||
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
|
||||
linear_input_size = convw * convh * 32
|
||||
self.head = nn.Linear(linear_input_size, action_shape)
|
||||
self.fc = nn.Linear(linear_input_size, 512)
|
||||
self.head = nn.Linear(512, action_shape)
|
||||
|
||||
def forward(self, x, state=None, info={}):
|
||||
if not isinstance(x, torch.Tensor):
|
||||
@ -78,4 +79,5 @@ class DQN(nn.Module):
|
||||
x = F.relu(self.bn1(self.conv1(x)))
|
||||
x = F.relu(self.bn2(self.conv2(x)))
|
||||
x = F.relu(self.bn3(self.conv3(x)))
|
||||
return self.head(x.view(x.size(0), -1)), state
|
||||
x = self.fc(x.reshape(x.size(0), -1))
|
||||
return self.head(x), state
|
||||
|
@ -51,11 +51,12 @@ class Critic(nn.Module):
|
||||
|
||||
|
||||
class DQN(nn.Module):
|
||||
|
||||
def __init__(self, h, w, action_shape, device='cpu'):
|
||||
super(DQN, self).__init__()
|
||||
self.device = device
|
||||
|
||||
self.conv1 = nn.Conv2d(3, 16, kernel_size=5, stride=2)
|
||||
self.conv1 = nn.Conv2d(4, 16, kernel_size=5, stride=2)
|
||||
self.bn1 = nn.BatchNorm2d(16)
|
||||
self.conv2 = nn.Conv2d(16, 32, kernel_size=5, stride=2)
|
||||
self.bn2 = nn.BatchNorm2d(32)
|
||||
@ -68,12 +69,14 @@ class DQN(nn.Module):
|
||||
convw = conv2d_size_out(conv2d_size_out(conv2d_size_out(w)))
|
||||
convh = conv2d_size_out(conv2d_size_out(conv2d_size_out(h)))
|
||||
linear_input_size = convw * convh * 32
|
||||
self.head = nn.Linear(linear_input_size, action_shape)
|
||||
self.fc = nn.Linear(linear_input_size, 512)
|
||||
self.head = nn.Linear(512, action_shape)
|
||||
|
||||
def forward(self, x, state=None, info={}):
|
||||
if not isinstance(x, torch.Tensor):
|
||||
x = torch.tensor(x, device=self.device, dtype=torch.float)
|
||||
s = torch.tensor(x, device=self.device, dtype=torch.float)
|
||||
x = F.relu(self.bn1(self.conv1(x)))
|
||||
x = F.relu(self.bn2(self.conv2(x)))
|
||||
x = F.relu(self.bn3(self.conv3(x)))
|
||||
return self.head(x.view(x.size(0), -1)), state
|
||||
x = self.fc(x.reshape(x.size(0), -1))
|
||||
return self.head(x), state
|
||||
|
124
tianshou/env/atari.py
vendored
Normal file
124
tianshou/env/atari.py
vendored
Normal file
@ -0,0 +1,124 @@
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import cv2
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym.spaces.box import Box
|
||||
|
||||
|
||||
def create_atari_environment(name=None, sticky_actions=True, max_episode_steps=2000):
|
||||
game_version = 'v0' if sticky_actions else 'v4'
|
||||
name = '{}NoFrameskip-{}'.format(name, game_version)
|
||||
env = gym.make(name)
|
||||
env = env.env
|
||||
env = preprocessing(env, max_episode_steps=max_episode_steps)
|
||||
return env
|
||||
|
||||
|
||||
class preprocessing(object):
|
||||
|
||||
def __init__(self, env, frame_skip=4, terminal_on_life_loss=False,
|
||||
size=84, max_episode_steps=2000):
|
||||
self.max_episode_steps = max_episode_steps
|
||||
self.env = env
|
||||
self.terminal_on_life_loss = terminal_on_life_loss
|
||||
self.frame_skip = frame_skip
|
||||
self.size = size
|
||||
self.count = 0
|
||||
obs_dims = self.env.observation_space
|
||||
|
||||
self.screen_buffer = [
|
||||
np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8),
|
||||
np.empty((obs_dims.shape[0], obs_dims.shape[1]), dtype=np.uint8)
|
||||
]
|
||||
|
||||
self.game_over = False
|
||||
self.lives = 0
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
|
||||
return Box(low=0, high=255, shape=(self.size, self.size, 4),
|
||||
dtype=np.uint8)
|
||||
|
||||
def action_space(self):
|
||||
return self.env.action_space
|
||||
|
||||
def reward_range(self):
|
||||
return self.env.reward_range
|
||||
|
||||
def metadata(self):
|
||||
return self.env.metadata
|
||||
|
||||
def close(self):
|
||||
return self.env.close()
|
||||
|
||||
def reset(self):
|
||||
self.count = 0
|
||||
self.env.reset()
|
||||
self.lives = self.env.ale.lives()
|
||||
self._grayscale_obs(self.screen_buffer[0])
|
||||
self.screen_buffer[1].fill(0)
|
||||
|
||||
return np.stack([self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1)
|
||||
|
||||
def render(self, mode):
|
||||
|
||||
return self.env.render(mode)
|
||||
|
||||
def step(self, action):
|
||||
|
||||
total_reward = 0.
|
||||
observation = []
|
||||
for t in range(self.frame_skip):
|
||||
self.count += 1
|
||||
_, reward, terminal, info = self.env.step(action)
|
||||
total_reward += reward
|
||||
|
||||
if self.terminal_on_life_loss:
|
||||
lives = self.env.ale.lives()
|
||||
is_terminal = terminal or lives < self.lives
|
||||
self.lives = lives
|
||||
else:
|
||||
is_terminal = terminal
|
||||
|
||||
if is_terminal:
|
||||
break
|
||||
elif t >= self.frame_skip - 2:
|
||||
t_ = t - (self.frame_skip - 2)
|
||||
self._grayscale_obs(self.screen_buffer[t_])
|
||||
|
||||
observation.append(self._pool_and_resize())
|
||||
while len(observation) > 0 and len(observation) < self.frame_skip:
|
||||
observation.append(observation[-1])
|
||||
if len(observation) > 0:
|
||||
observation = np.stack(observation, axis=-1)
|
||||
else:
|
||||
observation = np.stack([self._pool_and_resize() for _ in range(self.frame_skip)], axis=-1)
|
||||
if self.count >= self.max_episode_steps:
|
||||
terminal = True
|
||||
self.terminal = terminal
|
||||
return observation, total_reward, is_terminal, info
|
||||
|
||||
def _grayscale_obs(self, output):
|
||||
|
||||
self.env.ale.getScreenGrayscale(output)
|
||||
return output
|
||||
|
||||
def _pool_and_resize(self):
|
||||
|
||||
if self.frame_skip > 1:
|
||||
np.maximum(self.screen_buffer[0], self.screen_buffer[1],
|
||||
out=self.screen_buffer[0])
|
||||
|
||||
transformed_image = cv2.resize(self.screen_buffer[0],
|
||||
(self.size, self.size),
|
||||
interpolation=cv2.INTER_AREA)
|
||||
int_image = np.asarray(transformed_image, dtype=np.uint8)
|
||||
# return np.expand_dims(int_image, axis=2)
|
||||
return int_image
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
create_atari_environment()
|
26
tianshou/env/mujoco/__init__.py
vendored
Normal file
26
tianshou/env/mujoco/__init__.py
vendored
Normal file
@ -0,0 +1,26 @@
|
||||
from gym.envs.registration import register
|
||||
import gym
|
||||
|
||||
register(
|
||||
id='PointMaze-v0',
|
||||
entry_point='tianshou.env.mujoco.point_maze_env:PointMazeEnv',
|
||||
kwargs={
|
||||
"maze_size_scaling": 4,
|
||||
"maze_id": "Maze2",
|
||||
"maze_height": 0.5,
|
||||
"manual_collision": True,
|
||||
"goal": (1, 3),
|
||||
}
|
||||
)
|
||||
|
||||
register(
|
||||
id='PointMaze-v1',
|
||||
entry_point='tianshou.env.mujoco.point_maze_env:PointMazeEnv',
|
||||
kwargs={
|
||||
"maze_size_scaling": 2,
|
||||
"maze_id": "Maze2",
|
||||
"maze_height": 0.5,
|
||||
"manual_collision": True,
|
||||
"goal": (1, 3),
|
||||
}
|
||||
)
|
34
tianshou/env/mujoco/assets/point.xml
vendored
Normal file
34
tianshou/env/mujoco/assets/point.xml
vendored
Normal file
@ -0,0 +1,34 @@
|
||||
<mujoco>
|
||||
<compiler inertiafromgeom="true" angle="degree" coordinate="local"/>
|
||||
<option timestep="0.02" integrator="RK4"/>
|
||||
<default>
|
||||
<joint limited="false" armature="0" damping="0"/>
|
||||
<geom condim="3" conaffinity="0" margin="0" friction="1 0.5 0.5" rgba="0.8 0.6 0.4 1" density="100"/>
|
||||
</default>
|
||||
<asset>
|
||||
<texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0"/>
|
||||
<texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4"
|
||||
rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01"/>
|
||||
<texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100"/>
|
||||
<material name='MatPlane' texture="texplane" shininess="1" texrepeat="30 30" specular="1" reflectance="0.5"/>
|
||||
<material name='geom' texture="texgeom" texuniform="true"/>
|
||||
</asset>
|
||||
<worldbody>
|
||||
<light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3"
|
||||
dir="-0 0 -1.3"/>
|
||||
<geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1'
|
||||
rgba='0.8 0.9 0.8 1' condim='3'/>
|
||||
<body name="torso" pos="0 0 0">
|
||||
<geom name="pointbody" type="sphere" size="0.5" pos="0 0 0.5"/>
|
||||
<geom name="pointarrow" type="box" size="0.5 0.1 0.1" pos="0.6 0 0.5"/>
|
||||
<joint name='ballx' type='slide' axis='1 0 0' pos='0 0 0'/>
|
||||
<joint name='bally' type='slide' axis='0 1 0' pos='0 0 0'/>
|
||||
<joint name='rot' type='hinge' axis='0 0 1' pos='0 0 0' limited="false"/>
|
||||
</body>
|
||||
</worldbody>
|
||||
<actuator>
|
||||
<!-- Those are just dummy actuators for providing ranges -->
|
||||
<motor joint='ballx' ctrlrange="-1 1" ctrllimited="true"/>
|
||||
<motor joint='rot' ctrlrange="-0.25 0.25" ctrllimited="true"/>
|
||||
</actuator>
|
||||
</mujoco>
|
196
tianshou/env/mujoco/maze_env_utils.py
vendored
Normal file
196
tianshou/env/mujoco/maze_env_utils.py
vendored
Normal file
@ -0,0 +1,196 @@
|
||||
"""Adapted from rllab maze_env_utils.py."""
|
||||
import numpy as np
|
||||
import math
|
||||
|
||||
|
||||
class Move(object):
|
||||
X = 11
|
||||
Y = 12
|
||||
Z = 13
|
||||
XY = 14
|
||||
XZ = 15
|
||||
YZ = 16
|
||||
XYZ = 17
|
||||
SpinXY = 18
|
||||
|
||||
|
||||
def can_move_x(movable):
|
||||
return movable in [Move.X, Move.XY, Move.XZ, Move.XYZ,
|
||||
Move.SpinXY]
|
||||
|
||||
|
||||
def can_move_y(movable):
|
||||
return movable in [Move.Y, Move.XY, Move.YZ, Move.XYZ,
|
||||
Move.SpinXY]
|
||||
|
||||
|
||||
def can_move_z(movable):
|
||||
return movable in [Move.Z, Move.XZ, Move.YZ, Move.XYZ]
|
||||
|
||||
|
||||
def can_spin(movable):
|
||||
return movable in [Move.SpinXY]
|
||||
|
||||
|
||||
def can_move(movable):
|
||||
return can_move_x(movable) or can_move_y(movable) or can_move_z(movable)
|
||||
|
||||
|
||||
def construct_maze(maze_id='Maze'):
|
||||
if maze_id == 'Maze':
|
||||
structure = [
|
||||
[1, 1, 1, 1, 1],
|
||||
[1, 'r', 0, 0, 1],
|
||||
[1, 1, 1, 0, 1],
|
||||
[1, 'g', 0, 0, 1],
|
||||
[1, 1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'Maze1':
|
||||
structure = [
|
||||
[1, 1, 1, 1, 1, 1, 1, 1],
|
||||
[1, 'r', 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 1, 1, 0, 1],
|
||||
[1, 1, 1, 1, 1, 0, 0, 1],
|
||||
[1, 0, 0, 0, 1, 0, 1, 1],
|
||||
[1, 0, 0, 0, 1, 0, 1, 1],
|
||||
[1, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 1, 1, 1, 1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'Maze2':
|
||||
structure = [
|
||||
[0, 0, 0, 0, 0],
|
||||
[0, 'r', 0, 0, 0],
|
||||
[0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0],
|
||||
[0, 0, 0, 0, 0],
|
||||
]
|
||||
# transfer maze
|
||||
elif maze_id == 'Maze3':
|
||||
structure = [
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 'r', 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1],
|
||||
[1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
|
||||
[1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 'g', 1],
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'Maze4':
|
||||
structure = [
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 'r', 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 0, 1, 0, 0, 0, 'g', 1],
|
||||
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'Push':
|
||||
structure = [
|
||||
[1, 1, 1, 1, 1],
|
||||
[1, 0, 'r', 1, 1],
|
||||
[1, 0, Move.XY, 0, 1],
|
||||
[1, 1, 0, 1, 1],
|
||||
[1, 1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'Fall':
|
||||
structure = [
|
||||
[1, 1, 1, 1],
|
||||
[1, 'r', 0, 1],
|
||||
[1, 0, Move.YZ, 1],
|
||||
[1, -1, -1, 1],
|
||||
[1, 0, 0, 1],
|
||||
[1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'Block':
|
||||
O = 'r'
|
||||
structure = [
|
||||
[1, 1, 1, 1, 1],
|
||||
[1, O, 0, 0, 1],
|
||||
[1, 0, 0, 0, 1],
|
||||
[1, 0, 0, 0, 1],
|
||||
[1, 1, 1, 1, 1],
|
||||
]
|
||||
elif maze_id == 'BlockMaze':
|
||||
O = 'r'
|
||||
structure = [
|
||||
[1, 1, 1, 1],
|
||||
[1, O, 0, 1],
|
||||
[1, 1, 0, 1],
|
||||
[1, 0, 0, 1],
|
||||
[1, 1, 1, 1],
|
||||
]
|
||||
else:
|
||||
raise NotImplementedError('The provided MazeId %s is not recognized' % maze_id)
|
||||
|
||||
return structure
|
||||
|
||||
|
||||
def line_intersect(pt1, pt2, ptA, ptB):
|
||||
"""
|
||||
Taken from https://www.cs.hmc.edu/ACM/lectures/intersections.html
|
||||
this returns the intersection of Line(pt1,pt2) and Line(ptA,ptB)
|
||||
"""
|
||||
|
||||
DET_TOLERANCE = 0.00000001
|
||||
|
||||
# the first line is pt1 + r*(pt2-pt1)
|
||||
# in component form:
|
||||
x1, y1 = pt1
|
||||
x2, y2 = pt2
|
||||
dx1 = x2 - x1
|
||||
dy1 = y2 - y1
|
||||
|
||||
# the second line is ptA + s*(ptB-ptA)
|
||||
x, y = ptA
|
||||
xB, yB = ptB
|
||||
dx = xB - x
|
||||
dy = yB - y
|
||||
|
||||
DET = (-dx1 * dy + dy1 * dx)
|
||||
|
||||
if math.fabs(DET) < DET_TOLERANCE: return (0, 0, 0, 0, 0)
|
||||
|
||||
# now, the determinant should be OK
|
||||
DETinv = 1.0 / DET
|
||||
|
||||
# find the scalar amount along the "self" segment
|
||||
r = DETinv * (-dy * (x - x1) + dx * (y - y1))
|
||||
|
||||
# find the scalar amount along the input line
|
||||
s = DETinv * (-dy1 * (x - x1) + dx1 * (y - y1))
|
||||
|
||||
# return the average of the two descriptions
|
||||
xi = (x1 + r * dx1 + x + s * dx) / 2.0
|
||||
yi = (y1 + r * dy1 + y + s * dy) / 2.0
|
||||
return (xi, yi, 1, r, s)
|
||||
|
||||
|
||||
def ray_segment_intersect(ray, segment):
|
||||
"""
|
||||
Check if the ray originated from (x, y) with direction theta intersects the line segment (x1, y1) -- (x2, y2),
|
||||
and return the intersection point if there is one
|
||||
"""
|
||||
(x, y), theta = ray
|
||||
# (x1, y1), (x2, y2) = segment
|
||||
pt1 = (x, y)
|
||||
len = 1
|
||||
pt2 = (x + len * math.cos(theta), y + len * math.sin(theta))
|
||||
xo, yo, valid, r, s = line_intersect(pt1, pt2, *segment)
|
||||
if valid and r >= 0 and 0 <= s <= 1:
|
||||
return (xo, yo)
|
||||
return None
|
||||
|
||||
|
||||
def point_distance(p1, p2):
|
||||
x1, y1 = p1
|
||||
x2, y2 = p2
|
||||
return ((x1 - x2) ** 2 + (y1 - y2) ** 2) ** 0.5
|
93
tianshou/env/mujoco/point.py
vendored
Normal file
93
tianshou/env/mujoco/point.py
vendored
Normal file
@ -0,0 +1,93 @@
|
||||
"""Wrapper for creating the ant environment in gym_mujoco."""
|
||||
|
||||
import math
|
||||
import numpy as np
|
||||
from gym import utils
|
||||
from gym.envs.mujoco import mujoco_env
|
||||
|
||||
|
||||
class PointEnv(mujoco_env.MujocoEnv, utils.EzPickle):
|
||||
FILE = "point.xml"
|
||||
ORI_IND = 2
|
||||
|
||||
def __init__(self, file_path=None, expose_all_qpos=True, noisy_init=False):
|
||||
self._expose_all_qpos = expose_all_qpos
|
||||
self.noisy_init = noisy_init
|
||||
mujoco_env.MujocoEnv.__init__(self, file_path, 1)
|
||||
utils.EzPickle.__init__(self)
|
||||
|
||||
@property
|
||||
def physics(self):
|
||||
return self.model
|
||||
|
||||
def _step(self, a):
|
||||
return self.step(a)
|
||||
|
||||
def step(self, action):
|
||||
# action[0] is velocity, action[1] is direction
|
||||
action[0] = 0.2 * action[0]
|
||||
qpos = np.copy(self.data.qpos)
|
||||
qpos[2] += action[1]
|
||||
ori = qpos[2]
|
||||
# compute increment in each direction
|
||||
dx = math.cos(ori) * action[0]
|
||||
dy = math.sin(ori) * action[0]
|
||||
# ensure that the robot is within reasonable range
|
||||
qpos[0] = np.clip(qpos[0] + dx, -100, 100)
|
||||
qpos[1] = np.clip(qpos[1] + dy, -100, 100)
|
||||
qvel = np.squeeze(self.data.qvel)
|
||||
self.set_state(qpos, qvel)
|
||||
for _ in range(0, self.frame_skip):
|
||||
# self.physics.step()
|
||||
self.sim.step()
|
||||
next_obs = self._get_obs()
|
||||
reward = 0
|
||||
done = False
|
||||
info = {}
|
||||
return next_obs, reward, done, info
|
||||
|
||||
def _get_obs(self):
|
||||
if self._expose_all_qpos:
|
||||
return np.concatenate([
|
||||
self.data.qpos.flat[:3], # Only point-relevant coords.
|
||||
self.data.qvel.flat[:3]])
|
||||
return np.concatenate([
|
||||
self.data.qpos.flat[2:3],
|
||||
self.data.qvel.flat[:3]])
|
||||
|
||||
def reset_model(self):
|
||||
if self.noisy_init:
|
||||
qpos = self.init_qpos + self.np_random.uniform(
|
||||
size=self.model.nq, low=-.1, high=.1)
|
||||
qvel = self.init_qvel + self.np_random.randn(self.model.nv) * .1
|
||||
|
||||
else:
|
||||
qpos = self.init_qpos
|
||||
qvel = self.init_qvel
|
||||
|
||||
# Set everything other than point to original position and 0 velocity.
|
||||
qpos[3:] = self.init_qpos[3:]
|
||||
qvel[3:] = 0.
|
||||
self.set_state(qpos, qvel)
|
||||
return self._get_obs()
|
||||
|
||||
def get_ori(self):
|
||||
return self.data.qpos[self.__class__.ORI_IND]
|
||||
|
||||
def set_xy(self, xy):
|
||||
qpos = np.copy(self.data.qpos)
|
||||
qpos[0] = xy[0]
|
||||
qpos[1] = xy[1]
|
||||
|
||||
qvel = self.data.qvel
|
||||
self.set_state(qpos, qvel)
|
||||
|
||||
def get_xy(self):
|
||||
qpos = np.copy(self.data.qpos)
|
||||
return qpos[:2]
|
||||
|
||||
def viewer_setup(self):
|
||||
|
||||
self.viewer.cam.trackbodyid = -1
|
||||
self.viewer.cam.distance = 80
|
||||
self.viewer.cam.elevation = -90
|
563
tianshou/env/mujoco/point_maze_env.py
vendored
Normal file
563
tianshou/env/mujoco/point_maze_env.py
vendored
Normal file
@ -0,0 +1,563 @@
|
||||
"""Adapted from rllab maze_env.py."""
|
||||
|
||||
import os
|
||||
import tempfile
|
||||
import xml.etree.ElementTree as ET
|
||||
import math
|
||||
import numpy as np
|
||||
import gym
|
||||
from . import maze_env_utils
|
||||
from .point import PointEnv
|
||||
from gym.utils import seeding
|
||||
|
||||
# Directory that contains mujoco xml files.
|
||||
MODEL_DIR = os.path.join(os.path.dirname(__file__), 'assets')
|
||||
|
||||
|
||||
class PointMazeEnv(gym.Env):
|
||||
MODEL_CLASS = PointEnv
|
||||
|
||||
MAZE_HEIGHT = None
|
||||
MAZE_SIZE_SCALING = None
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
maze_id=None,
|
||||
maze_height=0.5,
|
||||
maze_size_scaling=8,
|
||||
n_bins=0,
|
||||
sensor_range=3.,
|
||||
sensor_span=2 * math.pi,
|
||||
observe_blocks=False,
|
||||
put_spin_near_agent=False,
|
||||
top_down_view=False,
|
||||
manual_collision=False,
|
||||
goal=None,
|
||||
EPS=0.25,
|
||||
max_episode_steps=2000,
|
||||
*args,
|
||||
**kwargs):
|
||||
self._maze_id = maze_id
|
||||
|
||||
model_cls = self.__class__.MODEL_CLASS
|
||||
if model_cls is None:
|
||||
raise "MODEL_CLASS unspecified!"
|
||||
xml_path = os.path.join(MODEL_DIR, model_cls.FILE)
|
||||
self.tree = tree = ET.parse(xml_path)
|
||||
self.worldbody = worldbody = tree.find(".//worldbody")
|
||||
self.visualize_goal = False
|
||||
self.max_episode_steps = max_episode_steps
|
||||
self.t = 0
|
||||
self.MAZE_HEIGHT = height = maze_height
|
||||
self.MAZE_SIZE_SCALING = size_scaling = maze_size_scaling
|
||||
self._n_bins = n_bins
|
||||
self._sensor_range = sensor_range * size_scaling
|
||||
self._sensor_span = sensor_span
|
||||
self._observe_blocks = observe_blocks
|
||||
self._put_spin_near_agent = put_spin_near_agent
|
||||
self._top_down_view = top_down_view
|
||||
self._manual_collision = manual_collision
|
||||
|
||||
self.MAZE_STRUCTURE = structure = maze_env_utils.construct_maze(
|
||||
maze_id=self._maze_id)
|
||||
# Elevate the maze to allow for falling.
|
||||
self.elevated = any(-1 in row for row in structure)
|
||||
self.blocks = any(
|
||||
any(maze_env_utils.can_move(r) for r in row)
|
||||
for row in structure) # Are there any movable blocks?
|
||||
|
||||
torso_x, torso_y = self._find_robot() # x, y coordinates
|
||||
self._init_torso_x = torso_x
|
||||
self._init_torso_y = torso_y
|
||||
self._init_positions = [
|
||||
(x - torso_x, y - torso_y)
|
||||
for x, y in self._find_all_robots()]
|
||||
|
||||
self._view = np.zeros([5, 5, 3])
|
||||
|
||||
height_offset = 0.
|
||||
if self.elevated:
|
||||
height_offset = height * size_scaling
|
||||
torso = tree.find(".//body[@name='torso']")
|
||||
torso.set('pos', '0 0 %.2f' % (0.75 + height_offset))
|
||||
if self.blocks:
|
||||
default = tree.find(".//default")
|
||||
default.find('.//geom').set('solimp', '.995 .995 .01')
|
||||
|
||||
self.movable_blocks = []
|
||||
for i in range(len(structure)):
|
||||
for j in range(len(structure[0])):
|
||||
struct = structure[i][j]
|
||||
if struct == 'r' and self._put_spin_near_agent:
|
||||
struct = maze_env_utils.Move.SpinXY
|
||||
if self.elevated and struct not in [-1]:
|
||||
# Create elevated platform.
|
||||
ET.SubElement(
|
||||
worldbody, "geom",
|
||||
name="elevated_%d_%d" % (i, j),
|
||||
pos="%f %f %f" % (j * size_scaling - torso_x,
|
||||
i * size_scaling - torso_y,
|
||||
height / 2 * size_scaling),
|
||||
size="%f %f %f" % (0.5 * size_scaling,
|
||||
0.5 * size_scaling,
|
||||
height / 2 * size_scaling),
|
||||
type="box",
|
||||
material="",
|
||||
contype="1",
|
||||
conaffinity="1",
|
||||
rgba="0.9 0.9 0.9 1",
|
||||
)
|
||||
if struct == 1: # Unmovable block.
|
||||
# Offset all coordinates so that robot starts at the origin.
|
||||
ET.SubElement(
|
||||
worldbody, "geom",
|
||||
name="block_%d_%d" % (i, j),
|
||||
pos="%f %f %f" % (j * size_scaling - torso_x,
|
||||
i * size_scaling - torso_y,
|
||||
height_offset +
|
||||
height / 2 * size_scaling),
|
||||
size="%f %f %f" % (0.5 * size_scaling,
|
||||
0.5 * size_scaling,
|
||||
height / 2 * size_scaling),
|
||||
type="box",
|
||||
material="",
|
||||
contype="1",
|
||||
conaffinity="1",
|
||||
rgba="0.4 0.4 0.4 1",
|
||||
)
|
||||
elif maze_env_utils.can_move(struct):
|
||||
name = "movable_%d_%d" % (i, j)
|
||||
self.movable_blocks.append((name, struct))
|
||||
falling = maze_env_utils.can_move_z(struct)
|
||||
spinning = maze_env_utils.can_spin(struct)
|
||||
x_offset = 0.25 * size_scaling if spinning else 0.0
|
||||
y_offset = 0.0
|
||||
shrink = 0.1 if spinning else 0.99 if falling else 1.0
|
||||
height_shrink = 0.1 if spinning else 1.0
|
||||
movable_body = ET.SubElement(
|
||||
worldbody, "body",
|
||||
name=name,
|
||||
pos="%f %f %f" % (j * size_scaling - torso_x + x_offset,
|
||||
i * size_scaling - torso_y + y_offset,
|
||||
height_offset +
|
||||
height / 2 * size_scaling * height_shrink),
|
||||
)
|
||||
ET.SubElement(
|
||||
movable_body, "geom",
|
||||
name="block_%d_%d" % (i, j),
|
||||
pos="0 0 0",
|
||||
size="%f %f %f" % (0.5 * size_scaling * shrink,
|
||||
0.5 * size_scaling * shrink,
|
||||
height / 2 * size_scaling * height_shrink),
|
||||
type="box",
|
||||
material="",
|
||||
mass="0.001" if falling else "0.0002",
|
||||
contype="1",
|
||||
conaffinity="1",
|
||||
rgba="0.9 0.1 0.1 1"
|
||||
)
|
||||
if maze_env_utils.can_move_x(struct):
|
||||
ET.SubElement(
|
||||
movable_body, "joint",
|
||||
armature="0",
|
||||
axis="1 0 0",
|
||||
damping="0.0",
|
||||
limited="true" if falling else "false",
|
||||
range="%f %f" % (-size_scaling, size_scaling),
|
||||
margin="0.01",
|
||||
name="movable_x_%d_%d" % (i, j),
|
||||
pos="0 0 0",
|
||||
type="slide"
|
||||
)
|
||||
if maze_env_utils.can_move_y(struct):
|
||||
ET.SubElement(
|
||||
movable_body, "joint",
|
||||
armature="0",
|
||||
axis="0 1 0",
|
||||
damping="0.0",
|
||||
limited="true" if falling else "false",
|
||||
range="%f %f" % (-size_scaling, size_scaling),
|
||||
margin="0.01",
|
||||
name="movable_y_%d_%d" % (i, j),
|
||||
pos="0 0 0",
|
||||
type="slide"
|
||||
)
|
||||
if maze_env_utils.can_move_z(struct):
|
||||
ET.SubElement(
|
||||
movable_body, "joint",
|
||||
armature="0",
|
||||
axis="0 0 1",
|
||||
damping="0.0",
|
||||
limited="true",
|
||||
range="%f 0" % (-height_offset),
|
||||
margin="0.01",
|
||||
name="movable_z_%d_%d" % (i, j),
|
||||
pos="0 0 0",
|
||||
type="slide"
|
||||
)
|
||||
if maze_env_utils.can_spin(struct):
|
||||
ET.SubElement(
|
||||
movable_body, "joint",
|
||||
armature="0",
|
||||
axis="0 0 1",
|
||||
damping="0.0",
|
||||
limited="false",
|
||||
name="spinable_%d_%d" % (i, j),
|
||||
pos="0 0 0",
|
||||
type="ball"
|
||||
)
|
||||
|
||||
torso = tree.find(".//body[@name='torso']")
|
||||
geoms = torso.findall(".//geom")
|
||||
for geom in geoms:
|
||||
if 'name' not in geom.attrib:
|
||||
raise Exception("Every geom of the torso must have a name "
|
||||
"defined")
|
||||
|
||||
_, file_path = tempfile.mkstemp(text=True, suffix='.xml')
|
||||
tree.write(file_path)
|
||||
|
||||
self.wrapped_env = model_cls(*args, file_path=file_path, **kwargs)
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
self.GOAL = goal
|
||||
if self.GOAL is not None:
|
||||
self.GOAL = self.unwrapped._rowcol_to_xy(*self.GOAL)
|
||||
self.EPS = EPS
|
||||
|
||||
def get_ori(self):
|
||||
return self.wrapped_env.get_ori()
|
||||
|
||||
def get_top_down_view(self):
|
||||
self._view = np.zeros_like(self._view)
|
||||
|
||||
def valid(row, col):
|
||||
return self._view.shape[0] > row >= 0 and self._view.shape[1] > col >= 0
|
||||
|
||||
def update_view(x, y, d, row=None, col=None):
|
||||
if row is None or col is None:
|
||||
x = x - self._robot_x
|
||||
y = y - self._robot_y
|
||||
th = self._robot_ori
|
||||
|
||||
row, col = self._xy_to_rowcol(x, y)
|
||||
update_view(x, y, d, row=row, col=col)
|
||||
return
|
||||
|
||||
row, row_frac, col, col_frac = int(row), row % 1, int(col), col % 1
|
||||
if row_frac < 0:
|
||||
row_frac += 1
|
||||
if col_frac < 0:
|
||||
col_frac += 1
|
||||
|
||||
if valid(row, col):
|
||||
self._view[row, col, d] += (
|
||||
(min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) *
|
||||
(min(1., col_frac + 0.5) - max(0., col_frac - 0.5)))
|
||||
if valid(row - 1, col):
|
||||
self._view[row - 1, col, d] += (
|
||||
(max(0., 0.5 - row_frac)) *
|
||||
(min(1., col_frac + 0.5) - max(0., col_frac - 0.5)))
|
||||
if valid(row + 1, col):
|
||||
self._view[row + 1, col, d] += (
|
||||
(max(0., row_frac - 0.5)) *
|
||||
(min(1., col_frac + 0.5) - max(0., col_frac - 0.5)))
|
||||
if valid(row, col - 1):
|
||||
self._view[row, col - 1, d] += (
|
||||
(min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) *
|
||||
(max(0., 0.5 - col_frac)))
|
||||
if valid(row, col + 1):
|
||||
self._view[row, col + 1, d] += (
|
||||
(min(1., row_frac + 0.5) - max(0., row_frac - 0.5)) *
|
||||
(max(0., col_frac - 0.5)))
|
||||
if valid(row - 1, col - 1):
|
||||
self._view[row - 1, col - 1, d] += (
|
||||
(max(0., 0.5 - row_frac)) * max(0., 0.5 - col_frac))
|
||||
if valid(row - 1, col + 1):
|
||||
self._view[row - 1, col + 1, d] += (
|
||||
(max(0., 0.5 - row_frac)) * max(0., col_frac - 0.5))
|
||||
if valid(row + 1, col + 1):
|
||||
self._view[row + 1, col + 1, d] += (
|
||||
(max(0., row_frac - 0.5)) * max(0., col_frac - 0.5))
|
||||
if valid(row + 1, col - 1):
|
||||
self._view[row + 1, col - 1, d] += (
|
||||
(max(0., row_frac - 0.5)) * max(0., 0.5 - col_frac))
|
||||
|
||||
# Draw ant.
|
||||
robot_x, robot_y = self.wrapped_env.get_body_com("torso")[:2]
|
||||
self._robot_x = robot_x
|
||||
self._robot_y = robot_y
|
||||
self._robot_ori = self.get_ori()
|
||||
|
||||
structure = self.MAZE_STRUCTURE
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
height = self.MAZE_HEIGHT
|
||||
|
||||
# Draw immovable blocks and chasms.
|
||||
for i in range(len(structure)):
|
||||
for j in range(len(structure[0])):
|
||||
if structure[i][j] == 1: # Wall.
|
||||
update_view(j * size_scaling - self._init_torso_x,
|
||||
i * size_scaling - self._init_torso_y,
|
||||
0)
|
||||
if structure[i][j] == -1: # Chasm.
|
||||
update_view(j * size_scaling - self._init_torso_x,
|
||||
i * size_scaling - self._init_torso_y,
|
||||
1)
|
||||
|
||||
# Draw movable blocks.
|
||||
for block_name, block_type in self.movable_blocks:
|
||||
block_x, block_y = self.wrapped_env.get_body_com(block_name)[:2]
|
||||
update_view(block_x, block_y, 2)
|
||||
|
||||
import cv2
|
||||
cv2.imshow('x.jpg', cv2.resize(np.uint8(self._view * 255), (512, 512), interpolation=cv2.INTER_CUBIC))
|
||||
cv2.waitKey(0)
|
||||
|
||||
return self._view
|
||||
|
||||
def get_range_sensor_obs(self):
|
||||
"""Returns egocentric range sensor observations of maze."""
|
||||
robot_x, robot_y, robot_z = self.wrapped_env.get_body_com("torso")[:3]
|
||||
ori = self.get_ori()
|
||||
|
||||
structure = self.MAZE_STRUCTURE
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
height = self.MAZE_HEIGHT
|
||||
|
||||
segments = []
|
||||
# Get line segments (corresponding to outer boundary) of each immovable
|
||||
# block or drop-off.
|
||||
for i in range(len(structure)):
|
||||
for j in range(len(structure[0])):
|
||||
if structure[i][j] in [1, -1]: # There's a wall or drop-off.
|
||||
cx = j * size_scaling - self._init_torso_x
|
||||
cy = i * size_scaling - self._init_torso_y
|
||||
x1 = cx - 0.5 * size_scaling
|
||||
x2 = cx + 0.5 * size_scaling
|
||||
y1 = cy - 0.5 * size_scaling
|
||||
y2 = cy + 0.5 * size_scaling
|
||||
struct_segments = [
|
||||
((x1, y1), (x2, y1)),
|
||||
((x2, y1), (x2, y2)),
|
||||
((x2, y2), (x1, y2)),
|
||||
((x1, y2), (x1, y1)),
|
||||
]
|
||||
for seg in struct_segments:
|
||||
segments.append(dict(
|
||||
segment=seg,
|
||||
type=structure[i][j],
|
||||
))
|
||||
|
||||
for block_name, block_type in self.movable_blocks:
|
||||
block_x, block_y, block_z = self.wrapped_env.get_body_com(block_name)[
|
||||
:3]
|
||||
if (block_z + height * size_scaling / 2 >= robot_z and
|
||||
robot_z >= block_z - height * size_scaling / 2): # Block in view.
|
||||
x1 = block_x - 0.5 * size_scaling
|
||||
x2 = block_x + 0.5 * size_scaling
|
||||
y1 = block_y - 0.5 * size_scaling
|
||||
y2 = block_y + 0.5 * size_scaling
|
||||
struct_segments = [
|
||||
((x1, y1), (x2, y1)),
|
||||
((x2, y1), (x2, y2)),
|
||||
((x2, y2), (x1, y2)),
|
||||
((x1, y2), (x1, y1)),
|
||||
]
|
||||
for seg in struct_segments:
|
||||
segments.append(dict(
|
||||
segment=seg,
|
||||
type=block_type,
|
||||
))
|
||||
|
||||
# 3 for wall, drop-off, block
|
||||
sensor_readings = np.zeros((self._n_bins, 3))
|
||||
for ray_idx in range(self._n_bins):
|
||||
ray_ori = (ori - self._sensor_span * 0.5 +
|
||||
(2 * ray_idx + 1.0) / (2 * self._n_bins) * self._sensor_span)
|
||||
ray_segments = []
|
||||
# Get all segments that intersect with ray.
|
||||
for seg in segments:
|
||||
p = maze_env_utils.ray_segment_intersect(
|
||||
ray=((robot_x, robot_y), ray_ori),
|
||||
segment=seg["segment"])
|
||||
if p is not None:
|
||||
ray_segments.append(dict(
|
||||
segment=seg["segment"],
|
||||
type=seg["type"],
|
||||
ray_ori=ray_ori,
|
||||
distance=maze_env_utils.point_distance(
|
||||
p, (robot_x, robot_y)),
|
||||
))
|
||||
if len(ray_segments) > 0:
|
||||
# Find out which segment is intersected first.
|
||||
first_seg = sorted(
|
||||
ray_segments, key=lambda x: x["distance"])[0]
|
||||
seg_type = first_seg["type"]
|
||||
idx = (0 if seg_type == 1 else # Wall.
|
||||
1 if seg_type == -1 else # Drop-off.
|
||||
2 if maze_env_utils.can_move(seg_type) else # Block.
|
||||
None)
|
||||
if first_seg["distance"] <= self._sensor_range:
|
||||
sensor_readings[ray_idx][idx] = (
|
||||
self._sensor_range - first_seg[
|
||||
"distance"]) / self._sensor_range
|
||||
return sensor_readings
|
||||
|
||||
def _get_obs(self):
|
||||
wrapped_obs = self.wrapped_env._get_obs()
|
||||
# print("ant obs", wrapped_obs)
|
||||
if self._top_down_view:
|
||||
view = [self.get_top_down_view().flat]
|
||||
else:
|
||||
view = []
|
||||
|
||||
if self._observe_blocks:
|
||||
additional_obs = []
|
||||
for block_name, block_type in self.movable_blocks:
|
||||
additional_obs.append(
|
||||
self.wrapped_env.get_body_com(block_name))
|
||||
wrapped_obs = np.concatenate([wrapped_obs[:3]] + additional_obs +
|
||||
[wrapped_obs[3:]])
|
||||
|
||||
range_sensor_obs = self.get_range_sensor_obs()
|
||||
return wrapped_obs
|
||||
|
||||
def seed(self, seed=None):
|
||||
self.np_random, seed = seeding.np_random(seed)
|
||||
return [seed]
|
||||
|
||||
def reset(self, goal=None):
|
||||
self.goal = goal
|
||||
|
||||
if self.visualize_goal: # remove the prev goal and add a new goal
|
||||
goal_x, goal_y = goal[0], goal[1]
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
# remove the original goal
|
||||
try:
|
||||
self.worldbody.remove(self.goal_element)
|
||||
except AttributeError:
|
||||
pass
|
||||
# offset all coordinates so that robot starts at the origin
|
||||
self.goal_element = \
|
||||
ET.SubElement(
|
||||
self.worldbody, "geom",
|
||||
name="goal_%d_%d" % (goal_x, goal_y),
|
||||
pos="%f %f %f" % (goal_x,
|
||||
goal_y,
|
||||
self.MAZE_HEIGHT / 2 * size_scaling),
|
||||
size="%f %f %f" % (0.1 * size_scaling, # smaller than the block to prevent collision
|
||||
0.1 * size_scaling,
|
||||
self.MAZE_HEIGHT / 2 * size_scaling),
|
||||
type="box",
|
||||
material="",
|
||||
contype="1",
|
||||
conaffinity="1",
|
||||
rgba="1.0 0.0 0.0 0.5"
|
||||
)
|
||||
# Note: running the lines below will make the robot position wrong! (because the graph is rebuilt)
|
||||
torso = self.tree.find(".//body[@name='torso']")
|
||||
geoms = torso.findall(".//geom")
|
||||
for geom in geoms:
|
||||
if 'name' not in geom.attrib:
|
||||
raise Exception("Every geom of the torso must have a name "
|
||||
"defined")
|
||||
_, file_path = tempfile.mkstemp(text=True, suffix='.xml')
|
||||
self.tree.write(
|
||||
file_path) # here we write a temporal file with the robot specifications. Why not the original one??
|
||||
|
||||
model_cls = self.__class__.MODEL_CLASS
|
||||
self.wrapped_env = model_cls(*self.args, file_path=file_path,
|
||||
**self.kwargs) # file to the robot specifications; model_cls is AntEnv
|
||||
|
||||
self.t = 0
|
||||
self.trajectory = []
|
||||
self.wrapped_env.reset()
|
||||
if len(self._init_positions) > 1:
|
||||
xy = self._init_positions[self.np_random.randint(len(self._init_positions))]
|
||||
self.wrapped_env.set_xy(xy)
|
||||
return self._get_obs()
|
||||
|
||||
@property
|
||||
def viewer(self):
|
||||
return self.wrapped_env.viewer
|
||||
|
||||
def render(self, *args, **kwargs):
|
||||
return self.wrapped_env.render(*args, **kwargs)
|
||||
|
||||
@property
|
||||
def observation_space(self):
|
||||
shape = self._get_obs().shape
|
||||
high = np.inf * np.ones(shape)
|
||||
low = -high
|
||||
return gym.spaces.Box(low, high)
|
||||
|
||||
@property
|
||||
def action_space(self):
|
||||
return self.wrapped_env.action_space
|
||||
|
||||
def _find_robot(self):
|
||||
structure = self.MAZE_STRUCTURE
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
for i in range(len(structure)):
|
||||
for j in range(len(structure[0])):
|
||||
if structure[i][j] == 'r':
|
||||
return j * size_scaling, i * size_scaling
|
||||
assert False, 'No robot in maze specification.'
|
||||
|
||||
def _find_all_robots(self):
|
||||
structure = self.MAZE_STRUCTURE
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
coords = []
|
||||
for i in range(len(structure)):
|
||||
for j in range(len(structure[0])):
|
||||
if structure[i][j] == 'r':
|
||||
coords.append((j * size_scaling, i * size_scaling))
|
||||
return coords
|
||||
|
||||
def _is_in_collision(self, pos):
|
||||
x, y = pos
|
||||
structure = self.MAZE_STRUCTURE
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
for i in range(len(structure)):
|
||||
for j in range(len(structure[0])):
|
||||
if structure[i][j] == 1:
|
||||
minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x
|
||||
maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x
|
||||
miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y
|
||||
maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y
|
||||
if minx <= x <= maxx and miny <= y <= maxy:
|
||||
return True
|
||||
return False
|
||||
|
||||
def _rowcol_to_xy(self, j, i):
|
||||
size_scaling = self.MAZE_SIZE_SCALING
|
||||
minx = j * size_scaling - size_scaling * 0.5 - self._init_torso_x
|
||||
maxx = j * size_scaling + size_scaling * 0.5 - self._init_torso_x
|
||||
miny = i * size_scaling - size_scaling * 0.5 - self._init_torso_y
|
||||
maxy = i * size_scaling + size_scaling * 0.5 - self._init_torso_y
|
||||
return (minx + maxx) / 2, (miny + maxy) / 2
|
||||
|
||||
def step(self, action):
|
||||
self.t += 1
|
||||
if self._manual_collision:
|
||||
old_pos = self.wrapped_env.get_xy()
|
||||
inner_next_obs, inner_reward, inner_done, info = self.wrapped_env.step(
|
||||
action)
|
||||
new_pos = self.wrapped_env.get_xy()
|
||||
if self._is_in_collision(new_pos):
|
||||
self.wrapped_env.set_xy(old_pos)
|
||||
else:
|
||||
inner_next_obs, inner_reward, inner_done, info = self.wrapped_env.step(
|
||||
action)
|
||||
next_obs = self._get_obs()
|
||||
done = False
|
||||
if self.goal is not None:
|
||||
done = bool(((next_obs[:2] - self.goal[:2]) ** 2).sum() < self.EPS)
|
||||
|
||||
new_pos = self.wrapped_env.get_xy()
|
||||
if self._is_in_collision(new_pos) or inner_done:
|
||||
done = True
|
||||
if self.t >= self.max_episode_steps:
|
||||
done = True
|
||||
return next_obs, inner_reward, done, info
|
Loading…
x
Reference in New Issue
Block a user