2020-03-11 10:56:38 +08:00
|
|
|
import gym
|
|
|
|
import time
|
2020-03-16 11:11:29 +08:00
|
|
|
import pytest
|
2020-03-11 10:56:38 +08:00
|
|
|
import numpy as np
|
|
|
|
from tianshou.env import FrameStack, VectorEnv, SubprocVectorEnv, RayVectorEnv
|
|
|
|
|
|
|
|
|
|
|
|
class MyTestEnv(gym.Env):
|
|
|
|
def __init__(self, size, sleep=0):
|
|
|
|
self.size = size
|
|
|
|
self.sleep = sleep
|
2020-03-16 11:11:29 +08:00
|
|
|
self.reset()
|
2020-03-11 10:56:38 +08:00
|
|
|
|
|
|
|
def reset(self):
|
2020-03-16 11:11:29 +08:00
|
|
|
self.done = False
|
2020-03-11 10:56:38 +08:00
|
|
|
self.index = 0
|
|
|
|
return self.index
|
|
|
|
|
|
|
|
def step(self, action):
|
2020-03-16 11:11:29 +08:00
|
|
|
if self.done:
|
|
|
|
raise ValueError('step after done !!!')
|
2020-03-11 10:56:38 +08:00
|
|
|
if self.sleep > 0:
|
|
|
|
time.sleep(self.sleep)
|
|
|
|
if self.index == self.size:
|
2020-03-16 11:11:29 +08:00
|
|
|
self.done = True
|
2020-03-11 10:56:38 +08:00
|
|
|
return self.index, 0, True, {}
|
|
|
|
if action == 0:
|
|
|
|
self.index = max(self.index - 1, 0)
|
|
|
|
return self.index, 0, False, {}
|
|
|
|
elif action == 1:
|
|
|
|
self.index += 1
|
2020-03-16 11:11:29 +08:00
|
|
|
self.done = self.index == self.size
|
|
|
|
return self.index, int(self.done), self.done, {}
|
2020-03-11 10:56:38 +08:00
|
|
|
|
2020-03-11 16:14:53 +08:00
|
|
|
|
|
|
|
def test_framestack(k=4, size=10):
|
2020-03-11 10:56:38 +08:00
|
|
|
env = MyTestEnv(size=size)
|
|
|
|
fsenv = FrameStack(env, k)
|
|
|
|
fsenv.seed()
|
|
|
|
obs = fsenv.reset()
|
|
|
|
assert abs(obs - np.array([0, 0, 0, 0])).sum() == 0
|
|
|
|
for i in range(5):
|
|
|
|
obs, rew, done, info = fsenv.step(1)
|
|
|
|
assert abs(obs - np.array([2, 3, 4, 5])).sum() == 0
|
|
|
|
for i in range(10):
|
|
|
|
obs, rew, done, info = fsenv.step(0)
|
|
|
|
assert abs(obs - np.array([0, 0, 0, 0])).sum() == 0
|
|
|
|
for i in range(9):
|
|
|
|
obs, rew, done, info = fsenv.step(1)
|
|
|
|
assert abs(obs - np.array([6, 7, 8, 9])).sum() == 0
|
|
|
|
assert (rew, done) == (0, False)
|
|
|
|
obs, rew, done, info = fsenv.step(1)
|
|
|
|
assert abs(obs - np.array([7, 8, 9, 10])).sum() == 0
|
|
|
|
assert (rew, done) == (1, True)
|
2020-03-16 11:11:29 +08:00
|
|
|
with pytest.raises(ValueError):
|
|
|
|
obs, rew, done, info = fsenv.step(0)
|
|
|
|
# assert abs(obs - np.array([8, 9, 10, 10])).sum() == 0
|
|
|
|
# assert (rew, done) == (0, True)
|
2020-03-11 10:56:38 +08:00
|
|
|
fsenv.close()
|
|
|
|
|
2020-03-11 16:14:53 +08:00
|
|
|
|
2020-03-16 11:11:29 +08:00
|
|
|
def test_vecenv(size=10, num=8, sleep=0.001):
|
|
|
|
verbose = __name__ == '__main__'
|
2020-03-11 16:14:53 +08:00
|
|
|
env_fns = [lambda: MyTestEnv(size=size, sleep=sleep) for _ in range(num)]
|
|
|
|
venv = [
|
|
|
|
VectorEnv(env_fns, reset_after_done=True),
|
|
|
|
SubprocVectorEnv(env_fns, reset_after_done=True),
|
|
|
|
]
|
2020-03-16 11:11:29 +08:00
|
|
|
if verbose:
|
2020-03-11 16:14:53 +08:00
|
|
|
venv.append(RayVectorEnv(env_fns, reset_after_done=True))
|
|
|
|
for v in venv:
|
|
|
|
v.seed()
|
2020-03-11 18:02:19 +08:00
|
|
|
action_list = [1] * 5 + [0] * 10 + [1] * 15
|
2020-03-11 16:14:53 +08:00
|
|
|
if not verbose:
|
|
|
|
o = [v.reset() for v in venv]
|
|
|
|
for i, a in enumerate(action_list):
|
|
|
|
o = [v.step([a] * num) for v in venv]
|
|
|
|
for i in zip(*o):
|
|
|
|
for j in range(1, len(i)):
|
|
|
|
assert (i[0] == i[j]).all()
|
|
|
|
else:
|
|
|
|
t = [0, 0, 0]
|
|
|
|
for i, e in enumerate(venv):
|
|
|
|
t[i] = time.time()
|
|
|
|
e.reset()
|
|
|
|
for a in action_list:
|
|
|
|
e.step([a] * num)
|
|
|
|
t[i] = time.time() - t[i]
|
2020-03-13 17:49:22 +08:00
|
|
|
print(f'VectorEnv: {t[0]:.6f}s')
|
|
|
|
print(f'SubprocVectorEnv: {t[1]:.6f}s')
|
|
|
|
print(f'RayVectorEnv: {t[2]:.6f}s')
|
2020-03-11 17:28:51 +08:00
|
|
|
for v in venv:
|
|
|
|
v.close()
|
2020-03-11 16:14:53 +08:00
|
|
|
|
|
|
|
|
2020-03-16 11:11:29 +08:00
|
|
|
def test_vecenv2():
|
|
|
|
verbose = __name__ == '__main__'
|
|
|
|
env_fns = [
|
|
|
|
lambda: MyTestEnv(size=1),
|
|
|
|
lambda: MyTestEnv(size=2),
|
|
|
|
lambda: MyTestEnv(size=3),
|
|
|
|
lambda: MyTestEnv(size=4),
|
|
|
|
]
|
|
|
|
num = len(env_fns)
|
|
|
|
venv = [
|
|
|
|
VectorEnv(env_fns, reset_after_done=False),
|
|
|
|
SubprocVectorEnv(env_fns, reset_after_done=False),
|
|
|
|
]
|
|
|
|
if verbose:
|
|
|
|
venv.append(RayVectorEnv(env_fns, reset_after_done=False))
|
|
|
|
for v in venv:
|
|
|
|
v.seed()
|
|
|
|
o = [v.reset() for v in venv]
|
|
|
|
action_list = [1] * 6
|
|
|
|
for i, a in enumerate(action_list):
|
|
|
|
o = [v.step([a] * num) for v in venv]
|
|
|
|
if verbose:
|
|
|
|
print(o[0])
|
|
|
|
print(o[1])
|
|
|
|
print(o[2])
|
|
|
|
print('---')
|
|
|
|
for i in zip(*o):
|
|
|
|
for j in range(1, len(i)):
|
|
|
|
assert (i[0] == i[j]).all()
|
|
|
|
for v in venv:
|
|
|
|
v.close()
|
|
|
|
|
|
|
|
|
2020-03-11 10:56:38 +08:00
|
|
|
if __name__ == '__main__':
|
2020-03-11 16:14:53 +08:00
|
|
|
test_framestack()
|
2020-03-16 11:11:29 +08:00
|
|
|
test_vecenv()
|
|
|
|
test_vecenv2()
|