Tianshou/test/base/env.py
youkaichao 26fb87433d Improve collector (#125)
* remove multibuf

* reward_metric

* make fileds with empty Batch rather than None after reset

* many fixes and refactor
Co-authored-by: Trinkle23897 <463003665@qq.com>
2020-07-13 17:33:01 +08:00

51 lines
1.6 KiB
Python

import gym
import time
from gym.spaces.discrete import Discrete
class MyTestEnv(gym.Env):
"""This is a "going right" task. The task is to go right ``size`` steps.
"""
def __init__(self, size, sleep=0, dict_state=False, ma_rew=0):
self.size = size
self.sleep = sleep
self.dict_state = dict_state
self.ma_rew = ma_rew
self.action_space = Discrete(2)
self.reset()
def reset(self, state=0):
self.done = False
self.index = state
return self._get_dict_state()
def _get_reward(self):
"""Generate a non-scalar reward if ma_rew is True."""
x = int(self.done)
if self.ma_rew > 0:
return [x] * self.ma_rew
return x
def _get_dict_state(self):
"""Generate a dict_state if dict_state is True."""
return {'index': self.index} if self.dict_state else self.index
def step(self, action):
if self.done:
raise ValueError('step after done !!!')
if self.sleep > 0:
time.sleep(self.sleep)
if self.index == self.size:
self.done = True
return self._get_dict_state(), self._get_reward(), self.done, {}
if action == 0:
self.index = max(self.index - 1, 0)
return self._get_dict_state(), self._get_reward(), self.done, \
{'key': 1, 'env': self} if self.dict_state else {}
elif action == 1:
self.index += 1
self.done = self.index == self.size
return self._get_dict_state(), self._get_reward(), \
self.done, {'key': 1, 'env': self}