2020-06-05 11:17:43 +02:00
|
|
|
import gym
|
2020-07-13 00:24:31 +08:00
|
|
|
import time
|
2020-06-05 11:17:43 +02:00
|
|
|
from gym.spaces.discrete import Discrete
|
2020-03-21 10:58:01 +08:00
|
|
|
|
|
|
|
|
|
|
|
class MyTestEnv(gym.Env):
|
2020-07-13 00:24:31 +08:00
|
|
|
"""This is a "going right" task. The task is to go right ``size`` steps.
|
|
|
|
"""
|
|
|
|
|
|
|
|
def __init__(self, size, sleep=0, dict_state=False, ma_rew=0):
|
2020-03-21 10:58:01 +08:00
|
|
|
self.size = size
|
|
|
|
self.sleep = sleep
|
2020-04-28 20:56:02 +08:00
|
|
|
self.dict_state = dict_state
|
2020-07-13 00:24:31 +08:00
|
|
|
self.ma_rew = ma_rew
|
2020-06-11 08:57:37 +08:00
|
|
|
self.action_space = Discrete(2)
|
2020-03-21 10:58:01 +08:00
|
|
|
self.reset()
|
|
|
|
|
2020-04-09 19:53:45 +08:00
|
|
|
def reset(self, state=0):
|
2020-03-21 10:58:01 +08:00
|
|
|
self.done = False
|
2020-04-09 19:53:45 +08:00
|
|
|
self.index = state
|
2020-07-13 00:24:31 +08:00
|
|
|
return self._get_dict_state()
|
|
|
|
|
|
|
|
def _get_reward(self):
|
|
|
|
"""Generate a non-scalar reward if ma_rew is True."""
|
|
|
|
x = int(self.done)
|
|
|
|
if self.ma_rew > 0:
|
|
|
|
return [x] * self.ma_rew
|
|
|
|
return x
|
|
|
|
|
|
|
|
def _get_dict_state(self):
|
|
|
|
"""Generate a dict_state if dict_state is True."""
|
2020-04-28 20:56:02 +08:00
|
|
|
return {'index': self.index} if self.dict_state else self.index
|
2020-03-21 10:58:01 +08:00
|
|
|
|
|
|
|
def step(self, action):
|
|
|
|
if self.done:
|
|
|
|
raise ValueError('step after done !!!')
|
|
|
|
if self.sleep > 0:
|
|
|
|
time.sleep(self.sleep)
|
|
|
|
if self.index == self.size:
|
|
|
|
self.done = True
|
2020-07-13 00:24:31 +08:00
|
|
|
return self._get_dict_state(), self._get_reward(), self.done, {}
|
2020-03-21 10:58:01 +08:00
|
|
|
if action == 0:
|
|
|
|
self.index = max(self.index - 1, 0)
|
2020-07-13 00:24:31 +08:00
|
|
|
return self._get_dict_state(), self._get_reward(), self.done, \
|
|
|
|
{'key': 1, 'env': self} if self.dict_state else {}
|
2020-03-21 10:58:01 +08:00
|
|
|
elif action == 1:
|
|
|
|
self.index += 1
|
|
|
|
self.done = self.index == self.size
|
2020-07-13 00:24:31 +08:00
|
|
|
return self._get_dict_state(), self._get_reward(), \
|
|
|
|
self.done, {'key': 1, 'env': self}
|