diff --git a/test/base/__init__.py b/test/base/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/base/env.py b/test/base/env.py new file mode 100644 index 0000000..d2fd671 --- /dev/null +++ b/test/base/env.py @@ -0,0 +1,30 @@ +import gym +import time + + +class MyTestEnv(gym.Env): + def __init__(self, size, sleep=0): + self.size = size + self.sleep = sleep + self.reset() + + def reset(self): + self.done = False + self.index = 0 + return self.index + + def step(self, action): + if self.done: + raise ValueError('step after done !!!') + if self.sleep > 0: + time.sleep(self.sleep) + if self.index == self.size: + self.done = True + return self.index, 0, True, {} + if action == 0: + self.index = max(self.index - 1, 0) + return self.index, 0, False, {} + elif action == 1: + self.index += 1 + self.done = self.index == self.size + return self.index, int(self.done), self.done, {} diff --git a/test/test_batch.py b/test/base/test_batch.py similarity index 100% rename from test/test_batch.py rename to test/base/test_batch.py diff --git a/test/test_buffer.py b/test/base/test_buffer.py similarity index 92% rename from test/test_buffer.py rename to test/base/test_buffer.py index cd6d59a..b2bfe12 100644 --- a/test/test_buffer.py +++ b/test/base/test_buffer.py @@ -1,8 +1,8 @@ from tianshou.data import ReplayBuffer if __name__ == '__main__': - from test_env import MyTestEnv + from env import MyTestEnv else: # pytest - from test.test_env import MyTestEnv + from test.base.env import MyTestEnv def test_replaybuffer(size=10, bufsize=20): diff --git a/test/test_env.py b/test/base/test_env.py similarity index 79% rename from test/test_env.py rename to test/base/test_env.py index 9f38606..4e7b8d0 100644 --- a/test/test_env.py +++ b/test/base/test_env.py @@ -1,36 +1,12 @@ -import gym import time import pytest import numpy as np from tianshou.env import FrameStack, VectorEnv, SubprocVectorEnv, RayVectorEnv - -class MyTestEnv(gym.Env): - def __init__(self, size, sleep=0): - self.size = size - self.sleep = sleep - self.reset() - - def reset(self): - self.done = False - self.index = 0 - return self.index - - def step(self, action): - if self.done: - raise ValueError('step after done !!!') - if self.sleep > 0: - time.sleep(self.sleep) - if self.index == self.size: - self.done = True - return self.index, 0, True, {} - if action == 0: - self.index = max(self.index - 1, 0) - return self.index, 0, False, {} - elif action == 1: - self.index += 1 - self.done = self.index == self.size - return self.index, int(self.done), self.done, {} +if __name__ == '__main__': + from env import MyTestEnv +else: # pytest + from test.base.env import MyTestEnv def test_framestack(k=4, size=10): diff --git a/test/continuous/__init__.py b/test/continuous/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/continuous/net.py b/test/continuous/net.py new file mode 100644 index 0000000..2b12420 --- /dev/null +++ b/test/continuous/net.py @@ -0,0 +1,49 @@ +import torch +import numpy as np +from torch import nn + + +class Actor(nn.Module): + def __init__(self, layer_num, state_shape, action_shape, + max_action, device='cpu'): + super().__init__() + self.device = device + self.model = [ + nn.Linear(np.prod(state_shape), 128), + nn.ReLU(inplace=True)] + for i in range(layer_num): + self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] + self.model += [nn.Linear(128, np.prod(action_shape))] + self.model = nn.Sequential(*self.model) + self._max = max_action + + def forward(self, s, **kwargs): + s = torch.tensor(s, device=self.device, dtype=torch.float) + batch = s.shape[0] + s = s.view(batch, -1) + logits = self.model(s) + logits = self._max * torch.tanh(logits) + return logits, None + + +class Critic(nn.Module): + def __init__(self, layer_num, state_shape, action_shape, device='cpu'): + super().__init__() + self.device = device + self.model = [ + nn.Linear(np.prod(state_shape) + np.prod(action_shape), 128), + nn.ReLU(inplace=True)] + for i in range(layer_num): + self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] + self.model += [nn.Linear(128, 1)] + self.model = nn.Sequential(*self.model) + + def forward(self, s, a): + s = torch.tensor(s, device=self.device, dtype=torch.float) + if isinstance(a, np.ndarray): + a = torch.tensor(a, device=self.device, dtype=torch.float) + batch = s.shape[0] + s = s.view(batch, -1) + a = a.view(batch, -1) + logits = self.model(torch.cat([s, a], dim=1)) + return logits diff --git a/test/test_ddpg.py b/test/continuous/test_ddpg.py similarity index 71% rename from test/test_ddpg.py rename to test/continuous/test_ddpg.py index d24eb41..4fcea0c 100644 --- a/test/test_ddpg.py +++ b/test/continuous/test_ddpg.py @@ -3,7 +3,6 @@ import torch import pprint import argparse import numpy as np -from torch import nn from torch.utils.tensorboard import SummaryWriter from tianshou.policy import DDPGPolicy @@ -11,51 +10,10 @@ from tianshou.trainer import offpolicy_trainer from tianshou.data import Collector, ReplayBuffer from tianshou.env import VectorEnv, SubprocVectorEnv - -class Actor(nn.Module): - def __init__(self, layer_num, state_shape, action_shape, - max_action, device='cpu'): - super().__init__() - self.device = device - self.model = [ - nn.Linear(np.prod(state_shape), 128), - nn.ReLU(inplace=True)] - for i in range(layer_num): - self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] - self.model += [nn.Linear(128, np.prod(action_shape))] - self.model = nn.Sequential(*self.model) - self._max = max_action - - def forward(self, s, **kwargs): - s = torch.tensor(s, device=self.device, dtype=torch.float) - batch = s.shape[0] - s = s.view(batch, -1) - logits = self.model(s) - logits = self._max * torch.tanh(logits) - return logits, None - - -class Critic(nn.Module): - def __init__(self, layer_num, state_shape, action_shape, device='cpu'): - super().__init__() - self.device = device - self.model = [ - nn.Linear(np.prod(state_shape) + np.prod(action_shape), 128), - nn.ReLU(inplace=True)] - for i in range(layer_num): - self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] - self.model += [nn.Linear(128, 1)] - self.model = nn.Sequential(*self.model) - - def forward(self, s, a): - s = torch.tensor(s, device=self.device, dtype=torch.float) - if isinstance(a, np.ndarray): - a = torch.tensor(a, device=self.device, dtype=torch.float) - batch = s.shape[0] - s = s.view(batch, -1) - a = a.view(batch, -1) - logits = self.model(torch.cat([s, a], dim=1)) - return logits +if __name__ == '__main__': + from net import Actor, Critic +else: # pytest + from test.continuous.net import Actor, Critic def get_args(): diff --git a/test/discrete/__init__.py b/test/discrete/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/test/discrete/net.py b/test/discrete/net.py new file mode 100644 index 0000000..123ef43 --- /dev/null +++ b/test/discrete/net.py @@ -0,0 +1,49 @@ +import torch +import numpy as np +from torch import nn +import torch.nn.functional as F + + +class Net(nn.Module): + def __init__(self, layer_num, state_shape, action_shape=0, device='cpu'): + super().__init__() + self.device = device + self.model = [ + nn.Linear(np.prod(state_shape), 128), + nn.ReLU(inplace=True)] + for i in range(layer_num): + self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] + if action_shape: + self.model += [nn.Linear(128, np.prod(action_shape))] + self.model = nn.Sequential(*self.model) + + def forward(self, s, state=None, info={}): + s = torch.tensor(s, device=self.device, dtype=torch.float) + batch = s.shape[0] + s = s.view(batch, -1) + logits = self.model(s) + return logits, state + + +class Actor(nn.Module): + def __init__(self, preprocess_net, action_shape): + super().__init__() + self.preprocess = preprocess_net + self.last = nn.Linear(128, np.prod(action_shape)) + + def forward(self, s, state=None, info={}): + logits, h = self.preprocess(s, state) + logits = F.softmax(self.last(logits), dim=-1) + return logits, h + + +class Critic(nn.Module): + def __init__(self, preprocess_net): + super().__init__() + self.preprocess = preprocess_net + self.last = nn.Linear(128, 1) + + def forward(self, s): + logits, h = self.preprocess(s, None) + logits = self.last(logits) + return logits diff --git a/test/test_a2c.py b/test/discrete/test_a2c.py similarity index 73% rename from test/test_a2c.py rename to test/discrete/test_a2c.py index 584892f..3fa07af 100644 --- a/test/test_a2c.py +++ b/test/discrete/test_a2c.py @@ -3,8 +3,6 @@ import torch import pprint import argparse import numpy as np -from torch import nn -import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter from tianshou.policy import A2CPolicy @@ -12,50 +10,10 @@ from tianshou.env import SubprocVectorEnv from tianshou.trainer import onpolicy_trainer from tianshou.data import Collector, ReplayBuffer - -class Net(nn.Module): - def __init__(self, layer_num, state_shape, device='cpu'): - super().__init__() - self.device = device - self.model = [ - nn.Linear(np.prod(state_shape), 128), - nn.ReLU(inplace=True)] - for i in range(layer_num): - self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] - self.model = nn.Sequential(*self.model) - - def forward(self, s): - s = torch.tensor(s, device=self.device, dtype=torch.float) - batch = s.shape[0] - s = s.view(batch, -1) - logits = self.model(s) - return logits - - -class Actor(nn.Module): - def __init__(self, preprocess_net, action_shape): - super().__init__() - self.model = nn.Sequential(*[ - preprocess_net, - nn.Linear(128, np.prod(action_shape)), - ]) - - def forward(self, s, **kwargs): - logits = F.softmax(self.model(s), dim=-1) - return logits, None - - -class Critic(nn.Module): - def __init__(self, preprocess_net): - super().__init__() - self.model = nn.Sequential(*[ - preprocess_net, - nn.Linear(128, 1), - ]) - - def forward(self, s): - logits = self.model(s) - return logits +if __name__ == '__main__': + from net import Net, Actor, Critic +else: # pytest + from test.discrete.net import Net, Actor, Critic def get_args(): @@ -103,7 +61,7 @@ def test_a2c(args=get_args()): train_envs.seed(args.seed) test_envs.seed(args.seed) # model - net = Net(args.layer_num, args.state_shape, args.device) + net = Net(args.layer_num, args.state_shape, device=args.device) actor = Actor(net, args.action_shape).to(args.device) critic = Critic(net).to(args.device) optim = torch.optim.Adam(list( diff --git a/test/test_dqn.py b/test/discrete/test_dqn.py similarity index 84% rename from test/test_dqn.py rename to test/discrete/test_dqn.py index 80053ed..5f047ba 100644 --- a/test/test_dqn.py +++ b/test/discrete/test_dqn.py @@ -3,7 +3,6 @@ import torch import pprint import argparse import numpy as np -from torch import nn from torch.utils.tensorboard import SummaryWriter from tianshou.policy import DQNPolicy @@ -11,24 +10,10 @@ from tianshou.env import SubprocVectorEnv from tianshou.trainer import offpolicy_trainer from tianshou.data import Collector, ReplayBuffer - -class Net(nn.Module): - def __init__(self, layer_num, state_shape, action_shape, device='cpu'): - super().__init__() - self.device = device - self.model = [ - nn.Linear(np.prod(state_shape), 128), - nn.ReLU(inplace=True)] - for i in range(layer_num): - self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] - self.model += [nn.Linear(128, np.prod(action_shape))] - self.model = nn.Sequential(*self.model) - - def forward(self, s, **kwargs): - s = torch.tensor(s, device=self.device, dtype=torch.float) - batch = s.shape[0] - q = self.model(s.view(batch, -1)) - return q, None +if __name__ == '__main__': + from net import Net +else: # pytest + from test.discrete.net import Net def get_args(): diff --git a/test/test_pg.py b/test/discrete/test_pg.py similarity index 86% rename from test/test_pg.py rename to test/discrete/test_pg.py index 42a97c7..ae62709 100644 --- a/test/test_pg.py +++ b/test/discrete/test_pg.py @@ -4,7 +4,6 @@ import torch import pprint import argparse import numpy as np -from torch import nn from torch.utils.tensorboard import SummaryWriter from tianshou.policy import PGPolicy @@ -12,6 +11,11 @@ from tianshou.env import SubprocVectorEnv from tianshou.trainer import onpolicy_trainer from tianshou.data import Batch, Collector, ReplayBuffer +if __name__ == '__main__': + from net import Net +else: # pytest + from test.discrete.net import Net + def compute_return_base(batch, aa=None, bb=None, gamma=0.1): returns = np.zeros_like(batch.rew) @@ -66,25 +70,6 @@ def test_fn(size=2560): print(f'policy: {(time.time() - t) / cnt}') -class Net(nn.Module): - def __init__(self, layer_num, state_shape, action_shape, device='cpu'): - super().__init__() - self.device = device - self.model = [ - nn.Linear(np.prod(state_shape), 128), - nn.ReLU(inplace=True)] - for i in range(layer_num): - self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] - self.model += [nn.Linear(128, np.prod(action_shape))] - self.model = nn.Sequential(*self.model) - - def forward(self, s, **kwargs): - s = torch.tensor(s, device=self.device, dtype=torch.float) - batch = s.shape[0] - logits = self.model(s.view(batch, -1)) - return logits, None - - def get_args(): parser = argparse.ArgumentParser() parser.add_argument('--task', type=str, default='CartPole-v0') @@ -126,7 +111,9 @@ def test_pg(args=get_args()): train_envs.seed(args.seed) test_envs.seed(args.seed) # model - net = Net(args.layer_num, args.state_shape, args.action_shape, args.device) + net = Net( + args.layer_num, args.state_shape, args.action_shape, + device=args.device) net = net.to(args.device) optim = torch.optim.Adam(net.parameters(), lr=args.lr) dist = torch.distributions.Categorical diff --git a/test/test_ppo.py b/test/discrete/test_ppo.py similarity index 74% rename from test/test_ppo.py rename to test/discrete/test_ppo.py index c5816b2..5f9cec0 100644 --- a/test/test_ppo.py +++ b/test/discrete/test_ppo.py @@ -3,8 +3,6 @@ import torch import pprint import argparse import numpy as np -from torch import nn -import torch.nn.functional as F from torch.utils.tensorboard import SummaryWriter from tianshou.policy import PPOPolicy @@ -12,50 +10,10 @@ from tianshou.env import SubprocVectorEnv from tianshou.trainer import onpolicy_trainer from tianshou.data import Collector, ReplayBuffer - -class Net(nn.Module): - def __init__(self, layer_num, state_shape, device='cpu'): - super().__init__() - self.device = device - self.model = [ - nn.Linear(np.prod(state_shape), 128), - nn.ReLU(inplace=True)] - for i in range(layer_num): - self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)] - self.model = nn.Sequential(*self.model) - - def forward(self, s): - s = torch.tensor(s, device=self.device, dtype=torch.float) - batch = s.shape[0] - s = s.view(batch, -1) - logits = self.model(s) - return logits - - -class Actor(nn.Module): - def __init__(self, preprocess_net, action_shape): - super().__init__() - self.model = nn.Sequential(*[ - preprocess_net, - nn.Linear(128, np.prod(action_shape)), - ]) - - def forward(self, s, **kwargs): - logits = F.softmax(self.model(s), dim=-1) - return logits, None - - -class Critic(nn.Module): - def __init__(self, preprocess_net): - super().__init__() - self.model = nn.Sequential(*[ - preprocess_net, - nn.Linear(128, 1), - ]) - - def forward(self, s): - logits = self.model(s) - return logits +if __name__ == '__main__': + from net import Net, Actor, Critic +else: # pytest + from test.discrete.net import Net, Actor, Critic def get_args(): @@ -104,7 +62,7 @@ def test_ppo(args=get_args()): train_envs.seed(args.seed) test_envs.seed(args.seed) # model - net = Net(args.layer_num, args.state_shape, args.device) + net = Net(args.layer_num, args.state_shape, device=args.device) actor = Actor(net, args.action_shape).to(args.device) critic = Critic(net).to(args.device) optim = torch.optim.Adam(list(