refract test code

This commit is contained in:
Trinkle23897 2020-03-21 10:58:01 +08:00
parent d64d78d769
commit 8bd8246b16
14 changed files with 160 additions and 210 deletions

0
test/base/__init__.py Normal file
View File

30
test/base/env.py Normal file
View File

@ -0,0 +1,30 @@
import gym
import time
class MyTestEnv(gym.Env):
def __init__(self, size, sleep=0):
self.size = size
self.sleep = sleep
self.reset()
def reset(self):
self.done = False
self.index = 0
return self.index
def step(self, action):
if self.done:
raise ValueError('step after done !!!')
if self.sleep > 0:
time.sleep(self.sleep)
if self.index == self.size:
self.done = True
return self.index, 0, True, {}
if action == 0:
self.index = max(self.index - 1, 0)
return self.index, 0, False, {}
elif action == 1:
self.index += 1
self.done = self.index == self.size
return self.index, int(self.done), self.done, {}

View File

@ -1,8 +1,8 @@
from tianshou.data import ReplayBuffer
if __name__ == '__main__':
from test_env import MyTestEnv
from env import MyTestEnv
else: # pytest
from test.test_env import MyTestEnv
from test.base.env import MyTestEnv
def test_replaybuffer(size=10, bufsize=20):

View File

@ -1,36 +1,12 @@
import gym
import time
import pytest
import numpy as np
from tianshou.env import FrameStack, VectorEnv, SubprocVectorEnv, RayVectorEnv
class MyTestEnv(gym.Env):
def __init__(self, size, sleep=0):
self.size = size
self.sleep = sleep
self.reset()
def reset(self):
self.done = False
self.index = 0
return self.index
def step(self, action):
if self.done:
raise ValueError('step after done !!!')
if self.sleep > 0:
time.sleep(self.sleep)
if self.index == self.size:
self.done = True
return self.index, 0, True, {}
if action == 0:
self.index = max(self.index - 1, 0)
return self.index, 0, False, {}
elif action == 1:
self.index += 1
self.done = self.index == self.size
return self.index, int(self.done), self.done, {}
if __name__ == '__main__':
from env import MyTestEnv
else: # pytest
from test.base.env import MyTestEnv
def test_framestack(k=4, size=10):

View File

49
test/continuous/net.py Normal file
View File

@ -0,0 +1,49 @@
import torch
import numpy as np
from torch import nn
class Actor(nn.Module):
def __init__(self, layer_num, state_shape, action_shape,
max_action, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model += [nn.Linear(128, np.prod(action_shape))]
self.model = nn.Sequential(*self.model)
self._max = max_action
def forward(self, s, **kwargs):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
logits = self.model(s)
logits = self._max * torch.tanh(logits)
return logits, None
class Critic(nn.Module):
def __init__(self, layer_num, state_shape, action_shape, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape) + np.prod(action_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model += [nn.Linear(128, 1)]
self.model = nn.Sequential(*self.model)
def forward(self, s, a):
s = torch.tensor(s, device=self.device, dtype=torch.float)
if isinstance(a, np.ndarray):
a = torch.tensor(a, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
a = a.view(batch, -1)
logits = self.model(torch.cat([s, a], dim=1))
return logits

View File

@ -3,7 +3,6 @@ import torch
import pprint
import argparse
import numpy as np
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from tianshou.policy import DDPGPolicy
@ -11,51 +10,10 @@ from tianshou.trainer import offpolicy_trainer
from tianshou.data import Collector, ReplayBuffer
from tianshou.env import VectorEnv, SubprocVectorEnv
class Actor(nn.Module):
def __init__(self, layer_num, state_shape, action_shape,
max_action, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model += [nn.Linear(128, np.prod(action_shape))]
self.model = nn.Sequential(*self.model)
self._max = max_action
def forward(self, s, **kwargs):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
logits = self.model(s)
logits = self._max * torch.tanh(logits)
return logits, None
class Critic(nn.Module):
def __init__(self, layer_num, state_shape, action_shape, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape) + np.prod(action_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model += [nn.Linear(128, 1)]
self.model = nn.Sequential(*self.model)
def forward(self, s, a):
s = torch.tensor(s, device=self.device, dtype=torch.float)
if isinstance(a, np.ndarray):
a = torch.tensor(a, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
a = a.view(batch, -1)
logits = self.model(torch.cat([s, a], dim=1))
return logits
if __name__ == '__main__':
from net import Actor, Critic
else: # pytest
from test.continuous.net import Actor, Critic
def get_args():

View File

49
test/discrete/net.py Normal file
View File

@ -0,0 +1,49 @@
import torch
import numpy as np
from torch import nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self, layer_num, state_shape, action_shape=0, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
if action_shape:
self.model += [nn.Linear(128, np.prod(action_shape))]
self.model = nn.Sequential(*self.model)
def forward(self, s, state=None, info={}):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
logits = self.model(s)
return logits, state
class Actor(nn.Module):
def __init__(self, preprocess_net, action_shape):
super().__init__()
self.preprocess = preprocess_net
self.last = nn.Linear(128, np.prod(action_shape))
def forward(self, s, state=None, info={}):
logits, h = self.preprocess(s, state)
logits = F.softmax(self.last(logits), dim=-1)
return logits, h
class Critic(nn.Module):
def __init__(self, preprocess_net):
super().__init__()
self.preprocess = preprocess_net
self.last = nn.Linear(128, 1)
def forward(self, s):
logits, h = self.preprocess(s, None)
logits = self.last(logits)
return logits

View File

@ -3,8 +3,6 @@ import torch
import pprint
import argparse
import numpy as np
from torch import nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from tianshou.policy import A2CPolicy
@ -12,50 +10,10 @@ from tianshou.env import SubprocVectorEnv
from tianshou.trainer import onpolicy_trainer
from tianshou.data import Collector, ReplayBuffer
class Net(nn.Module):
def __init__(self, layer_num, state_shape, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model = nn.Sequential(*self.model)
def forward(self, s):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
logits = self.model(s)
return logits
class Actor(nn.Module):
def __init__(self, preprocess_net, action_shape):
super().__init__()
self.model = nn.Sequential(*[
preprocess_net,
nn.Linear(128, np.prod(action_shape)),
])
def forward(self, s, **kwargs):
logits = F.softmax(self.model(s), dim=-1)
return logits, None
class Critic(nn.Module):
def __init__(self, preprocess_net):
super().__init__()
self.model = nn.Sequential(*[
preprocess_net,
nn.Linear(128, 1),
])
def forward(self, s):
logits = self.model(s)
return logits
if __name__ == '__main__':
from net import Net, Actor, Critic
else: # pytest
from test.discrete.net import Net, Actor, Critic
def get_args():
@ -103,7 +61,7 @@ def test_a2c(args=get_args()):
train_envs.seed(args.seed)
test_envs.seed(args.seed)
# model
net = Net(args.layer_num, args.state_shape, args.device)
net = Net(args.layer_num, args.state_shape, device=args.device)
actor = Actor(net, args.action_shape).to(args.device)
critic = Critic(net).to(args.device)
optim = torch.optim.Adam(list(

View File

@ -3,7 +3,6 @@ import torch
import pprint
import argparse
import numpy as np
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from tianshou.policy import DQNPolicy
@ -11,24 +10,10 @@ from tianshou.env import SubprocVectorEnv
from tianshou.trainer import offpolicy_trainer
from tianshou.data import Collector, ReplayBuffer
class Net(nn.Module):
def __init__(self, layer_num, state_shape, action_shape, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model += [nn.Linear(128, np.prod(action_shape))]
self.model = nn.Sequential(*self.model)
def forward(self, s, **kwargs):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
q = self.model(s.view(batch, -1))
return q, None
if __name__ == '__main__':
from net import Net
else: # pytest
from test.discrete.net import Net
def get_args():

View File

@ -4,7 +4,6 @@ import torch
import pprint
import argparse
import numpy as np
from torch import nn
from torch.utils.tensorboard import SummaryWriter
from tianshou.policy import PGPolicy
@ -12,6 +11,11 @@ from tianshou.env import SubprocVectorEnv
from tianshou.trainer import onpolicy_trainer
from tianshou.data import Batch, Collector, ReplayBuffer
if __name__ == '__main__':
from net import Net
else: # pytest
from test.discrete.net import Net
def compute_return_base(batch, aa=None, bb=None, gamma=0.1):
returns = np.zeros_like(batch.rew)
@ -66,25 +70,6 @@ def test_fn(size=2560):
print(f'policy: {(time.time() - t) / cnt}')
class Net(nn.Module):
def __init__(self, layer_num, state_shape, action_shape, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model += [nn.Linear(128, np.prod(action_shape))]
self.model = nn.Sequential(*self.model)
def forward(self, s, **kwargs):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
logits = self.model(s.view(batch, -1))
return logits, None
def get_args():
parser = argparse.ArgumentParser()
parser.add_argument('--task', type=str, default='CartPole-v0')
@ -126,7 +111,9 @@ def test_pg(args=get_args()):
train_envs.seed(args.seed)
test_envs.seed(args.seed)
# model
net = Net(args.layer_num, args.state_shape, args.action_shape, args.device)
net = Net(
args.layer_num, args.state_shape, args.action_shape,
device=args.device)
net = net.to(args.device)
optim = torch.optim.Adam(net.parameters(), lr=args.lr)
dist = torch.distributions.Categorical

View File

@ -3,8 +3,6 @@ import torch
import pprint
import argparse
import numpy as np
from torch import nn
import torch.nn.functional as F
from torch.utils.tensorboard import SummaryWriter
from tianshou.policy import PPOPolicy
@ -12,50 +10,10 @@ from tianshou.env import SubprocVectorEnv
from tianshou.trainer import onpolicy_trainer
from tianshou.data import Collector, ReplayBuffer
class Net(nn.Module):
def __init__(self, layer_num, state_shape, device='cpu'):
super().__init__()
self.device = device
self.model = [
nn.Linear(np.prod(state_shape), 128),
nn.ReLU(inplace=True)]
for i in range(layer_num):
self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
self.model = nn.Sequential(*self.model)
def forward(self, s):
s = torch.tensor(s, device=self.device, dtype=torch.float)
batch = s.shape[0]
s = s.view(batch, -1)
logits = self.model(s)
return logits
class Actor(nn.Module):
def __init__(self, preprocess_net, action_shape):
super().__init__()
self.model = nn.Sequential(*[
preprocess_net,
nn.Linear(128, np.prod(action_shape)),
])
def forward(self, s, **kwargs):
logits = F.softmax(self.model(s), dim=-1)
return logits, None
class Critic(nn.Module):
def __init__(self, preprocess_net):
super().__init__()
self.model = nn.Sequential(*[
preprocess_net,
nn.Linear(128, 1),
])
def forward(self, s):
logits = self.model(s)
return logits
if __name__ == '__main__':
from net import Net, Actor, Critic
else: # pytest
from test.discrete.net import Net, Actor, Critic
def get_args():
@ -104,7 +62,7 @@ def test_ppo(args=get_args()):
train_envs.seed(args.seed)
test_envs.seed(args.seed)
# model
net = Net(args.layer_num, args.state_shape, args.device)
net = Net(args.layer_num, args.state_shape, device=args.device)
actor = Actor(net, args.action_shape).to(args.device)
critic = Critic(net).to(args.device)
optim = torch.optim.Adam(list(