Tianshou/test/discrete/net.py

import torch
import numpy as np
from torch import nn
import torch.nn.functional as F


class Net(nn.Module):
    def __init__(self, layer_num, state_shape, action_shape=0, device='cpu'):
        super().__init__()
        self.device = device
        self.model = [
            nn.Linear(np.prod(state_shape), 128),
            nn.ReLU(inplace=True)]
        for i in range(layer_num):
            self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]
        if action_shape:
            self.model += [nn.Linear(128, np.prod(action_shape))]
        self.model = nn.Sequential(*self.model)

    def forward(self, s, state=None, info={}):
        if not isinstance(s, torch.Tensor):
            s = torch.tensor(s, device=self.device, dtype=torch.float)
        batch = s.shape[0]
        s = s.view(batch, -1)
        logits = self.model(s)
        return logits, state


class Actor(nn.Module):
    def __init__(self, preprocess_net, action_shape):
        super().__init__()
        self.preprocess = preprocess_net
        self.last = nn.Linear(128, np.prod(action_shape))

    def forward(self, s, state=None, info={}):
        logits, h = self.preprocess(s, state)
        logits = F.softmax(self.last(logits), dim=-1)
        return logits, h


class Critic(nn.Module):
    def __init__(self, preprocess_net):
        super().__init__()
        self.preprocess = preprocess_net
        self.last = nn.Linear(128, 1)

    def forward(self, s):
        logits, h = self.preprocess(s, None)
        logits = self.last(logits)
        return logits
refract test code 2020-03-21 10:58:01 +08:00			`import torch`
			`import numpy as np`
			`from torch import nn`
			`import torch.nn.functional as F`


			`class Net(nn.Module):`
			`def __init__(self, layer_num, state_shape, action_shape=0, device='cpu'):`
			`super().__init__()`
			`self.device = device`
			`self.model = [`
			`nn.Linear(np.prod(state_shape), 128),`
			`nn.ReLU(inplace=True)]`
			`for i in range(layer_num):`
			`self.model += [nn.Linear(128, 128), nn.ReLU(inplace=True)]`
			`if action_shape:`
			`self.model += [nn.Linear(128, np.prod(action_shape))]`
			`self.model = nn.Sequential(*self.model)`

			`def forward(self, s, state=None, info={}):`
fix collector 2020-03-25 14:08:28 +08:00			`if not isinstance(s, torch.Tensor):`
			`s = torch.tensor(s, device=self.device, dtype=torch.float)`
refract test code 2020-03-21 10:58:01 +08:00			`batch = s.shape[0]`
			`s = s.view(batch, -1)`
			`logits = self.model(s)`
			`return logits, state`


			`class Actor(nn.Module):`
			`def __init__(self, preprocess_net, action_shape):`
			`super().__init__()`
			`self.preprocess = preprocess_net`
			`self.last = nn.Linear(128, np.prod(action_shape))`

			`def forward(self, s, state=None, info={}):`
			`logits, h = self.preprocess(s, state)`
			`logits = F.softmax(self.last(logits), dim=-1)`
			`return logits, h`


			`class Critic(nn.Module):`
			`def __init__(self, preprocess_net):`
			`super().__init__()`
			`self.preprocess = preprocess_net`
			`self.last = nn.Linear(128, 1)`

			`def forward(self, s):`
			`logits, h = self.preprocess(s, None)`
			`logits = self.last(logits)`
			`return logits`