minor reformat (#2)

* update atari.py * fix setup.py pass the pytest * fix setup.py pass the pytest
2020-03-26 09:01:20 +08:00 · 2020-03-26 09:01:20 +08:00 · 3c0a09fefd
commit 3c0a09fefd
parent fdc969b830
14 changed files with 29 additions and 16 deletions
--- a/setup.py
+++ b/setup.py
@ -37,11 +37,19 @@ setup(
                                    'examples', 'examples.*',
                                    'docs', 'docs.*']),
    install_requires=[
-        'gym',
+        'gym>=0.15.0',
        'tqdm',
        'numpy',
        'cloudpickle',
        'tensorboard',
        'torch>=1.4.0',
    ],
    extras_require={
        'atari': [
            'atari_py',
        ],
        'mujoco': [
            'mujoco_py',
        ]
    },
 )
--- a/test/base/test_buffer.py
+++ b/test/base/test_buffer.py
@ -1,4 +1,5 @@
 from tianshou.data import ReplayBuffer
 if __name__ == '__main__':
    from env import MyTestEnv
 else:  # pytest
--- a/test/base/test_collector.py
+++ b/test/base/test_collector.py
@ -11,6 +11,7 @@ else:  # pytest
 class MyPolicy(BasePolicy):
    """docstring for MyPolicy"""
    def __init__(self):
        super().__init__()
--- a/tianshou/init.py
+++ b/tianshou/init.py
@ -1,4 +1,4 @@
-from tianshou import data, env, utils, policy, trainer,\
+from tianshou import data, env, utils, policy, trainer, \
    exploration
 __version__ = '0.2.0'
--- a/tianshou/data/batch.py
+++ b/tianshou/data/batch.py
@ -37,7 +37,7 @@ class Batch(object):
            else:
                raise TypeError(
                    'No support for append with type {} in class Batch.'
-                    .format(type(batch.__dict__[k])))
+                        .format(type(batch.__dict__[k])))
    def split(self, size=None, permute=True):
        length = min([
--- a/tianshou/data/buffer.py
+++ b/tianshou/data/buffer.py
@ -47,7 +47,7 @@ class ReplayBuffer(object):
        '''
        weight: importance weights, disabled here
        '''
-        assert isinstance(info, dict),\
+        assert isinstance(info, dict), \
            'You should return a dict in the last argument of env.step().'
        self._add_to_buffer('obs', obs)
        self._add_to_buffer('act', act)
--- a/tianshou/data/collector.py
+++ b/tianshou/data/collector.py
@ -31,8 +31,8 @@ class Collector(object):
        if self._multi_env:
            self.env_num = len(env)
            if isinstance(self.buffer, list):
-                assert len(self.buffer) == self.env_num,\
+                assert len(self.buffer) == self.env_num, \
-                    'The number of data buffer does not match the number of '\
+                    'The number of data buffer does not match the number of ' \
                    'input env.'
                self._multi_buf = True
            elif isinstance(self.buffer, ReplayBuffer):
@ -87,7 +87,7 @@ class Collector(object):
        if not self._multi_env:
            n_episode = np.sum(n_episode)
        start_time = time.time()
-        assert sum([(n_step != 0), (n_episode != 0)]) == 1,\
+        assert sum([(n_step != 0), (n_episode != 0)]) == 1, \
            "One and only one collection number specification permitted!"
        cur_step = 0
        cur_episode = np.zeros(self.env_num) if self._multi_env else 0
--- a/tianshou/env/init.py
+++ b/tianshou/env/init.py
@ -1,6 +1,6 @@
 from tianshou.env.utils import CloudpickleWrapper
 from tianshou.env.common import EnvWrapper, FrameStack
-from tianshou.env.vecenv import BaseVectorEnv, VectorEnv,\
+from tianshou.env.vecenv import BaseVectorEnv, VectorEnv, \
    SubprocVectorEnv, RayVectorEnv
 __all__ = [
--- a/tianshou/env/vecenv.py
+++ b/tianshou/env/vecenv.py
@ -1,6 +1,7 @@
 import numpy as np
 from abc import ABC, abstractmethod
 from multiprocessing import Process, Pipe
 try:
    import ray
 except ImportError:
@ -122,7 +123,7 @@ class SubprocVectorEnv(BaseVectorEnv):
            zip(*[Pipe() for _ in range(self.env_num)])
        self.processes = [
            Process(target=worker, args=(
-                    parent, child, CloudpickleWrapper(env_fn)), daemon=True)
+                parent, child, CloudpickleWrapper(env_fn)), daemon=True)
            for (parent, child, env_fn) in zip(
                self.parent_remote, self.child_remote, env_fns)
        ]
--- a/tianshou/exploration/random.py
+++ b/tianshou/exploration/random.py
@ -14,7 +14,7 @@ class OUNoise(object):
        if self.x is None or self.x.shape != size:
            self.x = 0
        self.x = self.x + self.alpha * (mu - self.x) + \
-            self.beta * np.random.normal(size=size)
+                 self.beta * np.random.normal(size=size)
        return self.x
    def reset(self):
--- a/tianshou/policy/a2c.py
+++ b/tianshou/policy/a2c.py
@ -40,8 +40,8 @@ class A2CPolicy(PGPolicy):
                vf_loss = F.mse_loss(r[:, None], v)
                ent_loss = dist.entropy().mean()
                loss = actor_loss \
-                    + self._w_vf * vf_loss \
+                       + self._w_vf * vf_loss \
-                    - self._w_ent * ent_loss
+                       - self._w_ent * ent_loss
                loss.backward()
                if self._grad_norm:
                    nn.utils.clip_grad_norm_(
--- a/tianshou/policy/ddpg.py
+++ b/tianshou/policy/ddpg.py
@ -5,6 +5,8 @@ import torch.nn.functional as F
 from tianshou.data import Batch
 from tianshou.policy import BasePolicy
 # from tianshou.exploration import OUNoise
--- a/tianshou/policy/pg.py
+++ b/tianshou/policy/pg.py
@ -35,7 +35,7 @@ class PGPolicy(BasePolicy):
    def learn(self, batch, batch_size=None, repeat=1):
        losses = []
        batch.returns = (batch.returns - batch.returns.mean()) \
-            / (batch.returns.std() + self._eps)
+                        / (batch.returns.std() + self._eps)
        for _ in range(repeat):
            for b in batch.split(batch_size):
                self.optim.zero_grad()
--- a/tianshou/policy/ppo.py
+++ b/tianshou/policy/ppo.py
@ -59,7 +59,7 @@ class PPOPolicy(PGPolicy):
    def learn(self, batch, batch_size=None, repeat=1):
        losses, clip_losses, vf_losses, ent_losses = [], [], [], []
        batch.returns = (batch.returns - batch.returns.mean()) \
-            / (batch.returns.std() + self._eps)
+                        / (batch.returns.std() + self._eps)
        batch.act = torch.tensor(batch.act)
        batch.returns = torch.tensor(batch.returns)[:, None]
        for _ in range(repeat):
@ -82,13 +82,13 @@ class PPOPolicy(PGPolicy):
                ent_loss = dist.entropy().mean()
                ent_losses.append(ent_loss.detach().cpu().numpy())
                loss = clip_loss \
-                    + self._w_vf * vf_loss - self._w_ent * ent_loss
+                       + self._w_vf * vf_loss - self._w_ent * ent_loss
                losses.append(loss.detach().cpu().numpy())
                self.optim.zero_grad()
                loss.backward()
                nn.utils.clip_grad_norm_(list(
                    self.actor.parameters()) + list(self.critic.parameters()),
-                    self._max_grad_norm)
+                                         self._max_grad_norm)
                self.optim.step()
        self.sync_weight()
        return {