n+e 94bfb32cc1
optimize training procedure and improve code coverage (#189)
1. add policy.eval() in all test scripts' "watch performance"
2. remove dict return support for collector preprocess_fn
3. add `__contains__` and `pop` in batch: `key in batch`, `batch.pop(key, deft)`
4. exact n_episode for a list of n_episode limitation and save fake data in cache_buffer when self.buffer is None (#184)
5. fix tensorboard logging: h-axis stands for env step instead of gradient step; add test results into tensorboard
6. add test_returns (both GAE and nstep)
7. change the type-checking order in batch.py and converter.py in order to meet the most often case first
8. fix shape inconsistency for torch.Tensor in replay buffer
9. remove `**kwargs` in ReplayBuffer
10. remove default value in batch.split() and add merge_last argument (#185)
11. improve nstep efficiency
12. add max_batchsize in onpolicy algorithms
13. potential bugfix for subproc.wait
14. fix RecurrentActorProb
15. improve the code-coverage (from 90% to 95%) and remove the dead code
16. fix some incorrect type annotation

The above improvement also increases the training FPS: on my computer, the previous version is only ~1800 FPS and after that, it can reach ~2050 (faster than v0.2.4.post1).
2020-08-27 12:15:18 +08:00

84 lines
2.4 KiB
Python

import numpy as np
from typing import Union, Optional
from abc import ABC, abstractmethod
class BaseNoise(ABC, object):
"""The action noise base class."""
def __init__(self, **kwargs) -> None:
super().__init__()
@abstractmethod
def __call__(self, **kwargs) -> np.ndarray:
"""Generate new noise."""
raise NotImplementedError
def reset(self) -> None:
"""Reset to the initial state."""
pass
class GaussianNoise(BaseNoise):
"""Class for vanilla gaussian process,
used for exploration in DDPG by default.
"""
def __init__(self,
mu: float = 0.0,
sigma: float = 1.0):
super().__init__()
self._mu = mu
assert 0 <= sigma, 'noise std should not be negative'
self._sigma = sigma
def __call__(self, size: tuple) -> np.ndarray:
return np.random.normal(self._mu, self._sigma, size)
class OUNoise(BaseNoise):
"""Class for Ornstein-Uhlenbeck process, as used for exploration in DDPG.
Usage:
::
# init
self.noise = OUNoise()
# generate noise
noise = self.noise(logits.shape, eps)
For required parameters, you can refer to the stackoverflow page. However,
our experiment result shows that (similar to OpenAI SpinningUp) using
vanilla gaussian process has little difference from using the
Ornstein-Uhlenbeck process.
"""
def __init__(self,
mu: float = 0.0,
sigma: float = 0.3,
theta: float = 0.15,
dt: float = 1e-2,
x0: Optional[Union[float, np.ndarray]] = None
) -> None:
super(BaseNoise, self).__init__()
self._mu = mu
self._alpha = theta * dt
self._beta = sigma * np.sqrt(dt)
self._x0 = x0
self.reset()
def __call__(self, size: tuple, mu: Optional[float] = None) -> np.ndarray:
"""Generate new noise. Return a ``numpy.ndarray`` which size is equal
to ``size``.
"""
if self._x is None or self._x.shape != size:
self._x = 0
if mu is None:
mu = self._mu
r = self._beta * np.random.normal(size=size)
self._x = self._x + self._alpha * (mu - self._x) + r
return self._x
def reset(self) -> None:
"""Reset to the initial state."""
self._x = self._x0