| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  | from abc import ABC, abstractmethod | 
					
						
							| 
									
										
										
										
											2021-09-03 05:05:04 +08:00
										 |  |  | from typing import Optional, Sequence, Union | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | import numpy as np | 
					
						
							| 
									
										
										
										
											2020-03-18 21:45:41 +08:00
										 |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  | class BaseNoise(ABC, object): | 
					
						
							|  |  |  |     """The action noise base class.""" | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |     def __init__(self) -> None: | 
					
						
							| 
									
										
										
										
											2020-08-19 15:00:24 +08:00
										 |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-13 19:31:50 +08:00
										 |  |  |     def reset(self) -> None: | 
					
						
							|  |  |  |         """Reset to the initial state.""" | 
					
						
							|  |  |  |         pass | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |     @abstractmethod | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |     def __call__(self, size: Sequence[int]) -> np.ndarray: | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |         """Generate new noise.""" | 
					
						
							|  |  |  |         raise NotImplementedError | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class GaussianNoise(BaseNoise): | 
					
						
							| 
									
										
										
										
											2021-09-03 05:05:04 +08:00
										 |  |  |     """The vanilla Gaussian process, for exploration in DDPG by default.""" | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |     def __init__(self, mu: float = 0.0, sigma: float = 1.0) -> None: | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |         super().__init__() | 
					
						
							|  |  |  |         self._mu = mu | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |         assert 0 <= sigma, "Noise std should not be negative." | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |         self._sigma = sigma | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |     def __call__(self, size: Sequence[int]) -> np.ndarray: | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |         return np.random.normal(self._mu, self._sigma, size) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  | class OUNoise(BaseNoise): | 
					
						
							| 
									
										
										
										
											2020-04-05 18:34:45 +08:00
										 |  |  |     """Class for Ornstein-Uhlenbeck process, as used for exploration in DDPG.
 | 
					
						
							| 
									
										
										
										
											2020-09-11 07:55:37 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-04-03 21:28:12 +08:00
										 |  |  |     Usage: | 
					
						
							|  |  |  |     :: | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |         # init | 
					
						
							|  |  |  |         self.noise = OUNoise() | 
					
						
							|  |  |  |         # generate noise | 
					
						
							|  |  |  |         noise = self.noise(logits.shape, eps) | 
					
						
							|  |  |  | 
 | 
					
						
							|  |  |  |     For required parameters, you can refer to the stackoverflow page. However, | 
					
						
							|  |  |  |     our experiment result shows that (similar to OpenAI SpinningUp) using | 
					
						
							| 
									
										
										
										
											2021-09-03 05:05:04 +08:00
										 |  |  |     vanilla Gaussian process has little difference from using the | 
					
						
							| 
									
										
										
										
											2020-04-03 21:28:12 +08:00
										 |  |  |     Ornstein-Uhlenbeck process. | 
					
						
							|  |  |  |     """
 | 
					
						
							| 
									
										
										
										
											2020-03-18 21:45:41 +08:00
										 |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |     def __init__( | 
					
						
							|  |  |  |         self, | 
					
						
							|  |  |  |         mu: float = 0.0, | 
					
						
							|  |  |  |         sigma: float = 0.3, | 
					
						
							|  |  |  |         theta: float = 0.15, | 
					
						
							|  |  |  |         dt: float = 1e-2, | 
					
						
							|  |  |  |         x0: Optional[Union[float, np.ndarray]] = None, | 
					
						
							|  |  |  |     ) -> None: | 
					
						
							|  |  |  |         super().__init__() | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |         self._mu = mu | 
					
						
							|  |  |  |         self._alpha = theta * dt | 
					
						
							|  |  |  |         self._beta = sigma * np.sqrt(dt) | 
					
						
							|  |  |  |         self._x0 = x0 | 
					
						
							| 
									
										
										
										
											2020-03-18 21:45:41 +08:00
										 |  |  |         self.reset() | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-13 19:31:50 +08:00
										 |  |  |     def reset(self) -> None: | 
					
						
							|  |  |  |         """Reset to the initial state.""" | 
					
						
							|  |  |  |         self._x = self._x0 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2021-03-30 16:06:03 +08:00
										 |  |  |     def __call__(self, size: Sequence[int], mu: Optional[float] = None) -> np.ndarray: | 
					
						
							| 
									
										
										
										
											2020-09-11 07:55:37 +08:00
										 |  |  |         """Generate new noise.
 | 
					
						
							|  |  |  | 
 | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |         Return an numpy array which size is equal to ``size``. | 
					
						
							| 
									
										
										
										
											2020-04-03 21:28:12 +08:00
										 |  |  |         """
 | 
					
						
							| 
									
										
										
										
											2020-09-13 19:31:50 +08:00
										 |  |  |         if self._x is None or isinstance( | 
					
						
							| 
									
										
										
										
											2021-09-03 05:05:04 +08:00
										 |  |  |             self._x, np.ndarray | 
					
						
							|  |  |  |         ) and self._x.shape != size: | 
					
						
							| 
									
										
										
										
											2020-09-12 15:39:01 +08:00
										 |  |  |             self._x = 0.0 | 
					
						
							| 
									
										
										
										
											2020-06-16 22:17:28 +08:00
										 |  |  |         if mu is None: | 
					
						
							|  |  |  |             mu = self._mu | 
					
						
							|  |  |  |         r = self._beta * np.random.normal(size=size) | 
					
						
							|  |  |  |         self._x = self._x + self._alpha * (mu - self._x) + r | 
					
						
							| 
									
										
										
										
											2021-03-30 16:06:03 +08:00
										 |  |  |         return self._x  # type: ignore |