From 09692c84fefda1dc451c1816817d72bcd2361759 Mon Sep 17 00:00:00 2001
From: n+e <trinkle23897@gmail.com>
Date: Tue, 30 Mar 2021 16:06:03 +0800
Subject: [PATCH] fix numpy>=1.20 typing check (#323)

Change the behavior of to_numpy and to_torch: from now on, dict is automatically converted to Batch and list is automatically converted to np.ndarray (if an error occurs, raise the exception instead of converting each element in the list).
---
 setup.py                                  |  2 +-
 test/base/test_batch.py                   | 40 ++++++++-------
 test/base/test_buffer.py                  | 14 +++---
 tianshou/data/batch.py                    | 52 ++++++++------------
 tianshou/data/buffer/base.py              | 51 +++++++++----------
 tianshou/data/buffer/cached.py            | 10 ++--
 tianshou/data/buffer/manager.py           | 16 +++---
 tianshou/data/buffer/prio.py              | 19 ++++---
 tianshou/data/collector.py                |  8 +--
 tianshou/data/utils/converter.py          | 60 +++++++----------------
 tianshou/env/venvs.py                     | 10 ++--
 tianshou/env/worker/base.py               |  8 +--
 tianshou/env/worker/dummy.py              |  4 +-
 tianshou/env/worker/ray.py                |  4 +-
 tianshou/env/worker/subproc.py            |  6 +--
 tianshou/exploration/random.py            |  6 +--
 tianshou/policy/base.py                   | 15 +++---
 tianshou/policy/imitation/discrete_bcq.py | 11 +----
 tianshou/policy/modelbase/psrl.py         | 10 ++--
 tianshou/policy/modelfree/a2c.py          |  6 +--
 tianshou/policy/modelfree/ddpg.py         | 12 +++--
 tianshou/policy/modelfree/dqn.py          |  6 ++-
 tianshou/policy/multiagent/mapolicy.py    |  9 ++--
 tianshou/utils/log_tools.py               | 22 ++-------
 tianshou/utils/net/common.py              | 15 +++---
 tianshou/utils/net/continuous.py          | 16 +++---
 tianshou/utils/net/discrete.py            |  4 +-
 tianshou/utils/statistics.py              | 41 ++++++++--------
 28 files changed, 212 insertions(+), 265 deletions(-)

diff --git a/setup.py b/setup.py
index f8736fa..24220c2 100644
--- a/setup.py
+++ b/setup.py
@@ -47,7 +47,7 @@ setup(
     install_requires=[
         "gym>=0.15.4",
         "tqdm",
-        "numpy!=1.16.0,<1.20.0",  # https://github.com/numpy/numpy/issues/12793
+        "numpy>1.16.0",  # https://github.com/numpy/numpy/issues/12793
         "tensorboard",
         "torch>=1.4.0",
         "numba>=0.51.0",
diff --git a/test/base/test_batch.py b/test/base/test_batch.py
index 0898e15..09b280e 100644
--- a/test/base/test_batch.py
+++ b/test/base/test_batch.py
@@ -20,9 +20,9 @@ def test_batch():
     assert len(Batch(a=[1, 2, 3], b={'c': {}})) == 3
     assert not Batch(a=[1, 2, 3]).is_empty()
     b = Batch({'a': [4, 4], 'b': [5, 5]}, c=[None, None])
-    assert b.c.dtype == np.object
+    assert b.c.dtype == object
     b = Batch(d=[None], e=[starmap], f=Batch)
-    assert b.d.dtype == b.e.dtype == np.object and b.f == Batch
+    assert b.d.dtype == b.e.dtype == object and b.f == Batch
     b = Batch()
     b.update()
     assert b.is_empty()
@@ -153,10 +153,10 @@ def test_batch():
         batch3[0] = Batch(a={"c": 2, "e": 1})
     # auto convert
     batch4 = Batch(a=np.array(['a', 'b']))
-    assert batch4.a.dtype == np.object  # auto convert to np.object
+    assert batch4.a.dtype == object  # auto convert to object
     batch4.update(a=np.array(['c', 'd']))
     assert list(batch4.a) == ['c', 'd']
-    assert batch4.a.dtype == np.object  # auto convert to np.object
+    assert batch4.a.dtype == object  # auto convert to object
     batch5 = Batch(a=np.array([{'index': 0}]))
     assert isinstance(batch5.a, Batch)
     assert np.allclose(batch5.a.index, [0])
@@ -405,21 +405,23 @@ def test_utils_to_torch_numpy():
     assert data_list_2_torch.shape == (2, 3, 3)
     assert np.allclose(to_numpy(to_torch(data_list_2)), data_list_2)
     data_list_3 = [np.zeros((3, 2)), np.zeros((3, 3))]
-    data_list_3_torch = to_torch(data_list_3)
-    assert isinstance(data_list_3_torch, list)
-    assert all(isinstance(e, torch.Tensor) for e in data_list_3_torch)
-    assert all(starmap(np.allclose,
-                       zip(to_numpy(to_torch(data_list_3)), data_list_3)))
+    data_list_3_torch = [torch.zeros((3, 2)), torch.zeros((3, 3))]
+    with pytest.raises(TypeError):
+        to_torch(data_list_3)
+    with pytest.raises(TypeError):
+        to_numpy(data_list_3_torch)
     data_list_4 = [np.zeros((2, 3)), np.zeros((3, 3))]
-    data_list_4_torch = to_torch(data_list_4)
-    assert isinstance(data_list_4_torch, list)
-    assert all(isinstance(e, torch.Tensor) for e in data_list_4_torch)
-    assert all(starmap(np.allclose,
-                       zip(to_numpy(to_torch(data_list_4)), data_list_4)))
+    data_list_4_torch = [torch.zeros((2, 3)), torch.zeros((3, 3))]
+    with pytest.raises(TypeError):
+        to_torch(data_list_4)
+    with pytest.raises(TypeError):
+        to_numpy(data_list_4_torch)
     data_list_5 = [np.zeros(2), np.zeros((3, 3))]
-    data_list_5_torch = to_torch(data_list_5)
-    assert isinstance(data_list_5_torch, list)
-    assert all(isinstance(e, torch.Tensor) for e in data_list_5_torch)
+    data_list_5_torch = [torch.zeros(2), torch.zeros((3, 3))]
+    with pytest.raises(TypeError):
+        to_torch(data_list_5)
+    with pytest.raises(TypeError):
+        to_numpy(data_list_5_torch)
     data_array = np.random.rand(3, 2, 2)
     data_empty_tensor = to_torch(data_array[[]])
     assert isinstance(data_empty_tensor, torch.Tensor)
@@ -508,10 +510,10 @@ def test_batch_empty():
     assert np.allclose(b5.b.c, [2, 0])
     assert np.allclose(b5.b.d, [1, 0])
     data = Batch(a=[False, True],
-                 b={'c': np.array([2., 'st'], dtype=np.object),
+                 b={'c': np.array([2., 'st'], dtype=object),
                     'd': [1, None],
                     'e': [2., float('nan')]},
-                 c=np.array([1, 3, 4], dtype=np.int),
+                 c=np.array([1, 3, 4], dtype=int),
                  t=torch.tensor([4, 5, 6, 7.]))
     data[-1] = Batch.empty(data[1])
     assert np.allclose(data.c, [1, 3, 0])
diff --git a/test/base/test_buffer.py b/test/base/test_buffer.py
index 225375d..04348cd 100644
--- a/test/base/test_buffer.py
+++ b/test/base/test_buffer.py
@@ -33,7 +33,7 @@ def test_replaybuffer(size=10, bufsize=20):
                       done=done, obs_next=obs_next, info=info))
         obs = obs_next
         assert len(buf) == min(bufsize, i + 1)
-    assert buf.act.dtype == np.int
+    assert buf.act.dtype == int
     assert buf.act.shape == (bufsize, 1)
     data, indice = buf.sample(bufsize * 2)
     assert (indice < len(buf)).all()
@@ -50,9 +50,9 @@ def test_replaybuffer(size=10, bufsize=20):
     assert b.obs_next[0] == 'str'
     assert np.all(b.obs[1:] == 0)
     assert np.all(b.obs_next[1:] == np.array(None))
-    assert b.info.a[0] == 3 and b.info.a.dtype == np.integer
+    assert b.info.a[0] == 3 and b.info.a.dtype == int
     assert np.all(b.info.a[1:] == 0)
-    assert b.info.b.c[0] == 5.0 and b.info.b.c.dtype == np.inexact
+    assert b.info.b.c[0] == 5.0 and b.info.b.c.dtype == float
     assert np.all(b.info.b.c[1:] == 0.0)
     assert ptr.shape == (1,) and ptr[0] == 0
     assert ep_rew.shape == (1,) and ep_rew[0] == 1
@@ -180,8 +180,8 @@ def test_priortized_replaybuffer(size=32, bufsize=15):
         assert len(buf2) == min(bufsize, 3 * (i + 1))
     # check single buffer's data
     assert buf.info.key.shape == (buf.maxsize,)
-    assert buf.rew.dtype == np.float
-    assert buf.done.dtype == np.bool_
+    assert buf.rew.dtype == float
+    assert buf.done.dtype == bool
     data, indice = buf.sample(len(buf) // 2)
     buf.update_weight(indice, -data.weight / 2)
     assert np.allclose(buf.weight[indice], np.abs(-data.weight / 2) ** buf._alpha)
@@ -273,7 +273,7 @@ def test_segtree():
         index = tree.get_prefix_sum_idx(scalar)
         assert naive[:index].sum() <= scalar <= naive[:index + 1].sum()
     # corner case here
-    naive = np.ones(actual_len, np.int)
+    naive = np.ones(actual_len, int)
     tree[np.arange(actual_len)] = naive
     for scalar in range(actual_len):
         index = tree.get_prefix_sum_idx(scalar * 1.)
@@ -485,7 +485,7 @@ def test_replaybuffermanager():
     buf.set_batch(batch)
     assert np.allclose(buf.buffers[-1].info, [1] * 5)
     assert buf.sample_index(-1).tolist() == []
-    assert np.array([ReplayBuffer(0, ignore_obs_next=True)]).dtype == np.object
+    assert np.array([ReplayBuffer(0, ignore_obs_next=True)]).dtype == object
 
 
 def test_cachedbuffer():
diff --git a/tianshou/data/batch.py b/tianshou/data/batch.py
index a07ad67..ae907f0 100644
--- a/tianshou/data/batch.py
+++ b/tianshou/data/batch.py
@@ -7,16 +7,18 @@ from numbers import Number
 from collections.abc import Collection
 from typing import Any, List, Dict, Union, Iterator, Optional, Iterable, Sequence
 
+IndexType = Union[slice, int, np.ndarray, List[int]]
+
 
 def _is_batch_set(data: Any) -> bool:
     # Batch set is a list/tuple of dict/Batch objects,
-    # or 1-D np.ndarray with np.object type,
+    # or 1-D np.ndarray with object type,
     # where each element is a dict/Batch object
     if isinstance(data, np.ndarray):  # most often case
         # "for e in data" will just unpack the first dimension,
         # but data.tolist() will flatten ndarray of objects
         # so do not use data.tolist()
-        return data.dtype == np.object and all(
+        return data.dtype == object and all(
             isinstance(e, (dict, Batch)) for e in data)
     elif isinstance(data, (list, tuple)):
         if len(data) > 0 and all(isinstance(e, (dict, Batch)) for e in data):
@@ -50,13 +52,13 @@ def _to_array_with_correct_type(v: Any) -> np.ndarray:
     if isinstance(v, np.ndarray) and issubclass(v.dtype.type, (np.bool_, np.number)):
         return v  # most often case
     # convert the value to np.ndarray
-    # convert to np.object data type if neither bool nor number
+    # convert to object data type if neither bool nor number
     # raises an exception if array's elements are tensors themself
     v = np.asanyarray(v)
     if not issubclass(v.dtype.type, (np.bool_, np.number)):
-        v = v.astype(np.object)
-    if v.dtype == np.object:
-        # scalar ndarray with np.object data type is very annoying
+        v = v.astype(object)
+    if v.dtype == object:
+        # scalar ndarray with object data type is very annoying
         # a=np.array([np.array({}, dtype=object), np.array({}, dtype=object)])
         # a is not array([{}, {}], dtype=object), and a[0]={} results in
         # something very strange:
@@ -87,13 +89,11 @@ def _create_value(
     if has_shape:
         shape = (size, *inst.shape) if stack else (size, *inst.shape[1:])
     if isinstance(inst, np.ndarray):
-        if issubclass(inst.dtype.type, (np.bool_, np.number)):
-            target_type = inst.dtype.type
-        else:
-            target_type = np.object
+        target_type = inst.dtype.type if issubclass(
+            inst.dtype.type, (np.bool_, np.number)) else object
         return np.full(
             shape,
-            fill_value=None if target_type == np.object else 0,
+            fill_value=None if target_type == object else 0,
             dtype=target_type
         )
     elif isinstance(inst, torch.Tensor):
@@ -105,8 +105,8 @@ def _create_value(
         return zero_batch
     elif is_scalar:
         return _create_value(np.asarray(inst), size, stack=stack)
-    else:  # fall back to np.object
-        return np.array([None for _ in range(size)])
+    else:  # fall back to object
+        return np.array([None for _ in range(size)], object)
 
 
 def _assert_type_keys(keys: Iterable[str]) -> None:
@@ -187,7 +187,7 @@ class Batch:
                 for k, v in batch_dict.items():
                     self.__dict__[k] = _parse_value(v)
             elif _is_batch_set(batch_dict):
-                self.stack_(batch_dict)
+                self.stack_(batch_dict)  # type: ignore
         if len(kwargs) > 0:
             self.__init__(kwargs, copy=copy)  # type: ignore
 
@@ -223,9 +223,7 @@ class Batch:
         """
         self.__init__(**state)  # type: ignore
 
-    def __getitem__(
-        self, index: Union[str, slice, int, np.integer, np.ndarray, List[int]]
-    ) -> Any:
+    def __getitem__(self, index: Union[str, IndexType]) -> Any:
         """Return self[index]."""
         if isinstance(index, str):
             return self.__dict__[index]
@@ -241,11 +239,7 @@ class Batch:
         else:
             raise IndexError("Cannot access item from empty Batch object.")
 
-    def __setitem__(
-        self,
-        index: Union[str, slice, int, np.integer, np.ndarray, List[int]],
-        value: Any,
-    ) -> None:
+    def __setitem__(self, index: Union[str, IndexType], value: Any) -> None:
         """Assign value to self[index]."""
         value = _parse_value(value)
         if isinstance(index, str):
@@ -530,8 +524,7 @@ class Batch:
             elif all(isinstance(e, (Batch, dict)) for e in v):  # third often
                 self.__dict__[k] = Batch.stack(v, axis)
             else:  # most often case is np.ndarray
-                v = np.stack(v, axis)
-                self.__dict__[k] = _to_array_with_correct_type(v)
+                self.__dict__[k] = _to_array_with_correct_type(np.stack(v, axis))
         # all the keys
         keys_total = set.union(*[set(b.keys()) for b in batches])
         # keys that are reserved in all batches
@@ -587,9 +580,7 @@ class Batch:
         batch.stack_(batches, axis)
         return batch
 
-    def empty_(
-        self, index: Union[str, slice, int, np.integer, np.ndarray, List[int]] = None
-    ) -> "Batch":
+    def empty_(self, index: Optional[Union[slice, IndexType]] = None) -> "Batch":
         """Return an empty Batch object with 0 or None filled.
 
         If "index" is specified, it will only reset the specific indexed-data.
@@ -620,7 +611,7 @@ class Batch:
             elif v is None:
                 continue
             elif isinstance(v, np.ndarray):
-                if v.dtype == np.object:
+                if v.dtype == object:
                     self.__dict__[k][index] = None
                 else:
                     self.__dict__[k][index] = 0
@@ -636,10 +627,7 @@ class Batch:
         return self
 
     @staticmethod
-    def empty(
-        batch: "Batch",
-        index: Union[str, slice, int, np.integer, np.ndarray, List[int]] = None,
-    ) -> "Batch":
+    def empty(batch: "Batch", index: Optional[IndexType] = None) -> "Batch":
         """Return an empty Batch object with 0 or None filled.
 
         The shape is the same as the given Batch.
diff --git a/tianshou/data/buffer/base.py b/tianshou/data/buffer/base.py
index 54c9f1c..4189207 100644
--- a/tianshou/data/buffer/base.py
+++ b/tianshou/data/buffer/base.py
@@ -115,9 +115,9 @@ class ReplayBuffer:
     def unfinished_index(self) -> np.ndarray:
         """Return the index of unfinished episode."""
         last = (self._index - 1) % self._size if self._size else 0
-        return np.array([last] if not self.done[last] and self._size else [], np.int)
+        return np.array([last] if not self.done[last] and self._size else [], int)
 
-    def prev(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def prev(self, index: Union[int, np.ndarray]) -> np.ndarray:
         """Return the index of previous transition.
 
         The index won't be modified if it is the beginning of an episode.
@@ -126,7 +126,7 @@ class ReplayBuffer:
         end_flag = self.done[index] | (index == self.last_index[0])
         return (index + end_flag) % self._size
 
-    def next(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def next(self, index: Union[int, np.ndarray]) -> np.ndarray:
         """Return the index of next transition.
 
         The index won't be modified if it is the end of an episode.
@@ -140,12 +140,12 @@ class ReplayBuffer:
         Return the updated indices. If update fails, return an empty array.
         """
         if len(buffer) == 0 or self.maxsize == 0:
-            return np.array([], np.int)
+            return np.array([], int)
         stack_num, buffer.stack_num = buffer.stack_num, 1
         from_indices = buffer.sample_index(0)  # get all available indices
         buffer.stack_num = stack_num
         if len(from_indices) == 0:
-            return np.array([], np.int)
+            return np.array([], int)
         to_indices = []
         for _ in range(len(from_indices)):
             to_indices.append(self._index)
@@ -224,8 +224,8 @@ class ReplayBuffer:
             self._meta[ptr] = batch
         except ValueError:
             stack = not stacked_batch
-            batch.rew = batch.rew.astype(np.float)
-            batch.done = batch.done.astype(np.bool_)
+            batch.rew = batch.rew.astype(float)
+            batch.done = batch.done.astype(bool)
             if self._meta.is_empty():
                 self._meta = _create_value(  # type: ignore
                     batch, self.maxsize, stack)
@@ -248,10 +248,10 @@ class ReplayBuffer:
                     [np.arange(self._index, self._size), np.arange(self._index)]
                 )
             else:
-                return np.array([], np.int)
+                return np.array([], int)
         else:
             if batch_size < 0:
-                return np.array([], np.int)
+                return np.array([], int)
             all_indices = prev_indices = np.concatenate(
                 [np.arange(self._index, self._size), np.arange(self._index)]
             )
@@ -275,9 +275,9 @@ class ReplayBuffer:
 
     def get(
         self,
-        index: Union[int, np.integer, np.ndarray],
+        index: Union[int, List[int], np.ndarray],
         key: str,
-        default_value: Optional[Any] = None,
+        default_value: Any = None,
         stack_num: Optional[int] = None,
     ) -> Union[Batch, np.ndarray]:
         """Return the stacked result.
@@ -303,7 +303,7 @@ class ReplayBuffer:
             if isinstance(index, list):
                 indice = np.array(index)
             else:
-                indice = index
+                indice = index  # type: ignore
             for _ in range(stack_num):
                 stack = [val[indice]] + stack
                 indice = self.prev(indice)
@@ -316,30 +316,31 @@ class ReplayBuffer:
                 raise e  # val != Batch()
             return Batch()
 
-    def __getitem__(self, index: Union[slice, int, np.integer, np.ndarray]) -> Batch:
+    def __getitem__(self, index: Union[slice, int, List[int], np.ndarray]) -> Batch:
         """Return a data batch: self[index].
 
         If stack_num is larger than 1, return the stacked obs and obs_next with shape
         (batch, len, ...).
         """
         if isinstance(index, slice):  # change slice to np array
-            if index == slice(None):  # buffer[:] will get all available data
-                index = self.sample_index(0)
-            else:
-                index = self._indices[:len(self)][index]
+            # buffer[:] will get all available data
+            indice = self.sample_index(0) if index == slice(None) \
+                else self._indices[:len(self)][index]
+        else:
+            indice = index
         # raise KeyError first instead of AttributeError,
         # to support np.array([ReplayBuffer()])
-        obs = self.get(index, "obs")
+        obs = self.get(indice, "obs")
         if self._save_obs_next:
-            obs_next = self.get(index, "obs_next", Batch())
+            obs_next = self.get(indice, "obs_next", Batch())
         else:
-            obs_next = self.get(self.next(index), "obs", Batch())
+            obs_next = self.get(self.next(indice), "obs", Batch())
         return Batch(
             obs=obs,
-            act=self.act[index],
-            rew=self.rew[index],
-            done=self.done[index],
+            act=self.act[indice],
+            rew=self.rew[indice],
+            done=self.done[indice],
             obs_next=obs_next,
-            info=self.get(index, "info", Batch()),
-            policy=self.get(index, "policy", Batch()),
+            info=self.get(indice, "info", Batch()),
+            policy=self.get(indice, "policy", Batch()),
         )
diff --git a/tianshou/data/buffer/cached.py b/tianshou/data/buffer/cached.py
index acbae6f..49bb33b 100644
--- a/tianshou/data/buffer/cached.py
+++ b/tianshou/data/buffer/cached.py
@@ -58,14 +58,14 @@ class CachedReplayBuffer(ReplayBufferManager):
         cached_buffer_ids[i]th cached buffer's corresponding episode result.
         """
         if buffer_ids is None:
-            buffer_ids = np.arange(1, 1 + self.cached_buffer_num)
+            buf_arr = np.arange(1, 1 + self.cached_buffer_num)
         else:  # make sure it is np.ndarray
-            buffer_ids = np.asarray(buffer_ids) + 1
-        ptr, ep_rew, ep_len, ep_idx = super().add(batch, buffer_ids=buffer_ids)
+            buf_arr = np.asarray(buffer_ids) + 1
+        ptr, ep_rew, ep_len, ep_idx = super().add(batch, buffer_ids=buf_arr)
         # find the terminated episode, move data from cached buf to main buf
         updated_ptr, updated_ep_idx = [], []
-        done = batch.done.astype(np.bool_)
-        for buffer_idx in buffer_ids[done]:
+        done = batch.done.astype(bool)
+        for buffer_idx in buf_arr[done]:
             index = self.main_buffer.update(self.buffers[buffer_idx])
             if len(index) == 0:  # unsuccessful move, replace with -1
                 index = [-1]
diff --git a/tianshou/data/buffer/manager.py b/tianshou/data/buffer/manager.py
index fa9db25..3258b12 100644
--- a/tianshou/data/buffer/manager.py
+++ b/tianshou/data/buffer/manager.py
@@ -22,7 +22,7 @@ class ReplayBufferManager(ReplayBuffer):
 
     def __init__(self, buffer_list: List[ReplayBuffer]) -> None:
         self.buffer_num = len(buffer_list)
-        self.buffers = np.array(buffer_list, dtype=np.object)
+        self.buffers = np.array(buffer_list, dtype=object)
         offset, size = [], 0
         buffer_type = type(self.buffers[0])
         kwargs = self.buffers[0].options
@@ -46,7 +46,7 @@ class ReplayBufferManager(ReplayBuffer):
         _next_index(index, offset, done, last, lens)
 
     def __len__(self) -> int:
-        return self._lengths.sum()
+        return int(self._lengths.sum())
 
     def reset(self, keep_statistics: bool = False) -> None:
         self.last_index = self._offset.copy()
@@ -68,7 +68,7 @@ class ReplayBufferManager(ReplayBuffer):
             for offset, buf in zip(self._offset, self.buffers)
         ])
 
-    def prev(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def prev(self, index: Union[int, np.ndarray]) -> np.ndarray:
         if isinstance(index, (list, np.ndarray)):
             return _prev_index(np.asarray(index), self._extend_offset,
                                self.done, self.last_index, self._lengths)
@@ -76,7 +76,7 @@ class ReplayBufferManager(ReplayBuffer):
             return _prev_index(np.array([index]), self._extend_offset,
                                self.done, self.last_index, self._lengths)[0]
 
-    def next(self, index: Union[int, np.integer, np.ndarray]) -> np.ndarray:
+    def next(self, index: Union[int, np.ndarray]) -> np.ndarray:
         if isinstance(index, (list, np.ndarray)):
             return _next_index(np.asarray(index), self._extend_offset,
                                self.done, self.last_index, self._lengths)
@@ -130,8 +130,8 @@ class ReplayBufferManager(ReplayBuffer):
         try:
             self._meta[ptrs] = batch
         except ValueError:
-            batch.rew = batch.rew.astype(np.float)
-            batch.done = batch.done.astype(np.bool_)
+            batch.rew = batch.rew.astype(float)
+            batch.done = batch.done.astype(bool)
             if self._meta.is_empty():
                 self._meta = _create_value(  # type: ignore
                     batch, self.maxsize, stack=False)
@@ -143,7 +143,7 @@ class ReplayBufferManager(ReplayBuffer):
 
     def sample_index(self, batch_size: int) -> np.ndarray:
         if batch_size < 0:
-            return np.array([], np.int)
+            return np.array([], int)
         if self._sample_avail and self.stack_num > 1:
             all_indices = np.concatenate([
                 buf.sample_index(0) + offset
@@ -154,7 +154,7 @@ class ReplayBufferManager(ReplayBuffer):
             else:
                 return np.random.choice(all_indices, batch_size)
         if batch_size == 0:  # get all available indices
-            sample_num = np.zeros(self.buffer_num, np.int)
+            sample_num = np.zeros(self.buffer_num, int)
         else:
             buffer_idx = np.random.choice(
                 self.buffer_num, batch_size, p=self._lengths / self._lengths.sum()
diff --git a/tianshou/data/buffer/prio.py b/tianshou/data/buffer/prio.py
index 46c0be5..e5c4909 100644
--- a/tianshou/data/buffer/prio.py
+++ b/tianshou/data/buffer/prio.py
@@ -34,6 +34,7 @@ class PrioritizedReplayBuffer(ReplayBuffer):
     def update(self, buffer: ReplayBuffer) -> np.ndarray:
         indices = super().update(buffer)
         self.init_weight(indices)
+        return indices
 
     def add(
         self, batch: Batch, buffer_ids: Optional[Union[np.ndarray, List[int]]] = None
@@ -45,13 +46,11 @@ class PrioritizedReplayBuffer(ReplayBuffer):
     def sample_index(self, batch_size: int) -> np.ndarray:
         if batch_size > 0 and len(self) > 0:
             scalar = np.random.rand(batch_size) * self.weight.reduce()
-            return self.weight.get_prefix_sum_idx(scalar)
+            return self.weight.get_prefix_sum_idx(scalar)  # type: ignore
         else:
             return super().sample_index(batch_size)
 
-    def get_weight(
-        self, index: Union[slice, int, np.integer, np.ndarray]
-    ) -> np.ndarray:
+    def get_weight(self, index: Union[int, np.ndarray]) -> Union[float, np.ndarray]:
         """Get the importance sampling weight.
 
         The "weight" in the returned Batch is the weight on loss function to de-bias
@@ -76,7 +75,13 @@ class PrioritizedReplayBuffer(ReplayBuffer):
         self._max_prio = max(self._max_prio, weight.max())
         self._min_prio = min(self._min_prio, weight.min())
 
-    def __getitem__(self, index: Union[slice, int, np.integer, np.ndarray]) -> Batch:
-        batch = super().__getitem__(index)
-        batch.weight = self.get_weight(index)
+    def __getitem__(self, index: Union[slice, int, List[int], np.ndarray]) -> Batch:
+        if isinstance(index, slice):  # change slice to np array
+            # buffer[:] will get all available data
+            indice = self.sample_index(0) if index == slice(None) \
+                else self._indices[:len(self)][index]
+        else:
+            indice = index
+        batch = super().__getitem__(indice)
+        batch.weight = self.get_weight(indice)
         return batch
diff --git a/tianshou/data/collector.py b/tianshou/data/collector.py
index bf73990..37ddabf 100644
--- a/tianshou/data/collector.py
+++ b/tianshou/data/collector.py
@@ -123,7 +123,7 @@ class Collector(object):
             if isinstance(state, torch.Tensor):
                 state[id].zero_()
             elif isinstance(state, np.ndarray):
-                state[id] = None if state.dtype == np.object else 0
+                state[id] = None if state.dtype == object else 0
             elif isinstance(state, Batch):
                 state.empty_(id)
 
@@ -266,7 +266,7 @@ class Collector(object):
                 if n_episode:
                     surplus_env_num = len(ready_env_ids) - (n_episode - episode_count)
                     if surplus_env_num > 0:
-                        mask = np.ones_like(ready_env_ids, np.bool)
+                        mask = np.ones_like(ready_env_ids, dtype=bool)
                         mask[env_ind_local[:surplus_env_num]] = False
                         ready_env_ids = ready_env_ids[mask]
                         self.data = self.data[mask]
@@ -291,7 +291,7 @@ class Collector(object):
             rews, lens, idxs = list(map(
                 np.concatenate, [episode_rews, episode_lens, episode_start_indices]))
         else:
-            rews, lens, idxs = np.array([]), np.array([], np.int), np.array([], np.int)
+            rews, lens, idxs = np.array([]), np.array([], int), np.array([], int)
 
         return {
             "n/ep": episode_count,
@@ -493,7 +493,7 @@ class AsyncCollector(Collector):
             rews, lens, idxs = list(map(
                 np.concatenate, [episode_rews, episode_lens, episode_start_indices]))
         else:
-            rews, lens, idxs = np.array([]), np.array([], np.int), np.array([], np.int)
+            rews, lens, idxs = np.array([]), np.array([], int), np.array([], int)
 
         return {
             "n/ep": episode_count,
diff --git a/tianshou/data/utils/converter.py b/tianshou/data/utils/converter.py
index 52b0744..9f7d88a 100644
--- a/tianshou/data/utils/converter.py
+++ b/tianshou/data/utils/converter.py
@@ -4,15 +4,12 @@ import pickle
 import numpy as np
 from copy import deepcopy
 from numbers import Number
-from typing import Dict, Union, Optional
+from typing import Any, Dict, Union, Optional
 
 from tianshou.data.batch import _parse_value, Batch
 
 
-def to_numpy(
-    x: Optional[Union[Batch, dict, list, tuple, np.number, np.bool_, Number,
-                      np.ndarray, torch.Tensor]]
-) -> Union[Batch, dict, list, tuple, np.ndarray]:
+def to_numpy(x: Any) -> Union[Batch, np.ndarray]:
     """Return an object without torch.Tensor."""
     if isinstance(x, torch.Tensor):  # most often case
         return x.detach().cpu().numpy()
@@ -21,28 +18,22 @@ def to_numpy(
     elif isinstance(x, (np.number, np.bool_, Number)):
         return np.asanyarray(x)
     elif x is None:
-        return np.array(None, dtype=np.object)
-    elif isinstance(x, Batch):
-        x = deepcopy(x)
+        return np.array(None, dtype=object)
+    elif isinstance(x, (dict, Batch)):
+        x = Batch(x) if isinstance(x, dict) else deepcopy(x)
         x.to_numpy()
         return x
-    elif isinstance(x, dict):
-        return {k: to_numpy(v) for k, v in x.items()}
     elif isinstance(x, (list, tuple)):
-        try:
-            return to_numpy(_parse_value(x))
-        except TypeError:
-            return [to_numpy(e) for e in x]
+        return to_numpy(_parse_value(x))
     else:  # fallback
         return np.asanyarray(x)
 
 
 def to_torch(
-    x: Union[Batch, dict, list, tuple, np.number, np.bool_, Number, np.ndarray,
-             torch.Tensor],
+    x: Any,
     dtype: Optional[torch.dtype] = None,
     device: Union[str, int, torch.device] = "cpu",
-) -> Union[Batch, dict, list, tuple, torch.Tensor]:
+) -> Union[Batch, torch.Tensor]:
     """Return an object without np.ndarray."""
     if isinstance(x, np.ndarray) and issubclass(
         x.dtype.type, (np.bool_, np.number)
@@ -57,25 +48,17 @@ def to_torch(
         return x.to(device)  # type: ignore
     elif isinstance(x, (np.number, np.bool_, Number)):
         return to_torch(np.asanyarray(x), dtype, device)
-    elif isinstance(x, dict):
-        return {k: to_torch(v, dtype, device) for k, v in x.items()}
-    elif isinstance(x, Batch):
-        x = deepcopy(x)
+    elif isinstance(x, (dict, Batch)):
+        x = Batch(x, copy=True) if isinstance(x, dict) else deepcopy(x)
         x.to_torch(dtype, device)
         return x
     elif isinstance(x, (list, tuple)):
-        try:
-            return to_torch(_parse_value(x), dtype, device)
-        except TypeError:
-            return [to_torch(e, dtype, device) for e in x]
+        return to_torch(_parse_value(x), dtype, device)
     else:  # fallback
         raise TypeError(f"object {x} cannot be converted to torch.")
 
 
-def to_torch_as(
-    x: Union[Batch, dict, list, tuple, np.ndarray, torch.Tensor],
-    y: torch.Tensor,
-) -> Union[Batch, dict, list, tuple, torch.Tensor]:
+def to_torch_as(x: Any, y: torch.Tensor) -> Union[Batch, torch.Tensor]:
     """Return an object without np.ndarray.
 
     Same as ``to_torch(x, dtype=y.dtype, device=y.device)``.
@@ -147,25 +130,20 @@ def to_hdf5(x: Hdf5ConvertibleType, y: h5py.Group) -> None:
             y[k].attrs["__data_type__"] = v.__class__.__name__
 
 
-def from_hdf5(
-    x: h5py.Group, device: Optional[str] = None
-) -> Hdf5ConvertibleType:
+def from_hdf5(x: h5py.Group, device: Optional[str] = None) -> Hdf5ConvertibleValues:
     """Restore object from HDF5 group."""
     if isinstance(x, h5py.Dataset):
         # handle datasets
         if x.attrs["__data_type__"] == "ndarray":
-            y = np.array(x)
+            return np.array(x)
         elif x.attrs["__data_type__"] == "Tensor":
-            y = torch.tensor(x, device=device)
+            return torch.tensor(x, device=device)
         else:
-            y = pickle.loads(x[()])
+            return pickle.loads(x[()])
     else:
         # handle groups representing a dict or a Batch
-        y = {k: v for k, v in x.attrs.items() if k != "__data_type__"}
+        y = dict(x.attrs.items())
+        data_type = y.pop("__data_type__", None)
         for k, v in x.items():
             y[k] = from_hdf5(v, device)
-        if "__data_type__" in x.attrs:
-            # if dictionary represents Batch, convert to Batch
-            if x.attrs["__data_type__"] == "Batch":
-                y = Batch(y)
-    return y
+        return Batch(y) if data_type == "Batch" else y
diff --git a/tianshou/env/venvs.py b/tianshou/env/venvs.py
index a15a4e2..b2fc73b 100644
--- a/tianshou/env/venvs.py
+++ b/tianshou/env/venvs.py
@@ -140,12 +140,10 @@ class BaseVectorEnv(gym.Env):
         self, id: Optional[Union[int, List[int], np.ndarray]] = None
     ) -> Union[List[int], np.ndarray]:
         if id is None:
-            id = list(range(self.env_num))
-        elif np.isscalar(id):
-            id = [id]
-        return id
+            return list(range(self.env_num))
+        return [id] if np.isscalar(id) else id  # type: ignore
 
-    def _assert_id(self, id: List[int]) -> None:
+    def _assert_id(self, id: Union[List[int], np.ndarray]) -> None:
         for i in id:
             assert i not in self.waiting_id, \
                 f"Cannot interact with environment {i} which is stepping now."
@@ -291,7 +289,7 @@ class BaseVectorEnv(gym.Env):
             clip_max = 10.0  # this magic number is from openai baselines
             # see baselines/common/vec_env/vec_normalize.py#L10
             obs = (obs - self.obs_rms.mean) / np.sqrt(self.obs_rms.var + self.__eps)
-            obs = np.clip(obs, -clip_max, clip_max)
+            obs = np.clip(obs, -clip_max, clip_max)  # type: ignore
         return obs
 
     def __del__(self) -> None:
diff --git a/tianshou/env/worker/base.py b/tianshou/env/worker/base.py
index d22d60b..dbf350a 100644
--- a/tianshou/env/worker/base.py
+++ b/tianshou/env/worker/base.py
@@ -25,9 +25,7 @@ class EnvWorker(ABC):
     def send_action(self, action: np.ndarray) -> None:
         pass
 
-    def get_result(
-        self,
-    ) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    def get_result(self) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
         return self.result
 
     def step(
@@ -45,9 +43,7 @@ class EnvWorker(ABC):
 
     @staticmethod
     def wait(
-        workers: List["EnvWorker"],
-        wait_num: int,
-        timeout: Optional[float] = None,
+        workers: List["EnvWorker"], wait_num: int, timeout: Optional[float] = None
     ) -> List["EnvWorker"]:
         """Given a list of workers, return those ready ones."""
         raise NotImplementedError
diff --git a/tianshou/env/worker/dummy.py b/tianshou/env/worker/dummy.py
index eafa690..d0579d1 100644
--- a/tianshou/env/worker/dummy.py
+++ b/tianshou/env/worker/dummy.py
@@ -20,9 +20,7 @@ class DummyEnvWorker(EnvWorker):
 
     @staticmethod
     def wait(  # type: ignore
-        workers: List["DummyEnvWorker"],
-        wait_num: int,
-        timeout: Optional[float] = None,
+        workers: List["DummyEnvWorker"], wait_num: int, timeout: Optional[float] = None
     ) -> List["DummyEnvWorker"]:
         # Sequential EnvWorker objects are always ready
         return workers
diff --git a/tianshou/env/worker/ray.py b/tianshou/env/worker/ray.py
index 8139ed9..af7285b 100644
--- a/tianshou/env/worker/ray.py
+++ b/tianshou/env/worker/ray.py
@@ -25,9 +25,7 @@ class RayEnvWorker(EnvWorker):
 
     @staticmethod
     def wait(  # type: ignore
-        workers: List["RayEnvWorker"],
-        wait_num: int,
-        timeout: Optional[float] = None,
+        workers: List["RayEnvWorker"], wait_num: int, timeout: Optional[float] = None
     ) -> List["RayEnvWorker"]:
         results = [x.result for x in workers]
         ready_results, _ = ray.wait(results, num_returns=wait_num, timeout=timeout)
diff --git a/tianshou/env/worker/subproc.py b/tianshou/env/worker/subproc.py
index 822d65c..8b89b6c 100644
--- a/tianshou/env/worker/subproc.py
+++ b/tianshou/env/worker/subproc.py
@@ -12,7 +12,6 @@ from tianshou.env.utils import CloudpickleWrapper
 
 
 _NP_TO_CT = {
-    np.bool: ctypes.c_bool,
     np.bool_: ctypes.c_bool,
     np.uint8: ctypes.c_uint8,
     np.uint16: ctypes.c_uint16,
@@ -31,7 +30,7 @@ class ShArray:
     """Wrapper of multiprocessing Array."""
 
     def __init__(self, dtype: np.generic, shape: Tuple[int]) -> None:
-        self.arr = Array(_NP_TO_CT[dtype.type], int(np.prod(shape)))
+        self.arr = Array(_NP_TO_CT[dtype.type], int(np.prod(shape)))  # type: ignore
         self.dtype = dtype
         self.shape = shape
 
@@ -64,8 +63,7 @@ def _worker(
     obs_bufs: Optional[Union[dict, tuple, ShArray]] = None,
 ) -> None:
     def _encode_obs(
-        obs: Union[dict, tuple, np.ndarray],
-        buffer: Union[dict, tuple, ShArray],
+        obs: Union[dict, tuple, np.ndarray], buffer: Union[dict, tuple, ShArray]
     ) -> None:
         if isinstance(obs, np.ndarray) and isinstance(buffer, ShArray):
             buffer.save(obs)
diff --git a/tianshou/exploration/random.py b/tianshou/exploration/random.py
index 2e495dc..a590858 100644
--- a/tianshou/exploration/random.py
+++ b/tianshou/exploration/random.py
@@ -68,9 +68,7 @@ class OUNoise(BaseNoise):
         """Reset to the initial state."""
         self._x = self._x0
 
-    def __call__(
-        self, size: Sequence[int], mu: Optional[float] = None
-    ) -> np.ndarray:
+    def __call__(self, size: Sequence[int], mu: Optional[float] = None) -> np.ndarray:
         """Generate new noise.
 
         Return an numpy array which size is equal to ``size``.
@@ -82,4 +80,4 @@ class OUNoise(BaseNoise):
             mu = self._mu
         r = self._beta * np.random.normal(size=size)
         self._x = self._x + self._alpha * (mu - self._x) + r
-        return self._x
+        return self._x  # type: ignore
diff --git a/tianshou/policy/base.py b/tianshou/policy/base.py
index b297065..238aace 100644
--- a/tianshou/policy/base.py
+++ b/tianshou/policy/base.py
@@ -142,14 +142,14 @@ class BasePolicy(ABC, nn.Module):
                 isinstance(act, np.ndarray):
             # currently this action mapping only supports np.ndarray action
             if self.action_bound_method == "clip":
-                act = np.clip(act, -1.0, 1.0)
+                act = np.clip(act, -1.0, 1.0)  # type: ignore
             elif self.action_bound_method == "tanh":
                 act = np.tanh(act)
             if self.action_scaling:
-                assert np.all(act >= -1.0) and np.all(act <= 1.0), \
+                assert np.min(act) >= -1.0 and np.max(act) <= 1.0, \
                     "action scaling only accepts raw action range = [-1, 1]"
                 low, high = self.action_space.low, self.action_space.high
-                act = low + (high - low) * (act + 1.0) / 2.0
+                act = low + (high - low) * (act + 1.0) / 2.0  # type: ignore
         return act
 
     def process_fn(
@@ -241,9 +241,9 @@ class BasePolicy(ABC, nn.Module):
         :return: A bool type numpy.ndarray in the same shape with indice. "True" means
             "obs_next" of that buffer[indice] is valid.
         """
-        mask = ~buffer.done[indice].astype(np.bool)
-        # info['TimeLimit.truncated'] will be set to True if 'done' flag is generated
-        # because of timelimit of environments. Checkout gym.wrappers.TimeLimit.
+        mask = ~buffer.done[indice]
+        # info["TimeLimit.truncated"] will be True if "done" flag is generated by
+        # timelimit of environments. Checkout gym.wrappers.TimeLimit.
         if hasattr(buffer, 'info') and 'TimeLimit.truncated' in buffer.info:
             mask = mask | buffer.info['TimeLimit.truncated'][indice]
         return mask
@@ -281,7 +281,8 @@ class BasePolicy(ABC, nn.Module):
             assert np.isclose(gae_lambda, 1.0)
             v_s_ = np.zeros_like(rew)
         else:
-            v_s_ = to_numpy(v_s_.flatten()) * BasePolicy.value_mask(buffer, indice)
+            v_s_ = to_numpy(v_s_.flatten())  # type: ignore
+            v_s_ = v_s_ * BasePolicy.value_mask(buffer, indice)
         v_s = np.roll(v_s_, 1) if v_s is None else to_numpy(v_s.flatten())
 
         end_flag = batch.done.copy()
diff --git a/tianshou/policy/imitation/discrete_bcq.py b/tianshou/policy/imitation/discrete_bcq.py
index 5d70822..38ae0c4 100644
--- a/tianshou/policy/imitation/discrete_bcq.py
+++ b/tianshou/policy/imitation/discrete_bcq.py
@@ -58,7 +58,7 @@ class DiscreteBCQPolicy(DQNPolicy):
         else:
             self._log_tau = -np.inf
         assert 0.0 <= eval_eps < 1.0
-        self._eps = eval_eps
+        self.eps = eval_eps
         self._weight_reg = imitation_logits_penalty
 
     def train(self, mode: bool = True) -> "DiscreteBCQPolicy":
@@ -96,15 +96,6 @@ class DiscreteBCQPolicy(DQNPolicy):
         return Batch(act=action, state=state, q_value=q_value,
                      imitation_logits=imitation_logits)
 
-    def exploration_noise(self, act: np.ndarray, batch: Batch) -> np.ndarray:
-        # add eps to act
-        if not np.isclose(self._eps, 0.0):
-            bsz = len(act)
-            mask = np.random.rand(bsz) < self._eps
-            act_rand = np.random.randint(self.max_action_num, size=[bsz])
-            act[mask] = act_rand[mask]
-        return act
-
     def learn(self, batch: Batch, **kwargs: Any) -> Dict[str, float]:
         if self._iter % self._freq == 0:
             self.sync_weight()
diff --git a/tianshou/policy/modelbase/psrl.py b/tianshou/policy/modelbase/psrl.py
index 4a56597..b438dbc 100644
--- a/tianshou/policy/modelbase/psrl.py
+++ b/tianshou/policy/modelbase/psrl.py
@@ -1,6 +1,6 @@
 import torch
 import numpy as np
-from typing import Any, Dict, Union, Optional
+from typing import Any, Dict, Tuple, Union, Optional
 
 from tianshou.data import Batch
 from tianshou.policy import BasePolicy
@@ -100,7 +100,7 @@ class PSRLModel(object):
         discount_factor: float,
         eps: float,
         value: np.ndarray,
-    ) -> np.ndarray:
+    ) -> Tuple[np.ndarray, np.ndarray]:
         """Value iteration solver for MDPs.
 
         :param np.ndarray trans_prob: transition probabilities, with shape
@@ -126,7 +126,7 @@ class PSRLModel(object):
     def __call__(
         self,
         obs: np.ndarray,
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> np.ndarray:
         if not self.updated:
@@ -215,6 +215,6 @@ class PSRLPolicy(BasePolicy):
                 rew_count[obs_next, :] += 1
         self.model.observe(trans_count, rew_sum, rew_square_sum, rew_count)
         return {
-            "psrl/rew_mean": self.model.rew_mean.mean(),
-            "psrl/rew_std": self.model.rew_std.mean(),
+            "psrl/rew_mean": float(self.model.rew_mean.mean()),
+            "psrl/rew_std": float(self.model.rew_std.mean()),
         }
diff --git a/tianshou/policy/modelfree/a2c.py b/tianshou/policy/modelfree/a2c.py
index 9396971..433810d 100644
--- a/tianshou/policy/modelfree/a2c.py
+++ b/tianshou/policy/modelfree/a2c.py
@@ -5,7 +5,7 @@ import torch.nn.functional as F
 from typing import Any, Dict, List, Type, Optional
 
 from tianshou.policy import PGPolicy
-from tianshou.data import Batch, ReplayBuffer, to_numpy, to_torch_as
+from tianshou.data import Batch, ReplayBuffer, to_torch_as
 
 
 class A2CPolicy(PGPolicy):
@@ -84,8 +84,8 @@ class A2CPolicy(PGPolicy):
                 v_s.append(self.critic(b.obs))
                 v_s_.append(self.critic(b.obs_next))
         batch.v_s = torch.cat(v_s, dim=0).flatten()  # old value
-        v_s = to_numpy(batch.v_s)
-        v_s_ = to_numpy(torch.cat(v_s_, dim=0).flatten())
+        v_s = batch.v_s.cpu().numpy()
+        v_s_ = torch.cat(v_s_, dim=0).flatten().cpu().numpy()
         # when normalizing values, we do not minus self.ret_rms.mean to be numerically
         # consistent with OPENAI baselines' value normalization pipeline. Emperical
         # study also shows that "minus mean" will harm performances a tiny little bit
diff --git a/tianshou/policy/modelfree/ddpg.py b/tianshou/policy/modelfree/ddpg.py
index 7d582fb..324467f 100644
--- a/tianshou/policy/modelfree/ddpg.py
+++ b/tianshou/policy/modelfree/ddpg.py
@@ -1,4 +1,5 @@
 import torch
+import warnings
 import numpy as np
 from copy import deepcopy
 from typing import Any, Dict, Tuple, Union, Optional
@@ -167,7 +168,12 @@ class DDPGPolicy(BasePolicy):
             "loss/critic": critic_loss.item(),
         }
 
-    def exploration_noise(self, act: np.ndarray, batch: Batch) -> np.ndarray:
-        if self._noise:
-            act = act + self._noise(act.shape)
+    def exploration_noise(
+        self, act: Union[np.ndarray, Batch], batch: Batch
+    ) -> Union[np.ndarray, Batch]:
+        if self._noise is None:
+            return act
+        if isinstance(act, np.ndarray):
+            return act + self._noise(act.shape)
+        warnings.warn("Cannot add exploration noise to non-numpy_array action.")
         return act
diff --git a/tianshou/policy/modelfree/dqn.py b/tianshou/policy/modelfree/dqn.py
index 5b9f463..5a4f663 100644
--- a/tianshou/policy/modelfree/dqn.py
+++ b/tianshou/policy/modelfree/dqn.py
@@ -168,8 +168,10 @@ class DQNPolicy(BasePolicy):
         self._iter += 1
         return {"loss": loss.item()}
 
-    def exploration_noise(self, act: np.ndarray, batch: Batch) -> np.ndarray:
-        if not np.isclose(self.eps, 0.0):
+    def exploration_noise(
+        self, act: Union[np.ndarray, Batch], batch: Batch
+    ) -> Union[np.ndarray, Batch]:
+        if isinstance(act, np.ndarray) and not np.isclose(self.eps, 0.0):
             bsz = len(act)
             rand_mask = np.random.rand(bsz) < self.eps
             q = np.random.rand(bsz, self.max_action_num)  # [0, 1]
diff --git a/tianshou/policy/multiagent/mapolicy.py b/tianshou/policy/multiagent/mapolicy.py
index 7aa1f66..176f86a 100644
--- a/tianshou/policy/multiagent/mapolicy.py
+++ b/tianshou/policy/multiagent/mapolicy.py
@@ -1,5 +1,5 @@
 import numpy as np
-from typing import Any, Dict, List, Union, Optional
+from typing import Any, Dict, List, Tuple, Union, Optional
 
 from tianshou.policy import BasePolicy
 from tianshou.data import Batch, ReplayBuffer
@@ -71,7 +71,7 @@ class MultiAgentPolicyManager(BasePolicy):
                 act[agent_index], batch[agent_index])
         return act
 
-    def forward(
+    def forward(  # type: ignore
         self,
         batch: Batch,
         state: Optional[Union[dict, Batch]] = None,
@@ -100,7 +100,8 @@ class MultiAgentPolicyManager(BasePolicy):
                     "agent_n": xxx}
             }
         """
-        results = []
+        results: List[Tuple[bool, np.ndarray, Batch,
+                            Union[np.ndarray, Batch], Batch]] = []
         for policy in self.policies:
             # This part of code is difficult to understand.
             # Let's follow an example with two agents
@@ -112,7 +113,7 @@ class MultiAgentPolicyManager(BasePolicy):
             agent_index = np.nonzero(batch.obs.agent_id == policy.agent_id)[0]
             if len(agent_index) == 0:
                 # (has_data, agent_index, out, act, state)
-                results.append((False, None, Batch(), None, Batch()))
+                results.append((False, np.array([-1]), Batch(), Batch(), Batch()))
                 continue
             tmp_batch = batch[agent_index]
             if isinstance(tmp_batch.rew, np.ndarray):
diff --git a/tianshou/utils/log_tools.py b/tianshou/utils/log_tools.py
index c50c8eb..fcd1d55 100644
--- a/tianshou/utils/log_tools.py
+++ b/tianshou/utils/log_tools.py
@@ -14,16 +14,12 @@ class BaseLogger(ABC):
 
     @abstractmethod
     def write(
-        self,
-        key: str,
-        x: Union[Number, np.number, np.ndarray],
-        y: Union[Number, np.number, np.ndarray],
-        **kwargs: Any,
+        self, key: str, x: int, y: Union[Number, np.number, np.ndarray], **kwargs: Any
     ) -> None:
         """Specify how the writer is used to log data.
 
-        :param key: namespace which the input data tuple belongs to.
-        :param x: stands for the ordinate of the input data tuple.
+        :param str key: namespace which the input data tuple belongs to.
+        :param int x: stands for the ordinate of the input data tuple.
         :param y: stands for the abscissa of the input data tuple.
         """
         pass
@@ -84,11 +80,7 @@ class BasicLogger(BaseLogger):
         self.last_log_update_step = -1
 
     def write(
-        self,
-        key: str,
-        x: Union[Number, np.number, np.ndarray],
-        y: Union[Number, np.number, np.ndarray],
-        **kwargs: Any,
+        self, key: str, x: int, y: Union[Number, np.number, np.ndarray], **kwargs: Any
     ) -> None:
         self.writer.add_scalar(key, y, global_step=x)
 
@@ -149,11 +141,7 @@ class LazyLogger(BasicLogger):
         super().__init__(None)  # type: ignore
 
     def write(
-        self,
-        key: str,
-        x: Union[Number, np.number, np.ndarray],
-        y: Union[Number, np.number, np.ndarray],
-        **kwargs: Any,
+        self, key: str, x: int, y: Union[Number, np.number, np.ndarray], **kwargs: Any
     ) -> None:
         """The LazyLogger writes nothing."""
         pass
diff --git a/tianshou/utils/net/common.py b/tianshou/utils/net/common.py
index b41346e..664b488 100644
--- a/tianshou/utils/net/common.py
+++ b/tianshou/utils/net/common.py
@@ -50,8 +50,7 @@ class MLP(nn.Module):
         output_dim: int = 0,
         hidden_sizes: Sequence[int] = (),
         norm_layer: Optional[Union[ModuleType, Sequence[ModuleType]]] = None,
-        activation: Optional[Union[ModuleType, Sequence[ModuleType]]]
-        = nn.ReLU,
+        activation: Optional[Union[ModuleType, Sequence[ModuleType]]] = nn.ReLU,
         device: Optional[Union[str, int, torch.device]] = None,
     ) -> None:
         super().__init__()
@@ -139,7 +138,7 @@ class Net(nn.Module):
     def __init__(
         self,
         state_shape: Union[int, Sequence[int]],
-        action_shape: Optional[Union[int, Sequence[int]]] = 0,
+        action_shape: Union[int, Sequence[int]] = 0,
         hidden_sizes: Sequence[int] = (),
         norm_layer: Optional[ModuleType] = None,
         activation: Optional[ModuleType] = nn.ReLU,
@@ -153,8 +152,8 @@ class Net(nn.Module):
         self.device = device
         self.softmax = softmax
         self.num_atoms = num_atoms
-        input_dim = np.prod(state_shape)
-        action_dim = np.prod(action_shape) * num_atoms
+        input_dim = int(np.prod(state_shape))
+        action_dim = int(np.prod(action_shape)) * num_atoms
         if concat:
             input_dim += action_dim
         self.use_dueling = dueling_param is not None
@@ -179,7 +178,7 @@ class Net(nn.Module):
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[torch.Tensor, Any]:
         """Mapping: s -> flatten (inside MLP)-> logits."""
@@ -221,8 +220,8 @@ class Recurrent(nn.Module):
             num_layers=layer_num,
             batch_first=True,
         )
-        self.fc1 = nn.Linear(np.prod(state_shape), hidden_layer_size)
-        self.fc2 = nn.Linear(hidden_layer_size, np.prod(action_shape))
+        self.fc1 = nn.Linear(int(np.prod(state_shape)), hidden_layer_size)
+        self.fc2 = nn.Linear(hidden_layer_size, int(np.prod(action_shape)))
 
     def forward(
         self,
diff --git a/tianshou/utils/net/continuous.py b/tianshou/utils/net/continuous.py
index a8f6675..36c1786 100644
--- a/tianshou/utils/net/continuous.py
+++ b/tianshou/utils/net/continuous.py
@@ -46,7 +46,7 @@ class Actor(nn.Module):
         super().__init__()
         self.device = device
         self.preprocess = preprocess_net
-        self.output_dim = np.prod(action_shape)
+        self.output_dim = int(np.prod(action_shape))
         input_dim = getattr(preprocess_net, "output_dim",
                             preprocess_net_output_dim)
         self.last = MLP(input_dim, self.output_dim,
@@ -56,7 +56,7 @@ class Actor(nn.Module):
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[torch.Tensor, Any]:
         """Mapping: s -> logits -> action."""
@@ -162,7 +162,7 @@ class ActorProb(nn.Module):
         super().__init__()
         self.preprocess = preprocess_net
         self.device = device
-        self.output_dim = np.prod(action_shape)
+        self.output_dim = int(np.prod(action_shape))
         input_dim = getattr(preprocess_net, "output_dim",
                             preprocess_net_output_dim)
         self.mu = MLP(input_dim, self.output_dim,
@@ -179,7 +179,7 @@ class ActorProb(nn.Module):
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[Tuple[torch.Tensor, torch.Tensor], Any]:
         """Mapping: s -> logits -> (mu, sigma)."""
@@ -219,12 +219,12 @@ class RecurrentActorProb(nn.Module):
         super().__init__()
         self.device = device
         self.nn = nn.LSTM(
-            input_size=np.prod(state_shape),
+            input_size=int(np.prod(state_shape)),
             hidden_size=hidden_layer_size,
             num_layers=layer_num,
             batch_first=True,
         )
-        output_dim = np.prod(action_shape)
+        output_dim = int(np.prod(action_shape))
         self.mu = nn.Linear(hidden_layer_size, output_dim)
         self._c_sigma = conditioned_sigma
         if conditioned_sigma:
@@ -293,12 +293,12 @@ class RecurrentCritic(nn.Module):
         self.action_shape = action_shape
         self.device = device
         self.nn = nn.LSTM(
-            input_size=np.prod(state_shape),
+            input_size=int(np.prod(state_shape)),
             hidden_size=hidden_layer_size,
             num_layers=layer_num,
             batch_first=True,
         )
-        self.fc2 = nn.Linear(hidden_layer_size + np.prod(action_shape), 1)
+        self.fc2 = nn.Linear(hidden_layer_size + int(np.prod(action_shape)), 1)
 
     def forward(
         self,
diff --git a/tianshou/utils/net/discrete.py b/tianshou/utils/net/discrete.py
index fc7c9b0..ee1294f 100644
--- a/tianshou/utils/net/discrete.py
+++ b/tianshou/utils/net/discrete.py
@@ -45,7 +45,7 @@ class Actor(nn.Module):
         super().__init__()
         self.device = device
         self.preprocess = preprocess_net
-        self.output_dim = np.prod(action_shape)
+        self.output_dim = int(np.prod(action_shape))
         input_dim = getattr(preprocess_net, "output_dim",
                             preprocess_net_output_dim)
         self.last = MLP(input_dim, self.output_dim,
@@ -55,7 +55,7 @@ class Actor(nn.Module):
     def forward(
         self,
         s: Union[np.ndarray, torch.Tensor],
-        state: Optional[Any] = None,
+        state: Any = None,
         info: Dict[str, Any] = {},
     ) -> Tuple[torch.Tensor, Any]:
         r"""Mapping: s -> Q(s, \*)."""
diff --git a/tianshou/utils/statistics.py b/tianshou/utils/statistics.py
index 009ad4d..1ff1e00 100644
--- a/tianshou/utils/statistics.py
+++ b/tianshou/utils/statistics.py
@@ -3,8 +3,6 @@ import numpy as np
 from numbers import Number
 from typing import List, Union
 
-from tianshou.data import to_numpy
-
 
 class MovAvg(object):
     """Class for moving average.
@@ -28,44 +26,43 @@ class MovAvg(object):
     def __init__(self, size: int = 100) -> None:
         super().__init__()
         self.size = size
-        self.cache: List[Union[Number, np.number]] = []
+        self.cache: List[np.number] = []
         self.banned = [np.inf, np.nan, -np.inf]
 
     def add(
         self, x: Union[Number, np.number, list, np.ndarray, torch.Tensor]
-    ) -> np.number:
+    ) -> float:
         """Add a scalar into :class:`MovAvg`.
 
         You can add ``torch.Tensor`` with only one element, a python scalar, or
         a list of python scalar.
         """
         if isinstance(x, torch.Tensor):
-            x = to_numpy(x.flatten())
-        if isinstance(x, list) or isinstance(x, np.ndarray):
-            for i in x:
-                if i not in self.banned:
-                    self.cache.append(i)
-        elif x not in self.banned:
-            self.cache.append(x)
+            x = x.flatten().cpu().numpy()
+        if np.isscalar(x):
+            x = [x]
+        for i in x:  # type: ignore
+            if i not in self.banned:
+                self.cache.append(i)
         if self.size > 0 and len(self.cache) > self.size:
             self.cache = self.cache[-self.size:]
         return self.get()
 
-    def get(self) -> np.number:
+    def get(self) -> float:
         """Get the average."""
         if len(self.cache) == 0:
-            return 0
-        return np.mean(self.cache)
+            return 0.0
+        return float(np.mean(self.cache))
 
-    def mean(self) -> np.number:
+    def mean(self) -> float:
         """Get the average. Same as :meth:`get`."""
         return self.get()
 
-    def std(self) -> np.number:
+    def std(self) -> float:
         """Get the standard deviation."""
         if len(self.cache) == 0:
-            return 0
-        return np.std(self.cache)
+            return 0.0
+        return float(np.std(self.cache))
 
 
 class RunningMeanStd(object):
@@ -74,8 +71,10 @@ class RunningMeanStd(object):
     https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
     """
 
-    def __init__(self) -> None:
-        self.mean, self.var = 0.0, 1.0
+    def __init__(
+        self, mean: Union[float, np.ndarray] = 0.0, std: Union[float, np.ndarray] = 1.0
+    ) -> None:
+        self.mean, self.var = mean, std
         self.count = 0
 
     def update(self, x: np.ndarray) -> None:
@@ -92,5 +91,5 @@ class RunningMeanStd(object):
         m_2 = m_a + m_b + delta ** 2 * self.count * batch_count / total_count
         new_var = m_2 / total_count
 
-        self.mean, self.var = new_mean, new_var
+        self.mean, self.var = new_mean, new_var  # type: ignore
         self.count = total_count