Saving and loading replay buffer with HDF5 (#261)

As mentioned in #260, this pull request is about an implementation of saving and loading the replay buffer with HDF5.
2020-12-17 01:58:43 +01:00 · 2020-12-17 01:58:43 +01:00 · 5d13d8a453
commit 5d13d8a453
parent cd481423dc
7 changed files with 211 additions and 5 deletions
--- a/.gitignore
+++ b/.gitignore
@ -144,3 +144,4 @@ MUJOCO_LOG.TXT
 .DS_Store
 *.zip
 *.pstats
 *.swp
--- a/docs/bibtex.json
+++ b/docs/bibtex.json
@ -0,0 +1,9 @@
 {
    "cited": {
        "tutorials/dqn": [
            "DQN",
            "DDPG",
            "PPO"
        ]
    }
 }
--- a/docs/conf.py
+++ b/docs/conf.py
@ -70,6 +70,7 @@ autodoc_default_options = {
        ]
    )
 }
 bibtex_bibfiles = ['refs.bib']
 # -- Options for HTML output -------------------------------------------------
--- a/setup.py
+++ b/setup.py
@ -47,10 +47,11 @@ setup(
    install_requires=[
        "gym>=0.15.4",
        "tqdm",
-        "numpy",
+        "numpy!=1.16.0",  # https://github.com/numpy/numpy/issues/12793
        "tensorboard",
        "torch>=1.4.0",
        "numba>=0.51.0",
        "h5py>=3.1.0"
    ],
    extras_require={
        "dev": [
--- a/test/base/test_buffer.py
+++ b/test/base/test_buffer.py
@ -1,11 +1,15 @@
 import os
 import torch
 import pickle
 import pytest
 import tempfile
 import h5py
 import numpy as np
 from timeit import timeit
 from tianshou.data import Batch, SegmentTree, \
    ReplayBuffer, ListReplayBuffer, PrioritizedReplayBuffer
 from tianshou.data.utils.converter import to_hdf5
 if __name__ == '__main__':
    from env import MyTestEnv
@ -278,7 +282,73 @@ def test_pickle():
                       pbuf.weight[np.arange(len(pbuf))])
 def test_hdf5():
    size = 100
    buffers = {
        "array": ReplayBuffer(size, stack_num=2),
        "list": ListReplayBuffer(),
        "prioritized": PrioritizedReplayBuffer(size, 0.6, 0.4)
    }
    buffer_types = {k: b.__class__ for k, b in buffers.items()}
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    rew = torch.tensor([1.]).to(device)
    for i in range(4):
        kwargs = {
            'obs': Batch(index=np.array([i])),
            'act': i,
            'rew': rew,
            'done': 0,
            'info': {"number": {"n": i}, 'extra': None},
        }
        buffers["array"].add(**kwargs)
        buffers["list"].add(**kwargs)
        buffers["prioritized"].add(weight=np.random.rand(), **kwargs)
    # save
    paths = {}
    for k, buf in buffers.items():
        f, path = tempfile.mkstemp(suffix='.hdf5')
        os.close(f)
        buf.save_hdf5(path)
        paths[k] = path
    # load replay buffer
    _buffers = {k: buffer_types[k].load_hdf5(paths[k]) for k in paths.keys()}
    # compare
    for k in buffers.keys():
        assert len(_buffers[k]) == len(buffers[k])
        assert np.allclose(_buffers[k].act, buffers[k].act)
        assert _buffers[k].stack_num == buffers[k].stack_num
        assert _buffers[k]._maxsize == buffers[k]._maxsize
        assert _buffers[k]._index == buffers[k]._index
        assert np.all(_buffers[k]._indices == buffers[k]._indices)
    for k in ["array", "prioritized"]:
        assert isinstance(buffers[k].get(0, "info"), Batch)
        assert isinstance(_buffers[k].get(0, "info"), Batch)
    for k in ["array"]:
        assert np.all(
            buffers[k][:].info.number.n == _buffers[k][:].info.number.n)
        assert np.all(
            buffers[k][:].info.extra == _buffers[k][:].info.extra)
    for path in paths.values():
        os.remove(path)
    # raise exception when value cannot be pickled
    data = {"not_supported": lambda x: x*x}
    grp = h5py.Group
    with pytest.raises(NotImplementedError):
        to_hdf5(data, grp)
    # ndarray with data type not supported by HDF5 that cannot be pickled
    data = {"not_supported": np.array(lambda x: x*x)}
    grp = h5py.Group
    with pytest.raises(RuntimeError):
        to_hdf5(data, grp)
 if __name__ == '__main__':
    test_hdf5()
    test_replaybuffer()
    test_ignore_obs_next()
    test_stack()
--- a/tianshou/data/buffer.py
+++ b/tianshou/data/buffer.py
@ -1,10 +1,12 @@
 import h5py
 import torch
 import numpy as np
 from numbers import Number
 from typing import Any, Dict, List, Tuple, Union, Optional
 from tianshou.data import Batch, SegmentTree, to_numpy
 from tianshou.data.batch import _create_value
 from tianshou.data import Batch, SegmentTree, to_numpy
 from tianshou.data.utils.converter import to_hdf5, from_hdf5
 class ReplayBuffer:
@ -38,7 +40,10 @@ class ReplayBuffer:
        >>> # but there are only three valid items, so len(buf) == 3.
        >>> len(buf)
        3
-        >>> pickle.dump(buf, open('buf.pkl', 'wb'))  # save to file "buf.pkl"
+        >>> # save to file "buf.pkl"
        >>> pickle.dump(buf, open('buf.pkl', 'wb'))
        >>> # save to HDF5 file
        >>> buf.save_hdf5('buf.hdf5')
        >>> buf2 = ReplayBuffer(size=10)
        >>> for i in range(15):
        ...     buf2.add(obs=i, act=i, rew=i, done=i, obs_next=i + 1, info={})
@ -54,7 +59,7 @@ class ReplayBuffer:
                0.,  0.,  0.,  0.,  0.,  0.,  0.])
        >>> # get a random sample from buffer
-        >>> # the batch_data is equal to buf[incide].
+        >>> # the batch_data is equal to buf[indice].
        >>> batch_data, indice = buf.sample(batch_size=4)
        >>> batch_data.obs == buf[indice].obs
        array([ True,  True,  True,  True])
@ -63,6 +68,15 @@ class ReplayBuffer:
        >>> buf = pickle.load(open('buf.pkl', 'rb'))  # load from "buf.pkl"
        >>> len(buf)
        3
        >>> # load complete buffer from HDF5 file
        >>> buf = ReplayBuffer.load_hdf5('buf.hdf5')
        >>> len(buf)
        3
        >>> # load contents of HDF5 file into existing buffer
        >>> # (only possible if size of buffer and data in file match)
        >>> buf.load_contents_hdf5('buf.hdf5')
        >>> len(buf)
        3
    :class:`~tianshou.data.ReplayBuffer` also supports frame_stack sampling
    (typically for RNN usage, see issue#19), ignoring storing the next
@ -167,8 +181,14 @@ class ReplayBuffer:
        We need it because pickling buffer does not work out-of-the-box
        ("buffer.__getattr__" is customized).
        """
        self._indices = np.arange(state["_maxsize"])
        self.__dict__.update(state)
    def __getstate__(self) -> dict:
        exclude = {"_indices"}
        state = {k: v for k, v in self.__dict__.items() if k not in exclude}
        return state
    def _add_to_buffer(self, name: str, inst: Any) -> None:
        try:
            value = self._meta.__dict__[name]
@ -359,6 +379,21 @@ class ReplayBuffer:
            policy=self.get(index, "policy"),
        )
    def save_hdf5(self, path: str) -> None:
        """Save replay buffer to HDF5 file."""
        with h5py.File(path, "w") as f:
            to_hdf5(self.__getstate__(), f)
    @classmethod
    def load_hdf5(
        cls, path: str, device: Optional[str] = None
    ) -> "ReplayBuffer":
        """Load replay buffer from HDF5 file."""
        with h5py.File(path, "r") as f:
            buf = cls.__new__(cls)
            buf.__setstate__(from_hdf5(f, device=device))
        return buf
 class ListReplayBuffer(ReplayBuffer):
    """List-based replay buffer.
--- a/tianshou/data/utils/converter.py
+++ b/tianshou/data/utils/converter.py
@ -1,8 +1,10 @@
 import h5py
 import torch
 import pickle
 import numpy as np
 from copy import deepcopy
 from numbers import Number
-from typing import Union, Optional
+from typing import Dict, Union, Optional
 from tianshou.data.batch import _parse_value, Batch
@ -80,3 +82,90 @@ def to_torch_as(
    """
    assert isinstance(y, torch.Tensor)
    return to_torch(x, dtype=y.dtype, device=y.device)
 # Note: object is used as a proxy for objects that can be pickled
 # Note: mypy does not support cyclic definition currently
 Hdf5ConvertibleValues = Union[  # type: ignore
    int, float, Batch, np.ndarray, torch.Tensor, object,
    'Hdf5ConvertibleType',  # type: ignore
 ]
 Hdf5ConvertibleType = Dict[str, Hdf5ConvertibleValues]  # type: ignore
 def to_hdf5(x: Hdf5ConvertibleType, y: h5py.Group) -> None:
    """Copy object into HDF5 group."""
    def to_hdf5_via_pickle(x: object, y: h5py.Group, key: str) -> None:
        """Pickle, convert to numpy array and write to HDF5 dataset."""
        data = np.frombuffer(pickle.dumps(x), dtype=np.byte)
        y.create_dataset(key, data=data)
    for k, v in x.items():
        if isinstance(v, (Batch, dict)):
            # dicts and batches are both represented by groups
            subgrp = y.create_group(k)
            if isinstance(v, Batch):
                subgrp_data = v.__getstate__()
                subgrp.attrs["__data_type__"] = "Batch"
            else:
                subgrp_data = v
            to_hdf5(subgrp_data, subgrp)
        elif isinstance(v, torch.Tensor):
            # PyTorch tensors are written to datasets
            y.create_dataset(k, data=to_numpy(v))
            y[k].attrs["__data_type__"] = "Tensor"
        elif isinstance(v, np.ndarray):
            try:
                # NumPy arrays are written to datasets
                y.create_dataset(k, data=v)
                y[k].attrs["__data_type__"] = "ndarray"
            except TypeError:
                # If data type is not supported by HDF5 fall back to pickle.
                # This happens if dtype=object (e.g. due to entries being None)
                # and possibly in other cases like structured arrays.
                try:
                    to_hdf5_via_pickle(v, y, k)
                except Exception as e:
                    raise RuntimeError(
                        f"Attempted to pickle {v.__class__.__name__} due to "
                        "data type not supported by HDF5 and failed."
                    ) from e
                y[k].attrs["__data_type__"] = "pickled_ndarray"
        elif isinstance(v, (int, float)):
            # ints and floats are stored as attributes of groups
            y.attrs[k] = v
        else:  # resort to pickle for any other type of object
            try:
                to_hdf5_via_pickle(v, y, k)
            except Exception as e:
                raise NotImplementedError(
                    f"No conversion to HDF5 for object of type '{type(v)}' "
                    "implemented and fallback to pickle failed."
                ) from e
            y[k].attrs["__data_type__"] = v.__class__.__name__
 def from_hdf5(
    x: h5py.Group, device: Optional[str] = None
 ) -> Hdf5ConvertibleType:
    """Restore object from HDF5 group."""
    if isinstance(x, h5py.Dataset):
        # handle datasets
        if x.attrs["__data_type__"] == "ndarray":
            y = np.array(x)
        elif x.attrs["__data_type__"] == "Tensor":
            y = torch.tensor(x, device=device)
        else:
            y = pickle.loads(x[()])
    else:
        # handle groups representing a dict or a Batch
        y = {k: v for k, v in x.attrs.items() if k != "__data_type__"}
        for k, v in x.items():
            y[k] = from_hdf5(v, device)
        if "__data_type__" in x.attrs:
            # if dictionary represents Batch, convert to Batch
            if x.attrs["__data_type__"] == "Batch":
                y = Batch(y)
    return y