Training FPS improvement (base commit is 94bfb32): test_pdqn: 1660 (without numba) -> 1930 discrete/test_ppo: 5100 -> 5170 since nstep has little impact on overall performance, the unit test result is: GAE: 4.1s -> 0.057s nstep: 0.3s -> 0.15s (little improvement) Others: - fix a bug in ttt set_eps - keep only sumtree in segment tree implementation - dirty fix for asyncVenv check_id test
27 lines
995 B
Python
27 lines
995 B
Python
import numpy as np
|
|
|
|
# functions that need to pre-compile for producing benchmark result
|
|
from tianshou.policy.base import _episodic_return, _nstep_return
|
|
from tianshou.data.utils.segtree import _reduce, _setitem, _get_prefix_sum_idx
|
|
|
|
|
|
def pre_compile():
|
|
"""Since Numba acceleration needs to compile the function in the first run,
|
|
here we use some fake data for the common-type function-call compilation.
|
|
Otherwise, the current training speed cannot compare with the previous.
|
|
"""
|
|
f64 = np.array([0, 1], dtype=np.float64)
|
|
f32 = np.array([0, 1], dtype=np.float32)
|
|
b = np.array([False, True], dtype=np.bool_)
|
|
i64 = np.array([0, 1], dtype=np.int64)
|
|
# returns
|
|
_episodic_return(f64, f64, b, .1, .1)
|
|
_episodic_return(f32, f64, b, .1, .1)
|
|
_nstep_return(f64, b, f32, i64, .1, 1, 4, 1., 0.)
|
|
# segtree
|
|
_setitem(f64, i64, f64)
|
|
_setitem(f64, i64, f32)
|
|
_reduce(f64, 0, 1)
|
|
_get_prefix_sum_idx(f64, 1, f64)
|
|
_get_prefix_sum_idx(f32, 1, f64)
|