2017-12-08 21:09:23 +08:00
|
|
|
from __future__ import absolute_import
|
|
|
|
from __future__ import division
|
|
|
|
|
|
|
|
import tensorflow as tf
|
|
|
|
|
2017-12-15 14:24:08 +08:00
|
|
|
|
2017-12-23 15:36:10 +08:00
|
|
|
class PolicyBase(object):
|
|
|
|
"""
|
|
|
|
base class for policy. only provides `act` method with exploration
|
|
|
|
"""
|
2018-03-04 21:29:58 +08:00
|
|
|
def act(self, observation, my_feed_dict):
|
2017-12-17 12:52:00 +08:00
|
|
|
raise NotImplementedError()
|
2017-12-13 20:47:45 +08:00
|
|
|
|
2018-03-11 17:47:42 +08:00
|
|
|
def reset(self):
|
|
|
|
"""
|
|
|
|
for temporal correlated random process exploration, as in DDPG
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
pass
|