Tianshou/tianshou/core/value_function/base.py

from __future__ import absolute_import

import tensorflow as tf

# TODO: linear feature baseline also in tf?
class ValueFunctionBase(object):
    """
    base class of value functions. Children include state values V(s) and action values Q(s, a)
    """
    def __init__(self, value_tensor, observation_placeholder):
        self._observation_placeholder = observation_placeholder
        self._value_tensor = tf.squeeze(value_tensor)  # canonical values has shape (batchsize, )

    def eval_value(self, **kwargs):
        """

        :return: batch of corresponding values in numpy array
        """
        raise NotImplementedError()

    @property
    def value_tensor(self):
        """

        :return: tensor of the corresponding values
        """
        return self._value_tensor
fix imports to support both python2 and python3. move contents from __init__.py to leave for work after major development. 2017-12-23 15:36:10 +08:00			`from __future__ import absolute_import`

			`import tensorflow as tf`
add value_function (critic). value_function and policy not finished yet. 2017-12-22 00:22:23 +08:00
			`# TODO: linear feature baseline also in tf?`
			`class ValueFunctionBase(object):`
			`"""`
			`base class of value functions. Children include state values V(s) and action values Q(s, a)`
			`"""`
			`def __init__(self, value_tensor, observation_placeholder):`
			`self._observation_placeholder = observation_placeholder`
fix imports to support both python2 and python3. move contents from __init__.py to leave for work after major development. 2017-12-23 15:36:10 +08:00			`self._value_tensor = tf.squeeze(value_tensor) # canonical values has shape (batchsize, )`
add value_function (critic). value_function and policy not finished yet. 2017-12-22 00:22:23 +08:00
towards policy/value refactor 2017-12-23 17:25:16 +08:00			`def eval_value(self, **kwargs):`
add value_function (critic). value_function and policy not finished yet. 2017-12-22 00:22:23 +08:00			`"""`

			`:return: batch of corresponding values in numpy array`
			`"""`
			`raise NotImplementedError()`

towards policy/value refactor 2017-12-23 17:25:16 +08:00			`@property`
			`def value_tensor(self):`
add value_function (critic). value_function and policy not finished yet. 2017-12-22 00:22:23 +08:00			`"""`

			`:return: tensor of the corresponding values`
			`"""`
			`return self._value_tensor`