2018-04-15 17:41:43 +08:00

55 lines
2.7 KiB
Python

import tensorflow as tf
def identify_dependent_variables(tensor, candidate_variables):
"""
Identify and return the variables in ``candidate_variables`` that ``tensor`` depends on.
:param tensor: A Tensor. The target Tensor to identify dependency.
:param candidate_variables: A list of :class:`tf.Variable` s. The candidate Variables to identify dependency.
:return: A list of :class:`tf.Variable` s in ``candidate variables`` that has effect on ``tensor``.
"""
grads = tf.gradients(tensor, candidate_variables)
return [var for var, grad in zip(candidate_variables, grads) if grad is not None]
def get_soft_update_op(update_fraction, including_nets, excluding_nets=None):
"""
Builds the graph op to softly update the "old net" of policies and value_functions, as suggested in
`DDPG <https://arxiv.org/pdf/1509.02971.pdf>`_. It updates the :class:`tf.Variable` s in the old net,
:math:`\\theta'` with the :class:`tf.Variable` s in the current network, :math:`\\theta` as
:math:`\\theta' = \\tau \\theta + (1 - \\tau) \\theta'`.
:param update_fraction: A float in range :math:`[0, 1]`. Corresponding to the :math:`\\tau` in the update equation.
:param including_nets: A list of policies and/or value_functions. All :class:`tf.Variable` s in these networks
are included for update. Shared Variables are updated only once in case of layer sharing among the networks.
:param excluding_nets: Optional. A list of policies and/or value_functions defaulting to ``None``.
All :class:`tf.Variable` s in these networks
are excluded from the update determined by ``including nets``. This is useful in existence of layer sharing
among networks and we only want to update the Variables in ``including_nets`` that are not shared.
:return: A list of ops :func:`tf.assign` specifying the soft update.
"""
assert 0 < update_fraction < 1, 'Unrecommended update_fraction <=0 or >=1!'
variables = []
variables_old = []
for net in including_nets:
for var, var_old in zip(net.network_weights, net.network_old_weights):
if var not in variables:
variables.append(var)
variables_old.append(var_old)
if excluding_nets:
excluding_variables = []
for net in excluding_nets:
excluding_variables += net.network_weights
for var, var_old in zip(variables, variables_old):
if var in excluding_variables:
variables.remove(var)
variables_old.remove(var_old)
return [tf.assign(var_old, update_fraction * var + (1 - update_fraction) * var_old)
for var_old, var in zip(variables_old, variables)]