delete .pyc
This commit is contained in:
parent
f09ebc2124
commit
3569f2aa49
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1,3 @@
|
|||||||
.idea
|
.idea
|
||||||
leela-zero
|
leela-zero
|
||||||
|
.pyc
|
||||||
|
@ -1,110 +0,0 @@
|
|||||||
import tensorflow as tf
|
|
||||||
import numpy as np
|
|
||||||
import time
|
|
||||||
import multi_gpu
|
|
||||||
import tensorflow.contrib.layers as layers
|
|
||||||
|
|
||||||
def residual_block(input, is_training):
|
|
||||||
normalizer_params = {'is_training': is_training,
|
|
||||||
'updates_collections': None}
|
|
||||||
h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
|
|
||||||
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
|
||||||
weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
|
|
||||||
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
|
||||||
weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
h = h + residual
|
|
||||||
return tf.nn.relu(h)
|
|
||||||
|
|
||||||
def policy_heads(input, is_training):
|
|
||||||
normalizer_params = {'is_training': is_training,
|
|
||||||
'updates_collections': None}
|
|
||||||
h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
|
|
||||||
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
|
||||||
weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
h = layers.flatten(h)
|
|
||||||
h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
return h
|
|
||||||
|
|
||||||
def value_heads(input, is_training):
|
|
||||||
normalizer_params = {'is_training': is_training,
|
|
||||||
'updates_collections': None}
|
|
||||||
h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
|
|
||||||
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
|
||||||
weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
h = layers.flatten(h)
|
|
||||||
h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
|
|
||||||
return h
|
|
||||||
|
|
||||||
x = tf.placeholder(tf.float32,shape=[None,19,19,17])
|
|
||||||
is_training = tf.placeholder(tf.bool, shape=[])
|
|
||||||
z = tf.placeholder(tf.float32, shape=[None, 1])
|
|
||||||
pi = tf.placeholder(tf.float32, shape=[None, 362])
|
|
||||||
|
|
||||||
h = residual_block(x, is_training)
|
|
||||||
for i in range(18):
|
|
||||||
h = residual_block(h, is_training)
|
|
||||||
v = value_heads(h, is_training)
|
|
||||||
p = policy_heads(h, is_training)
|
|
||||||
loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
|
|
||||||
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
|
|
||||||
total_loss = loss + reg
|
|
||||||
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
|
|
||||||
|
|
||||||
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
|
|
||||||
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
|
|
||||||
def train():
|
|
||||||
data = np.load("data.npz")
|
|
||||||
boards = data["boards"]
|
|
||||||
wins = data["wins"]
|
|
||||||
ps = data["ps"]
|
|
||||||
print (boards.shape)
|
|
||||||
print (wins.shape)
|
|
||||||
print (ps.shape)
|
|
||||||
epochs = 100
|
|
||||||
batch_size = 32
|
|
||||||
batch_num = boards.shape[0] // batch_size
|
|
||||||
result_path = "./results/"
|
|
||||||
with multi_gpu.create_session() as sess:
|
|
||||||
sess.run(tf.global_variables_initializer())
|
|
||||||
ckpt_file = tf.train.latest_checkpoint(result_path)
|
|
||||||
if ckpt_file is not None:
|
|
||||||
print('Restoring model from {}...'.format(ckpt_file))
|
|
||||||
saver.restore(sess, ckpt_file)
|
|
||||||
for epoch in range(epochs):
|
|
||||||
time_train = -time.time()
|
|
||||||
index = np.arange(boards.shape[0])
|
|
||||||
np.random.shuffle(index)
|
|
||||||
losses = []
|
|
||||||
regs = []
|
|
||||||
for iter in range(batch_num):
|
|
||||||
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
|
|
||||||
z:wins[index[iter*batch_size:(iter+1)*batch_size]],
|
|
||||||
pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
|
|
||||||
is_training:True})
|
|
||||||
losses.append(l)
|
|
||||||
regs.append(r)
|
|
||||||
if iter % 1 == 0:
|
|
||||||
print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
|
|
||||||
time_train=-time.time()
|
|
||||||
losses = []
|
|
||||||
regs = []
|
|
||||||
if iter % 20 == 0:
|
|
||||||
save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)
|
|
||||||
saver.save(sess, result_path + save_path)
|
|
||||||
|
|
||||||
def forward(board):
|
|
||||||
result_path = "./results/"
|
|
||||||
with multi_gpu.create_session() as sess:
|
|
||||||
sess.run(tf.global_variables_initializer())
|
|
||||||
ckpt_file = tf.train.latest_checkpoint(result_path)
|
|
||||||
if ckpt_file is not None:
|
|
||||||
print('Restoring model from {}...'.format(ckpt_file))
|
|
||||||
saver.restore(sess, ckpt_file)
|
|
||||||
else:
|
|
||||||
raise ValueError("No model loaded")
|
|
||||||
return sess.run([p,v], feed_dict={x:board})
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
train()
|
|
@ -1,75 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
from __future__ import absolute_import
|
|
||||||
from __future__ import print_function
|
|
||||||
from __future__ import division
|
|
||||||
|
|
||||||
import tensorflow as tf
|
|
||||||
from six.moves import zip
|
|
||||||
|
|
||||||
|
|
||||||
tf.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use""")
|
|
||||||
tf.flags.DEFINE_boolean('log_device_placement', False,
|
|
||||||
"""Whether to log device placement.""")
|
|
||||||
FLAGS = tf.flags.FLAGS
|
|
||||||
|
|
||||||
|
|
||||||
def create_session():
|
|
||||||
config = tf.ConfigProto(allow_soft_placement=True,
|
|
||||||
log_device_placement=FLAGS.log_device_placement)
|
|
||||||
return tf.Session(config=config)
|
|
||||||
|
|
||||||
|
|
||||||
def average_gradients(tower_grads):
|
|
||||||
"""
|
|
||||||
Calculate the average gradient for each shared variable across all towers.
|
|
||||||
|
|
||||||
Note that this function provides a synchronization point across all towers.
|
|
||||||
|
|
||||||
:param tower_grads: List of lists of (gradient, variable) tuples.
|
|
||||||
The outer list is over individual gradients. The inner list is over
|
|
||||||
the gradient calculation for each tower.
|
|
||||||
:return: List of pairs of (gradient, variable) where the gradient has
|
|
||||||
been averaged across all towers.
|
|
||||||
"""
|
|
||||||
average_grads = []
|
|
||||||
for grad_and_vars in zip(*tower_grads):
|
|
||||||
# Note that each grad_and_vars looks like the following:
|
|
||||||
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
|
|
||||||
if grad_and_vars[0][0] is None:
|
|
||||||
continue
|
|
||||||
grads = []
|
|
||||||
for g, _ in grad_and_vars:
|
|
||||||
# Add 0 dimension to the gradients to represent the tower.
|
|
||||||
expanded_g = tf.expand_dims(g, 0)
|
|
||||||
|
|
||||||
# Append on a 'tower' dimension which we will average over below.
|
|
||||||
grads.append(expanded_g)
|
|
||||||
|
|
||||||
# Average over the 'tower' dimension.
|
|
||||||
grad = tf.concat(grads, 0)
|
|
||||||
grad = tf.reduce_mean(grad, 0)
|
|
||||||
|
|
||||||
# Keep in mind that the Variables are redundant because they are shared
|
|
||||||
# across towers. So .. we will just return the first tower's pointer to
|
|
||||||
# the Variable.
|
|
||||||
v = grad_and_vars[0][1]
|
|
||||||
grad_and_var = (grad, v)
|
|
||||||
average_grads.append(grad_and_var)
|
|
||||||
return average_grads
|
|
||||||
|
|
||||||
|
|
||||||
def average_losses(tower_losses):
|
|
||||||
"""
|
|
||||||
Calculate the average loss or other quantity for all towers.
|
|
||||||
|
|
||||||
:param tower_losses: A list of lists of quantities. The outer list is over
|
|
||||||
towers. The inner list is over losses or other quantities for each
|
|
||||||
tower.
|
|
||||||
:return: A list of quantities that have been averaged over all towers.
|
|
||||||
"""
|
|
||||||
ret = []
|
|
||||||
for quantities in zip(*tower_losses):
|
|
||||||
ret.append(tf.add_n(quantities) / len(quantities))
|
|
||||||
return ret
|
|
Loading…
x
Reference in New Issue
Block a user