From 3569f2aa493363c3f4fe38b8a66f155a51308c8c Mon Sep 17 00:00:00 2001 From: Tongzheng Ren Date: Sun, 5 Nov 2017 16:38:20 +0800 Subject: [PATCH] delete .pyc --- .gitignore | 1 + AlphaGo/Network.py | 110 ------------------------------------------- AlphaGo/multi_gpu.py | 75 ----------------------------- 3 files changed, 1 insertion(+), 185 deletions(-) delete mode 100644 AlphaGo/Network.py delete mode 100644 AlphaGo/multi_gpu.py diff --git a/.gitignore b/.gitignore index b9ae745..85c32a8 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .idea leela-zero +.pyc diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py deleted file mode 100644 index d350247..0000000 --- a/AlphaGo/Network.py +++ /dev/null @@ -1,110 +0,0 @@ -import tensorflow as tf -import numpy as np -import time -import multi_gpu -import tensorflow.contrib.layers as layers - -def residual_block(input, is_training): - normalizer_params = {'is_training': is_training, - 'updates_collections': None} - h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = h + residual - return tf.nn.relu(h) - -def policy_heads(input, is_training): - normalizer_params = {'is_training': is_training, - 'updates_collections': None} - h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = layers.flatten(h) - h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4)) - return h - -def value_heads(input, is_training): - normalizer_params = {'is_training': is_training, - 'updates_collections': None} - h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = layers.flatten(h) - h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4)) - h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4)) - return h - -x = tf.placeholder(tf.float32,shape=[None,19,19,17]) -is_training = tf.placeholder(tf.bool, shape=[]) -z = tf.placeholder(tf.float32, shape=[None, 1]) -pi = tf.placeholder(tf.float32, shape=[None, 362]) - -h = residual_block(x, is_training) -for i in range(18): - h = residual_block(h, is_training) -v = value_heads(h, is_training) -p = policy_heads(h, is_training) -loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1)))) -reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) -total_loss = loss + reg -train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) - -var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) -saver = tf.train.Saver(max_to_keep=10, var_list=var_list) -def train(): - data = np.load("data.npz") - boards = data["boards"] - wins = data["wins"] - ps = data["ps"] - print (boards.shape) - print (wins.shape) - print (ps.shape) - epochs = 100 - batch_size = 32 - batch_num = boards.shape[0] // batch_size - result_path = "./results/" - with multi_gpu.create_session() as sess: - sess.run(tf.global_variables_initializer()) - ckpt_file = tf.train.latest_checkpoint(result_path) - if ckpt_file is not None: - print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) - for epoch in range(epochs): - time_train = -time.time() - index = np.arange(boards.shape[0]) - np.random.shuffle(index) - losses = [] - regs = [] - for iter in range(batch_num): - _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], - z:wins[index[iter*batch_size:(iter+1)*batch_size]], - pi:ps[index[iter*batch_size:(iter+1)*batch_size]], - is_training:True}) - losses.append(l) - regs.append(r) - if iter % 1 == 0: - print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) - time_train=-time.time() - losses = [] - regs = [] - if iter % 20 == 0: - save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter) - saver.save(sess, result_path + save_path) - -def forward(board): - result_path = "./results/" - with multi_gpu.create_session() as sess: - sess.run(tf.global_variables_initializer()) - ckpt_file = tf.train.latest_checkpoint(result_path) - if ckpt_file is not None: - print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) - else: - raise ValueError("No model loaded") - return sess.run([p,v], feed_dict={x:board}) - -if __name__ == "__main__": - train() \ No newline at end of file diff --git a/AlphaGo/multi_gpu.py b/AlphaGo/multi_gpu.py deleted file mode 100644 index 55dfaa8..0000000 --- a/AlphaGo/multi_gpu.py +++ /dev/null @@ -1,75 +0,0 @@ - #!/usr/bin/env python -# -*- coding: utf-8 -*- - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import tensorflow as tf -from six.moves import zip - - -tf.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use""") -tf.flags.DEFINE_boolean('log_device_placement', False, - """Whether to log device placement.""") -FLAGS = tf.flags.FLAGS - - -def create_session(): - config = tf.ConfigProto(allow_soft_placement=True, - log_device_placement=FLAGS.log_device_placement) - return tf.Session(config=config) - - -def average_gradients(tower_grads): - """ - Calculate the average gradient for each shared variable across all towers. - - Note that this function provides a synchronization point across all towers. - - :param tower_grads: List of lists of (gradient, variable) tuples. - The outer list is over individual gradients. The inner list is over - the gradient calculation for each tower. - :return: List of pairs of (gradient, variable) where the gradient has - been averaged across all towers. - """ - average_grads = [] - for grad_and_vars in zip(*tower_grads): - # Note that each grad_and_vars looks like the following: - # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) - if grad_and_vars[0][0] is None: - continue - grads = [] - for g, _ in grad_and_vars: - # Add 0 dimension to the gradients to represent the tower. - expanded_g = tf.expand_dims(g, 0) - - # Append on a 'tower' dimension which we will average over below. - grads.append(expanded_g) - - # Average over the 'tower' dimension. - grad = tf.concat(grads, 0) - grad = tf.reduce_mean(grad, 0) - - # Keep in mind that the Variables are redundant because they are shared - # across towers. So .. we will just return the first tower's pointer to - # the Variable. - v = grad_and_vars[0][1] - grad_and_var = (grad, v) - average_grads.append(grad_and_var) - return average_grads - - -def average_losses(tower_losses): - """ - Calculate the average loss or other quantity for all towers. - - :param tower_losses: A list of lists of quantities. The outer list is over - towers. The inner list is over losses or other quantities for each - tower. - :return: A list of quantities that have been averaged over all towers. - """ - ret = [] - for quantities in zip(*tower_losses): - ret.append(tf.add_n(quantities) / len(quantities)) - return ret