diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py new file mode 100644 index 0000000..c98c6e2 --- /dev/null +++ b/AlphaGo/Network.py @@ -0,0 +1,95 @@ +import tensorflow as tf +import numpy as np +import time +import multi_gpu +import tensorflow.contrib.layers as layers + +data = np.load("data.npz") +boards = data["boards"] +wins = data["wins"] +ps = data["ps"] +print (boards.shape) +print (wins.shape) +print (ps.shape) +def residual_block(input, is_training): + normalizer_params = {'is_training': is_training, + 'updates_collections': None} + h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, + normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, + weights_regularizer=layers.l2_regularizer(1e-4)) + residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity, + normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, + weights_regularizer=layers.l2_regularizer(1e-4)) + h = h + residual + return tf.nn.relu(h) + +def policy_heads(input, is_training): + normalizer_params = {'is_training': is_training, + 'updates_collections': None} + h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu, + normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, + weights_regularizer=layers.l2_regularizer(1e-4)) + h = layers.flatten(h) + h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4)) + return h + +def value_heads(input, is_training): + normalizer_params = {'is_training': is_training, + 'updates_collections': None} + h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu, + normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, + weights_regularizer=layers.l2_regularizer(1e-4)) + h = layers.flatten(h) + h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4)) + h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4)) + return h + + +x = tf.placeholder(tf.float32,shape=[None,19,19,17]) +is_training = tf.placeholder(tf.bool, shape=[]) +z = tf.placeholder(tf.float32, shape=[None, 1]) +pi = tf.placeholder(tf.float32, shape=[None, 362]) + +h = residual_block(x, is_training) +for i in range(18): + h = residual_block(h, is_training) +v = value_heads(h, is_training) +p = policy_heads(h, is_training) +loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1)))) +reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) +total_loss = loss + reg +train_op = tf.train.RMSPropOptimizer(1e-2).minimize(total_loss) + +var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) +saver = tf.train.Saver(max_to_keep=10, var_list=var_list) +epochs = 100 +batch_size = 32 +batch_num = boards.shape[0] // batch_size +result_path = "./results/" +with multi_gpu.create_session() as sess: + sess.run(tf.global_variables_initializer()) + ckpt_file = tf.train.latest_checkpoint(result_path) + if ckpt_file is not None: + print('Restoring model from {}...'.format(ckpt_file)) + saver.restore(sess, ckpt_file) + for epoch in range(epochs): + time_train = -time.time() + index = np.arange(boards.shape[0]) + np.random.shuffle(index) + losses = [] + regs = [] + for iter in range(batch_num): + _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], + z:wins[index[iter*batch_size:(iter+1)*batch_size]], + pi:ps[index[iter*batch_size:(iter+1)*batch_size]], + is_training:True}) + losses.append(l) + regs.append(r) + if iter % 1 == 0: + print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) + time_train=-time.time() + losses = [] + regs = [] + if iter % 20 == 0: + save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter) + saver.save(sess, result_path + save_path) diff --git a/AlphaGo/data.py b/AlphaGo/data.py new file mode 100644 index 0000000..3785577 --- /dev/null +++ b/AlphaGo/data.py @@ -0,0 +1,65 @@ +import os + +import numpy as np + +path = "/raid/tongzheng/AG/self_play_204/" +name = os.listdir(path) +boards = np.zeros([0, 19, 19, 17]) +wins = np.zeros([0, 1]) +ps = np.zeros([0, 362]) + +for n in name: + data = np.load(path + n) + board = data["boards"] + win = data["win"] + p = data["p"] + # board = np.zeros([0, 19, 19, 17]) + # win = np.zeros([0, 1]) + # p = np.zeros([0, 362]) + # for i in range(data["boards"].shape[3]): + # board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0) + # win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0) + # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0) + boards = np.concatenate([boards, board], axis=0) + wins = np.concatenate([wins, win], axis=0) + ps = np.concatenate([ps, p], axis=0) + print("Finish " + n) + +board_ori = boards +win_ori = wins +p_ori = ps +for i in range(1, 3): + board = np.rot90(board_ori, i, (1, 2)) + p = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) + boards = np.concatenate([boards, board], axis=0) + wins = np.concatenate([wins, win_ori], axis=0) + ps = np.concatenate([ps, p], axis=0) + +board = board_ori[:, ::-1] +p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) +boards = np.concatenate([boards, board], axis=0) +wins = np.concatenate([wins, win_ori], axis=0) +ps = np.concatenate([ps, p], axis=0) + +board = board_ori[:, :, ::-1] +p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) +boards = np.concatenate([boards, board], axis=0) +wins = np.concatenate([wins, win_ori], axis=0) +ps = np.concatenate([ps, p], axis=0) + +board = board_ori[:, ::-1] +p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) +boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0) +wins = np.concatenate([wins, win_ori], axis=0) +ps = np.concatenate([ps, p], axis=0) + +board = board_ori[:, :, ::-1] +p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) +boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0) +wins = np.concatenate([wins, win_ori], axis=0) +ps = np.concatenate([ps, p], axis=0) + +np.savez("data", boards=boards, wins=wins, ps=ps) \ No newline at end of file diff --git a/AlphaGo/multi_gpu.py b/AlphaGo/multi_gpu.py new file mode 100644 index 0000000..55dfaa8 --- /dev/null +++ b/AlphaGo/multi_gpu.py @@ -0,0 +1,75 @@ + #!/usr/bin/env python +# -*- coding: utf-8 -*- + +from __future__ import absolute_import +from __future__ import print_function +from __future__ import division + +import tensorflow as tf +from six.moves import zip + + +tf.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use""") +tf.flags.DEFINE_boolean('log_device_placement', False, + """Whether to log device placement.""") +FLAGS = tf.flags.FLAGS + + +def create_session(): + config = tf.ConfigProto(allow_soft_placement=True, + log_device_placement=FLAGS.log_device_placement) + return tf.Session(config=config) + + +def average_gradients(tower_grads): + """ + Calculate the average gradient for each shared variable across all towers. + + Note that this function provides a synchronization point across all towers. + + :param tower_grads: List of lists of (gradient, variable) tuples. + The outer list is over individual gradients. The inner list is over + the gradient calculation for each tower. + :return: List of pairs of (gradient, variable) where the gradient has + been averaged across all towers. + """ + average_grads = [] + for grad_and_vars in zip(*tower_grads): + # Note that each grad_and_vars looks like the following: + # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN)) + if grad_and_vars[0][0] is None: + continue + grads = [] + for g, _ in grad_and_vars: + # Add 0 dimension to the gradients to represent the tower. + expanded_g = tf.expand_dims(g, 0) + + # Append on a 'tower' dimension which we will average over below. + grads.append(expanded_g) + + # Average over the 'tower' dimension. + grad = tf.concat(grads, 0) + grad = tf.reduce_mean(grad, 0) + + # Keep in mind that the Variables are redundant because they are shared + # across towers. So .. we will just return the first tower's pointer to + # the Variable. + v = grad_and_vars[0][1] + grad_and_var = (grad, v) + average_grads.append(grad_and_var) + return average_grads + + +def average_losses(tower_losses): + """ + Calculate the average loss or other quantity for all towers. + + :param tower_losses: A list of lists of quantities. The outer list is over + towers. The inner list is over losses or other quantities for each + tower. + :return: A list of quantities that have been averaged over all towers. + """ + ret = [] + for quantities in zip(*tower_losses): + ret.append(tf.add_n(quantities) / len(quantities)) + return ret