Implement the network of AlphaGo

2017-11-04 22:16:43 +08:00 · 2017-11-04 22:16:43 +08:00 · 5f923f565e
commit 5f923f565e
parent 889e5c2fb4
3 changed files with 235 additions and 0 deletions
--- a/AlphaGo/Network.py
+++ b/AlphaGo/Network.py
@ -0,0 +1,95 @@
+import tensorflow as tf
+import numpy as np
+import time
+import multi_gpu
+import tensorflow.contrib.layers as layers
+
+data = np.load("data.npz")
+boards = data["boards"]
+wins = data["wins"]
+ps = data["ps"]
+print (boards.shape)
+print (wins.shape)
+print (ps.shape)
+def residual_block(input, is_training):
+	normalizer_params = {'is_training': is_training,
+						 'updates_collections': None}
+	h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
+					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+					  weights_regularizer=layers.l2_regularizer(1e-4))
+	residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
+							 normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+							 weights_regularizer=layers.l2_regularizer(1e-4))
+	h = h + residual
+	return tf.nn.relu(h)
+
+def policy_heads(input, is_training):
+	normalizer_params = {'is_training': is_training,
+						 'updates_collections': None}
+	h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
+					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+					  weights_regularizer=layers.l2_regularizer(1e-4))
+	h = layers.flatten(h)
+	h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
+	return h
+
+def value_heads(input, is_training):
+	normalizer_params = {'is_training': is_training,
+						 'updates_collections': None}
+	h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
+					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+					  weights_regularizer=layers.l2_regularizer(1e-4))
+	h = layers.flatten(h)
+	h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
+	h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
+	return h
+
+
+x = tf.placeholder(tf.float32,shape=[None,19,19,17])
+is_training = tf.placeholder(tf.bool, shape=[])
+z = tf.placeholder(tf.float32, shape=[None, 1])
+pi = tf.placeholder(tf.float32, shape=[None, 362])
+
+h = residual_block(x, is_training)
+for i in range(18):
+	h = residual_block(h, is_training)
+v = value_heads(h, is_training)
+p = policy_heads(h, is_training)
+loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
+reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+total_loss = loss + reg
+train_op = tf.train.RMSPropOptimizer(1e-2).minimize(total_loss)
+
+var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
+saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
+epochs = 100
+batch_size = 32
+batch_num = boards.shape[0] // batch_size
+result_path = "./results/"
+with multi_gpu.create_session() as sess:
+	sess.run(tf.global_variables_initializer())
+	ckpt_file = tf.train.latest_checkpoint(result_path)
+	if ckpt_file is not None:
+		print('Restoring model from {}...'.format(ckpt_file))
+		saver.restore(sess, ckpt_file)
+	for epoch in range(epochs):
+		time_train = -time.time()
+		index = np.arange(boards.shape[0])
+		np.random.shuffle(index)
+		losses = []
+		regs = []
+		for iter in range(batch_num):
+			_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
+																				z:wins[index[iter*batch_size:(iter+1)*batch_size]],
+																				pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
+																				is_training:True})
+			losses.append(l)
+			regs.append(r)
+			if iter % 1 == 0:
+				print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
+				time_train=-time.time()
+				losses = []
+				regs = []
+			if iter % 20 == 0:
+				save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)
+				saver.save(sess, result_path + save_path)
--- a/AlphaGo/data.py
+++ b/AlphaGo/data.py
@ -0,0 +1,65 @@
+import os
+
+import numpy as np
+
+path = "/raid/tongzheng/AG/self_play_204/"
+name = os.listdir(path)
+boards = np.zeros([0, 19, 19, 17])
+wins = np.zeros([0, 1])
+ps = np.zeros([0, 362])
+
+for n in name:
+	data = np.load(path + n)
+	board = data["boards"]
+	win = data["win"]
+	p = data["p"]
+	# board = np.zeros([0, 19, 19, 17])
+	# win = np.zeros([0, 1])
+	# p = np.zeros([0, 362])
+	# for i in range(data["boards"].shape[3]):
+	# 	board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
+	# 	win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
+	# 	p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
+	boards = np.concatenate([boards, board], axis=0)
+	wins = np.concatenate([wins, win], axis=0)
+	ps = np.concatenate([ps, p], axis=0)
+	print("Finish " + n)
+
+board_ori = boards
+win_ori = wins
+p_ori = ps
+for i in range(1, 3):
+	board = np.rot90(board_ori, i, (1, 2))
+	p = np.concatenate(
+		[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
+	boards = np.concatenate([boards, board], axis=0)
+	wins = np.concatenate([wins, win_ori], axis=0)
+	ps = np.concatenate([ps, p], axis=0)
+
+board = board_ori[:, ::-1]
+p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
+boards = np.concatenate([boards, board], axis=0)
+wins = np.concatenate([wins, win_ori], axis=0)
+ps = np.concatenate([ps, p], axis=0)
+
+board = board_ori[:, :, ::-1]
+p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+				   axis=1)
+boards = np.concatenate([boards, board], axis=0)
+wins = np.concatenate([wins, win_ori], axis=0)
+ps = np.concatenate([ps, p], axis=0)
+
+board = board_ori[:, ::-1]
+p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
+boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
+wins = np.concatenate([wins, win_ori], axis=0)
+ps = np.concatenate([ps, p], axis=0)
+
+board = board_ori[:, :, ::-1]
+p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+				   axis=1)
+boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
+wins = np.concatenate([wins, win_ori], axis=0)
+ps = np.concatenate([ps, p], axis=0)
+
+np.savez("data", boards=boards, wins=wins, ps=ps)
--- a/AlphaGo/multi_gpu.py
+++ b/AlphaGo/multi_gpu.py
@ -0,0 +1,75 @@
+    #!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+from __future__ import absolute_import
+from __future__ import print_function
+from __future__ import division
+
+import tensorflow as tf
+from six.moves import zip
+
+
+tf.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use""")
+tf.flags.DEFINE_boolean('log_device_placement', False,
+                        """Whether to log device placement.""")
+FLAGS = tf.flags.FLAGS
+
+
+def create_session():
+    config = tf.ConfigProto(allow_soft_placement=True,
+                            log_device_placement=FLAGS.log_device_placement)
+    return tf.Session(config=config)
+
+
+def average_gradients(tower_grads):
+    """
+    Calculate the average gradient for each shared variable across all towers.
+
+    Note that this function provides a synchronization point across all towers.
+
+    :param tower_grads: List of lists of (gradient, variable) tuples.
+        The outer list is over individual gradients. The inner list is over
+        the gradient calculation for each tower.
+    :return: List of pairs of (gradient, variable) where the gradient has
+        been averaged across all towers.
+    """
+    average_grads = []
+    for grad_and_vars in zip(*tower_grads):
+        # Note that each grad_and_vars looks like the following:
+        #   ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+        if grad_and_vars[0][0] is None:
+            continue
+        grads = []
+        for g, _ in grad_and_vars:
+            # Add 0 dimension to the gradients to represent the tower.
+            expanded_g = tf.expand_dims(g, 0)
+
+            # Append on a 'tower' dimension which we will average over below.
+            grads.append(expanded_g)
+
+        # Average over the 'tower' dimension.
+        grad = tf.concat(grads, 0)
+        grad = tf.reduce_mean(grad, 0)
+
+        # Keep in mind that the Variables are redundant because they are shared
+        # across towers. So .. we will just return the first tower's pointer to
+        # the Variable.
+        v = grad_and_vars[0][1]
+        grad_and_var = (grad, v)
+        average_grads.append(grad_and_var)
+    return average_grads
+
+
+def average_losses(tower_losses):
+    """
+    Calculate the average loss or other quantity for all towers.
+
+    :param tower_losses: A list of lists of quantities. The outer list is over
+        towers. The inner list is over losses or other quantities for each
+        tower.
+    :return: A list of quantities that have been averaged over all towers.
+    """
+    ret = []
+    for quantities in zip(*tower_losses):
+        ret.append(tf.add_n(quantities) / len(quantities))
+    return ret