Implement the network of AlphaGo

This commit is contained in:
Tongzheng Ren 2017-11-04 22:16:43 +08:00
parent 889e5c2fb4
commit 5f923f565e
3 changed files with 235 additions and 0 deletions

95
AlphaGo/Network.py Normal file
View File

@ -0,0 +1,95 @@
import tensorflow as tf
import numpy as np
import time
import multi_gpu
import tensorflow.contrib.layers as layers
data = np.load("data.npz")
boards = data["boards"]
wins = data["wins"]
ps = data["ps"]
print (boards.shape)
print (wins.shape)
print (ps.shape)
def residual_block(input, is_training):
normalizer_params = {'is_training': is_training,
'updates_collections': None}
h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
h = h + residual
return tf.nn.relu(h)
def policy_heads(input, is_training):
normalizer_params = {'is_training': is_training,
'updates_collections': None}
h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
h = layers.flatten(h)
h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
return h
def value_heads(input, is_training):
normalizer_params = {'is_training': is_training,
'updates_collections': None}
h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
h = layers.flatten(h)
h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
return h
x = tf.placeholder(tf.float32,shape=[None,19,19,17])
is_training = tf.placeholder(tf.bool, shape=[])
z = tf.placeholder(tf.float32, shape=[None, 1])
pi = tf.placeholder(tf.float32, shape=[None, 362])
h = residual_block(x, is_training)
for i in range(18):
h = residual_block(h, is_training)
v = value_heads(h, is_training)
p = policy_heads(h, is_training)
loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = loss + reg
train_op = tf.train.RMSPropOptimizer(1e-2).minimize(total_loss)
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
epochs = 100
batch_size = 32
batch_num = boards.shape[0] // batch_size
result_path = "./results/"
with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer())
ckpt_file = tf.train.latest_checkpoint(result_path)
if ckpt_file is not None:
print('Restoring model from {}...'.format(ckpt_file))
saver.restore(sess, ckpt_file)
for epoch in range(epochs):
time_train = -time.time()
index = np.arange(boards.shape[0])
np.random.shuffle(index)
losses = []
regs = []
for iter in range(batch_num):
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
z:wins[index[iter*batch_size:(iter+1)*batch_size]],
pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
is_training:True})
losses.append(l)
regs.append(r)
if iter % 1 == 0:
print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
time_train=-time.time()
losses = []
regs = []
if iter % 20 == 0:
save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)
saver.save(sess, result_path + save_path)

65
AlphaGo/data.py Normal file
View File

@ -0,0 +1,65 @@
import os
import numpy as np
path = "/raid/tongzheng/AG/self_play_204/"
name = os.listdir(path)
boards = np.zeros([0, 19, 19, 17])
wins = np.zeros([0, 1])
ps = np.zeros([0, 362])
for n in name:
data = np.load(path + n)
board = data["boards"]
win = data["win"]
p = data["p"]
# board = np.zeros([0, 19, 19, 17])
# win = np.zeros([0, 1])
# p = np.zeros([0, 362])
# for i in range(data["boards"].shape[3]):
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
# win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
# p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win], axis=0)
ps = np.concatenate([ps, p], axis=0)
print("Finish " + n)
board_ori = boards
win_ori = wins
p_ori = ps
for i in range(1, 3):
board = np.rot90(board_ori, i, (1, 2))
p = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, ::-1]
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, :, ::-1]
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, board], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, ::-1]
p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
board = board_ori[:, :, ::-1]
p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
wins = np.concatenate([wins, win_ori], axis=0)
ps = np.concatenate([ps, p], axis=0)
np.savez("data", boards=boards, wins=wins, ps=ps)

75
AlphaGo/multi_gpu.py Normal file
View File

@ -0,0 +1,75 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import tensorflow as tf
from six.moves import zip
tf.flags.DEFINE_integer('num_gpus', 1, """How many GPUs to use""")
tf.flags.DEFINE_boolean('log_device_placement', False,
"""Whether to log device placement.""")
FLAGS = tf.flags.FLAGS
def create_session():
config = tf.ConfigProto(allow_soft_placement=True,
log_device_placement=FLAGS.log_device_placement)
return tf.Session(config=config)
def average_gradients(tower_grads):
"""
Calculate the average gradient for each shared variable across all towers.
Note that this function provides a synchronization point across all towers.
:param tower_grads: List of lists of (gradient, variable) tuples.
The outer list is over individual gradients. The inner list is over
the gradient calculation for each tower.
:return: List of pairs of (gradient, variable) where the gradient has
been averaged across all towers.
"""
average_grads = []
for grad_and_vars in zip(*tower_grads):
# Note that each grad_and_vars looks like the following:
# ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
if grad_and_vars[0][0] is None:
continue
grads = []
for g, _ in grad_and_vars:
# Add 0 dimension to the gradients to represent the tower.
expanded_g = tf.expand_dims(g, 0)
# Append on a 'tower' dimension which we will average over below.
grads.append(expanded_g)
# Average over the 'tower' dimension.
grad = tf.concat(grads, 0)
grad = tf.reduce_mean(grad, 0)
# Keep in mind that the Variables are redundant because they are shared
# across towers. So .. we will just return the first tower's pointer to
# the Variable.
v = grad_and_vars[0][1]
grad_and_var = (grad, v)
average_grads.append(grad_and_var)
return average_grads
def average_losses(tower_losses):
"""
Calculate the average loss or other quantity for all towers.
:param tower_losses: A list of lists of quantities. The outer list is over
towers. The inner list is over losses or other quantities for each
tower.
:return: A list of quantities that have been averaged over all towers.
"""
ret = []
for quantities in zip(*tower_losses):
ret.append(tf.add_n(quantities) / len(quantities))
return ret