Tianshou/AlphaGo/Network.py

import tensorflow as tf
import numpy as np
import time
import multi_gpu
import tensorflow.contrib.layers as layers

def residual_block(input, is_training):
	normalizer_params = {'is_training': is_training,
						 'updates_collections': None}
	h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
					  weights_regularizer=layers.l2_regularizer(1e-4))
	residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
							 normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
							 weights_regularizer=layers.l2_regularizer(1e-4))
	h = h + residual
	return tf.nn.relu(h)

def policy_heads(input, is_training):
	normalizer_params = {'is_training': is_training,
						 'updates_collections': None}
	h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
					  weights_regularizer=layers.l2_regularizer(1e-4))
	h = layers.flatten(h)
	h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
	return h

def value_heads(input, is_training):
	normalizer_params = {'is_training': is_training,
						 'updates_collections': None}
	h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,
					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
					  weights_regularizer=layers.l2_regularizer(1e-4))
	h = layers.flatten(h)
	h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))
	h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
	return h

x = tf.placeholder(tf.float32,shape=[None,19,19,17])
is_training = tf.placeholder(tf.bool, shape=[])
z = tf.placeholder(tf.float32, shape=[None, 1])
pi = tf.placeholder(tf.float32, shape=[None, 362])

h = residual_block(x, is_training)
for i in range(18):
	h = residual_block(h, is_training)
v = value_heads(h, is_training)
p = policy_heads(h, is_training)
loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = loss + reg
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)

var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
def train():
	data = np.load("data.npz")
	boards = data["boards"]
	wins = data["wins"]
	ps = data["ps"]
	print (boards.shape)
	print (wins.shape)
	print (ps.shape)
	epochs = 100
	batch_size = 32
	batch_num = boards.shape[0] // batch_size
	result_path = "./results/"
	with multi_gpu.create_session() as sess:
		sess.run(tf.global_variables_initializer())
		ckpt_file = tf.train.latest_checkpoint(result_path)
		if ckpt_file is not None:
			print('Restoring model from {}...'.format(ckpt_file))
			saver.restore(sess, ckpt_file)
		for epoch in range(epochs):
			time_train = -time.time()
			index = np.arange(boards.shape[0])
			np.random.shuffle(index)
			losses = []
			regs = []
			for iter in range(batch_num):
				_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
																					z:wins[index[iter*batch_size:(iter+1)*batch_size]],
																					pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
																					is_training:True})
				losses.append(l)
				regs.append(r)
				if iter % 1 == 0:
					print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
					time_train=-time.time()
					losses = []
					regs = []
				if iter % 20 == 0:
					save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)
					saver.save(sess, result_path + save_path)

def forward(board):
	result_path = "./results/"
	with multi_gpu.create_session() as sess:
		sess.run(tf.global_variables_initializer())
		ckpt_file = tf.train.latest_checkpoint(result_path)
		if ckpt_file is not None:
			print('Restoring model from {}...'.format(ckpt_file))
			saver.restore(sess, ckpt_file)
		else:
			raise ValueError("No model loaded")
		return sess.run([p,v], feed_dict={x:board})

if __name__='main':
	train()
Implement the network of AlphaGo 2017-11-04 22:16:43 +08:00			`import tensorflow as tf`
			`import numpy as np`
			`import time`
			`import multi_gpu`
			`import tensorflow.contrib.layers as layers`

			`def residual_block(input, is_training):`
			`normalizer_params = {'is_training': is_training,`
			`'updates_collections': None}`
			`h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,`
			`normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,`
			`weights_regularizer=layers.l2_regularizer(1e-4))`
			`residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,`
			`normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,`
			`weights_regularizer=layers.l2_regularizer(1e-4))`
			`h = h + residual`
			`return tf.nn.relu(h)`

			`def policy_heads(input, is_training):`
			`normalizer_params = {'is_training': is_training,`
			`'updates_collections': None}`
			`h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,`
			`normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,`
			`weights_regularizer=layers.l2_regularizer(1e-4))`
			`h = layers.flatten(h)`
			`h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))`
			`return h`

			`def value_heads(input, is_training):`
			`normalizer_params = {'is_training': is_training,`
			`'updates_collections': None}`
			`h = layers.conv2d(input, 2, kernel_size=1, stride=1, activation_fn=tf.nn.relu,`
			`normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,`
			`weights_regularizer=layers.l2_regularizer(1e-4))`
			`h = layers.flatten(h)`
			`h = layers.fully_connected(h, 256, activation_fn=tf.nn.relu, weights_regularizer=layers.l2_regularizer(1e-4))`
			`h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))`
			`return h`

			`x = tf.placeholder(tf.float32,shape=[None,19,19,17])`
			`is_training = tf.placeholder(tf.bool, shape=[])`
			`z = tf.placeholder(tf.float32, shape=[None, 1])`
			`pi = tf.placeholder(tf.float32, shape=[None, 362])`

			`h = residual_block(x, is_training)`
			`for i in range(18):`
			`h = residual_block(h, is_training)`
			`v = value_heads(h, is_training)`
			`p = policy_heads(h, is_training)`
			`loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))`
			`reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))`
			`total_loss = loss + reg`
modify the network 2017-11-05 16:47:01 +08:00			`train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)`
Implement the network of AlphaGo 2017-11-04 22:16:43 +08:00
			`var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)`
			`saver = tf.train.Saver(max_to_keep=10, var_list=var_list)`
modified the network 2017-11-05 15:30:35 +08:00			`def train():`
			`data = np.load("data.npz")`
			`boards = data["boards"]`
			`wins = data["wins"]`
			`ps = data["ps"]`
			`print (boards.shape)`
			`print (wins.shape)`
			`print (ps.shape)`
			`epochs = 100`
			`batch_size = 32`
			`batch_num = boards.shape[0] // batch_size`
			`result_path = "./results/"`
			`with multi_gpu.create_session() as sess:`
			`sess.run(tf.global_variables_initializer())`
			`ckpt_file = tf.train.latest_checkpoint(result_path)`
			`if ckpt_file is not None:`
			`print('Restoring model from {}...'.format(ckpt_file))`
			`saver.restore(sess, ckpt_file)`
			`for epoch in range(epochs):`
			`time_train = -time.time()`
			`index = np.arange(boards.shape[0])`
			`np.random.shuffle(index)`
			`losses = []`
			`regs = []`
			`for iter in range(batch_num):`
			`_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iterbatch_size:(iter+1)batch_size]],`
			`z:wins[index[iterbatch_size:(iter+1)batch_size]],`
			`pi:ps[index[iterbatch_size:(iter+1)batch_size]],`
			`is_training:True})`
			`losses.append(l)`
			`regs.append(r)`
			`if iter % 1 == 0:`
			`print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))`
			`time_train=-time.time()`
			`losses = []`
			`regs = []`
			`if iter % 20 == 0:`
			`save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)`
			`saver.save(sess, result_path + save_path)`

			`def forward(board):`
			`result_path = "./results/"`
			`with multi_gpu.create_session() as sess:`
			`sess.run(tf.global_variables_initializer())`
			`ckpt_file = tf.train.latest_checkpoint(result_path)`
			`if ckpt_file is not None:`
			`print('Restoring model from {}...'.format(ckpt_file))`
			`saver.restore(sess, ckpt_file)`
			`else:`
			`raise ValueError("No model loaded")`
modify the network 2017-11-05 16:47:01 +08:00			`return sess.run([p,v], feed_dict={x:board})`

			`if __name__='main':`
			`train()`