diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py index 5c23a6f..807c33b 100644 --- a/AlphaGo/Network.py +++ b/AlphaGo/Network.py @@ -1,10 +1,14 @@ -import tensorflow as tf -import numpy as np -import time import os -import multi_gpu +import time + +import numpy as np +import tensorflow as tf import tensorflow.contrib.layers as layers -import sys + +import multi_gpu + +os.environ["CUDA_VISIBLE_DEVICES"] = "1" + def residual_block(input, is_training): normalizer_params = {'is_training': is_training, @@ -12,12 +16,13 @@ def residual_block(input, is_training): h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, weights_regularizer=layers.l2_regularizer(1e-4)) - residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity, - normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, - weights_regularizer=layers.l2_regularizer(1e-4)) - h = h + residual + h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity, + normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, + weights_regularizer=layers.l2_regularizer(1e-4)) + h = h + input return tf.nn.relu(h) + def policy_heads(input, is_training): normalizer_params = {'is_training': is_training, 'updates_collections': None} @@ -28,6 +33,7 @@ def policy_heads(input, is_training): h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4)) return h + def value_heads(input, is_training): normalizer_params = {'is_training': is_training, 'updates_collections': None} @@ -39,30 +45,40 @@ def value_heads(input, is_training): h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4)) return h -x = tf.placeholder(tf.float32,shape=[None,19,19,17]) + +x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17]) is_training = tf.placeholder(tf.bool, shape=[]) z = tf.placeholder(tf.float32, shape=[None, 1]) pi = tf.placeholder(tf.float32, shape=[None, 362]) -h = residual_block(x, is_training) -for i in range(18): +h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, + normalizer_params={'is_training': is_training, 'updates_collections': None}, + weights_regularizer=layers.l2_regularizer(1e-4)) +for i in range(19): h = residual_block(h, is_training) v = value_heads(h, is_training) p = policy_heads(h, is_training) -loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1)))) -reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) -total_loss = loss + reg -train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) +# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p))))) +value_loss = tf.reduce_mean(tf.square(z - v)) +policy_loss = - tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p)) +reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) +total_loss = value_loss + policy_loss + reg +# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss) +update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) +with tf.control_dependencies(update_ops): + train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) saver = tf.train.Saver(max_to_keep=10, var_list=var_list) + + def train(): data_path = "/home/tongzheng/data/" data_name = os.listdir("/home/tongzheng/data/") epochs = 100 - batch_size = 32 + batch_size = 128 - result_path = "./results/" + result_path = "./checkpoints/" with multi_gpu.create_session() as sess: sess.run(tf.global_variables_initializer()) ckpt_file = tf.train.latest_checkpoint(result_path) @@ -81,52 +97,64 @@ def train(): batch_num = boards.shape[0] // batch_size index = np.arange(boards.shape[0]) np.random.shuffle(index) - losses = [] + value_losses = [] + policy_losses = [] regs = [] time_train = -time.time() for iter in range(batch_num): - _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], - z:wins[index[iter*batch_size:(iter+1)*batch_size]], - pi:ps[index[iter*batch_size:(iter+1)*batch_size]], - is_training:True}) - losses.append(l) + lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op], + feed_dict={x: boards[ + index[iter * batch_size:(iter + 1) * batch_size]], + z: wins[index[ + iter * batch_size:(iter + 1) * batch_size]], + pi: ps[index[ + iter * batch_size:(iter + 1) * batch_size]], + is_training: True}) + value_losses.append(lv) + policy_losses.append(lp) regs.append(r) if iter % 1 == 0: - print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) - time_train=-time.time() - losses = [] + print( + "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format( + epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)), + np.mean(np.array(policy_losses)), np.mean(np.array(regs)))) + time_train = -time.time() + value_losses = [] + policy_losses = [] regs = [] if iter % 20 == 0: save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter) saver.save(sess, result_path + save_path) del data, boards, wins, ps + def forward(board): result_path = "./results/" - itflag = False - res = None - if board is None: - board = np.load("/home/yama/tongzheng/AG/self_play_204/d7d7d552b7be4b51883de99d74a8e51b.npz") - board = board["boards"][100].reshape(-1, 19, 19, 17) - result_path = "../parameters/checkpoints" - itflag = True - with multi_gpu.create_session() as sess: - sess.run(tf.global_variables_initializer()) - ckpt_file = tf.train.latest_checkpoint(result_path) - if ckpt_file is not None: - print('Restoring model from {}...'.format(ckpt_file)) - saver.restore(sess, ckpt_file) - else: - raise ValueError("No model loaded") - res = sess.run([tf.nn.softmax(p),v], feed_dict={x:board, is_training:itflag}) - #res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False}) - #res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True}) - print(res) - #print(res[0].tolist()[0]) - #print(np.argmax(res[0])) - return res + itflag = False + res = None + if board is None: + board = np.load("/home/yama/tongzheng/AG/self_play_204/d7d7d552b7be4b51883de99d74a8e51b.npz") + board = board["boards"][100].reshape(-1, 19, 19, 17) + result_path = "../parameters/checkpoints" + itflag = True + with multi_gpu.create_session() as sess: + sess.run(tf.global_variables_initializer()) + ckpt_file = tf.train.latest_checkpoint(result_path) + if ckpt_file is not None: + print('Restoring model from {}...'.format(ckpt_file)) + saver.restore(sess, ckpt_file) + else: + raise ValueError("No model loaded") + res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag}) + # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False}) + # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True}) + print(res) + # print(res[0].tolist()[0]) + # print(np.argmax(res[0])) + return res -if __name__=='__main__': - #train() - if sys.argv[1] == "test": - forward(None) + +if __name__ == '__main__': + train() +# if sys.argv[1] == "test": +# forward(None) diff --git a/AlphaGo/random_data.py b/AlphaGo/random_data.py new file mode 100644 index 0000000..9949ad5 --- /dev/null +++ b/AlphaGo/random_data.py @@ -0,0 +1,122 @@ +import os +import numpy as np +import time + +path = "/home/tongzheng/meta-data/" +save_path = "/home/tongzheng/data/" +name = os.listdir(path) +print(len(name)) +batch_size = 128 +batch_num = 512 + +block_size = batch_size * batch_num +slots_num = 32 + + +class block(object): + def __init__(self, block_size, block_id): + self.boards = [] + self.wins = [] + self.ps = [] + self.block_size = block_size + self.block_id = block_id + + def concat(self, board, p, win): + board = board.reshape(-1, 19, 19, 17) + win = win.reshape(-1, 1) + p = p.reshape(-1, 362) + self.boards.append(board) + self.wins.append(win) + self.ps.append(p) + + def isfull(self): + assert len(self.boards) == len(self.wins) + assert len(self.boards) == len(self.ps) + return len(self.boards) == self.block_size + + def save_and_reset(self, block_id): + self.boards = np.concatenate(self.boards, axis=0) + self.wins = np.concatenate(self.wins, axis=0) + self.ps = np.concatenate(self.ps, axis=0) + print ("Block {}, Boards shape {}, Wins Shape {}, Ps Shape {}".format(self.block_id, self.boards.shape[0], + self.wins.shape[0], self.ps.shape[0])) + np.savez(save_path + "block" + str(self.block_id), boards=self.boards, wins=self.wins, ps=self.ps) + self.boards = [] + self.wins = [] + self.ps = [] + self.block_id = block_id + + def store_num(self): + assert len(self.boards) == len(self.wins) + assert len(self.boards) == len(self.ps) + return len(self.boards) + + +def concat(block_list, board, win, p): + global index + seed = np.random.randint(slots_num) + block_list[seed].concat(board, win, p) + if block_list[seed].isfull(): + block_list[seed].save_and_reset(index) + index = index + 1 + + +block_list = [] +for index in range(slots_num): + block_list.append(block(block_size, index)) +index = index + 1 +for n in name: + data = np.load(path + n) + board = data["boards"] + win = data["win"] + p = data["p"] + print("Start {}".format(n)) + print("Shape {}".format(board.shape[0])) + start = -time.time() + for i in range(board.shape[0]): + board_ori = board[i].reshape(-1, 19, 19, 17) + win_ori = win[i].reshape(-1, 1) + p_ori = p[i].reshape(-1, 362) + concat(block_list, board_ori, p_ori, win_ori) + + for t in range(1, 4): + board_aug = np.rot90(board_ori, t, (1, 2)) + p_aug = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + concat(block_list, board_aug, p_aug, win_ori) + + board_aug = board_ori[:, ::-1] + p_aug = np.concatenate( + [p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + concat(block_list, board_aug, p_aug, win_ori) + + board_aug = board_ori[:, :, ::-1] + p_aug = np.concatenate( + [p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + concat(block_list, board_aug, p_aug, win_ori) + + board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2)) + p_aug = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), + p_ori[:, -1].reshape(-1, 1)], + axis=1) + concat(block_list, board_aug, p_aug, win_ori) + + board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2)) + p_aug = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361), + p_ori[:, -1].reshape(-1, 1)], + axis=1) + concat(block_list, board_aug, p_aug, win_ori) + print ("Finished {} with time {}".format(n, time.time()+start)) + data_num = 0 + for i in range(slots_num): + print("Block {} ".format(block_list[i].block_id) + "Size {}".format(block_list[i].store_num())) + data_num = data_num + block_list[i].store_num() + print ("Total data {}".format(data_num)) + +for i in range(slots_num): + block_list[i].save_and_reset(block_list[i].block_id) diff --git a/utils/text2data.py b/utils/text2data.py index ed460b6..41e0cdf 100644 --- a/utils/text2data.py +++ b/utils/text2data.py @@ -5,7 +5,7 @@ def hex2board(hex): scale = 16 num_of_bits = 360 binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2] - board = np.zeros([361]) + board = np.zeros([361], dtype='int8') for i in range(361): board[i] = int(binary[i]) board = board.reshape(1,19,19,1) @@ -17,9 +17,12 @@ def str2prob(str): for i in range(362): prob[i] = float(p[i]) prob = prob.reshape(1,362) - return prob + if np.sum(np.isnan(prob))==0: + return prob, True + else: + return 0, False -dir = "/home/yama/tongzheng/leela-zero/autogtp/new_spr/" +dir = "/home/yama/leela-zero/data/sgf-txt-files/" name = os.listdir(dir) text = [] for n in name: @@ -28,26 +31,35 @@ for n in name: print(text) for t in text: num = 0 - boards = np.zeros([0, 19, 19, 17]) - board = np.zeros([1, 19, 19, 0]) - win = np.zeros([0, 1]) + boards = np.zeros([0, 19, 19, 17], dtype='int8') + board = np.zeros([1, 19, 19, 0], dtype='int8') + win = np.zeros([0, 1], dtype='int8') p = np.zeros([0, 362]) + flag = False for line in open(dir + t): + if num % 19 == 0: + flag = False if num % 19 < 16: new_board = hex2board(line) board = np.concatenate([board, new_board], axis=3) if num % 19 == 16: if line == '0': - new_board = np.ones([1, 19 ,19 ,1]) + new_board = np.ones([1, 19 ,19 ,1], dtype='int8') if line == '1': - new_board = np.zeros([1, 19, 19, 1]) + new_board = np.zeros([1, 19, 19, 1], dtype='int8') board = np.concatenate([board, new_board], axis=3) boards = np.concatenate([boards, board], axis=0) - board = np.zeros([1, 19, 19, 0]) + board = np.zeros([1, 19, 19, 0], dtype='int8') if num % 19 == 17: - p = np.concatenate([p,str2prob(line)], axis=0) + if str2prob(line)[1] == True: + p = np.concatenate([p,str2prob(line)[0]], axis=0) + else: + flag = True + boards = boards[:-1] if num % 19 == 18: - win = np.concatenate([win, np.array(float(line)).reshape(1,1)], axis=0) + if flag == False: + win = np.concatenate([win, np.array(float(line), dtype='int8').reshape(1,1)], axis=0) num=num+1 + print("Boards Shape: {}, Wins Shape: {}, Probs Shape : {}".format(boards.shape[0], win.shape[0], p.shape[0])) print "Finished " + t - np.savez("data/"+t[:-4], boards=boards, win=win, p=p) + np.savez("/home/tongzheng/meta-data/"+t[:-4], boards=boards, win=win, p=p)