From b382bd8d31ad4e5c6401c1dd76e59198e1a39542 Mon Sep 17 00:00:00 2001 From: Tongzheng Ren Date: Wed, 8 Nov 2017 08:32:07 +0800 Subject: [PATCH] modify AlphaGo --- AlphaGo/Network.py | 64 +++++++++++++---------- AlphaGo/data.py | 128 ++++++++++++++++++++++++++------------------- 2 files changed, 108 insertions(+), 84 deletions(-) diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py index ef77e21..8608e77 100644 --- a/AlphaGo/Network.py +++ b/AlphaGo/Network.py @@ -1,6 +1,7 @@ import tensorflow as tf import numpy as np import time +import os import multi_gpu import tensorflow.contrib.layers as layers @@ -55,16 +56,11 @@ train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) saver = tf.train.Saver(max_to_keep=10, var_list=var_list) def train(): - data = np.load("data.npz") - boards = data["boards"] - wins = data["wins"] - ps = data["ps"] - print (boards.shape) - print (wins.shape) - print (ps.shape) + data_path = "/home/tongzheng/data/" + data_name = os.listdir("/home/tongzheng/data/") epochs = 100 batch_size = 32 - batch_num = boards.shape[0] // batch_size + result_path = "./results/" with multi_gpu.create_session() as sess: sess.run(tf.global_variables_initializer()) @@ -73,26 +69,36 @@ def train(): print('Restoring model from {}...'.format(ckpt_file)) saver.restore(sess, ckpt_file) for epoch in range(epochs): - time_train = -time.time() - index = np.arange(boards.shape[0]) - np.random.shuffle(index) - losses = [] - regs = [] - for iter in range(batch_num): - _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], - z:wins[index[iter*batch_size:(iter+1)*batch_size]], - pi:ps[index[iter*batch_size:(iter+1)*batch_size]], - is_training:True}) - losses.append(l) - regs.append(r) - if iter % 1 == 0: - print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) - time_train=-time.time() - losses = [] - regs = [] - if iter % 20 == 0: - save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter) - saver.save(sess, result_path + save_path) + for name in data_name: + data = np.load(data_path + name) + boards = data["boards"] + wins = data["wins"] + ps = data["ps"] + print (boards.shape) + print (wins.shape) + print (ps.shape) + batch_num = boards.shape[0] // batch_size + index = np.arange(boards.shape[0]) + np.random.shuffle(index) + losses = [] + regs = [] + time_train = -time.time() + for iter in range(batch_num): + _, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], + z:wins[index[iter*batch_size:(iter+1)*batch_size]], + pi:ps[index[iter*batch_size:(iter+1)*batch_size]], + is_training:True}) + losses.append(l) + regs.append(r) + if iter % 1 == 0: + print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) + time_train=-time.time() + losses = [] + regs = [] + if iter % 20 == 0: + save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter) + saver.save(sess, result_path + save_path) + del data, boards, wins, ps def forward(board): result_path = "./results/" @@ -106,5 +112,5 @@ def forward(board): raise ValueError("No model loaded") return sess.run([p,v], feed_dict={x:board}) -if __name__='main': +if __name__=="__main__": train() diff --git a/AlphaGo/data.py b/AlphaGo/data.py index 3785577..4a75f54 100644 --- a/AlphaGo/data.py +++ b/AlphaGo/data.py @@ -1,65 +1,83 @@ import os - +import threading import numpy as np -path = "/raid/tongzheng/AG/self_play_204/" +path = "/home/yama/leela-zero/data/npz-files/" name = os.listdir(path) -boards = np.zeros([0, 19, 19, 17]) -wins = np.zeros([0, 1]) -ps = np.zeros([0, 362]) +print(len(name)) +thread_num = 17 +batch_num = len(name) // thread_num -for n in name: - data = np.load(path + n) - board = data["boards"] - win = data["win"] - p = data["p"] - # board = np.zeros([0, 19, 19, 17]) - # win = np.zeros([0, 1]) - # p = np.zeros([0, 362]) - # for i in range(data["boards"].shape[3]): - # board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0) - # win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0) - # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0) - boards = np.concatenate([boards, board], axis=0) - wins = np.concatenate([wins, win], axis=0) - ps = np.concatenate([ps, p], axis=0) - print("Finish " + n) +def integrate(name, index): + boards = np.zeros([0, 19, 19, 17]) + wins = np.zeros([0, 1]) + ps = np.zeros([0, 362]) + for n in name: + data = np.load(path + n) + board = data["boards"] + win = data["win"] + p = data["p"] + # board = np.zeros([0, 19, 19, 17]) + # win = np.zeros([0, 1]) + # p = np.zeros([0, 362]) + # for i in range(data["boards"].shape[3]): + # board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0) + # win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0) + # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0) + boards = np.concatenate([boards, board], axis=0) + wins = np.concatenate([wins, win], axis=0) + ps = np.concatenate([ps, p], axis=0) + # print("Finish " + n) + print ("Integration {} Finished!".format(index)) + board_ori = boards + win_ori = wins + p_ori = ps + for i in range(1, 3): + board = np.rot90(board_ori, i, (1, 2)) + p = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + boards = np.concatenate([boards, board], axis=0) + wins = np.concatenate([wins, win_ori], axis=0) + ps = np.concatenate([ps, p], axis=0) -board_ori = boards -win_ori = wins -p_ori = ps -for i in range(1, 3): - board = np.rot90(board_ori, i, (1, 2)) - p = np.concatenate( - [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) - boards = np.concatenate([boards, board], axis=0) - wins = np.concatenate([wins, win_ori], axis=0) - ps = np.concatenate([ps, p], axis=0) + board = board_ori[:, ::-1] + p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + boards = np.concatenate([boards, board], axis=0) + wins = np.concatenate([wins, win_ori], axis=0) + ps = np.concatenate([ps, p], axis=0) -board = board_ori[:, ::-1] -p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) -boards = np.concatenate([boards, board], axis=0) -wins = np.concatenate([wins, win_ori], axis=0) -ps = np.concatenate([ps, p], axis=0) + board = board_ori[:, :, ::-1] + p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + boards = np.concatenate([boards, board], axis=0) + wins = np.concatenate([wins, win_ori], axis=0) + ps = np.concatenate([ps, p], axis=0) -board = board_ori[:, :, ::-1] -p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], - axis=1) -boards = np.concatenate([boards, board], axis=0) -wins = np.concatenate([wins, win_ori], axis=0) -ps = np.concatenate([ps, p], axis=0) + board = board_ori[:, ::-1] + p = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], + axis=1) + boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0) + wins = np.concatenate([wins, win_ori], axis=0) + ps = np.concatenate([ps, p], axis=0) -board = board_ori[:, ::-1] -p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1) -boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0) -wins = np.concatenate([wins, win_ori], axis=0) -ps = np.concatenate([ps, p], axis=0) + board = board_ori[:, :, ::-1] + p = np.concatenate( + [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361), + p_ori[:, -1].reshape(-1, 1)], + axis=1) + boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0) + wins = np.concatenate([wins, win_ori], axis=0) + ps = np.concatenate([ps, p], axis=0) -board = board_ori[:, :, ::-1] -p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], - axis=1) -boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0) -wins = np.concatenate([wins, win_ori], axis=0) -ps = np.concatenate([ps, p], axis=0) - -np.savez("data", boards=boards, wins=wins, ps=ps) \ No newline at end of file + np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps) + print ("Thread {} has finished.".format(index)) +thread_list = list() +for i in range(thread_num): + thread_list.append(threading.Thread(target=integrate, args=(name[batch_num * i:batch_num * (i + 1)], i,))) +for thread in thread_list: + thread.start() +for thread in thread_list: + thread.join()