From b382bd8d31ad4e5c6401c1dd76e59198e1a39542 Mon Sep 17 00:00:00 2001
From: Tongzheng Ren <rtz19970824@gmail.com>
Date: Wed, 8 Nov 2017 08:32:07 +0800
Subject: [PATCH] modify AlphaGo

---
 AlphaGo/Network.py |  64 +++++++++++++----------
 AlphaGo/data.py    | 128 ++++++++++++++++++++++++++-------------------
 2 files changed, 108 insertions(+), 84 deletions(-)

diff --git a/AlphaGo/Network.py b/AlphaGo/Network.py
index ef77e21..8608e77 100644
--- a/AlphaGo/Network.py
+++ b/AlphaGo/Network.py
@@ -1,6 +1,7 @@
 import tensorflow as tf
 import numpy as np
 import time
+import os
 import multi_gpu
 import tensorflow.contrib.layers as layers
 
@@ -55,16 +56,11 @@ train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
 var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
 saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
 def train():
-	data = np.load("data.npz")
-	boards = data["boards"]
-	wins = data["wins"]
-	ps = data["ps"]
-	print (boards.shape)
-	print (wins.shape)
-	print (ps.shape)
+	data_path = "/home/tongzheng/data/"
+	data_name = os.listdir("/home/tongzheng/data/")
 	epochs = 100
 	batch_size = 32
-	batch_num = boards.shape[0] // batch_size
+
 	result_path = "./results/"
 	with multi_gpu.create_session() as sess:
 		sess.run(tf.global_variables_initializer())
@@ -73,26 +69,36 @@ def train():
 			print('Restoring model from {}...'.format(ckpt_file))
 			saver.restore(sess, ckpt_file)
 		for epoch in range(epochs):
-			time_train = -time.time()
-			index = np.arange(boards.shape[0])
-			np.random.shuffle(index)
-			losses = []
-			regs = []
-			for iter in range(batch_num):
-				_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
-																					z:wins[index[iter*batch_size:(iter+1)*batch_size]],
-																					pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
-																					is_training:True})
-				losses.append(l)
-				regs.append(r)
-				if iter % 1 == 0:
-					print("Epoch: {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
-					time_train=-time.time()
-					losses = []
-					regs = []
-				if iter % 20 == 0:
-					save_path = "Epoch{}.Iteration{}.ckpt".format(epoch, iter)
-					saver.save(sess, result_path + save_path)
+			for name in data_name:
+				data = np.load(data_path + name)
+				boards = data["boards"]
+				wins = data["wins"]
+				ps = data["ps"]
+				print (boards.shape)
+				print (wins.shape)
+				print (ps.shape)
+				batch_num = boards.shape[0] // batch_size
+				index = np.arange(boards.shape[0])
+				np.random.shuffle(index)
+				losses = []
+				regs = []
+				time_train = -time.time()
+				for iter in range(batch_num):
+					_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
+																						z:wins[index[iter*batch_size:(iter+1)*batch_size]],
+																						pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
+																						is_training:True})
+					losses.append(l)
+					regs.append(r)
+					if iter % 1 == 0:
+						print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
+						time_train=-time.time()
+						losses = []
+						regs = []
+					if iter % 20 == 0:
+						save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
+						saver.save(sess, result_path + save_path)
+				del data, boards, wins, ps
 
 def forward(board):
 	result_path = "./results/"
@@ -106,5 +112,5 @@ def forward(board):
 			raise ValueError("No model loaded")
 		return sess.run([p,v], feed_dict={x:board})
 
-if __name__='main':
+if __name__=="__main__":
 	train()
diff --git a/AlphaGo/data.py b/AlphaGo/data.py
index 3785577..4a75f54 100644
--- a/AlphaGo/data.py
+++ b/AlphaGo/data.py
@@ -1,65 +1,83 @@
 import os
-
+import threading
 import numpy as np
 
-path = "/raid/tongzheng/AG/self_play_204/"
+path = "/home/yama/leela-zero/data/npz-files/"
 name = os.listdir(path)
-boards = np.zeros([0, 19, 19, 17])
-wins = np.zeros([0, 1])
-ps = np.zeros([0, 362])
+print(len(name))
+thread_num = 17
+batch_num = len(name) // thread_num
 
-for n in name:
-	data = np.load(path + n)
-	board = data["boards"]
-	win = data["win"]
-	p = data["p"]
-	# board = np.zeros([0, 19, 19, 17])
-	# win = np.zeros([0, 1])
-	# p = np.zeros([0, 362])
-	# for i in range(data["boards"].shape[3]):
-	# 	board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
-	# 	win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
-	# 	p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
-	boards = np.concatenate([boards, board], axis=0)
-	wins = np.concatenate([wins, win], axis=0)
-	ps = np.concatenate([ps, p], axis=0)
-	print("Finish " + n)
+def integrate(name, index):
+    boards = np.zeros([0, 19, 19, 17])
+    wins = np.zeros([0, 1])
+    ps = np.zeros([0, 362])
+    for n in name:
+        data = np.load(path + n)
+        board = data["boards"]
+        win = data["win"]
+        p = data["p"]
+        # board = np.zeros([0, 19, 19, 17])
+        # win = np.zeros([0, 1])
+        # p = np.zeros([0, 362])
+        # for i in range(data["boards"].shape[3]):
+        #       board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
+        #       win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
+        # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
+        boards = np.concatenate([boards, board], axis=0)
+        wins = np.concatenate([wins, win], axis=0)
+        ps = np.concatenate([ps, p], axis=0)
+        # print("Finish " + n)
+    print ("Integration {} Finished!".format(index))
+    board_ori = boards
+    win_ori = wins
+    p_ori = ps
+    for i in range(1, 3):
+        board = np.rot90(board_ori, i, (1, 2))
+        p = np.concatenate(
+            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+            axis=1)
+        boards = np.concatenate([boards, board], axis=0)
+        wins = np.concatenate([wins, win_ori], axis=0)
+        ps = np.concatenate([ps, p], axis=0)
 
-board_ori = boards
-win_ori = wins
-p_ori = ps
-for i in range(1, 3):
-	board = np.rot90(board_ori, i, (1, 2))
-	p = np.concatenate(
-		[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
-	boards = np.concatenate([boards, board], axis=0)
-	wins = np.concatenate([wins, win_ori], axis=0)
-	ps = np.concatenate([ps, p], axis=0)
+    board = board_ori[:, ::-1]
+    p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+                       axis=1)
+    boards = np.concatenate([boards, board], axis=0)
+    wins = np.concatenate([wins, win_ori], axis=0)
+    ps = np.concatenate([ps, p], axis=0)
 
-board = board_ori[:, ::-1]
-p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
-boards = np.concatenate([boards, board], axis=0)
-wins = np.concatenate([wins, win_ori], axis=0)
-ps = np.concatenate([ps, p], axis=0)
+    board = board_ori[:, :, ::-1]
+    p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+                       axis=1)
+    boards = np.concatenate([boards, board], axis=0)
+    wins = np.concatenate([wins, win_ori], axis=0)
+    ps = np.concatenate([ps, p], axis=0)
 
-board = board_ori[:, :, ::-1]
-p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
-				   axis=1)
-boards = np.concatenate([boards, board], axis=0)
-wins = np.concatenate([wins, win_ori], axis=0)
-ps = np.concatenate([ps, p], axis=0)
+    board = board_ori[:, ::-1]
+    p = np.concatenate(
+        [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+        axis=1)
+    boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
+    wins = np.concatenate([wins, win_ori], axis=0)
+    ps = np.concatenate([ps, p], axis=0)
 
-board = board_ori[:, ::-1]
-p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)], axis=1)
-boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
-wins = np.concatenate([wins, win_ori], axis=0)
-ps = np.concatenate([ps, p], axis=0)
+    board = board_ori[:, :, ::-1]
+    p = np.concatenate(
+        [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
+         p_ori[:, -1].reshape(-1, 1)],
+        axis=1)
+    boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
+    wins = np.concatenate([wins, win_ori], axis=0)
+    ps = np.concatenate([ps, p], axis=0)
 
-board = board_ori[:, :, ::-1]
-p = np.concatenate([np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1,2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
-				   axis=1)
-boards = np.concatenate([boards, np.rot90(board, 1, (1,2))], axis=0)
-wins = np.concatenate([wins, win_ori], axis=0)
-ps = np.concatenate([ps, p], axis=0)
-
-np.savez("data", boards=boards, wins=wins, ps=ps)
\ No newline at end of file
+    np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps)
+    print ("Thread {} has finished.".format(index))
+thread_list = list()
+for i in range(thread_num):
+    thread_list.append(threading.Thread(target=integrate, args=(name[batch_num * i:batch_num * (i + 1)], i,)))
+for thread in thread_list:
+    thread.start()
+for thread in thread_list:
+    thread.join()