data processing, network modified

2017-11-09 19:23:40 +08:00 · 2017-11-09 19:23:40 +08:00 · d9674a3c8d
commit d9674a3c8d
parent 93dc10a728
3 changed files with 227 additions and 65 deletions
--- a/AlphaGo/Network.py
+++ b/AlphaGo/Network.py
@ -1,10 +1,14 @@
-import tensorflow as tf
-import numpy as np
-import time
 import os
-import multi_gpu
+import time
+
+import numpy as np
+import tensorflow as tf
 import tensorflow.contrib.layers as layers
-import sys
+
+import multi_gpu
+
+os.environ["CUDA_VISIBLE_DEVICES"] = "1"
+

 def residual_block(input, is_training):
 	normalizer_params = {'is_training': is_training,
@ -12,12 +16,13 @@ def residual_block(input, is_training):
 	h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
 					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
 					  weights_regularizer=layers.l2_regularizer(1e-4))
-	residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
-							 normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
-							 weights_regularizer=layers.l2_regularizer(1e-4))
-	h = h + residual
+	h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
+					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
+					  weights_regularizer=layers.l2_regularizer(1e-4))
+	h = h + input
 	return tf.nn.relu(h)

+
 def policy_heads(input, is_training):
 	normalizer_params = {'is_training': is_training,
 						 'updates_collections': None}
@ -28,6 +33,7 @@ def policy_heads(input, is_training):
 	h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
 	return h

+
 def value_heads(input, is_training):
 	normalizer_params = {'is_training': is_training,
 						 'updates_collections': None}
@ -39,30 +45,40 @@ def value_heads(input, is_training):
 	h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
 	return h

-x = tf.placeholder(tf.float32,shape=[None,19,19,17])
+
+x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
 is_training = tf.placeholder(tf.bool, shape=[])
 z = tf.placeholder(tf.float32, shape=[None, 1])
 pi = tf.placeholder(tf.float32, shape=[None, 362])

-h = residual_block(x, is_training)
-for i in range(18):
+h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
+				  normalizer_params={'is_training': is_training, 'updates_collections': None},
+				  weights_regularizer=layers.l2_regularizer(1e-4))
+for i in range(19):
 	h = residual_block(h, is_training)
 v = value_heads(h, is_training)
 p = policy_heads(h, is_training)
-loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
-reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
-total_loss = loss + reg
-train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
+# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
+value_loss = tf.reduce_mean(tf.square(z - v))
+policy_loss = - tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))

+reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+total_loss = value_loss + policy_loss + reg
+# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
+update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
+with tf.control_dependencies(update_ops):
+	train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
 var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
 saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
+
+
 def train():
 	data_path = "/home/tongzheng/data/"
 	data_name = os.listdir("/home/tongzheng/data/")
 	epochs = 100
-	batch_size = 32
+	batch_size = 128

-	result_path = "./results/"
+	result_path = "./checkpoints/"
 	with multi_gpu.create_session() as sess:
 		sess.run(tf.global_variables_initializer())
 		ckpt_file = tf.train.latest_checkpoint(result_path)
@ -81,52 +97,64 @@ def train():
 				batch_num = boards.shape[0] // batch_size
 				index = np.arange(boards.shape[0])
 				np.random.shuffle(index)
-				losses = []
+				value_losses = []
+				policy_losses = []
 				regs = []
 				time_train = -time.time()
 				for iter in range(batch_num):
-					_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
-																						z:wins[index[iter*batch_size:(iter+1)*batch_size]],
-																						pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
-																						is_training:True})
-					losses.append(l)
+					lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op],
+														 feed_dict={x: boards[
+															 index[iter * batch_size:(iter + 1) * batch_size]],
+																	z: wins[index[
+																			iter * batch_size:(iter + 1) * batch_size]],
+																	pi: ps[index[
+																		   iter * batch_size:(iter + 1) * batch_size]],
+																	is_training: True})
+					value_losses.append(lv)
+					policy_losses.append(lp)
 					regs.append(r)
 					if iter % 1 == 0:
-						print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
-						time_train=-time.time()
-						losses = []
+						print(
+						"Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
+							epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
+							np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
+						time_train = -time.time()
+						value_losses = []
+						policy_losses = []
 						regs = []
 					if iter % 20 == 0:
 						save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
 						saver.save(sess, result_path + save_path)
 				del data, boards, wins, ps

+
 def forward(board):
 	result_path = "./results/"
-		itflag = False
-		res = None
-		if board is None:
-			board = np.load("/home/yama/tongzheng/AG/self_play_204/d7d7d552b7be4b51883de99d74a8e51b.npz")
-			board = board["boards"][100].reshape(-1, 19, 19, 17)
-			result_path = "../parameters/checkpoints"
-			itflag = True
-		with multi_gpu.create_session() as sess:
-			sess.run(tf.global_variables_initializer())
-			ckpt_file = tf.train.latest_checkpoint(result_path)
-			if ckpt_file is not None:
-				print('Restoring model from {}...'.format(ckpt_file))
-				saver.restore(sess, ckpt_file)
-			else:
-				raise ValueError("No model loaded")
-			res = sess.run([tf.nn.softmax(p),v], feed_dict={x:board, is_training:itflag})
-			#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
-			#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
-			print(res)
-			#print(res[0].tolist()[0])
-			#print(np.argmax(res[0]))
-		return res
+	itflag = False
+	res = None
+	if board is None:
+		board = np.load("/home/yama/tongzheng/AG/self_play_204/d7d7d552b7be4b51883de99d74a8e51b.npz")
+		board = board["boards"][100].reshape(-1, 19, 19, 17)
+		result_path = "../parameters/checkpoints"
+		itflag = True
+	with multi_gpu.create_session() as sess:
+		sess.run(tf.global_variables_initializer())
+		ckpt_file = tf.train.latest_checkpoint(result_path)
+		if ckpt_file is not None:
+			print('Restoring model from {}...'.format(ckpt_file))
+			saver.restore(sess, ckpt_file)
+		else:
+			raise ValueError("No model loaded")
+		res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
+		# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
+		# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
+		print(res)
+	# print(res[0].tolist()[0])
+	# print(np.argmax(res[0]))
+	return res

-if __name__=='__main__':
-	#train()
-    if sys.argv[1] == "test":
-		forward(None)
+
+if __name__ == '__main__':
+	train()
+# if sys.argv[1] == "test":
+#	forward(None)
--- a/AlphaGo/random_data.py
+++ b/AlphaGo/random_data.py
@ -0,0 +1,122 @@
+import os
+import numpy as np
+import time
+
+path = "/home/tongzheng/meta-data/"
+save_path = "/home/tongzheng/data/"
+name = os.listdir(path)
+print(len(name))
+batch_size = 128
+batch_num = 512
+
+block_size = batch_size * batch_num
+slots_num = 32
+
+
+class block(object):
+    def __init__(self, block_size, block_id):
+        self.boards = []
+        self.wins = []
+        self.ps = []
+        self.block_size = block_size
+        self.block_id = block_id
+
+    def concat(self, board, p, win):
+        board = board.reshape(-1, 19, 19, 17)
+        win = win.reshape(-1, 1)
+        p = p.reshape(-1, 362)
+        self.boards.append(board)
+        self.wins.append(win)
+        self.ps.append(p)
+
+    def isfull(self):
+        assert len(self.boards) == len(self.wins)
+        assert len(self.boards) == len(self.ps)
+        return len(self.boards) == self.block_size
+
+    def save_and_reset(self, block_id):
+        self.boards = np.concatenate(self.boards, axis=0)
+        self.wins = np.concatenate(self.wins, axis=0)
+        self.ps = np.concatenate(self.ps, axis=0)
+        print ("Block {}, Boards shape {}, Wins Shape {}, Ps Shape {}".format(self.block_id, self.boards.shape[0],
+                                                                             self.wins.shape[0], self.ps.shape[0]))
+        np.savez(save_path + "block" + str(self.block_id), boards=self.boards, wins=self.wins, ps=self.ps)
+        self.boards = []
+        self.wins = []
+        self.ps = []
+        self.block_id = block_id
+
+    def store_num(self):
+        assert len(self.boards) == len(self.wins)
+        assert len(self.boards) == len(self.ps)
+        return len(self.boards)
+
+
+def concat(block_list, board, win, p):
+    global index
+    seed = np.random.randint(slots_num)
+    block_list[seed].concat(board, win, p)
+    if block_list[seed].isfull():
+        block_list[seed].save_and_reset(index)
+        index = index + 1
+
+
+block_list = []
+for index in range(slots_num):
+    block_list.append(block(block_size, index))
+index = index + 1
+for n in name:
+    data = np.load(path + n)
+    board = data["boards"]
+    win = data["win"]
+    p = data["p"]
+    print("Start {}".format(n))
+    print("Shape {}".format(board.shape[0]))
+    start = -time.time()
+    for i in range(board.shape[0]):
+        board_ori = board[i].reshape(-1, 19, 19, 17)
+        win_ori = win[i].reshape(-1, 1)
+        p_ori = p[i].reshape(-1, 362)
+        concat(block_list, board_ori, p_ori, win_ori)
+
+        for t in range(1, 4):
+            board_aug = np.rot90(board_ori, t, (1, 2))
+            p_aug = np.concatenate(
+                [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+                axis=1)
+            concat(block_list, board_aug, p_aug, win_ori)
+
+        board_aug = board_ori[:, ::-1]
+        p_aug = np.concatenate(
+            [p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+            axis=1)
+        concat(block_list, board_aug, p_aug, win_ori)
+
+        board_aug = board_ori[:, :, ::-1]
+        p_aug = np.concatenate(
+            [p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+            axis=1)
+        concat(block_list, board_aug, p_aug, win_ori)
+
+        board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
+        p_aug = np.concatenate(
+            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
+             p_ori[:, -1].reshape(-1, 1)],
+            axis=1)
+        concat(block_list, board_aug, p_aug, win_ori)
+
+        board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
+        p_aug = np.concatenate(
+            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
+             p_ori[:, -1].reshape(-1, 1)],
+            axis=1)
+        concat(block_list, board_aug, p_aug, win_ori)
+    print ("Finished {} with time {}".format(n, time.time()+start))
+    data_num = 0
+    for i in range(slots_num):
+        print("Block {} ".format(block_list[i].block_id) + "Size {}".format(block_list[i].store_num()))
+        data_num = data_num + block_list[i].store_num()
+    print ("Total data {}".format(data_num))
+
+for i in range(slots_num):
+    block_list[i].save_and_reset(block_list[i].block_id)
--- a/utils/text2data.py
+++ b/utils/text2data.py
@ -5,7 +5,7 @@ def hex2board(hex):
    scale = 16
    num_of_bits = 360
    binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2]
-    board = np.zeros([361])
+    board = np.zeros([361], dtype='int8')
    for i in range(361):
        board[i] = int(binary[i])
    board = board.reshape(1,19,19,1)
@ -17,9 +17,12 @@ def str2prob(str):
    for i in range(362):
        prob[i] = float(p[i])
    prob = prob.reshape(1,362)
-    return prob
+    if np.sum(np.isnan(prob))==0:
+        return prob, True
+    else:
+	return 0, False

-dir = "/home/yama/tongzheng/leela-zero/autogtp/new_spr/"
+dir = "/home/yama/leela-zero/data/sgf-txt-files/"
 name = os.listdir(dir)
 text = []
 for n in name:
@ -28,26 +31,35 @@ for n in name:
 print(text)
 for t in text:
    num = 0
-    boards = np.zeros([0, 19, 19, 17])
-    board = np.zeros([1, 19, 19, 0])
-    win = np.zeros([0, 1])
+    boards = np.zeros([0, 19, 19, 17], dtype='int8')
+    board = np.zeros([1, 19, 19, 0], dtype='int8')
+    win = np.zeros([0, 1], dtype='int8')
    p = np.zeros([0, 362])
+    flag = False
    for line in open(dir + t):
+	if num % 19 == 0:
+	    flag = False
        if num % 19 < 16:
            new_board = hex2board(line)
            board = np.concatenate([board, new_board], axis=3)
        if num % 19 == 16:
            if line == '0':
-                new_board = np.ones([1, 19 ,19 ,1])
+                new_board = np.ones([1, 19 ,19 ,1], dtype='int8')
            if line == '1':
-                new_board = np.zeros([1, 19, 19, 1])
+                new_board = np.zeros([1, 19, 19, 1], dtype='int8')
            board = np.concatenate([board, new_board], axis=3)
            boards = np.concatenate([boards, board], axis=0)
-            board = np.zeros([1, 19, 19, 0])
+            board = np.zeros([1, 19, 19, 0], dtype='int8')
        if num % 19 == 17:
-            p = np.concatenate([p,str2prob(line)], axis=0)
+	    if str2prob(line)[1] == True:
+            	p = np.concatenate([p,str2prob(line)[0]], axis=0)
+	    else:
+		flag = True
+		boards = boards[:-1]
        if num % 19 == 18:
-            win = np.concatenate([win, np.array(float(line)).reshape(1,1)], axis=0)
+	    if flag == False:
+            	win = np.concatenate([win, np.array(float(line), dtype='int8').reshape(1,1)], axis=0)
        num=num+1
+    print("Boards Shape: {}, Wins Shape: {}, Probs Shape : {}".format(boards.shape[0], win.shape[0], p.shape[0]))
    print "Finished " + t
-    np.savez("data/"+t[:-4], boards=boards, win=win, p=p)
+    np.savez("/home/tongzheng/meta-data/"+t[:-4], boards=boards, win=win, p=p)