data processing, network modified

2017-11-09 19:23:40 +08:00 · 2017-11-09 19:23:40 +08:00 · d9674a3c8d
commit d9674a3c8d
parent 93dc10a728
3 changed files with 227 additions and 65 deletions
--- a/AlphaGo/Network.py
+++ b/AlphaGo/Network.py
@ -1,10 +1,14 @@
 import tensorflow as tf
 import numpy as np
 import time
 import os
-import multi_gpu
+import time
 import numpy as np
 import tensorflow as tf
 import tensorflow.contrib.layers as layers
-import sys
+
 import multi_gpu
 os.environ["CUDA_VISIBLE_DEVICES"] = "1"
 def residual_block(input, is_training):
 	normalizer_params = {'is_training': is_training,
@ -12,12 +16,13 @@ def residual_block(input, is_training):
 	h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
 					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
 					  weights_regularizer=layers.l2_regularizer(1e-4))
-	residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
+	h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
 					  normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
 					  weights_regularizer=layers.l2_regularizer(1e-4))
-	h = h + residual
+	h = h + input
 	return tf.nn.relu(h)
 def policy_heads(input, is_training):
 	normalizer_params = {'is_training': is_training,
 						 'updates_collections': None}
@ -28,6 +33,7 @@ def policy_heads(input, is_training):
 	h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
 	return h
 def value_heads(input, is_training):
 	normalizer_params = {'is_training': is_training,
 						 'updates_collections': None}
@ -39,30 +45,40 @@ def value_heads(input, is_training):
 	h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
 	return h
-x = tf.placeholder(tf.float32,shape=[None,19,19,17])
+
 x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
 is_training = tf.placeholder(tf.bool, shape=[])
 z = tf.placeholder(tf.float32, shape=[None, 1])
 pi = tf.placeholder(tf.float32, shape=[None, 362])
-h = residual_block(x, is_training)
+h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
-for i in range(18):
+				  normalizer_params={'is_training': is_training, 'updates_collections': None},
 				  weights_regularizer=layers.l2_regularizer(1e-4))
 for i in range(19):
 	h = residual_block(h, is_training)
 v = value_heads(h, is_training)
 p = policy_heads(h, is_training)
-loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
+# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
-reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
+value_loss = tf.reduce_mean(tf.square(z - v))
-total_loss = loss + reg
+policy_loss = - tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
 train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
 reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
 total_loss = value_loss + policy_loss + reg
 # train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
 with tf.control_dependencies(update_ops):
 	train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
 var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
 saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
 def train():
 	data_path = "/home/tongzheng/data/"
 	data_name = os.listdir("/home/tongzheng/data/")
 	epochs = 100
-	batch_size = 32
+	batch_size = 128
-	result_path = "./results/"
+	result_path = "./checkpoints/"
 	with multi_gpu.create_session() as sess:
 		sess.run(tf.global_variables_initializer())
 		ckpt_file = tf.train.latest_checkpoint(result_path)
@ -81,26 +97,37 @@ def train():
 				batch_num = boards.shape[0] // batch_size
 				index = np.arange(boards.shape[0])
 				np.random.shuffle(index)
-				losses = []
+				value_losses = []
 				policy_losses = []
 				regs = []
 				time_train = -time.time()
 				for iter in range(batch_num):
-					_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
+					lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op],
-																						z:wins[index[iter*batch_size:(iter+1)*batch_size]],
+														 feed_dict={x: boards[
-																						pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
+															 index[iter * batch_size:(iter + 1) * batch_size]],
-																						is_training:True})
+																	z: wins[index[
-					losses.append(l)
+																			iter * batch_size:(iter + 1) * batch_size]],
 																	pi: ps[index[
 																		   iter * batch_size:(iter + 1) * batch_size]],
 																	is_training: True})
 					value_losses.append(lv)
 					policy_losses.append(lp)
 					regs.append(r)
 					if iter % 1 == 0:
-						print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
+						print(
-						time_train=-time.time()
+						"Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
-						losses = []
+							epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
 							np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
 						time_train = -time.time()
 						value_losses = []
 						policy_losses = []
 						regs = []
 					if iter % 20 == 0:
 						save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
 						saver.save(sess, result_path + save_path)
 				del data, boards, wins, ps
 def forward(board):
 	result_path = "./results/"
 	itflag = False
@ -118,15 +145,16 @@ def forward(board):
 			saver.restore(sess, ckpt_file)
 		else:
 			raise ValueError("No model loaded")
-			res = sess.run([tf.nn.softmax(p),v], feed_dict={x:board, is_training:itflag})
+		res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
-			#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
+		# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
-			#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
+		# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
 		print(res)
-			#print(res[0].tolist()[0])
+	# print(res[0].tolist()[0])
-			#print(np.argmax(res[0]))
+	# print(np.argmax(res[0]))
 	return res
-if __name__=='__main__':
+
-	#train()
+if __name__ == '__main__':
-    if sys.argv[1] == "test":
+	train()
-		forward(None)
+# if sys.argv[1] == "test":
 #	forward(None)
--- a/AlphaGo/random_data.py
+++ b/AlphaGo/random_data.py
@ -0,0 +1,122 @@
 import os
 import numpy as np
 import time
 path = "/home/tongzheng/meta-data/"
 save_path = "/home/tongzheng/data/"
 name = os.listdir(path)
 print(len(name))
 batch_size = 128
 batch_num = 512
 block_size = batch_size * batch_num
 slots_num = 32
 class block(object):
    def __init__(self, block_size, block_id):
        self.boards = []
        self.wins = []
        self.ps = []
        self.block_size = block_size
        self.block_id = block_id
    def concat(self, board, p, win):
        board = board.reshape(-1, 19, 19, 17)
        win = win.reshape(-1, 1)
        p = p.reshape(-1, 362)
        self.boards.append(board)
        self.wins.append(win)
        self.ps.append(p)
    def isfull(self):
        assert len(self.boards) == len(self.wins)
        assert len(self.boards) == len(self.ps)
        return len(self.boards) == self.block_size
    def save_and_reset(self, block_id):
        self.boards = np.concatenate(self.boards, axis=0)
        self.wins = np.concatenate(self.wins, axis=0)
        self.ps = np.concatenate(self.ps, axis=0)
        print ("Block {}, Boards shape {}, Wins Shape {}, Ps Shape {}".format(self.block_id, self.boards.shape[0],
                                                                             self.wins.shape[0], self.ps.shape[0]))
        np.savez(save_path + "block" + str(self.block_id), boards=self.boards, wins=self.wins, ps=self.ps)
        self.boards = []
        self.wins = []
        self.ps = []
        self.block_id = block_id
    def store_num(self):
        assert len(self.boards) == len(self.wins)
        assert len(self.boards) == len(self.ps)
        return len(self.boards)
 def concat(block_list, board, win, p):
    global index
    seed = np.random.randint(slots_num)
    block_list[seed].concat(board, win, p)
    if block_list[seed].isfull():
        block_list[seed].save_and_reset(index)
        index = index + 1
 block_list = []
 for index in range(slots_num):
    block_list.append(block(block_size, index))
 index = index + 1
 for n in name:
    data = np.load(path + n)
    board = data["boards"]
    win = data["win"]
    p = data["p"]
    print("Start {}".format(n))
    print("Shape {}".format(board.shape[0]))
    start = -time.time()
    for i in range(board.shape[0]):
        board_ori = board[i].reshape(-1, 19, 19, 17)
        win_ori = win[i].reshape(-1, 1)
        p_ori = p[i].reshape(-1, 362)
        concat(block_list, board_ori, p_ori, win_ori)
        for t in range(1, 4):
            board_aug = np.rot90(board_ori, t, (1, 2))
            p_aug = np.concatenate(
                [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
                axis=1)
            concat(block_list, board_aug, p_aug, win_ori)
        board_aug = board_ori[:, ::-1]
        p_aug = np.concatenate(
            [p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)
        board_aug = board_ori[:, :, ::-1]
        p_aug = np.concatenate(
            [p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)
        board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
        p_aug = np.concatenate(
            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
             p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)
        board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
        p_aug = np.concatenate(
            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
             p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)
    print ("Finished {} with time {}".format(n, time.time()+start))
    data_num = 0
    for i in range(slots_num):
        print("Block {} ".format(block_list[i].block_id) + "Size {}".format(block_list[i].store_num()))
        data_num = data_num + block_list[i].store_num()
    print ("Total data {}".format(data_num))
 for i in range(slots_num):
    block_list[i].save_and_reset(block_list[i].block_id)
--- a/utils/text2data.py
+++ b/utils/text2data.py
@ -5,7 +5,7 @@ def hex2board(hex):
    scale = 16
    num_of_bits = 360
    binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2]
-    board = np.zeros([361])
+    board = np.zeros([361], dtype='int8')
    for i in range(361):
        board[i] = int(binary[i])
    board = board.reshape(1,19,19,1)
@ -17,9 +17,12 @@ def str2prob(str):
    for i in range(362):
        prob[i] = float(p[i])
    prob = prob.reshape(1,362)
-    return prob
+    if np.sum(np.isnan(prob))==0:
        return prob, True
    else:
 	return 0, False
-dir = "/home/yama/tongzheng/leela-zero/autogtp/new_spr/"
+dir = "/home/yama/leela-zero/data/sgf-txt-files/"
 name = os.listdir(dir)
 text = []
 for n in name:
@ -28,26 +31,35 @@ for n in name:
 print(text)
 for t in text:
    num = 0
-    boards = np.zeros([0, 19, 19, 17])
+    boards = np.zeros([0, 19, 19, 17], dtype='int8')
-    board = np.zeros([1, 19, 19, 0])
+    board = np.zeros([1, 19, 19, 0], dtype='int8')
-    win = np.zeros([0, 1])
+    win = np.zeros([0, 1], dtype='int8')
    p = np.zeros([0, 362])
    flag = False
    for line in open(dir + t):
 	if num % 19 == 0:
 	    flag = False
        if num % 19 < 16:
            new_board = hex2board(line)
            board = np.concatenate([board, new_board], axis=3)
        if num % 19 == 16:
            if line == '0':
-                new_board = np.ones([1, 19 ,19 ,1])
+                new_board = np.ones([1, 19 ,19 ,1], dtype='int8')
            if line == '1':
-                new_board = np.zeros([1, 19, 19, 1])
+                new_board = np.zeros([1, 19, 19, 1], dtype='int8')
            board = np.concatenate([board, new_board], axis=3)
            boards = np.concatenate([boards, board], axis=0)
-            board = np.zeros([1, 19, 19, 0])
+            board = np.zeros([1, 19, 19, 0], dtype='int8')
        if num % 19 == 17:
-            p = np.concatenate([p,str2prob(line)], axis=0)
+	    if str2prob(line)[1] == True:
            	p = np.concatenate([p,str2prob(line)[0]], axis=0)
 	    else:
 		flag = True
 		boards = boards[:-1]
        if num % 19 == 18:
-            win = np.concatenate([win, np.array(float(line)).reshape(1,1)], axis=0)
+	    if flag == False:
            	win = np.concatenate([win, np.array(float(line), dtype='int8').reshape(1,1)], axis=0)
        num=num+1
    print("Boards Shape: {}, Wins Shape: {}, Probs Shape : {}".format(boards.shape[0], win.shape[0], p.shape[0]))
    print "Finished " + t
-    np.savez("data/"+t[:-4], boards=boards, win=win, p=p)
+    np.savez("/home/tongzheng/meta-data/"+t[:-4], boards=boards, win=win, p=p)