data processing, network modified

This commit is contained in:
Tongzheng Ren 2017-11-09 19:23:40 +08:00
parent 93dc10a728
commit d9674a3c8d
3 changed files with 227 additions and 65 deletions

View File

@ -1,10 +1,14 @@
import tensorflow as tf
import numpy as np
import time
import os import os
import multi_gpu import time
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers import tensorflow.contrib.layers as layers
import sys
import multi_gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
def residual_block(input, is_training): def residual_block(input, is_training):
normalizer_params = {'is_training': is_training, normalizer_params = {'is_training': is_training,
@ -12,12 +16,13 @@ def residual_block(input, is_training):
h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4)) weights_regularizer=layers.l2_regularizer(1e-4))
residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity, h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params, normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4)) weights_regularizer=layers.l2_regularizer(1e-4))
h = h + residual h = h + input
return tf.nn.relu(h) return tf.nn.relu(h)
def policy_heads(input, is_training): def policy_heads(input, is_training):
normalizer_params = {'is_training': is_training, normalizer_params = {'is_training': is_training,
'updates_collections': None} 'updates_collections': None}
@ -28,6 +33,7 @@ def policy_heads(input, is_training):
h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4)) h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
return h return h
def value_heads(input, is_training): def value_heads(input, is_training):
normalizer_params = {'is_training': is_training, normalizer_params = {'is_training': is_training,
'updates_collections': None} 'updates_collections': None}
@ -39,30 +45,40 @@ def value_heads(input, is_training):
h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4)) h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
return h return h
x = tf.placeholder(tf.float32,shape=[None,19,19,17])
x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
is_training = tf.placeholder(tf.bool, shape=[]) is_training = tf.placeholder(tf.bool, shape=[])
z = tf.placeholder(tf.float32, shape=[None, 1]) z = tf.placeholder(tf.float32, shape=[None, 1])
pi = tf.placeholder(tf.float32, shape=[None, 362]) pi = tf.placeholder(tf.float32, shape=[None, 362])
h = residual_block(x, is_training) h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
for i in range(18): normalizer_params={'is_training': is_training, 'updates_collections': None},
weights_regularizer=layers.l2_regularizer(1e-4))
for i in range(19):
h = residual_block(h, is_training) h = residual_block(h, is_training)
v = value_heads(h, is_training) v = value_heads(h, is_training)
p = policy_heads(h, is_training) p = policy_heads(h, is_training)
loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1)))) # loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)) value_loss = tf.reduce_mean(tf.square(z - v))
total_loss = loss + reg policy_loss = - tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = value_loss + policy_loss + reg
# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
saver = tf.train.Saver(max_to_keep=10, var_list=var_list) saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
def train(): def train():
data_path = "/home/tongzheng/data/" data_path = "/home/tongzheng/data/"
data_name = os.listdir("/home/tongzheng/data/") data_name = os.listdir("/home/tongzheng/data/")
epochs = 100 epochs = 100
batch_size = 32 batch_size = 128
result_path = "./results/" result_path = "./checkpoints/"
with multi_gpu.create_session() as sess: with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer()) sess.run(tf.global_variables_initializer())
ckpt_file = tf.train.latest_checkpoint(result_path) ckpt_file = tf.train.latest_checkpoint(result_path)
@ -81,26 +97,37 @@ def train():
batch_num = boards.shape[0] // batch_size batch_num = boards.shape[0] // batch_size
index = np.arange(boards.shape[0]) index = np.arange(boards.shape[0])
np.random.shuffle(index) np.random.shuffle(index)
losses = [] value_losses = []
policy_losses = []
regs = [] regs = []
time_train = -time.time() time_train = -time.time()
for iter in range(batch_num): for iter in range(batch_num):
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]], lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op],
z:wins[index[iter*batch_size:(iter+1)*batch_size]], feed_dict={x: boards[
pi:ps[index[iter*batch_size:(iter+1)*batch_size]], index[iter * batch_size:(iter + 1) * batch_size]],
is_training:True}) z: wins[index[
losses.append(l) iter * batch_size:(iter + 1) * batch_size]],
pi: ps[index[
iter * batch_size:(iter + 1) * batch_size]],
is_training: True})
value_losses.append(lv)
policy_losses.append(lp)
regs.append(r) regs.append(r)
if iter % 1 == 0: if iter % 1 == 0:
print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs)))) print(
time_train=-time.time() "Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
losses = [] epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
time_train = -time.time()
value_losses = []
policy_losses = []
regs = [] regs = []
if iter % 20 == 0: if iter % 20 == 0:
save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter) save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
saver.save(sess, result_path + save_path) saver.save(sess, result_path + save_path)
del data, boards, wins, ps del data, boards, wins, ps
def forward(board): def forward(board):
result_path = "./results/" result_path = "./results/"
itflag = False itflag = False
@ -118,15 +145,16 @@ def forward(board):
saver.restore(sess, ckpt_file) saver.restore(sess, ckpt_file)
else: else:
raise ValueError("No model loaded") raise ValueError("No model loaded")
res = sess.run([tf.nn.softmax(p),v], feed_dict={x:board, is_training:itflag}) res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False}) # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True}) # res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
print(res) print(res)
#print(res[0].tolist()[0]) # print(res[0].tolist()[0])
#print(np.argmax(res[0])) # print(np.argmax(res[0]))
return res return res
if __name__=='__main__':
#train() if __name__ == '__main__':
if sys.argv[1] == "test": train()
forward(None) # if sys.argv[1] == "test":
# forward(None)

122
AlphaGo/random_data.py Normal file
View File

@ -0,0 +1,122 @@
import os
import numpy as np
import time
path = "/home/tongzheng/meta-data/"
save_path = "/home/tongzheng/data/"
name = os.listdir(path)
print(len(name))
batch_size = 128
batch_num = 512
block_size = batch_size * batch_num
slots_num = 32
class block(object):
def __init__(self, block_size, block_id):
self.boards = []
self.wins = []
self.ps = []
self.block_size = block_size
self.block_id = block_id
def concat(self, board, p, win):
board = board.reshape(-1, 19, 19, 17)
win = win.reshape(-1, 1)
p = p.reshape(-1, 362)
self.boards.append(board)
self.wins.append(win)
self.ps.append(p)
def isfull(self):
assert len(self.boards) == len(self.wins)
assert len(self.boards) == len(self.ps)
return len(self.boards) == self.block_size
def save_and_reset(self, block_id):
self.boards = np.concatenate(self.boards, axis=0)
self.wins = np.concatenate(self.wins, axis=0)
self.ps = np.concatenate(self.ps, axis=0)
print ("Block {}, Boards shape {}, Wins Shape {}, Ps Shape {}".format(self.block_id, self.boards.shape[0],
self.wins.shape[0], self.ps.shape[0]))
np.savez(save_path + "block" + str(self.block_id), boards=self.boards, wins=self.wins, ps=self.ps)
self.boards = []
self.wins = []
self.ps = []
self.block_id = block_id
def store_num(self):
assert len(self.boards) == len(self.wins)
assert len(self.boards) == len(self.ps)
return len(self.boards)
def concat(block_list, board, win, p):
global index
seed = np.random.randint(slots_num)
block_list[seed].concat(board, win, p)
if block_list[seed].isfull():
block_list[seed].save_and_reset(index)
index = index + 1
block_list = []
for index in range(slots_num):
block_list.append(block(block_size, index))
index = index + 1
for n in name:
data = np.load(path + n)
board = data["boards"]
win = data["win"]
p = data["p"]
print("Start {}".format(n))
print("Shape {}".format(board.shape[0]))
start = -time.time()
for i in range(board.shape[0]):
board_ori = board[i].reshape(-1, 19, 19, 17)
win_ori = win[i].reshape(-1, 1)
p_ori = p[i].reshape(-1, 362)
concat(block_list, board_ori, p_ori, win_ori)
for t in range(1, 4):
board_aug = np.rot90(board_ori, t, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = board_ori[:, ::-1]
p_aug = np.concatenate(
[p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = board_ori[:, :, ::-1]
p_aug = np.concatenate(
[p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
print ("Finished {} with time {}".format(n, time.time()+start))
data_num = 0
for i in range(slots_num):
print("Block {} ".format(block_list[i].block_id) + "Size {}".format(block_list[i].store_num()))
data_num = data_num + block_list[i].store_num()
print ("Total data {}".format(data_num))
for i in range(slots_num):
block_list[i].save_and_reset(block_list[i].block_id)

View File

@ -5,7 +5,7 @@ def hex2board(hex):
scale = 16 scale = 16
num_of_bits = 360 num_of_bits = 360
binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2] binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2]
board = np.zeros([361]) board = np.zeros([361], dtype='int8')
for i in range(361): for i in range(361):
board[i] = int(binary[i]) board[i] = int(binary[i])
board = board.reshape(1,19,19,1) board = board.reshape(1,19,19,1)
@ -17,9 +17,12 @@ def str2prob(str):
for i in range(362): for i in range(362):
prob[i] = float(p[i]) prob[i] = float(p[i])
prob = prob.reshape(1,362) prob = prob.reshape(1,362)
return prob if np.sum(np.isnan(prob))==0:
return prob, True
else:
return 0, False
dir = "/home/yama/tongzheng/leela-zero/autogtp/new_spr/" dir = "/home/yama/leela-zero/data/sgf-txt-files/"
name = os.listdir(dir) name = os.listdir(dir)
text = [] text = []
for n in name: for n in name:
@ -28,26 +31,35 @@ for n in name:
print(text) print(text)
for t in text: for t in text:
num = 0 num = 0
boards = np.zeros([0, 19, 19, 17]) boards = np.zeros([0, 19, 19, 17], dtype='int8')
board = np.zeros([1, 19, 19, 0]) board = np.zeros([1, 19, 19, 0], dtype='int8')
win = np.zeros([0, 1]) win = np.zeros([0, 1], dtype='int8')
p = np.zeros([0, 362]) p = np.zeros([0, 362])
flag = False
for line in open(dir + t): for line in open(dir + t):
if num % 19 == 0:
flag = False
if num % 19 < 16: if num % 19 < 16:
new_board = hex2board(line) new_board = hex2board(line)
board = np.concatenate([board, new_board], axis=3) board = np.concatenate([board, new_board], axis=3)
if num % 19 == 16: if num % 19 == 16:
if line == '0': if line == '0':
new_board = np.ones([1, 19 ,19 ,1]) new_board = np.ones([1, 19 ,19 ,1], dtype='int8')
if line == '1': if line == '1':
new_board = np.zeros([1, 19, 19, 1]) new_board = np.zeros([1, 19, 19, 1], dtype='int8')
board = np.concatenate([board, new_board], axis=3) board = np.concatenate([board, new_board], axis=3)
boards = np.concatenate([boards, board], axis=0) boards = np.concatenate([boards, board], axis=0)
board = np.zeros([1, 19, 19, 0]) board = np.zeros([1, 19, 19, 0], dtype='int8')
if num % 19 == 17: if num % 19 == 17:
p = np.concatenate([p,str2prob(line)], axis=0) if str2prob(line)[1] == True:
p = np.concatenate([p,str2prob(line)[0]], axis=0)
else:
flag = True
boards = boards[:-1]
if num % 19 == 18: if num % 19 == 18:
win = np.concatenate([win, np.array(float(line)).reshape(1,1)], axis=0) if flag == False:
win = np.concatenate([win, np.array(float(line), dtype='int8').reshape(1,1)], axis=0)
num=num+1 num=num+1
print("Boards Shape: {}, Wins Shape: {}, Probs Shape : {}".format(boards.shape[0], win.shape[0], p.shape[0]))
print "Finished " + t print "Finished " + t
np.savez("data/"+t[:-4], boards=boards, win=win, p=p) np.savez("/home/tongzheng/meta-data/"+t[:-4], boards=boards, win=win, p=p)