data processing, network modified

This commit is contained in:
Tongzheng Ren 2017-11-09 19:23:40 +08:00
parent 93dc10a728
commit d9674a3c8d
3 changed files with 227 additions and 65 deletions

View File

@ -1,10 +1,14 @@
import tensorflow as tf
import numpy as np
import time
import os
import multi_gpu
import time
import numpy as np
import tensorflow as tf
import tensorflow.contrib.layers as layers
import sys
import multi_gpu
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
def residual_block(input, is_training):
normalizer_params = {'is_training': is_training,
@ -12,12 +16,13 @@ def residual_block(input, is_training):
h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
h = h + residual
h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
weights_regularizer=layers.l2_regularizer(1e-4))
h = h + input
return tf.nn.relu(h)
def policy_heads(input, is_training):
normalizer_params = {'is_training': is_training,
'updates_collections': None}
@ -28,6 +33,7 @@ def policy_heads(input, is_training):
h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
return h
def value_heads(input, is_training):
normalizer_params = {'is_training': is_training,
'updates_collections': None}
@ -39,30 +45,40 @@ def value_heads(input, is_training):
h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
return h
x = tf.placeholder(tf.float32,shape=[None,19,19,17])
x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
is_training = tf.placeholder(tf.bool, shape=[])
z = tf.placeholder(tf.float32, shape=[None, 1])
pi = tf.placeholder(tf.float32, shape=[None, 362])
h = residual_block(x, is_training)
for i in range(18):
h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
normalizer_params={'is_training': is_training, 'updates_collections': None},
weights_regularizer=layers.l2_regularizer(1e-4))
for i in range(19):
h = residual_block(h, is_training)
v = value_heads(h, is_training)
p = policy_heads(h, is_training)
loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = loss + reg
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
value_loss = tf.reduce_mean(tf.square(z - v))
policy_loss = - tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
total_loss = value_loss + policy_loss + reg
# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
def train():
data_path = "/home/tongzheng/data/"
data_name = os.listdir("/home/tongzheng/data/")
epochs = 100
batch_size = 32
batch_size = 128
result_path = "./results/"
result_path = "./checkpoints/"
with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer())
ckpt_file = tf.train.latest_checkpoint(result_path)
@ -81,52 +97,64 @@ def train():
batch_num = boards.shape[0] // batch_size
index = np.arange(boards.shape[0])
np.random.shuffle(index)
losses = []
value_losses = []
policy_losses = []
regs = []
time_train = -time.time()
for iter in range(batch_num):
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
z:wins[index[iter*batch_size:(iter+1)*batch_size]],
pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
is_training:True})
losses.append(l)
lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op],
feed_dict={x: boards[
index[iter * batch_size:(iter + 1) * batch_size]],
z: wins[index[
iter * batch_size:(iter + 1) * batch_size]],
pi: ps[index[
iter * batch_size:(iter + 1) * batch_size]],
is_training: True})
value_losses.append(lv)
policy_losses.append(lp)
regs.append(r)
if iter % 1 == 0:
print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
time_train=-time.time()
losses = []
print(
"Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
time_train = -time.time()
value_losses = []
policy_losses = []
regs = []
if iter % 20 == 0:
save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
saver.save(sess, result_path + save_path)
del data, boards, wins, ps
def forward(board):
result_path = "./results/"
itflag = False
res = None
if board is None:
board = np.load("/home/yama/tongzheng/AG/self_play_204/d7d7d552b7be4b51883de99d74a8e51b.npz")
board = board["boards"][100].reshape(-1, 19, 19, 17)
result_path = "../parameters/checkpoints"
itflag = True
with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer())
ckpt_file = tf.train.latest_checkpoint(result_path)
if ckpt_file is not None:
print('Restoring model from {}...'.format(ckpt_file))
saver.restore(sess, ckpt_file)
else:
raise ValueError("No model loaded")
res = sess.run([tf.nn.softmax(p),v], feed_dict={x:board, is_training:itflag})
#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
print(res)
#print(res[0].tolist()[0])
#print(np.argmax(res[0]))
return res
itflag = False
res = None
if board is None:
board = np.load("/home/yama/tongzheng/AG/self_play_204/d7d7d552b7be4b51883de99d74a8e51b.npz")
board = board["boards"][100].reshape(-1, 19, 19, 17)
result_path = "../parameters/checkpoints"
itflag = True
with multi_gpu.create_session() as sess:
sess.run(tf.global_variables_initializer())
ckpt_file = tf.train.latest_checkpoint(result_path)
if ckpt_file is not None:
print('Restoring model from {}...'.format(ckpt_file))
saver.restore(sess, ckpt_file)
else:
raise ValueError("No model loaded")
res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
print(res)
# print(res[0].tolist()[0])
# print(np.argmax(res[0]))
return res
if __name__=='__main__':
#train()
if sys.argv[1] == "test":
forward(None)
if __name__ == '__main__':
train()
# if sys.argv[1] == "test":
# forward(None)

122
AlphaGo/random_data.py Normal file
View File

@ -0,0 +1,122 @@
import os
import numpy as np
import time
path = "/home/tongzheng/meta-data/"
save_path = "/home/tongzheng/data/"
name = os.listdir(path)
print(len(name))
batch_size = 128
batch_num = 512
block_size = batch_size * batch_num
slots_num = 32
class block(object):
def __init__(self, block_size, block_id):
self.boards = []
self.wins = []
self.ps = []
self.block_size = block_size
self.block_id = block_id
def concat(self, board, p, win):
board = board.reshape(-1, 19, 19, 17)
win = win.reshape(-1, 1)
p = p.reshape(-1, 362)
self.boards.append(board)
self.wins.append(win)
self.ps.append(p)
def isfull(self):
assert len(self.boards) == len(self.wins)
assert len(self.boards) == len(self.ps)
return len(self.boards) == self.block_size
def save_and_reset(self, block_id):
self.boards = np.concatenate(self.boards, axis=0)
self.wins = np.concatenate(self.wins, axis=0)
self.ps = np.concatenate(self.ps, axis=0)
print ("Block {}, Boards shape {}, Wins Shape {}, Ps Shape {}".format(self.block_id, self.boards.shape[0],
self.wins.shape[0], self.ps.shape[0]))
np.savez(save_path + "block" + str(self.block_id), boards=self.boards, wins=self.wins, ps=self.ps)
self.boards = []
self.wins = []
self.ps = []
self.block_id = block_id
def store_num(self):
assert len(self.boards) == len(self.wins)
assert len(self.boards) == len(self.ps)
return len(self.boards)
def concat(block_list, board, win, p):
global index
seed = np.random.randint(slots_num)
block_list[seed].concat(board, win, p)
if block_list[seed].isfull():
block_list[seed].save_and_reset(index)
index = index + 1
block_list = []
for index in range(slots_num):
block_list.append(block(block_size, index))
index = index + 1
for n in name:
data = np.load(path + n)
board = data["boards"]
win = data["win"]
p = data["p"]
print("Start {}".format(n))
print("Shape {}".format(board.shape[0]))
start = -time.time()
for i in range(board.shape[0]):
board_ori = board[i].reshape(-1, 19, 19, 17)
win_ori = win[i].reshape(-1, 1)
p_ori = p[i].reshape(-1, 362)
concat(block_list, board_ori, p_ori, win_ori)
for t in range(1, 4):
board_aug = np.rot90(board_ori, t, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = board_ori[:, ::-1]
p_aug = np.concatenate(
[p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = board_ori[:, :, ::-1]
p_aug = np.concatenate(
[p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
p_aug = np.concatenate(
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
p_ori[:, -1].reshape(-1, 1)],
axis=1)
concat(block_list, board_aug, p_aug, win_ori)
print ("Finished {} with time {}".format(n, time.time()+start))
data_num = 0
for i in range(slots_num):
print("Block {} ".format(block_list[i].block_id) + "Size {}".format(block_list[i].store_num()))
data_num = data_num + block_list[i].store_num()
print ("Total data {}".format(data_num))
for i in range(slots_num):
block_list[i].save_and_reset(block_list[i].block_id)

View File

@ -5,7 +5,7 @@ def hex2board(hex):
scale = 16
num_of_bits = 360
binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2]
board = np.zeros([361])
board = np.zeros([361], dtype='int8')
for i in range(361):
board[i] = int(binary[i])
board = board.reshape(1,19,19,1)
@ -17,9 +17,12 @@ def str2prob(str):
for i in range(362):
prob[i] = float(p[i])
prob = prob.reshape(1,362)
return prob
if np.sum(np.isnan(prob))==0:
return prob, True
else:
return 0, False
dir = "/home/yama/tongzheng/leela-zero/autogtp/new_spr/"
dir = "/home/yama/leela-zero/data/sgf-txt-files/"
name = os.listdir(dir)
text = []
for n in name:
@ -28,26 +31,35 @@ for n in name:
print(text)
for t in text:
num = 0
boards = np.zeros([0, 19, 19, 17])
board = np.zeros([1, 19, 19, 0])
win = np.zeros([0, 1])
boards = np.zeros([0, 19, 19, 17], dtype='int8')
board = np.zeros([1, 19, 19, 0], dtype='int8')
win = np.zeros([0, 1], dtype='int8')
p = np.zeros([0, 362])
flag = False
for line in open(dir + t):
if num % 19 == 0:
flag = False
if num % 19 < 16:
new_board = hex2board(line)
board = np.concatenate([board, new_board], axis=3)
if num % 19 == 16:
if line == '0':
new_board = np.ones([1, 19 ,19 ,1])
new_board = np.ones([1, 19 ,19 ,1], dtype='int8')
if line == '1':
new_board = np.zeros([1, 19, 19, 1])
new_board = np.zeros([1, 19, 19, 1], dtype='int8')
board = np.concatenate([board, new_board], axis=3)
boards = np.concatenate([boards, board], axis=0)
board = np.zeros([1, 19, 19, 0])
board = np.zeros([1, 19, 19, 0], dtype='int8')
if num % 19 == 17:
p = np.concatenate([p,str2prob(line)], axis=0)
if str2prob(line)[1] == True:
p = np.concatenate([p,str2prob(line)[0]], axis=0)
else:
flag = True
boards = boards[:-1]
if num % 19 == 18:
win = np.concatenate([win, np.array(float(line)).reshape(1,1)], axis=0)
if flag == False:
win = np.concatenate([win, np.array(float(line), dtype='int8').reshape(1,1)], axis=0)
num=num+1
print("Boards Shape: {}, Wins Shape: {}, Probs Shape : {}".format(boards.shape[0], win.shape[0], p.shape[0]))
print "Finished " + t
np.savez("data/"+t[:-4], boards=boards, win=win, p=p)
np.savez("/home/tongzheng/meta-data/"+t[:-4], boards=boards, win=win, p=p)