data processing, network modified
This commit is contained in:
parent
93dc10a728
commit
d9674a3c8d
@ -1,10 +1,14 @@
|
|||||||
import tensorflow as tf
|
|
||||||
import numpy as np
|
|
||||||
import time
|
|
||||||
import os
|
import os
|
||||||
import multi_gpu
|
import time
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
import tensorflow as tf
|
||||||
import tensorflow.contrib.layers as layers
|
import tensorflow.contrib.layers as layers
|
||||||
import sys
|
|
||||||
|
import multi_gpu
|
||||||
|
|
||||||
|
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
|
||||||
|
|
||||||
|
|
||||||
def residual_block(input, is_training):
|
def residual_block(input, is_training):
|
||||||
normalizer_params = {'is_training': is_training,
|
normalizer_params = {'is_training': is_training,
|
||||||
@ -12,12 +16,13 @@ def residual_block(input, is_training):
|
|||||||
h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
|
h = layers.conv2d(input, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu,
|
||||||
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
||||||
weights_regularizer=layers.l2_regularizer(1e-4))
|
weights_regularizer=layers.l2_regularizer(1e-4))
|
||||||
residual = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
|
h = layers.conv2d(h, 256, kernel_size=3, stride=1, activation_fn=tf.identity,
|
||||||
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
normalizer_fn=layers.batch_norm, normalizer_params=normalizer_params,
|
||||||
weights_regularizer=layers.l2_regularizer(1e-4))
|
weights_regularizer=layers.l2_regularizer(1e-4))
|
||||||
h = h + residual
|
h = h + input
|
||||||
return tf.nn.relu(h)
|
return tf.nn.relu(h)
|
||||||
|
|
||||||
|
|
||||||
def policy_heads(input, is_training):
|
def policy_heads(input, is_training):
|
||||||
normalizer_params = {'is_training': is_training,
|
normalizer_params = {'is_training': is_training,
|
||||||
'updates_collections': None}
|
'updates_collections': None}
|
||||||
@ -28,6 +33,7 @@ def policy_heads(input, is_training):
|
|||||||
h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
|
h = layers.fully_connected(h, 362, activation_fn=tf.identity, weights_regularizer=layers.l2_regularizer(1e-4))
|
||||||
return h
|
return h
|
||||||
|
|
||||||
|
|
||||||
def value_heads(input, is_training):
|
def value_heads(input, is_training):
|
||||||
normalizer_params = {'is_training': is_training,
|
normalizer_params = {'is_training': is_training,
|
||||||
'updates_collections': None}
|
'updates_collections': None}
|
||||||
@ -39,30 +45,40 @@ def value_heads(input, is_training):
|
|||||||
h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
|
h = layers.fully_connected(h, 1, activation_fn=tf.nn.tanh, weights_regularizer=layers.l2_regularizer(1e-4))
|
||||||
return h
|
return h
|
||||||
|
|
||||||
x = tf.placeholder(tf.float32,shape=[None,19,19,17])
|
|
||||||
|
x = tf.placeholder(tf.float32, shape=[None, 19, 19, 17])
|
||||||
is_training = tf.placeholder(tf.bool, shape=[])
|
is_training = tf.placeholder(tf.bool, shape=[])
|
||||||
z = tf.placeholder(tf.float32, shape=[None, 1])
|
z = tf.placeholder(tf.float32, shape=[None, 1])
|
||||||
pi = tf.placeholder(tf.float32, shape=[None, 362])
|
pi = tf.placeholder(tf.float32, shape=[None, 362])
|
||||||
|
|
||||||
h = residual_block(x, is_training)
|
h = layers.conv2d(x, 256, kernel_size=3, stride=1, activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm,
|
||||||
for i in range(18):
|
normalizer_params={'is_training': is_training, 'updates_collections': None},
|
||||||
|
weights_regularizer=layers.l2_regularizer(1e-4))
|
||||||
|
for i in range(19):
|
||||||
h = residual_block(h, is_training)
|
h = residual_block(h, is_training)
|
||||||
v = value_heads(h, is_training)
|
v = value_heads(h, is_training)
|
||||||
p = policy_heads(h, is_training)
|
p = policy_heads(h, is_training)
|
||||||
loss = tf.reduce_mean(tf.square(z-v)) - tf.reduce_mean(tf.multiply(pi, tf.log(tf.nn.softmax(p, 1))))
|
# loss = tf.reduce_mean(tf.square(z-v)) - tf.multiply(pi, tf.log(tf.clip_by_value(tf.nn.softmax(p), 1e-8, tf.reduce_max(tf.nn.softmax(p)))))
|
||||||
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
|
value_loss = tf.reduce_mean(tf.square(z - v))
|
||||||
total_loss = loss + reg
|
policy_loss = - tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=pi, logits=p))
|
||||||
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
|
|
||||||
|
|
||||||
|
reg = tf.add_n(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
|
||||||
|
total_loss = value_loss + policy_loss + reg
|
||||||
|
# train_op = tf.train.MomentumOptimizer(1e-4, momentum=0.9, use_nesterov=True).minimize(total_loss)
|
||||||
|
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
|
||||||
|
with tf.control_dependencies(update_ops):
|
||||||
|
train_op = tf.train.RMSPropOptimizer(1e-4).minimize(total_loss)
|
||||||
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
|
var_list = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
|
||||||
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
|
saver = tf.train.Saver(max_to_keep=10, var_list=var_list)
|
||||||
|
|
||||||
|
|
||||||
def train():
|
def train():
|
||||||
data_path = "/home/tongzheng/data/"
|
data_path = "/home/tongzheng/data/"
|
||||||
data_name = os.listdir("/home/tongzheng/data/")
|
data_name = os.listdir("/home/tongzheng/data/")
|
||||||
epochs = 100
|
epochs = 100
|
||||||
batch_size = 32
|
batch_size = 128
|
||||||
|
|
||||||
result_path = "./results/"
|
result_path = "./checkpoints/"
|
||||||
with multi_gpu.create_session() as sess:
|
with multi_gpu.create_session() as sess:
|
||||||
sess.run(tf.global_variables_initializer())
|
sess.run(tf.global_variables_initializer())
|
||||||
ckpt_file = tf.train.latest_checkpoint(result_path)
|
ckpt_file = tf.train.latest_checkpoint(result_path)
|
||||||
@ -81,26 +97,37 @@ def train():
|
|||||||
batch_num = boards.shape[0] // batch_size
|
batch_num = boards.shape[0] // batch_size
|
||||||
index = np.arange(boards.shape[0])
|
index = np.arange(boards.shape[0])
|
||||||
np.random.shuffle(index)
|
np.random.shuffle(index)
|
||||||
losses = []
|
value_losses = []
|
||||||
|
policy_losses = []
|
||||||
regs = []
|
regs = []
|
||||||
time_train = -time.time()
|
time_train = -time.time()
|
||||||
for iter in range(batch_num):
|
for iter in range(batch_num):
|
||||||
_, l, r, value, prob = sess.run([train_op, loss, reg, v, p], feed_dict={x:boards[index[iter*batch_size:(iter+1)*batch_size]],
|
lv, lp, r, value, prob, _ = sess.run([value_loss, policy_loss, reg, v, tf.nn.softmax(p), train_op],
|
||||||
z:wins[index[iter*batch_size:(iter+1)*batch_size]],
|
feed_dict={x: boards[
|
||||||
pi:ps[index[iter*batch_size:(iter+1)*batch_size]],
|
index[iter * batch_size:(iter + 1) * batch_size]],
|
||||||
is_training:True})
|
z: wins[index[
|
||||||
losses.append(l)
|
iter * batch_size:(iter + 1) * batch_size]],
|
||||||
|
pi: ps[index[
|
||||||
|
iter * batch_size:(iter + 1) * batch_size]],
|
||||||
|
is_training: True})
|
||||||
|
value_losses.append(lv)
|
||||||
|
policy_losses.append(lp)
|
||||||
regs.append(r)
|
regs.append(r)
|
||||||
if iter % 1 == 0:
|
if iter % 1 == 0:
|
||||||
print("Epoch: {}, Part {}, Iteration: {}, Time: {}, Loss: {}, Reg: {}".format(epoch, name, iter, time.time()+time_train, np.mean(np.array(losses)), np.mean(np.array(regs))))
|
print(
|
||||||
time_train=-time.time()
|
"Epoch: {}, Part {}, Iteration: {}, Time: {}, Value Loss: {}, Policy Loss: {}, Reg: {}".format(
|
||||||
losses = []
|
epoch, name, iter, time.time() + time_train, np.mean(np.array(value_losses)),
|
||||||
|
np.mean(np.array(policy_losses)), np.mean(np.array(regs))))
|
||||||
|
time_train = -time.time()
|
||||||
|
value_losses = []
|
||||||
|
policy_losses = []
|
||||||
regs = []
|
regs = []
|
||||||
if iter % 20 == 0:
|
if iter % 20 == 0:
|
||||||
save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
|
save_path = "Epoch{}.Part{}.Iteration{}.ckpt".format(epoch, name, iter)
|
||||||
saver.save(sess, result_path + save_path)
|
saver.save(sess, result_path + save_path)
|
||||||
del data, boards, wins, ps
|
del data, boards, wins, ps
|
||||||
|
|
||||||
|
|
||||||
def forward(board):
|
def forward(board):
|
||||||
result_path = "./results/"
|
result_path = "./results/"
|
||||||
itflag = False
|
itflag = False
|
||||||
@ -118,15 +145,16 @@ def forward(board):
|
|||||||
saver.restore(sess, ckpt_file)
|
saver.restore(sess, ckpt_file)
|
||||||
else:
|
else:
|
||||||
raise ValueError("No model loaded")
|
raise ValueError("No model loaded")
|
||||||
res = sess.run([tf.nn.softmax(p),v], feed_dict={x:board, is_training:itflag})
|
res = sess.run([tf.nn.softmax(p), v], feed_dict={x: board, is_training: itflag})
|
||||||
#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
|
# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][300].reshape(-1, 19, 19, 17), is_training:False})
|
||||||
#res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
|
# res = sess.run([tf.nn.softmax(p),v], feed_dict={x:fix_board["boards"][50].reshape(-1, 19, 19, 17), is_training:True})
|
||||||
print(res)
|
print(res)
|
||||||
#print(res[0].tolist()[0])
|
# print(res[0].tolist()[0])
|
||||||
#print(np.argmax(res[0]))
|
# print(np.argmax(res[0]))
|
||||||
return res
|
return res
|
||||||
|
|
||||||
if __name__=='__main__':
|
|
||||||
#train()
|
if __name__ == '__main__':
|
||||||
if sys.argv[1] == "test":
|
train()
|
||||||
forward(None)
|
# if sys.argv[1] == "test":
|
||||||
|
# forward(None)
|
||||||
|
122
AlphaGo/random_data.py
Normal file
122
AlphaGo/random_data.py
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import time
|
||||||
|
|
||||||
|
path = "/home/tongzheng/meta-data/"
|
||||||
|
save_path = "/home/tongzheng/data/"
|
||||||
|
name = os.listdir(path)
|
||||||
|
print(len(name))
|
||||||
|
batch_size = 128
|
||||||
|
batch_num = 512
|
||||||
|
|
||||||
|
block_size = batch_size * batch_num
|
||||||
|
slots_num = 32
|
||||||
|
|
||||||
|
|
||||||
|
class block(object):
|
||||||
|
def __init__(self, block_size, block_id):
|
||||||
|
self.boards = []
|
||||||
|
self.wins = []
|
||||||
|
self.ps = []
|
||||||
|
self.block_size = block_size
|
||||||
|
self.block_id = block_id
|
||||||
|
|
||||||
|
def concat(self, board, p, win):
|
||||||
|
board = board.reshape(-1, 19, 19, 17)
|
||||||
|
win = win.reshape(-1, 1)
|
||||||
|
p = p.reshape(-1, 362)
|
||||||
|
self.boards.append(board)
|
||||||
|
self.wins.append(win)
|
||||||
|
self.ps.append(p)
|
||||||
|
|
||||||
|
def isfull(self):
|
||||||
|
assert len(self.boards) == len(self.wins)
|
||||||
|
assert len(self.boards) == len(self.ps)
|
||||||
|
return len(self.boards) == self.block_size
|
||||||
|
|
||||||
|
def save_and_reset(self, block_id):
|
||||||
|
self.boards = np.concatenate(self.boards, axis=0)
|
||||||
|
self.wins = np.concatenate(self.wins, axis=0)
|
||||||
|
self.ps = np.concatenate(self.ps, axis=0)
|
||||||
|
print ("Block {}, Boards shape {}, Wins Shape {}, Ps Shape {}".format(self.block_id, self.boards.shape[0],
|
||||||
|
self.wins.shape[0], self.ps.shape[0]))
|
||||||
|
np.savez(save_path + "block" + str(self.block_id), boards=self.boards, wins=self.wins, ps=self.ps)
|
||||||
|
self.boards = []
|
||||||
|
self.wins = []
|
||||||
|
self.ps = []
|
||||||
|
self.block_id = block_id
|
||||||
|
|
||||||
|
def store_num(self):
|
||||||
|
assert len(self.boards) == len(self.wins)
|
||||||
|
assert len(self.boards) == len(self.ps)
|
||||||
|
return len(self.boards)
|
||||||
|
|
||||||
|
|
||||||
|
def concat(block_list, board, win, p):
|
||||||
|
global index
|
||||||
|
seed = np.random.randint(slots_num)
|
||||||
|
block_list[seed].concat(board, win, p)
|
||||||
|
if block_list[seed].isfull():
|
||||||
|
block_list[seed].save_and_reset(index)
|
||||||
|
index = index + 1
|
||||||
|
|
||||||
|
|
||||||
|
block_list = []
|
||||||
|
for index in range(slots_num):
|
||||||
|
block_list.append(block(block_size, index))
|
||||||
|
index = index + 1
|
||||||
|
for n in name:
|
||||||
|
data = np.load(path + n)
|
||||||
|
board = data["boards"]
|
||||||
|
win = data["win"]
|
||||||
|
p = data["p"]
|
||||||
|
print("Start {}".format(n))
|
||||||
|
print("Shape {}".format(board.shape[0]))
|
||||||
|
start = -time.time()
|
||||||
|
for i in range(board.shape[0]):
|
||||||
|
board_ori = board[i].reshape(-1, 19, 19, 17)
|
||||||
|
win_ori = win[i].reshape(-1, 1)
|
||||||
|
p_ori = p[i].reshape(-1, 362)
|
||||||
|
concat(block_list, board_ori, p_ori, win_ori)
|
||||||
|
|
||||||
|
for t in range(1, 4):
|
||||||
|
board_aug = np.rot90(board_ori, t, (1, 2))
|
||||||
|
p_aug = np.concatenate(
|
||||||
|
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
|
axis=1)
|
||||||
|
concat(block_list, board_aug, p_aug, win_ori)
|
||||||
|
|
||||||
|
board_aug = board_ori[:, ::-1]
|
||||||
|
p_aug = np.concatenate(
|
||||||
|
[p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
|
axis=1)
|
||||||
|
concat(block_list, board_aug, p_aug, win_ori)
|
||||||
|
|
||||||
|
board_aug = board_ori[:, :, ::-1]
|
||||||
|
p_aug = np.concatenate(
|
||||||
|
[p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||||
|
axis=1)
|
||||||
|
concat(block_list, board_aug, p_aug, win_ori)
|
||||||
|
|
||||||
|
board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
|
||||||
|
p_aug = np.concatenate(
|
||||||
|
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
|
||||||
|
p_ori[:, -1].reshape(-1, 1)],
|
||||||
|
axis=1)
|
||||||
|
concat(block_list, board_aug, p_aug, win_ori)
|
||||||
|
|
||||||
|
board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
|
||||||
|
p_aug = np.concatenate(
|
||||||
|
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
|
||||||
|
p_ori[:, -1].reshape(-1, 1)],
|
||||||
|
axis=1)
|
||||||
|
concat(block_list, board_aug, p_aug, win_ori)
|
||||||
|
print ("Finished {} with time {}".format(n, time.time()+start))
|
||||||
|
data_num = 0
|
||||||
|
for i in range(slots_num):
|
||||||
|
print("Block {} ".format(block_list[i].block_id) + "Size {}".format(block_list[i].store_num()))
|
||||||
|
data_num = data_num + block_list[i].store_num()
|
||||||
|
print ("Total data {}".format(data_num))
|
||||||
|
|
||||||
|
for i in range(slots_num):
|
||||||
|
block_list[i].save_and_reset(block_list[i].block_id)
|
@ -5,7 +5,7 @@ def hex2board(hex):
|
|||||||
scale = 16
|
scale = 16
|
||||||
num_of_bits = 360
|
num_of_bits = 360
|
||||||
binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2]
|
binary = bin(int(hex[:-2], scale))[2:].zfill(num_of_bits) + hex[-2]
|
||||||
board = np.zeros([361])
|
board = np.zeros([361], dtype='int8')
|
||||||
for i in range(361):
|
for i in range(361):
|
||||||
board[i] = int(binary[i])
|
board[i] = int(binary[i])
|
||||||
board = board.reshape(1,19,19,1)
|
board = board.reshape(1,19,19,1)
|
||||||
@ -17,9 +17,12 @@ def str2prob(str):
|
|||||||
for i in range(362):
|
for i in range(362):
|
||||||
prob[i] = float(p[i])
|
prob[i] = float(p[i])
|
||||||
prob = prob.reshape(1,362)
|
prob = prob.reshape(1,362)
|
||||||
return prob
|
if np.sum(np.isnan(prob))==0:
|
||||||
|
return prob, True
|
||||||
|
else:
|
||||||
|
return 0, False
|
||||||
|
|
||||||
dir = "/home/yama/tongzheng/leela-zero/autogtp/new_spr/"
|
dir = "/home/yama/leela-zero/data/sgf-txt-files/"
|
||||||
name = os.listdir(dir)
|
name = os.listdir(dir)
|
||||||
text = []
|
text = []
|
||||||
for n in name:
|
for n in name:
|
||||||
@ -28,26 +31,35 @@ for n in name:
|
|||||||
print(text)
|
print(text)
|
||||||
for t in text:
|
for t in text:
|
||||||
num = 0
|
num = 0
|
||||||
boards = np.zeros([0, 19, 19, 17])
|
boards = np.zeros([0, 19, 19, 17], dtype='int8')
|
||||||
board = np.zeros([1, 19, 19, 0])
|
board = np.zeros([1, 19, 19, 0], dtype='int8')
|
||||||
win = np.zeros([0, 1])
|
win = np.zeros([0, 1], dtype='int8')
|
||||||
p = np.zeros([0, 362])
|
p = np.zeros([0, 362])
|
||||||
|
flag = False
|
||||||
for line in open(dir + t):
|
for line in open(dir + t):
|
||||||
|
if num % 19 == 0:
|
||||||
|
flag = False
|
||||||
if num % 19 < 16:
|
if num % 19 < 16:
|
||||||
new_board = hex2board(line)
|
new_board = hex2board(line)
|
||||||
board = np.concatenate([board, new_board], axis=3)
|
board = np.concatenate([board, new_board], axis=3)
|
||||||
if num % 19 == 16:
|
if num % 19 == 16:
|
||||||
if line == '0':
|
if line == '0':
|
||||||
new_board = np.ones([1, 19 ,19 ,1])
|
new_board = np.ones([1, 19 ,19 ,1], dtype='int8')
|
||||||
if line == '1':
|
if line == '1':
|
||||||
new_board = np.zeros([1, 19, 19, 1])
|
new_board = np.zeros([1, 19, 19, 1], dtype='int8')
|
||||||
board = np.concatenate([board, new_board], axis=3)
|
board = np.concatenate([board, new_board], axis=3)
|
||||||
boards = np.concatenate([boards, board], axis=0)
|
boards = np.concatenate([boards, board], axis=0)
|
||||||
board = np.zeros([1, 19, 19, 0])
|
board = np.zeros([1, 19, 19, 0], dtype='int8')
|
||||||
if num % 19 == 17:
|
if num % 19 == 17:
|
||||||
p = np.concatenate([p,str2prob(line)], axis=0)
|
if str2prob(line)[1] == True:
|
||||||
|
p = np.concatenate([p,str2prob(line)[0]], axis=0)
|
||||||
|
else:
|
||||||
|
flag = True
|
||||||
|
boards = boards[:-1]
|
||||||
if num % 19 == 18:
|
if num % 19 == 18:
|
||||||
win = np.concatenate([win, np.array(float(line)).reshape(1,1)], axis=0)
|
if flag == False:
|
||||||
|
win = np.concatenate([win, np.array(float(line), dtype='int8').reshape(1,1)], axis=0)
|
||||||
num=num+1
|
num=num+1
|
||||||
|
print("Boards Shape: {}, Wins Shape: {}, Probs Shape : {}".format(boards.shape[0], win.shape[0], p.shape[0]))
|
||||||
print "Finished " + t
|
print "Finished " + t
|
||||||
np.savez("data/"+t[:-4], boards=boards, win=win, p=p)
|
np.savez("/home/tongzheng/meta-data/"+t[:-4], boards=boards, win=win, p=p)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user