minor fixed
This commit is contained in:
parent
453e457452
commit
bc49d466d1
@ -2,6 +2,7 @@ import os
|
||||
import threading
|
||||
import numpy as np
|
||||
|
||||
size = 9
|
||||
path = "/home/yama/leela-zero/data/npz-files/"
|
||||
name = os.listdir(path)
|
||||
print(len(name))
|
||||
@ -9,21 +10,21 @@ thread_num = 17
|
||||
batch_num = len(name) // thread_num
|
||||
|
||||
def integrate(name, index):
|
||||
boards = np.zeros([0, 19, 19, 17])
|
||||
boards = np.zeros([0, size, size, 17])
|
||||
wins = np.zeros([0, 1])
|
||||
ps = np.zeros([0, 362])
|
||||
ps = np.zeros([0, size**2 + 1])
|
||||
for n in name:
|
||||
data = np.load(path + n)
|
||||
board = data["boards"]
|
||||
win = data["win"]
|
||||
p = data["p"]
|
||||
# board = np.zeros([0, 19, 19, 17])
|
||||
board = data["state"]
|
||||
win = data["winner"]
|
||||
p = data["prob"]
|
||||
# board = np.zeros([0, size, size, 17])
|
||||
# win = np.zeros([0, 1])
|
||||
# p = np.zeros([0, 362])
|
||||
# p = np.zeros([0, size**2 + 1])
|
||||
# for i in range(data["boards"].shape[3]):
|
||||
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
|
||||
# board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, size, size, 17)], axis=0)
|
||||
# win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
|
||||
# p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
|
||||
# p = np.concatenate([p, data["p"][:,i].reshape(-1, size**2 + 1)], axis=0)
|
||||
boards = np.concatenate([boards, board], axis=0)
|
||||
wins = np.concatenate([wins, win], axis=0)
|
||||
ps = np.concatenate([ps, p], axis=0)
|
||||
@ -35,21 +36,21 @@ def integrate(name, index):
|
||||
for i in range(1, 3):
|
||||
board = np.rot90(board_ori, i, (1, 2))
|
||||
p = np.concatenate(
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, size, size), i, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
boards = np.concatenate([boards, board], axis=0)
|
||||
wins = np.concatenate([wins, win_ori], axis=0)
|
||||
ps = np.concatenate([ps, p], axis=0)
|
||||
|
||||
board = board_ori[:, ::-1]
|
||||
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
boards = np.concatenate([boards, board], axis=0)
|
||||
wins = np.concatenate([wins, win_ori], axis=0)
|
||||
ps = np.concatenate([ps, p], axis=0)
|
||||
|
||||
board = board_ori[:, :, ::-1]
|
||||
p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
boards = np.concatenate([boards, board], axis=0)
|
||||
wins = np.concatenate([wins, win_ori], axis=0)
|
||||
@ -57,7 +58,7 @@ def integrate(name, index):
|
||||
|
||||
board = board_ori[:, ::-1]
|
||||
p = np.concatenate(
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
|
||||
wins = np.concatenate([wins, win_ori], axis=0)
|
||||
@ -65,14 +66,14 @@ def integrate(name, index):
|
||||
|
||||
board = board_ori[:, :, ::-1]
|
||||
p = np.concatenate(
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size**2),
|
||||
p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
|
||||
wins = np.concatenate([wins, win_ori], axis=0)
|
||||
ps = np.concatenate([ps, p], axis=0)
|
||||
|
||||
np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps)
|
||||
np.savez("/home/tongzheng/data/data-" + str(index), state=boards, winner=wins, prob=ps)
|
||||
print ("Thread {} has finished.".format(index))
|
||||
thread_list = list()
|
||||
for i in range(thread_num):
|
||||
|
@ -203,10 +203,11 @@ class Network(object):
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
state = np.random.randint(0, 1, [1, 9, 9, 17])
|
||||
state = np.random.randint(0, 1, [256, 9, 9, 17])
|
||||
net = Network()
|
||||
sess = net.forward()
|
||||
start = time.time()
|
||||
start_time = time.time()
|
||||
for i in range(100):
|
||||
sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False})
|
||||
print("Step {}, Cumulative time {}".format(i, time.time() - start))
|
||||
print("Step {}, use time {}".format(i, time.time() - start_time))
|
||||
start_time = time.time()
|
||||
|
@ -2,15 +2,16 @@ import os
|
||||
import numpy as np
|
||||
import time
|
||||
|
||||
path = "/home/tongzheng/meta-data/"
|
||||
save_path = "/home/tongzheng/data/"
|
||||
size = 9
|
||||
path = "/raid/tongzheng/tianshou/AlphaGo/data/part1/"
|
||||
save_path = "/raid/tongzheng/tianshou/AlphaGo/data/"
|
||||
name = os.listdir(path)
|
||||
print(len(name))
|
||||
batch_size = 128
|
||||
batch_num = 512
|
||||
|
||||
block_size = batch_size * batch_num
|
||||
slots_num = 32
|
||||
slots_num = 16
|
||||
|
||||
|
||||
class block(object):
|
||||
@ -22,9 +23,9 @@ class block(object):
|
||||
self.block_id = block_id
|
||||
|
||||
def concat(self, board, p, win):
|
||||
board = board.reshape(-1, 19, 19, 17)
|
||||
board = board.reshape(-1, size, size, 17)
|
||||
win = win.reshape(-1, 1)
|
||||
p = p.reshape(-1, 362)
|
||||
p = p.reshape(-1, size ** 2 + 1)
|
||||
self.boards.append(board)
|
||||
self.wins.append(win)
|
||||
self.ps.append(p)
|
||||
@ -74,40 +75,40 @@ for n in name:
|
||||
print("Shape {}".format(board.shape[0]))
|
||||
start = -time.time()
|
||||
for i in range(board.shape[0]):
|
||||
board_ori = board[i].reshape(-1, 19, 19, 17)
|
||||
board_ori = board[i].reshape(-1, size, size, 17)
|
||||
win_ori = win[i].reshape(-1, 1)
|
||||
p_ori = p[i].reshape(-1, 362)
|
||||
p_ori = p[i].reshape(-1, size ** 2 + 1)
|
||||
concat(block_list, board_ori, p_ori, win_ori)
|
||||
|
||||
for t in range(1, 4):
|
||||
board_aug = np.rot90(board_ori, t, (1, 2))
|
||||
p_aug = np.concatenate(
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, size, size), t, (1, 2)).reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
concat(block_list, board_aug, p_aug, win_ori)
|
||||
|
||||
board_aug = board_ori[:, ::-1]
|
||||
p_aug = np.concatenate(
|
||||
[p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
[p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
concat(block_list, board_aug, p_aug, win_ori)
|
||||
|
||||
board_aug = board_ori[:, :, ::-1]
|
||||
p_aug = np.concatenate(
|
||||
[p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
|
||||
[p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
concat(block_list, board_aug, p_aug, win_ori)
|
||||
|
||||
board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
|
||||
p_aug = np.concatenate(
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size ** 2),
|
||||
p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
concat(block_list, board_aug, p_aug, win_ori)
|
||||
|
||||
board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
|
||||
p_aug = np.concatenate(
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
|
||||
[np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size ** 2),
|
||||
p_ori[:, -1].reshape(-1, 1)],
|
||||
axis=1)
|
||||
concat(block_list, board_aug, p_aug, win_ori)
|
||||
|
@ -45,6 +45,7 @@ class UCTNode(MCTSNode):
|
||||
self.W = np.zeros([action_num])
|
||||
self.N = np.zeros([action_num])
|
||||
self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1)
|
||||
self.mask = None
|
||||
|
||||
def selection(self, simulator):
|
||||
self.valid_mask(simulator)
|
||||
@ -70,9 +71,15 @@ class UCTNode(MCTSNode):
|
||||
self.parent.backpropagation(self.children[action].reward)
|
||||
|
||||
def valid_mask(self, simulator):
|
||||
if self.mask is None:
|
||||
start_time = time.time()
|
||||
self.mask = []
|
||||
for act in range(self.action_num - 1):
|
||||
if not simulator.is_valid(self.state, act):
|
||||
self.mask.append(act)
|
||||
self.ucb[act] = -float("Inf")
|
||||
else:
|
||||
self.ucb[self.mask] = -float("Inf")
|
||||
|
||||
|
||||
class TSNode(MCTSNode):
|
||||
@ -160,6 +167,10 @@ class MCTS(object):
|
||||
self.max_time = max_time
|
||||
if max_step is None and max_time is None:
|
||||
raise ValueError("Need a stop criteria!")
|
||||
|
||||
self.select_time = []
|
||||
self.evaluate_time = []
|
||||
self.bp_time = []
|
||||
while (max_step is not None and self.step < self.max_step or max_step is None) \
|
||||
and (max_time is not None and time.time() - self.start_time < self.max_time or max_time is None):
|
||||
self.expand()
|
||||
@ -171,6 +182,5 @@ class MCTS(object):
|
||||
value = node.children[new_action].expansion(self.evaluator, self.action_num)
|
||||
node.children[new_action].backpropagation(value + 0.)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
|
Loading…
x
Reference in New Issue
Block a user