minor fixed

2017-12-08 23:41:31 +08:00 · 2017-12-08 23:41:31 +08:00 · bc49d466d1
commit bc49d466d1
parent 453e457452
4 changed files with 47 additions and 34 deletions
--- a/AlphaGo/data.py
+++ b/AlphaGo/data.py
@ -2,6 +2,7 @@ import os
 import threading
 import numpy as np

+size = 9
 path = "/home/yama/leela-zero/data/npz-files/"
 name = os.listdir(path)
 print(len(name))
@ -9,21 +10,21 @@ thread_num = 17
 batch_num = len(name) // thread_num

 def integrate(name, index):
-    boards = np.zeros([0, 19, 19, 17])
+    boards = np.zeros([0, size, size, 17])
    wins = np.zeros([0, 1])
-    ps = np.zeros([0, 362])
+    ps = np.zeros([0, size**2 + 1])
    for n in name:
        data = np.load(path + n)
-        board = data["boards"]
-        win = data["win"]
-        p = data["p"]
-        # board = np.zeros([0, 19, 19, 17])
+        board = data["state"]
+        win = data["winner"]
+        p = data["prob"]
+        # board = np.zeros([0, size, size, 17])
        # win = np.zeros([0, 1])
-        # p = np.zeros([0, 362])
+        # p = np.zeros([0, size**2 + 1])
        # for i in range(data["boards"].shape[3]):
-        #       board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, 19, 19, 17)], axis=0)
+        #       board = np.concatenate([board, data["boards"][:,:,:,i].reshape(-1, size, size, 17)], axis=0)
        #       win = np.concatenate([win, data["win"][:,i].reshape(-1, 1)], axis=0)
-        # p = np.concatenate([p, data["p"][:,i].reshape(-1, 362)], axis=0)
+        # p = np.concatenate([p, data["p"][:,i].reshape(-1, size**2 + 1)], axis=0)
        boards = np.concatenate([boards, board], axis=0)
        wins = np.concatenate([wins, win], axis=0)
        ps = np.concatenate([ps, p], axis=0)
@ -35,21 +36,21 @@ def integrate(name, index):
    for i in range(1, 3):
        board = np.rot90(board_ori, i, (1, 2))
        p = np.concatenate(
-            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), i, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+            [np.rot90(p_ori[:, :-1].reshape(-1, size, size), i, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        boards = np.concatenate([boards, board], axis=0)
        wins = np.concatenate([wins, win_ori], axis=0)
        ps = np.concatenate([ps, p], axis=0)

    board = board_ori[:, ::-1]
-    p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+    p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
                       axis=1)
    boards = np.concatenate([boards, board], axis=0)
    wins = np.concatenate([wins, win_ori], axis=0)
    ps = np.concatenate([ps, p], axis=0)

    board = board_ori[:, :, ::-1]
-    p = np.concatenate([p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+    p = np.concatenate([p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
                       axis=1)
    boards = np.concatenate([boards, board], axis=0)
    wins = np.concatenate([wins, win_ori], axis=0)
@ -57,7 +58,7 @@ def integrate(name, index):

    board = board_ori[:, ::-1]
    p = np.concatenate(
-        [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+        [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size**2), p_ori[:, -1].reshape(-1, 1)],
        axis=1)
    boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
    wins = np.concatenate([wins, win_ori], axis=0)
@ -65,14 +66,14 @@ def integrate(name, index):

    board = board_ori[:, :, ::-1]
    p = np.concatenate(
-        [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
+        [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size**2),
         p_ori[:, -1].reshape(-1, 1)],
        axis=1)
    boards = np.concatenate([boards, np.rot90(board, 1, (1, 2))], axis=0)
    wins = np.concatenate([wins, win_ori], axis=0)
    ps = np.concatenate([ps, p], axis=0)

-    np.savez("/home/tongzheng/data/data-" + str(index), boards=boards, wins=wins, ps=ps)
+    np.savez("/home/tongzheng/data/data-" + str(index), state=boards, winner=wins, prob=ps)
    print ("Thread {} has finished.".format(index))
 thread_list = list()
 for i in range(thread_num):
--- a/AlphaGo/network_small.py
+++ b/AlphaGo/network_small.py
@ -203,10 +203,11 @@ class Network(object):


 if __name__ == '__main__':
-    state = np.random.randint(0, 1, [1, 9, 9, 17])
+    state = np.random.randint(0, 1, [256, 9, 9, 17])
    net = Network()
    sess = net.forward()
-    start = time.time()
+    start_time = time.time()
    for i in range(100):
        sess.run([tf.nn.softmax(net.p), net.v], feed_dict={net.x: state, net.is_training: False})
-        print("Step {}, Cumulative time {}".format(i, time.time() - start))
+        print("Step {}, use time {}".format(i, time.time() - start_time))
+        start_time = time.time()
--- a/AlphaGo/random_data.py
+++ b/AlphaGo/random_data.py
@ -2,15 +2,16 @@ import os
 import numpy as np
 import time

-path = "/home/tongzheng/meta-data/"
-save_path = "/home/tongzheng/data/"
+size = 9
+path = "/raid/tongzheng/tianshou/AlphaGo/data/part1/"
+save_path = "/raid/tongzheng/tianshou/AlphaGo/data/"
 name = os.listdir(path)
 print(len(name))
 batch_size = 128
 batch_num = 512

 block_size = batch_size * batch_num
-slots_num = 32
+slots_num = 16


 class block(object):
@ -22,9 +23,9 @@ class block(object):
        self.block_id = block_id

    def concat(self, board, p, win):
-        board = board.reshape(-1, 19, 19, 17)
+        board = board.reshape(-1, size, size, 17)
        win = win.reshape(-1, 1)
-        p = p.reshape(-1, 362)
+        p = p.reshape(-1, size ** 2 + 1)
        self.boards.append(board)
        self.wins.append(win)
        self.ps.append(p)
@ -74,40 +75,40 @@ for n in name:
    print("Shape {}".format(board.shape[0]))
    start = -time.time()
    for i in range(board.shape[0]):
-        board_ori = board[i].reshape(-1, 19, 19, 17)
+        board_ori = board[i].reshape(-1, size, size, 17)
        win_ori = win[i].reshape(-1, 1)
-        p_ori = p[i].reshape(-1, 362)
+        p_ori = p[i].reshape(-1, size ** 2 + 1)
        concat(block_list, board_ori, p_ori, win_ori)

        for t in range(1, 4):
            board_aug = np.rot90(board_ori, t, (1, 2))
            p_aug = np.concatenate(
-                [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19), t, (1, 2)).reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+                [np.rot90(p_ori[:, :-1].reshape(-1, size, size), t, (1, 2)).reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
                axis=1)
            concat(block_list, board_aug, p_aug, win_ori)

        board_aug = board_ori[:, ::-1]
        p_aug = np.concatenate(
-            [p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+            [p_ori[:, :-1].reshape(-1, size, size)[:, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)

        board_aug = board_ori[:, :, ::-1]
        p_aug = np.concatenate(
-            [p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1].reshape(-1, 361), p_ori[:, -1].reshape(-1, 1)],
+            [p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1].reshape(-1, size ** 2), p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)

        board_aug = np.rot90(board_ori[:, ::-1], 1, (1, 2))
        p_aug = np.concatenate(
-            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, ::-1], 1, (1, 2)).reshape(-1, 361),
+            [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, ::-1], 1, (1, 2)).reshape(-1, size ** 2),
             p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)

        board_aug = np.rot90(board_ori[:, :, ::-1], 1, (1, 2))
        p_aug = np.concatenate(
-            [np.rot90(p_ori[:, :-1].reshape(-1, 19, 19)[:, :, ::-1], 1, (1, 2)).reshape(-1, 361),
+            [np.rot90(p_ori[:, :-1].reshape(-1, size, size)[:, :, ::-1], 1, (1, 2)).reshape(-1, size ** 2),
             p_ori[:, -1].reshape(-1, 1)],
            axis=1)
        concat(block_list, board_aug, p_aug, win_ori)
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@ -45,6 +45,7 @@ class UCTNode(MCTSNode):
        self.W = np.zeros([action_num])
        self.N = np.zeros([action_num])
        self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1)
+        self.mask = None

    def selection(self, simulator):
        self.valid_mask(simulator)
@ -70,9 +71,15 @@ class UCTNode(MCTSNode):
                self.parent.backpropagation(self.children[action].reward)

    def valid_mask(self, simulator):
+        if self.mask is None:
+            start_time = time.time()
+            self.mask = []
            for act in range(self.action_num - 1):
                if not simulator.is_valid(self.state, act):
+                    self.mask.append(act)
                    self.ucb[act] = -float("Inf")
+        else:
+            self.ucb[self.mask] = -float("Inf")


 class TSNode(MCTSNode):
@ -160,6 +167,10 @@ class MCTS(object):
            self.max_time = max_time
        if max_step is None and max_time is None:
            raise ValueError("Need a stop criteria!")
+
+        self.select_time = []
+        self.evaluate_time = []
+        self.bp_time = []
        while (max_step is not None and self.step < self.max_step or max_step is None) \
                and (max_time is not None and time.time() - self.start_time < self.max_time or max_time is None):
            self.expand()
@ -171,6 +182,5 @@ class MCTS(object):
        value = node.children[new_action].expansion(self.evaluator, self.action_num)
        node.children[new_action].backpropagation(value + 0.)

-
 if __name__ == "__main__":
    pass