diff --git a/AlphaGo/game.py b/AlphaGo/game.py index 3ef0918..4cb5e00 100644 --- a/AlphaGo/game.py +++ b/AlphaGo/game.py @@ -8,6 +8,7 @@ from __future__ import print_function import utils import copy import tensorflow as tf +import numpy as np from collections import deque import Network @@ -185,7 +186,7 @@ class Game: self.board = [utils.EMPTY] * (self.size * self.size) self.strategy = strategy() # self.strategy = None - self.executor = Executor(game = self) + self.executor = Executor(game=self) self.history = [] self.past = deque(maxlen=8) for i in range(8): @@ -211,8 +212,8 @@ class Game: def set_komi(self, k): self.komi = k - def check_valid(self, vertex): - return True + def check_valid(self, color, vertex): + return self.executor.is_valid(color, vertex) def do_move(self, color, vertex): if vertex == utils.PASS: @@ -224,7 +225,6 @@ class Game: # move = self.strategy.gen_move(color) # return move move = self.strategy.gen_move(self.past, color) - print(move) self.do_move(color, move) return move diff --git a/AlphaGo/self-play.py b/AlphaGo/self-play.py new file mode 100644 index 0000000..bdbb1c1 --- /dev/null +++ b/AlphaGo/self-play.py @@ -0,0 +1,26 @@ +from game import Game +from engine import GTPEngine + +g = Game() + + +g.show_board() +e = GTPEngine(game_obj=g) + +num = 0 +black_pass = False +white_pass = False +while not (black_pass and white_pass): + if num % 2 == 0: + res=e.run_cmd("genmove BLACK")[0] + num += 1 + print(res) + if res == (0,0): + black_pass = True + else: + res = e.run_cmd("genmove WHITE")[0] + num += 1 + print(res) + if res == (0, 0): + white_pass = True + g.show_board() diff --git a/AlphaGo/strategy.py b/AlphaGo/strategy.py index d450a00..327dd53 100644 --- a/AlphaGo/strategy.py +++ b/AlphaGo/strategy.py @@ -1,25 +1,29 @@ -import os,sys +import os, sys + sys.path.append(os.path.join(os.path.dirname(__file__), os.path.pardir)) import numpy as np import utils import time +import copy import network_small import tensorflow as tf from collections import deque from tianshou.core.mcts.mcts import MCTS +DELTA = [[1, 0], [-1, 0], [0, -1], [0, 1]] + class GoEnv: def __init__(self, size=9, komi=6.5): self.size = size - self.komi = 6.5 + self.komi = komi self.board = [utils.EMPTY] * (self.size * self.size) self.history = deque(maxlen=8) def _flatten(self, vertex): x, y = vertex return (x - 1) * self.size + (y - 1) - + def _bfs(self, vertex, color, block, status, alive_break): block.append(vertex) status[self._flatten(vertex)] = True @@ -102,13 +106,28 @@ class GoEnv: for b in block: self.board[self._flatten(b)] = utils.EMPTY - def is_valid(self, color, vertex): + # def is_valid(self, color, vertex): + def is_valid(self, state, action): + if action == 81: + vertex = (0, 0) + else: + vertex = (action / 9 + 1, action % 9 + 1) + if state[0, 0, 0, -1] == 1: + color = 1 + else: + color = -1 + self.history.clear() + for i in range(8): + self.history.append((state[:, :, :, i] - state[:, :, :, i + 8]).reshape(-1).tolist()) + self.board = copy.copy(self.history[-1]) ### in board if not self._in_board(vertex): return False ### already have stone if not self.board[self._flatten(vertex)] == utils.EMPTY: + # print(np.array(self.board).reshape(9, 9)) + # print(vertex) return False ### check if it is qi @@ -127,13 +146,12 @@ class GoEnv: id_ = self._flatten(vertex) if self.board[id_] == utils.EMPTY: self.board[id_] = color - self.history.append(self.board) + self.history.append(copy.copy(self.board)) return True else: return False def step_forward(self, state, action): - # print(state) if state[0, 0, 0, -1] == 1: color = 1 else: @@ -141,7 +159,10 @@ class GoEnv: if action == 81: vertex = (0, 0) else: - vertex = (action / 9 + 1, action % 9) + vertex = (action % 9 + 1, action / 9 + 1) + # print(vertex) + # print(self.board) + self.board = (state[:, :, :, 7] - state[:, :, :, 15]).reshape(-1).tolist() self.do_move(color, vertex) new_state = np.concatenate( [state[:, :, :, 1:8], (np.array(self.board) == 1).reshape(1, 9, 9, 1), @@ -162,8 +183,8 @@ class strategy(object): def data_process(self, history, color): state = np.zeros([1, 9, 9, 17]) for i in range(8): - state[0, :, :, i] = history[i] == 1 - state[0, :, :, i + 8] = history[i] == -1 + state[0, :, :, i] = np.array(np.array(history[i]) == np.ones(81)).reshape(9, 9) + state[0, :, :, i + 8] = np.array(np.array(history[i]) == -np.ones(81)).reshape(9, 9) if color == 1: state[0, :, :, 16] = np.ones([9, 9]) if color == -1: @@ -171,16 +192,15 @@ class strategy(object): return state def gen_move(self, history, color): - self.simulator.history = history - self.simulator.board = history[-1] - state = self.data_process(history, color) - prior = self.evaluator(state)[0] - mcts = MCTS(self.simulator, self.evaluator, state, 82, prior, inverse=True, max_step=100) + self.simulator.history = copy.copy(history) + self.simulator.board = copy.copy(history[-1]) + state = self.data_process(self.simulator.history, color) + mcts = MCTS(self.simulator, self.evaluator, state, 82, inverse=True, max_step=10) temp = 1 p = mcts.root.N ** temp / np.sum(mcts.root.N ** temp) choice = np.random.choice(82, 1, p=p).tolist()[0] if choice == 81: move = (0, 0) else: - move = (choice / 9 + 1, choice % 9 + 1) + move = (choice % 9 + 1, choice / 9 + 1) return move diff --git a/AlphaGo/test.py b/AlphaGo/test.py index ce74bbd..b916bf8 100644 --- a/AlphaGo/test.py +++ b/AlphaGo/test.py @@ -1,34 +1,134 @@ -# -*- coding: utf-8 -*- -# vim:fenc=utf-8 -# $File: test.py -# $Date: Fri Dec 01 01:3722 2017 +0800 -# $Author: renyong15 © -# - from game import Game from engine import GTPEngine import utils -g = Game() -e = GTPEngine(game_obj=g) -e.run_cmd("genmove BLACK") + +g = Game() +e = GTPEngine(game_obj = g) +res = e.run_cmd('1 protocol_version') +print(e.known_commands) +print(res) + +res = e.run_cmd('2 name') +print(res) + +res = e.run_cmd('3 known_command quit') +print(res) + +res = e.run_cmd('4 unknown_command quitagain') +print(res) + +res = e.run_cmd('5 list_commands') +print(res) + +res = e.run_cmd('6 komi 6') +print(res) + +res = e.run_cmd('7 play BLACK C3') +print(res) + +res = e.run_cmd('play BLACK C4') +res = e.run_cmd('play BLACK C5') +res = e.run_cmd('play BLACK C6') +res = e.run_cmd('play BLACK D3') +print(res) + +res = e.run_cmd('8 genmove BLACK') +print(res) + +#g.show_board() +print(g.check_valid((10, 9))) +print(g.executor._neighbor((1,1))) +print(g.do_move(utils.WHITE, (4, 6))) +#g.show_board() + + +res = e.run_cmd('play BLACK L10') +res = e.run_cmd('play BLACK L11') +res = e.run_cmd('play BLACK L12') +res = e.run_cmd('play BLACK L13') +res = e.run_cmd('play BLACK L14') +res = e.run_cmd('play BLACK m15') +res = e.run_cmd('play BLACK m9') +res = e.run_cmd('play BLACK C9') +res = e.run_cmd('play BLACK D9') +res = e.run_cmd('play BLACK E9') +res = e.run_cmd('play BLACK F9') +res = e.run_cmd('play BLACK G9') +res = e.run_cmd('play BLACK H9') +res = e.run_cmd('play BLACK I9') + +res = e.run_cmd('play BLACK N9') +res = e.run_cmd('play BLACK N15') +res = e.run_cmd('play BLACK O10') +res = e.run_cmd('play BLACK O11') +res = e.run_cmd('play BLACK O12') +res = e.run_cmd('play BLACK O13') +res = e.run_cmd('play BLACK O14') +res = e.run_cmd('play BLACK M12') + +res = e.run_cmd('play WHITE M10') +res = e.run_cmd('play WHITE M11') +res = e.run_cmd('play WHITE N10') +res = e.run_cmd('play WHITE N11') + +res = e.run_cmd('play WHITE M13') +res = e.run_cmd('play WHITE M14') +res = e.run_cmd('play WHITE N13') +res = e.run_cmd('play WHITE N14') +print(res) + +res = e.run_cmd('play BLACK N12') +print(res) +#g.show_board() + +res = e.run_cmd('play BLACK P16') +res = e.run_cmd('play BLACK P17') +res = e.run_cmd('play BLACK P18') +res = e.run_cmd('play BLACK P19') +res = e.run_cmd('play BLACK Q16') +res = e.run_cmd('play BLACK R16') +res = e.run_cmd('play BLACK S16') + +res = e.run_cmd('play WHITE S18') +res = e.run_cmd('play WHITE S17') +res = e.run_cmd('play WHITE Q19') +res = e.run_cmd('play WHITE Q18') +res = e.run_cmd('play WHITE Q17') +res = e.run_cmd('play WHITE R18') +res = e.run_cmd('play WHITE R17') +res = e.run_cmd('play BLACK S19') +print(res) +#g.show_board() + +res = e.run_cmd('play WHITE R19') g.show_board() -e.run_cmd("genmove WHITE") + +res = e.run_cmd('play BLACK S19') +print(res) g.show_board() -e.run_cmd("genmove BLACK") + +res = e.run_cmd('play BLACK S19') +print(res) + + +res = e.run_cmd('play BLACK E17') +res = e.run_cmd('play BLACK F16') +res = e.run_cmd('play BLACK F18') +res = e.run_cmd('play BLACK G17') +res = e.run_cmd('play WHITE G16') +res = e.run_cmd('play WHITE G18') +res = e.run_cmd('play WHITE H17') g.show_board() -e.run_cmd("genmove WHITE") + +res = e.run_cmd('play WHITE F17') g.show_board() -e.run_cmd("genmove BLACK") -g.show_board() -e.run_cmd("genmove WHITE") -g.show_board() -e.run_cmd("genmove BLACK") -g.show_board() -e.run_cmd("genmove WHITE") -g.show_board() -e.run_cmd("genmove BLACK") -g.show_board() -e.run_cmd("genmove WHITE") + +res = e.run_cmd('play BLACK G17') +print(res) g.show_board() + +res = e.run_cmd('play BLACK G19') +res = e.run_cmd('play BLACK G17') +g.show_board() \ No newline at end of file diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py index 5a1cbac..db9a7cf 100644 --- a/tianshou/core/mcts/mcts.py +++ b/tianshou/core/mcts/mcts.py @@ -26,7 +26,7 @@ class MCTSNode(object): self.children = {} self.state = state self.action_num = action_num - self.prior = prior + self.prior = np.array(prior).reshape(-1) self.inverse = inverse def selection(self, simulator): @@ -35,6 +35,8 @@ class MCTSNode(object): def backpropagation(self, action): raise NotImplementedError("Need to implement function backpropagation") + def valid_mask(self, simulator): + pass class UCTNode(MCTSNode): def __init__(self, parent, action, state, action_num, prior, inverse=False): @@ -45,6 +47,7 @@ class UCTNode(MCTSNode): self.ucb = self.Q + c_puct * self.prior * math.sqrt(np.sum(self.N)) / (self.N + 1) def selection(self, simulator): + self.valid_mask(simulator) action = np.argmax(self.ucb) if action in self.children.keys(): return self.children[action].selection(simulator) @@ -66,6 +69,11 @@ class UCTNode(MCTSNode): else: self.parent.backpropagation(self.children[action].reward) + def valid_mask(self, simulator): + for act in range(self.action_num - 1): + if not simulator.is_valid(self.state, act): + self.ucb[act] = -float("Inf") + class TSNode(MCTSNode): def __init__(self, parent, action, state, action_num, prior, method="Gaussian", inverse=False): @@ -78,7 +86,7 @@ class TSNode(MCTSNode): self.sigma = np.zeros([action_num]) -class ActionNode: +class ActionNode(object): def __init__(self, parent, action): self.parent = parent self.action = action @@ -120,18 +128,19 @@ class ActionNode: self.parent.inverse) return value else: - return 0 + return 0. def backpropagation(self, value): self.reward += value self.parent.backpropagation(self.action) -class MCTS: - def __init__(self, simulator, evaluator, root, action_num, prior, method="UCT", inverse=False, max_step=None, +class MCTS(object): + def __init__(self, simulator, evaluator, root, action_num, method="UCT", inverse=False, max_step=None, max_time=None): self.simulator = simulator self.evaluator = evaluator + prior, _ = self.evaluator(root) self.action_num = action_num if method == "": self.root = root diff --git a/tianshou/core/mcts/mcts_test.py b/tianshou/core/mcts/mcts_test.py index 46ba381..da404ca 100644 --- a/tianshou/core/mcts/mcts_test.py +++ b/tianshou/core/mcts/mcts_test.py @@ -28,7 +28,7 @@ class TestEnv: if step == self.max_step: reward = int(np.random.uniform() < self.reward[num]) else: - reward = 0 + reward = 0. return new_state, reward @@ -36,4 +36,4 @@ if __name__ == "__main__": env = TestEnv(2) rollout = rollout_policy(env, 2) evaluator = lambda state: rollout(state) - mcts = MCTS(env, evaluator, [0, 0], 2, np.array([0.5, 0.5]), max_step=1e4) + mcts = MCTS(env, evaluator, [0, 0], 2, max_step=1e4)