From 8b7b4b6c6b016a48a529b5eb32e1db92d6b043b9 Mon Sep 17 00:00:00 2001
From: JialianLee <Jialian@DESKTOP-N4N6F2G.localdomain>
Date: Fri, 5 Jan 2018 17:02:19 +0800
Subject: [PATCH 1/2] Add dirichlet noise to root prior and add uniform noise
 to initial Q value

---
 tianshou/core/mcts/mcts.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tianshou/core/mcts/mcts.py b/tianshou/core/mcts/mcts.py
index 1251d05..46b854e 100644
--- a/tianshou/core/mcts/mcts.py
+++ b/tianshou/core/mcts/mcts.py
@@ -26,7 +26,7 @@ class MCTSNode(object):
 class UCTNode(MCTSNode):
     def __init__(self, parent, action, state, action_num, prior, mcts, inverse=False):
         super(UCTNode, self).__init__(parent, action, state, action_num, prior, inverse)
-        self.Q = np.zeros([action_num])
+        self.Q = np.random.uniform(-1, 1, action_num) * (1e-6)
         self.W = np.zeros([action_num])
         self.N = np.zeros([action_num])
         self.c_puct = c_puct
@@ -121,12 +121,14 @@ class ActionNode(object):
 
 class MCTS(object):
     def __init__(self, simulator, evaluator, start_state, action_num, method="UCT",
-                 role="unknown", debug=False, inverse=False):
+                 role="unknown", debug=False, inverse=False, epsilon=0.25):
         self.simulator = simulator
         self.evaluator = evaluator
         self.role = role
         self.debug = debug
+        self.epsilon = epsilon
         prior, _ = self.evaluator(start_state)
+        prior = (1 - self.epsilon) * prior + self.epsilon * np.random.dirichlet(1.0/action_num * np.ones([action_num]))
         self.action_num = action_num
         if method == "":
             self.root = start_state

From 32b7b33ed5fb4a19920491f719d0f93e027251c6 Mon Sep 17 00:00:00 2001
From: rtz19970824 <rtz19970824@gmail.com>
Date: Mon, 8 Jan 2018 16:19:59 +0800
Subject: [PATCH 2/2] debug: we should estimate our own win rate

---
 AlphaGo/model.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/AlphaGo/model.py b/AlphaGo/model.py
index 6fde6e5..8d4c508 100644
--- a/AlphaGo/model.py
+++ b/AlphaGo/model.py
@@ -284,7 +284,7 @@ class ResNet(object):
             history.append(board)
             states.append(self._history2state(history, color))
             probs.append(np.array(prob).reshape(1, self.board_size ** 2 + 1))
-            winner.append(np.array(data.winner).reshape(1, 1))
+            winner.append(np.array(data.winner * color).reshape(1, 1))
             color *= -1
         states = np.concatenate(states, axis=0)
         probs = np.concatenate(probs, axis=0)