update the policy

This commit is contained in:
rtz19970824 2017-12-11 13:38:24 +08:00
commit 715f7be6a8
7 changed files with 101 additions and 101 deletions

1
AlphaGo/.gitignore vendored
View File

@ -1,2 +1,3 @@
data data
checkpoints checkpoints
checkpoints_origin

View File

@ -186,8 +186,7 @@ class GTPEngine():
return self._game.executor.get_score(), None return self._game.executor.get_score(), None
def cmd_show_board(self, args, **kwargs): def cmd_show_board(self, args, **kwargs):
self._game.show_board() return self._game.board, True
return None, None
if __name__ == "main": if __name__ == "main":

View File

@ -1,89 +1,70 @@
import subprocess import subprocess
import sys import sys
import re import re
import Pyro4
import time import time
#start a name server to find the remote object
kill_old_server = subprocess.Popen(['killall', 'pyro4-ns'])
print "kill old server, the return code is : " + str(kill_old_server.wait())
time.sleep(1)
start_new_server = subprocess.Popen(['pyro4-ns', '&'])
print "Start Name Sever : " + str(start_new_server.pid)# + str(start_new_server.wait())
time.sleep(1)
agent_v0 = subprocess.Popen(['python', '-u', 'player.py', '--role=black'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
time.sleep(3)
print "Start Player 0 at : " + str(agent_v0.pid)
agent_v1 = subprocess.Popen(['python', '-u', 'player.py', '--role=white', '--checkpoint_path=./checkpoints_origin/'],
stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
time.sleep(3)
print "Start Player 1 at : " + str(agent_v1.pid)
player = [None] * 2
player[0] = Pyro4.Proxy("PYRONAME:black")
player[1] = Pyro4.Proxy("PYRONAME:white")
role = ["BLACK", "WHITE"]
color = ['b', 'w']
pattern = "[A-Z]{1}[0-9]{1}" pattern = "[A-Z]{1}[0-9]{1}"
size = 9 size = 9
agent_v1 = subprocess.Popen(['python', '-u', 'test.py'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) show = ['.', 'X', 'O']
agent_v0 = subprocess.Popen(['python', '-u', 'test.py', '--checkpoint_path=./checkpoints_origin/'], stdin=subprocess.PIPE,
stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
num = 0
game_num = 0 game_num = 0
black_pass = False while game_num < 1:
white_pass = False num = 0
pass_flag = [False, False]
while game_num < 10:
print("Start game {}".format(game_num)) print("Start game {}".format(game_num))
while not (black_pass and white_pass) and num < size ** 2 * 2: # end the game if both palyer chose to pass, or play too much turns
print(num) while not (pass_flag[0] and pass_flag[1]) and num < size ** 2 * 2:
if num % 2 == 0: turn = num % 2
print('BLACK TURN') move = player[turn].run_cmd(str(num) + ' genmove ' + color[turn] + '\n')
agent_v1.stdin.write(str(num) + ' genmove b\n') print role[turn] + " : " + str(move),
agent_v1.stdin.flush() num += 1
result = agent_v1.stdout.readline() match = re.search(pattern, move)
sys.stdout.write(result) if match is not None:
sys.stdout.flush() #print "match : " + str(match.group())
num += 1 play_or_pass = match.group()
match = re.search(pattern, result) pass_flag[turn] = False
print("COPY BLACK")
if match is not None:
agent_v0.stdin.write(str(num) + ' play b ' + match.group() + '\n')
agent_v0.stdin.flush()
result = agent_v0.stdout.readline()
sys.stdout.flush()
else:
agent_v0.stdin.write(str(num) + ' play b PASS\n')
agent_v0.stdin.flush()
result = agent_v0.stdout.readline()
sys.stdout.flush()
if re.search("pass", result) is not None:
black_pass = True
else:
black_pass = False
else: else:
print('WHITE TURN') #print "no match"
agent_v0.stdin.write(str(num) + ' genmove w\n') play_or_pass = ' PASS'
agent_v0.stdin.flush() pass_flag[turn] = True
result = agent_v0.stdout.readline() result = player[1 - turn].run_cmd(str(num) + ' play ' + color[turn] + ' ' + play_or_pass + '\n')
sys.stdout.write(result) board = player[turn].run_cmd(str(num) + ' show_board')
sys.stdout.flush() board = eval(board[board.index('['):board.index(']') + 1])
num += 1 for i in range(size):
match = re.search(pattern, result) for j in range(size):
print("COPY WHITE") print show[board[i * size + j]] + " ",
if match is not None: print "\n",
agent_v1.stdin.write(str(num) + ' play w ' + match.group() + '\n')
agent_v1.stdin.flush()
result = agent_v1.stdout.readline()
sys.stdout.flush()
else:
agent_v1.stdin.write(str(num) + ' play w PASS\n')
agent_v1.stdin.flush()
result = agent_v1.stdout.readline()
sys.stdout.flush()
if re.search("pass", result) is not None:
black_pass = True
else:
black_pass = False
print("Finished") score = player[turn].run_cmd(str(num) + ' get_score')
print("\n") print "Finished : ", score.split(" ")[1]
player[0].run_cmd(str(num) + ' clear_board')
agent_v1.stdin.write('clear_board\n') player[1].run_cmd(str(num) + ' clear_board')
agent_v1.stdin.flush()
result = agent_v1.stdout.readline()
sys.stdout.flush()
agent_v0.stdin.write('clear_board\n')
agent_v0.stdin.flush()
result = agent_v0.stdout.readline()
sys.stdout.flush()
agent_v1.stdin.write('get_score\n')
agent_v1.stdin.flush()
result = agent_v1.stdout.readline()
sys.stdout.write(result)
sys.stdout.flush()
game_num += 1 game_num += 1
subprocess.call(["kill", "-9", str(agent_v0.pid)])
subprocess.call(["kill", "-9", str(agent_v1.pid)])
print "Kill all player, finish all game."

37
AlphaGo/player.py Normal file
View File

@ -0,0 +1,37 @@
import argparse
import time
import sys
import Pyro4
from game import Game
from engine import GTPEngine
@Pyro4.expose
class Player(object):
def __init__(self, **kwargs):
self.role = kwargs['role']
self.engine = kwargs['engine']
def run_cmd(self, command):
#return "inside the Player of player.py"
return self.engine.run_cmd(command)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint_path", type=str, default="./checkpoints/")
parser.add_argument("--role", type=str, default="unknown")
args = parser.parse_args()
game = Game(checkpoint_path=args.checkpoint_path)
engine = GTPEngine(game_obj=game, name='tianshou', version=0)
daemon = Pyro4.Daemon() # make a Pyro daemon
ns = Pyro4.locateNS() # find the name server
player = Player(role = args.role, engine = engine)
print "Init " + args.role + " player finished"
uri = daemon.register(player) # register the greeting maker as a Pyro object
print "Start on name " + args.role
ns.register(args.role, uri) # register the object with a name in the name server
print "Start Request Loop " + str(uri)
daemon.requestLoop() # start the event loop of the server to wait for calls

View File

@ -1,19 +0,0 @@
import sys
from game import Game
from engine import GTPEngine
# import utils
import argparse
import time
parser = argparse.ArgumentParser()
parser.add_argument("--checkpoint_path", type=str, default="./checkpoints/")
args = parser.parse_args()
game = Game(checkpoint_path=args.checkpoint_path)
engine = GTPEngine(game_obj=game, name='tianshou', version=0)
while not engine.disconnect:
command = sys.stdin.readline()
result = engine.run_cmd(command)
sys.stdout.write(result)
sys.stdout.flush()

View File

@ -46,9 +46,9 @@ Tianshou(天授) is a reinforcement learning platform. The following image illus
Please follow [google python coding style](https://google.github.io/styleguide/pyguide.html) Please follow [google python coding style](https://google.github.io/styleguide/pyguide.html)
Files should all be named with lower case letters and underline. All files/folders should be named with lower case letters and underline (except specified names such as `AlphaGo`).
Try to use full names. Don't use too many abbrevations for class/function/variable names except common abbrevations (such as `num` for number, `dim` for dimension, `env` for environment, `op` for operation). For now we use `pi` to refer to the policy in examples/ppo_example.py. Try to use full names. Don't use abbrevations for class/function/variable names except common abbrevations (such as `num` for number, `dim` for dimension, `env` for environment, `op` for operation). For now we use `pi` to refer to the policy in examples/ppo_example.py.
The """xxx""" comment should be written right after class/function. Also comment the part that's not intuitive during the code. We must comment, but for now we don't need to polish them. The """xxx""" comment should be written right after class/function. Also comment the part that's not intuitive during the code. We must comment, but for now we don't need to polish them.

View File

@ -1,6 +1,7 @@
#!/usr/bin/env python #!/usr/bin/env python
import tensorflow as tf, numpy as np import tensorflow as tf
import numpy as np
import time import time
import gym import gym