-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathArena.py
116 lines (102 loc) · 3.98 KB
/
Arena.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import numpy as np
from pytorch_classification.utils import Bar, AverageMeter
import time
class Arena:
"""This class's methods and comments are taken from Shantanu Thakoor (see ShantanuThakoor/alpha-zero-general) @ github"""
def __init__(self, player1, player2, game, display=None):
"""
Input:
player 1,2: two functions that take board as input, return action
game: Game object
display: a function that takes board as input and prints it
pass
"""
pass
def play_game(self, verbose=False):
"""
Executes one episode of a game.
Returns:
either
winner: player who won the game (1 if player1, -1 if player2)
or
draw result returned from the game that is neither 1, -1, nor 0.
"""
pass
def play_games(self, num, verbose=False):
"""
Plays num games in which player1 starts num/2 games and player 2 starts num/2 games.
Returns
oneWon: games won by player1
twoWon: games won by player2
draws: games won by nobody
"""
pass
class MultiArena(Arena):
"""
An Arena class that manages player actions taking place in a given game.
Originally designed for card games where players have to be given cards
at the start of the game, and the last player remaining loses the game.
(Applicable to durak, big 2, bs, etc.). We let the game handle who plays
when.
"""
def __init__(self, game, *players, display=None):
"""
Input:
Players: A list of player functions, which computes an action given the field.
game: Game object
display: a function that takes Board as input and prints it out.
"""
self.player_functions = [p for p in players]
self.n_players = len(self.player_functions)
self.game = game
self.display = display
def play_game(self, verbose=False):
"""
Executes one episode of a game.
Returns:
either
winner: player who won the game (1 if player 1, ..., N if
player N)
or
draw result returned from the game that is neither (1 ... N)
"""
current_player = self.game.get_current_player()
field = self.game.get_init_field()
it = 0
while self.game.get_game_ended() == 0:
it += 1
if verbose:
assert self.display
print("turn ", str(it), "Player ", current_player)
self.display(field)
action = self.player_functions[current_player](field)
valids = self.game.get_valid_moves(field, current_player)
if valids[action] == 0:
log.error(f'Action {action} is not valid!')
log.debut(f'valids = {valids}')
assert valids[action] > 0
field, current_player = self.game.get_next_state(field, current_player, action)
if verbose:
assert self.display
print("Game over: Turn ", str(it), "Result ", str(self.game.get_game_ended()), " lost.")
return game.get_game_ended()
def play_games(self, num, verbose=False):
"""
Plays num games, where player starts an equal number of games
(ie num / self.N)
Returns:
a loss-vector counting # of games lost, indexed by player
(assume there are no draws)
"""
# eps_time = AverageMeter()
# bar = Bar('Arena.playGames', max=num)
# end = time.time()
# eps = 0
# maxeps = int(num)
# write two for loops here that correspond to num / self.N plays of a
# game, iterating over player start.
n_lost = np.zeros(self.n_players)
for _ in tqdm(range(num), desc=f'Arena.playGames'):
game_result = self.play_game(verbose=verbose)
n_lost[game_result] += 1
return n_lost