tic-tac-toe-rl/generate_board_hash_list.py
2021-05-17 00:50:03 +02:00

74 lines
2.1 KiB
Python

from game import TicTacToe
from tqdm import tqdm
import numpy as np
from random import randint
from hashlib import sha1
import pickle
board_list = []
for k in tqdm(range(10000)):
#tqdm.write(f"Game {k}")
ttt = TicTacToe()
#tqdm.write(str(len(board_list)))
while not ttt.done:
#tqdm.write(f"turn: {ttt.turn}")
#ttt.display_board()
# compute all the symetries
b1 = ttt.board
b2 = np.rot90(b1, 1)
b3 = np.rot90(b1, 2)
b4 = np.rot90(b1, 3)
b5 = np.fliplr(b1)
b6 = np.flipud(b1)
b7 = b1.T # mirror diagonally
b8 = np.fliplr(b2) # mirror anti-diagonally
# compute all the hash of the symetries
list_hd = []
for b in [b1, b2, b3, b4, b5, b6, b7, b8]:
#list_hd.append()
hd = sha1(np.ascontiguousarray(b)).hexdigest()
if hd in board_list:
break
if hd not in board_list:
board_list.append(hd)
# choose randomly
flag = False
while flag is not True:
move = randint(1,9) - 1
flag = ttt.input_is_valid(move)
# choose from the list of available moves
#zeros = np.where(ttt.board == 0)
#a = len(zeros[0])
#i = randint(0, a-1)
#move = np.ravel_multi_index((zeros[0][i], zeros[1][i]), (3,3))
# not faster than the random method above
# the method above is easier to understand
#tqdm.write(str(move))
ttt.update_board(move)
ttt.winner = ttt.check_win()
# premature ending as soon as the 627 possibilites have been found
if len(board_list) == 627:
tqdm.write(f"breaking at {k}")
break
# number of non-ending boards (sorted by number of turns)
# {0: 1, 1: 3, 2: 12, 3: 38, 4: 108, 5: 153, 6: 183, 7: 95, 8: 34, 9: 0}
# number of all board (sorted by number of turns)
# {0: 1, 1: 3, 2: 12, 3: 38, 4: 108, 5: 174, 6: 204, 7: 153, 8: 57, 9: 15}
# (it should always be 627)
print(f"Number of different (non-ending) boards: {len(board_list)}")
# Dump the pickle obj
fd = open("board_hash_list.pkl", "wb")
pickle.dump(board_list, fd)
fd.close()