From 9823a781777022bcded709e014edaabb98bec198 Mon Sep 17 00:00:00 2001 From: Otthorn Date: Mon, 17 May 2021 00:50:03 +0200 Subject: [PATCH] :tada: first commit --- README.md | 76 +++++++++++++ board_hash_list.pkl | Bin 0 -> 26977 bytes game.py | 119 +++++++++++++++++++++ generate_board_hash_list.py | 74 +++++++++++++ q_learning.py | 207 ++++++++++++++++++++++++++++++++++++ 5 files changed, 476 insertions(+) create mode 100644 README.md create mode 100644 board_hash_list.pkl create mode 100644 game.py create mode 100644 generate_board_hash_list.py create mode 100644 q_learning.py diff --git a/README.md b/README.md new file mode 100644 index 0000000..227aa3a --- /dev/null +++ b/README.md @@ -0,0 +1,76 @@ +# TicTacToe + +This repository is a simple implementation of the game of TicTacToe and some +experiments around Reinforcement Learning with it. + +## Structure + +* `game.py` contains the implementation of the game itself. +* `generate_board_hash_list.py` creates a pickle object containing a list of hash +for every possible unique non-ending variation of the board. It is useful to +create the Q-table latter and need to be precomputed. +* `q_learning.py` contains some experimentation with Q-learning using the + TicTacToe game as an exemple. + +## Implementation details + +The TicTacToe game is a Python Class. The board is a 3x3 ndarray (numpy) of the +dtype `int`. Input it taken from 1 to 9 following this scheme: + +``` ++---+---+---+ +| 1 | 2 | 3 | ++---+---+---+ +| 4 | 5 | 6 | ++---+---+---+ +| 7 | 8 | 9 | ++---+---+---+ +``` +It is automatically raveled/unravaled when necessary. + +We only need to check if there is a win above 5 moves because it impossible to +have a winner below this limit. At 9 moves the board is full and the game is +considered draw if no one won. + +## Combinatorics + +Without taking into account anything, we can estimate the upper bound of the +number of possible boards. There is $ 3^9 = 19683 $ possibilites. + +There are 8 different symetries possibles (dihedral group of order 8, aka the +symetry group of the square). This drastically reduce the number of possible +boards. + +Taking into account the symetries and the impossible boards (more O than X for +example), we get $765$ boards. + +Since we do not need to store the last board in the DAG, this number drops to +$627$ non-ending boards. + +This make our state space size to be $627$ and our action space size to be $9$. + +## Reward + +* `+1` for the winning side +* `-1` for the losign side +* `±0` in case of draw + +The reward are given only at the end of an episode, when the winner is +determined. We backtract over all the states and moves to update the Q-table, +given the appropriate reward for each player. +Since the learning is episodic it can only be done at the end. + +The learning rate $\alpha$ is set to $1$ because the game if fully +deterministic. + +We use an $\varepsilon$-greedy (expentionnally decreasing) strategy for +exploration/exploitation. + +The Bellman equation is simplified to the bare minimum for the special case of +an episodic, deterministic, 2 player game. + +Maybe some reward shaping could be done to get better result and we would also +try a more complete version of the Bellman equation by considering Q[s+1,a] +which we do not right now. This would necessitate to handle the special case of +the winning board, which are not stored in order to reduce the state space +size. diff --git a/board_hash_list.pkl b/board_hash_list.pkl new file mode 100644 index 0000000000000000000000000000000000000000..1c577a3a287433aa79d72877d54a8f4b9044b066 GIT binary patch literal 26977 zcmXZlOBU@ou>?>W45nHv`6Jt!2t68-KWi}y22jmnzUZT{$$DZ^NsU9mh$cUGuFTU{G%?{c`nC2 zwtBzgT=u8dewSrk*892ldY$EZwmp}g{b}u3`#GL^@Avz@*SegwwCgL+w{FLF-dFz~ zmu}y?m*zk9cWu{Em)f`QIo7>h*I2)?z3zIw!=?Sc&)&DYoX>t=+qx|8`s~|wzQ^5< z}Z3`mB9>Hu< zXJxZ2v+@%46WZE@*%%R0{e9OwL_eY{KC*3xVH`g2`N?_>Dk%cZ65-?Bd2xtIH` z_gAj9?%rrA$FjF}_x2u7Jx-UttsL#M-rw_W>vy02`1-1sTE;pqk2=bAe7j5U>uygg zz1^eL@(gc$p4E;lS9>k_yREym$zp9~d!G6&$K%IFEA_kA=RC@I-tW5lxGZF~CAsTy zjBEJC-uHFs>rxMoYyHn^BloZNXLyanrKOZ++uveapX(U!+K=VjuIzVTujRY$XSwDI z*y*dc;cfQAdaa+JDn~EYtL^Li*qrvPOJ1uqZ?g5b?aT9(*2`^W?`L23_kAq7CvNxg ztRRUS~frJ(oxo4%5k37cR9UwU!LyqmVSKYZO?P= z&5P#Jqm{Gu%N~_+Zp(eIb@Z#O$5zME#(Ev=y`R^eyH+W&+3V4sZJg)*c>8g`XFvDx zT)P-}zI$uA^tdeZ`yQ(o>*u=Hy*}r)ZP^l$+NyM@Ghx>ITp!Q%G|zDyHg|iDwtTJE z^F5bu9FG;-5`^#eK3h3IVY+YYHjcvdmLmyM$9MI`k4eXGU-$d-Yv*~$*iTmX-NW6mz@^9TnzLWe)8*^C?#I>3 z{@N_z`Rrr#Q>d5j(o-t$wY_C~uWbk|uu+dy(A>*=*ZjwJzb)~)2=(tMZF%c{ShAz6 zi@#nP%O7qf>=`K^GTDOY_Z?fC4$ z-rxGk+x>mk?X@!^VS_k9LTPbafM>5`eD~G%eINVs9nbciuQ0gVS(k5}>ndTc=UtZ% zZ1&R5euI$lfZ}m4^?RiHajbz1D?H92^y|9SQis@Gw|-jDZ+{@{1yOBL!sY0%c$MJo zK8F|a2!istTbER4>9>Ge`j#v>ztVdZSGGr>*r0j_r9CGo+LeWKxz;yZu(jiTz9*^V zZP%xaw_e-b%Qn_tujgy)Sl^aQhs$o6bzjQ{_}_aS+o{O3^G>!ucK9ydA$L8lMIji{ zTP{`q=lzaz7pa>cEON6qh@02CfQnylwr}-dUgZ*hTepALTF!F#z41$!YG)Lzdt7q# zkl(#sI~2W!Yy(_=CP0~5aBPZ1+r(l2tmN}79|WrFy?wS5C@ORfHo5e!`?3}}3(}6S zwFgokZx41zP+%;-Ri0qh8+a_;GCcRSTX}`aY98wY?QJ;pDwaHpwXb&24oF+5-l6&1 zK+F57GdB=Q@$8C{OI2rAd=jz0mm+MP-{-m3aVXpJdxg5TEq8@fD$Aw}CTdgCrR;4-v(h)!$eai`uSM0g?AT-fwfg`&`6G$$ zeU+XzsV83F-A-3!>~+I3>b8_&m9|3ytgV%ApgR4@#$Fb?eoZA%{>MO!whm<*c>T(L zcyMq0O&su_rKwE%J_{h9EUr3^&h_iG6Vj?s{VIqh4IH1IO#HqV9K$OK= z5eszyq1ur)wc6UPBB(yd&Mh`~EJRs!>F4kJZ+!3Y2BVy(@JAW@fnz=1b}WeGk~FMx zWjU`)YN*sXl~-_MDsq2z1Z#)&VIf_W@q!R zQcufYceP@>R0Yel-?d!Df4fvdz3>(V7E+g`+Af^$Qrp*Ex`|lmts>=FJ6hR2mKgc` zv-6aPNrt{!L?!-I%*U6zDrO&wcc@Y22D%+^S_m8h{4HKq{pj!HF3sNw-0b_kx4yq! znzrM{sH+^l$q!`no#>+IVE~Aen0!-G@~<0f$ApLjsIP3jb6p?Y2CI0%2F|zEAS(uU zD3~{rQz*;?A3U*yV&d-gE$|&08i03gU6#a_FXF^3{Q0V|i*(aLrvn z;)*S}R4~G#`eJ6-sB#l9T^8|b2^V!>qwLlxOk^s(^?bcUl8=J=3f_p8D5aS1;_Y$x zHEHOQAeI>RmjUcmA=ig7oEN4GHSzD)*1Hs)I!{H2d=!mHpe#FjHl|W82wyY}A>>kP zhK3-Y8_s&XFK)a`#eq${>vjdV+A2I<8j?besL(Q6Zi(GAS0s1XLv%HFePn0t>md<}Y=C971}ggCJI(svmRI4+3J9PJ7GV+* zNE<}P;o6Qeef?`{f+hE`iLB)TGeW8=0DFh>pjbpycmoU+hwoM*pIMGiz$oqRNxwHi@y_&7?>NmcB%EyBBBksVw7j*V4Kh)j;Dg7o>gzm!sfVYR7^Ww^y+l>K-}N1rTC^ZsNP@erB31s@M5S$`xyfu8fG>Dql0^ab{JFFe zSDXENA0kE#SwcG7Dk9&rS^kbFRX3)|C6hZWhn5+KT67#o&#)Ux;>vC*t8KM-&JE`6^oOpZnce;wJw=K7sY~4lH89VXgGF zEHPN@0o4NKyjI>~fP#0!mBPguDi1|0h+hIr1zo5Lbpv-6-K;c3 zhUw?h;DIW%5Vy&)RJy(k69#qw1Ll9-v#wP43 z@&}Rmu{j{XVtscH3+ZwC@!1CzgvS;jb15e$E@MNKx1E#1tU&KcX^BMyzdJk}awZRO z2;Mt-r=k6zqJ3o6`z=wBkesieF3-CPDbHQwv2C9pNTv{Ss$HrlaH>yb1ZG_uYYSYm zXIuu8FJ61;%`|)RXKJF zcL&eGmZ;^)UnQg}X0KUMYJlZA!zHX7^=noAye&&)eNe{>TXwPw!F{?aQDxdXT)7PH zf?6`|&$ucDUj!~401y|rUbn0ajf+tQy;O&v_2ubz9*%6nb3UlYNnM7hVM4^$TiGiL zZdPD{mgEIdwln(37a|pV;u?`>MaC`wY|RQVi9HE06k($v#Rt`e2I9PnMDM(XN2Q+$ z16kNchahbaRE&(^8nOl<`AgjOga3ObUio6|4hZs~&I@w9D|6?1LdTm|$EFa2K-{Gw zm;b@_4cDP}6;-YhE!kB3dOWJr6C=Q-EVvs|UoQ&JcG`&X_=QsH;dZhUL=0bQZh_7A zbIKo_LV<+A2MG#qUG6}18VP#M5(iEigbK$|DJ#cMh+)W~#gYq;6(aMlOiO$S8i|8& z=2fi_jXNNIaMTYX0?|7_^in`7W8-PHZ?#?ov1Q`qZdjLx#SV#%ycU1@iHSqG`EA6_ z6_;1NkY*W7KTIf8$VLDMY9Cvl3|NL$n%CWty5RO3R~mkiKEU7Nl(XJ_sm*9j?-4GUOoX|iR#}BBr46E7j{KNJgmf@m|CM(e6@WFUZa$ z>rzlR>h3HA#o1jc`0H80n8M>ydn7$sw9FqA?o(Uw6>9PC z9~Be98+p}hV%8Ab^aWg4!I3_J62Ng@(Xu40?H^M3moN}`Lv;X_BLApz53eWJb2+eO zZ8GM3Z~$Y$FGQu7t3)lZRiHJ@NYe+072(ixBwWxYjS751&I`V2>1(gx+R6pniR8d; zr{-=4uaVBQORO)v3_ziVq@g|7v@A6QN}WC-hl3wOTA?x>q#gnEgC*?0j+fSnYpPCp zfETW*lrjMnqe^lSIpllo6a|}y98Y^9;qu|%R}nJoaa{g-7k!)W&?^$U`blb+YCHP{ z**~@!1zEKX?gd0bcg-qYQOV3)2#wo9$uth?QxNN$;f*rfEPx^{`3t;aDNFI7YFXEK zKtW1{g&0B$6hUnE1F_&*1PCi-ec}MkqK{|KaW8fdC5ALGO%QD=R>8nu@7WH;x%l)H z1NT+|R6ccfaair3wBUG_1I*NLmm*U^s16exS!^SRfCE;pEzU2k=iiH*DI-OX3c*=V zxbGA`(bLE-LC!q=-s~T&ch~fsiBHl#{8CP+b65jVBT)R7c%3aJmtt>ngmA|h&$xs* z&|s*8x+Ogf7Qc<1=(1?0AOgspAdTP1Y-LfTU8LqWfvYKl-n*-u_gbt<7FSp1#2yru z->*OnQ5GHA$^lJV>JfDIb{ix3sOKYY5hIHwd4CEuFx@6<t{rAoMYkg3ND17@79UmJ-R^2C2{m`lnVH%7r=zmvxo_SlZyi0AR6pavYl}7pxP6n z#=>%^QwM}04vcCmD4dVn^#-*V69bT^N|#k$mBuH0p#gN^)6j==xfIt=cY};!0P|2W zV7(!FG=eUA4$w+Hmm+>L4Nl+j@NQ!hKt!lenRv05>&tew;<+o8wz8QNQex#>Vr1}8 z^Gk&tMpYW@xTl!{9ijl#UL@?0te%Zq7bk(`Lh~6s*m&`|tIb65FaoQlq*By{4NsHU zkrQCXDpev~n(p9W)pg#Rz3%oVJ!%mk1+x^~MXkOB;X2owx-kNv2ZGpY^AU`Qn~gNU zqBE{7ki~*~KoZqs>Kg#TEi}Q&3I$%>0+PrK5tH4r7?iqJ1HAxGh#jzP5wb#qH}VKh zLS$~iq1U70gAfxBN2z=ryQFu-OOsprRaz0Vy@UagBgQaky^lKM5$d0PAPikcyzR$zsz_B4#Rg?{QX>d za{bZ))cxT~ihKI_rMCR%a0xIcie?{%0@ljFt`wkKWX%bKQ$QkJ1QnKz!(zbhx;G3A z_NXbS2@RwAX<@$%8>{S%ZF)v*TzY!Gf;#C>dJp$S`qC5UsE8{MrH}kBB|)h5!hE{8 zbaAs$9}Q}SA;rMZ1;Gea6@=icjhCRjF!;J-V;p}!e?>1dmD0NMw;;buSs?*HoB2FC zPeQZ^1>HBb3U+MFB!M-Oh6Is%$QVm6mtnZz^J;v=;{`x8@^tkOAeT~{k3Y6FOor@! zaGDWd^vND;HqztF)Z7*9#ru&3Mt%rGY2=4TylsSsGE^lIzeJXHkpTr}7`Dz8<`(jv zZvrV|SnpSdp{({(hctMXm0iJEQGL|@_n0?wN5P&h|kI+hT z%EN~NC0?u6L6l8o0~Q$d7WXy10Tu=)EPfu=<5Gke`~hIrYkc<&_!Av~3*zqTk^*{S z$1Y8;%c5leUXq&=PvV1=u7>;HaNnY`yIh+7rw2k7lq$H5XQcy8flzml0NFG@G=fzVl%)dG<6T7HgR6#eY)J9wpSGD|dGs|@l&Jn=r+TSn7fI*oHD zY4t%f9cB?kj41__Mn_B=G~t5xU_%-#`fp^(XIf%M4^?tR@Ny|8_Zbm$sgmcd^EULf z*op8W(`7uud+D((-bHx3lqp3v#8ahCJSB90`6WyR9RY?fLqJP*wiaXv@Chl_gL(L% zIFt?tiW&Rt#z62ryYH?h3nlj~Kklm^5u!pRb5oQFcc-rrgj|Y=+I&n7;`u?@OZD#` z-|xXck%tG&xjuL0NiIl{E}5Q-U-fUbpdwLS&icwM1VQc6&}vnk8G|ruu&Aek7S)G| zf(^~MaGxT{rWSD?#0RynhtX2%L5!Hnd4 z99AF$du3K`f32IoA@HbRwr>DYY0n%Q%jmPY{_=F5q`JST8tEA+K4h`6DVbCF+nAR1 zxI4=&IAE9o{eGdFMYU5JqjVR7h}65M1a1Iynkh=#4NjR+svy3Pwt!I$KQPLT&G-$? z-?9as@g*F1YQl{Wwn3T2Q#W+@Nh)(qc6@4X0)%{z509*4-9SL6m9a%ZMa4#b8KTT? z>BUJ5t}bl^E6rCwf)D<@VQ1Iu{Bh&(6f z5HT1E#sHz-BTou(W>gXO!7^!n%z+yWJjf&Cxr!E?^M#<~^|^zoBU}NdaL@&3cSlAC z6~R6Pvw#En!mBY`;iid(Y$dr7rb~jTozWH!Gul;p{qZZ$YHoxz{?pb< ziK3a%V)#+^d=z|6%gS7)ONl}@lvcqf>`b5P*dJPOl&LWkX@~xjx3jf?t(nhLPN+BZ zeo>ZKg=#DfaGj~AW0hBXT=Rp3AUUq*8vSUEJfLYg#OKXBx+$tt56xvNqH|KFG+eqZO>Yy>H$ zIvJB>;8&_r-^``y2w@i(fF6lq7H$!4hJ$%@>7d(U?>JbwRR0%c%8*0a5nMRKb}~rm z<5pmQtH)>(lV7{n;CT@d+v&Dndk^D2j-$tkB|YuU|!gCVulmMnVpN3 z?u5<(lBPW;h*dj|blOyulAp{(pLNcQ=8%<8B4Kbdb#1Qp?fNlW;ki-Ph}ZP9DijQuS-!6c^xS&@Ff-^ zWRlay15X`t<(so%(Ov4r6WT_SkSW5*to_dM<4-6oVw>rG3!glzbTyM>*ZDo#)LlI> zQCF8)k7OC8&(5BG7#hU{A!k%H@mnrj$&i}1nsW#Ky15g~m(iBjg8gcXcjTCwCP^0! z(ac~q3Xhj#D&RG>z=fy$^h(eyzKan+JzVA`8Ok)`i1DFEHWqm`tfp?T@eNWFCUNYw z)HU!{ERbb{?@S?WsZjO#8?S_9bm>b6WY?7 zRhP&jX!+r46OTm|&_YX_3g1*l6&uSzvtuGIWp4s?>8i@GOtlwP_L$q~vARRg%4R|0 z(rMmcQmh;7iZiC@$0&u52+lBz<)px`Cm*Q?z0AoZEh2&#c<3}no+-7=3EJ^54?tyW z&)}q3qiTb;pXivDhM5dP#57tQ)E?9}GXhBr@>9Y%Y&s#-2)1D=L|dK2lo%AgOZmcK zVqQ#80z?$le@Olj_c5|w^QwkXT&n4>@G_zpSow7lg{`SK&4A#EF$^zo@Ma5wq>4yP zf)MzqT8qQ5%UoJY%ba`_T+>hHzbJmH<|!HsNKwc zILsrc<1|hln{YwI>>*EOM2VBSV+K0Edi0fopJKABDrxkv6%ji41L!O<_Y5arO8vV3EV8Iq50% zU_5Xz3(YkqAhXc}H9Uu$_{{1BjcNd5#Rp{Wd;mi$X6ZOA@fqx}0|PAa%+?y^P>%)D z%@LZlfJ=$R3t};q`8eUP?~mwed!l0|F_EN6$pDtvLK6U#pDpH)~!x;2ea*$cR zEn5cil-^p>U~`(Pnv&8rMwwzF`#A~*n}o`>d2WUW6JGsv*UJ=;Mg!&(-H;Y&3>m$D z1q8aXf+evLw_IxFkuJuAHm}i&Oyj476XyK6fKAMoFY#(32bM!gV`02!y>Ru_Ka&8` z*C>YPSI@Gpc#`Ts=$ajh8^a#4SHy(204AbU$y3%Ha;ci&Fk(jh7;ENu9K1qajN}po z1Q6v2wJ6D4D#bWbgo^%LdaBdZWtg+A*`cToqZ!$7Da7aGkxLrIW>qYBD1cvdbT5rq zh^G@Ub!_^$h?Q{-&AD@vw_?>?V4fj)g@EE4MV7Mgm9@|6$5=yiyr7ypzG`aH33N?vZ znso;(c1JAWpqxSvkn@xHvz}STuq$^_cb5msuQo#@Ip}pG#&Zk!Vv&hXmANg2AW04R zP7ht_Co3ptu3%Px10$~-D~o|=Hzf*h%^Dki4X?KiX1hXlO`0paI!HJ`{8BPaNO)*$ zXqs$XE5#>JoYQnzg!ydvJ!deKRpi(o>5CX2w=8L?yA{VKS49?9exRiEGk%YL;79lL!QADoZ2ThOxaTJ2! zU9tic5+etn@Ob!+fouxUc5!PZbthKwH3MF`)Dv+$b8yC%@o~VK_%$?PV|2R(O%gJw zJUJ&L=&f{ z$pfMa9FJv>FjI;O+Bk2iMQL7AijM7a6FdNYkRwWpEHNBSK5JkEL=Hl5WC-mX!F8#w zxv(>6q-~hFSa64}U}Y?DeM5eh8531Emzpx?${~xE3LmF%$nMzEd|Bq5R-PXC)CJDa z(CJn_2aGc}=ii{+d;;nRo*J`41gDPWj#;713nh0Yt!+ZkvwZqa*dVTtL#4tP6?sCD zNN6C#t&pWU)AUiMlgx<)+{A946)FM?2N5l0-pOd40Dw%4He4nLKPk@yq3Iv6ph%_% zD+@BZVkRl@LC@>gpu*bJ+1QnaHlJ_g851!*r}8l9fRV8{GY4q4J!43?6o+tTn|a|Z z76Tdn)eMe9CeABLuA$0qpATSa0Qjha=?Dv_m*K7rj(W1SGj|h7&GmCv15Fr-5k_GN zJ9B<2roOfY(@E@DISyzKCscOS>N#4%Dqu|TUO}u^jn8iPm|GNdiV>}rV^erGs^gfY zK*Wd)4r7p&(}4F6M4R)aV>@N@p=a-~q5J zI);ohn&AH=x%FjqUUbGRj4C~6YA`4{%Kyd6Vy!SY5?RU;;c7mLzYraTh>=4EO0xgf z0p*gxvu$TQfZw2LDuzU#h7JZn47XQPi@&Mc#bp(855Zv3UYs@2dFtbo5Yvc;}U$~^# zXN*}C%0CTNuG`CE&WQjng^?poX&!#!Rn@5Wlul%$Z9~5XO=6 z_#6&SH?tX{XHdp?7c(GdJI%>!on*IQp#DcDw+x7}Ah~;FEOyJWz)~DSGITG^UCIz; z<4BJT#PO<(YQj+)Z?kN^HQ_*-N6Z6ALb}30me?Y&yocVn6FnMMPA~d91Ta8yslJfH zAZh3*LF>e)oXF5^x)PdOG7mkPTY!72C-BWYp%%Cw!;j%q4qu9@V66X{{1vF+%%V<( zrtlON9YbHFunT0ePr;~uk@|Tp@`0{rAj$m za3`O`$n3PPg0piOs2ArmvPcVoqCbZx2^d@m)#2uN-8n%@MdeJYyE4FIMru^~Y0>Rq z7*&1{K_t!Y2c{P-8r*^RSIQe3Rnb3m+^q5FvC5NtLl*c>> z1B52MKn152_T*|3sc>og(7x_^G(+371Vc@>4!Ui>yJWp6?=F$m{jAYWqFqP;< z$f&fOG{UltXIa`Agvf=1WoTz+hD=r1T|i;{Mz7t}y1uvL(AF^P$_zkZ4ZzYiyjrV{ zd9&H%Jh$E-=Y+`Y^1?GI zTRS}I(362pKK8UdV1?u0x~Edjp${_OPlI-BB2Jorn`z_;85_Qi{_#b$u!%%oMtm{C zC7!lq<0tqeL1IAqu%z-0f%uWrPM*Vw+z0*;2I|m_;+YD=aqzU#PA@74HVmAx?U>Sz z8_BgCm$5%^D)Kf;X+ip+GJSZ zC59-tnbS#dQfGtFswp4-Nl~YXBTJ0~u)7_(!|Q(I(zJwu4Gm-?062~Jf>aDOs*Cyp zaWIZ6lf^=2nSUCJ&Kprl@pQFFl_ts-OLVj?Ul|Av;I_%8%oL_c!pp`HX$dYum}}bq zLT*l|;md(aD5acu<1{1mGL?74jlD@BXJBzMqPSU7Imbugh}Hud&2-f{Had+A%G#Wj z$reBbJ|Pyy!bEtY22ldDv-X>rr+S#t`#mdE!=3Crj5a-K(Ny7lRRif1a(r3)b^R=G zr}Nb(EMNaJ9W#_%sYV{D4^w0uc_@)*m6V9NJ_v3Gh~>68n6u(}iu6mcf0Je|4gYeg z)0cz9ECCH(pen)cGEL%;3^Z+?u*kV*Mte><8qUcXeMzE1l5Z3^6V6yFS2y9}dm>Ci zLp0`0jJ2Tc>_+pt5}a67b-ACAXzEyF2U=q*#}E9`Kpu6DL1mC_ex58d&a=zwtga*k zJyg4M=T6N53VTF0bN1+UL@6U%e<~FfDu- zt+;32Q|-+RHJxbXM`*c5y7n*_Ol#h@BR7%)Oe{cC2*4>0*F2EpEn}BkF+G=t>5H`JRr)ko{bE4Fp@~nE9#4$0bO37Gf zm*X9XEW1U+o5{lX=f=*SCKQbAz-hE*&K2lw8;bHnh9Kf%JFb-n2$v1dupmIkIyvr~ zQ_H$OK;>m>#h#oZ%83P;qNCbb63jN$-h5PM7IU6}_2Y{vgo?XzuOh~o`@tEVIW-Ti zpt|-cZ5J}YTtHqcelLH<<-}^fo`jafFHo_VjByL|xw7e$(u%4T;vrL^V74?qNEE3p zGoK9!WAeMJdWuYBpqBWiO5B=sWjQ*hW)22sLrUz+rF;|xd4AT$IifUbScEJp27HQx zFh>}ilgzUMXyy73Shaje^g{rQZ_GY3BO$lX?hOjU%ZS^J`3=lwlPq z?O9Voc_#W)R!dtv&ab@BGiYQ$Y;^)Y=C$xhCEBb@2A#Ni#`|DztYA%g4LA0&o%yl1 zKx)huv1J+Z&xjhgB=@x`{ib09cAvX4VstkQUDJ|I8=~%rBO}{8Kmzl+Y$rZhG8Xml zbtF8E`++FryB=KgjB`3gK{8^?j(*J3_t>~0Y4X;Wu~HSE zxO-?FRPs7TWvDg-(S}g+T1IpA37K^kHQuD6#k3?zq^^B5KdX|W5iZS94-8D+dB(^W9N@)do-h4bX*&;j;92wy zBMBo3afmA)R-AES+#CFJ>6{HjP~#aH^&luP5#>dQ&yKY2k^VHM#%JUr$W~g81j=W! zEFBX%#|)=Ur^&)Cmx_XqF-jxew{8cPWJ^g@sc(wr%?fZD?aBj~bf$9NhS+lGTwL=7 zbZ){XaGL4C3zW3HiAjc>E>Y^M!z3y&07hTt0OSPt4E96#0HLKZ_hv{&$xu&q=X84Q zH(O-ltC0t_)ue;i=TAz!Idm;Deal2DN&~Mqzw02-1eq>uhWcEKzve6sBP8c{j76ej zj(R0t(FP2DPB>`X@Fz`4Fzvk@wBSE^T^`mvX=V7;tf3;$XGpU#ABJPRu{3B1;H_k( zD?f#_9*omaU^5j_?}{4;u?YwghK@8g0y1XD#O#5dubzp%SH6Hm{eWTCnxiya9La!P zIY8-I;gVRzJ#i;LAh;o}zaO$rs4yD*n40HO9zSTwmx@qKt*gRIPSN3B&(s1&D)L+OM%{>Tk> zDe{VI>bNPE94h6Hw`(CJgwN{9Y@D19O>(D;Xa2`a`i=s|;ZTMk6`>3cC#&QmABf50 zQog;+K#78oPJffya5xhhIUUJ=!W}cvXB4J=Y%arpN%AYxFZNGWI z!GqG`fPlRyi`oItaDT)yKIBknu3Qg%OPs=ihAOamG7CnF%u`yuCvWxopTJa@p zo@A3BYKZ*L$Dg!H2oGIteIuY~LMGi61jGb4ywrVwJ0y9TuMK!|s>71rli1$tumTq_^IFC^5v?UMPO;4w4mFI7RaiFH`)sWA(Yw$LW9Xm{zGff?sCqew zLu7PZ$R&*PtZFnAB!y&Fdeg}&cp?5v7Y@m*aeK zZCdhkCJZ36nOIi7BVYzkX_H`V5>IcdIq|WK=d-ftli4IL8!}(&uwKRsKovQ{Lrecp z)%5^rIXQ}ACIt?gP);+!^aKF+@mi(Jj7Dndyeor3su~5!xj#}1BImG!Q%Q7!zJZTh z@TGEB%@OThUn$QgRZ_yuAm~9vqoC->5cgIvWwBT(d=Q(YS*OXM%;>f&)rK}fQa-0- zHXEyQDS`nC{3wx_50KzBpk7lP(Mvb3A*ZJk(W_4zR21U>At>EEcocJPwBfj>uG+MKIjy)MZ(gkEO=+4cJFlu&xj^Gn>o)2}&cd7KW98UrKPy|a8r8Bhs zVAMDjs#xW#g!u-mnP!zifs;;|0o{|af>-+^J*Ir;iXWfPBjzh5m2$)^8(sJzt1kpGR-CW85YM#wf@?AAU8u}4mG0U)bg(y8imq5 zjXAANs!<3c2_JJLEV&$~&V)a&BP$?+&BQrE0IrC3yT$=k#9)f_OWn%HxMnAc)utDd z+@AzebJ*l4dsAKr0QNML{+)}HsXFcK4L+%p`uL~$<{ZmVF@5FH6DVW z(!mit?aO@h)QK?Isx)zdZ3YVAuKC`OGHuKA!6L0a`78kBZmjBjqgj#1pFX<8XM z1(=CZX^ZTexb%W*nOg1KyTZ|CyoP;v1U;yDI>|_uM+PZ&QLBkbPZ&$hU4e}(_JJjK zzA-HFFsH%vizFBUDkUS3r%5HsaZB=!g->yri}7hIc~56K4DO(nNF$qq+L!^hP7vu= z5X1SflJbOIYxf(?u#Wiv!Tff;=LuI(A0tmOVH83w1{F9e%y7Otv)zF=2nMUX2uXrb z1k<9ImYm%A`ZRWhb?( zX{WU0|Jw%JJSHPXk{PFvCm#yXu}6z_)@fSRIHlHM4fw$P_M<8nq2^MB*c@W>xt6s%e^3xvTyocWi#(EMEvC zJwu*MOoJ7L-mxQnjCp_zp3?6*#LP@XGi|dXXy+XgBo_^LVAqBW-4))bB?3R+RaGjX zCi3owjn87V)R2uqXf9QwyRAq_oyCf zN1;brv0t&x3eh4$=UuAsWkA+wv8JwRE`17~Qu^_31^b0uz66joOo+Hkq9%P-M{c<~ zrq9y!PU{SHvVJqNoC}OMn2!J{drV2g7ECh?3PH&ev#Ko6Y}puRv7B+uh*O8wjgDlH z&mLu}p5kz+$q)~aj9?Y;UNe7@-a-2D!Xabu&>>Cn$Q+l(z?VW|*02xLk~K*_W?o6= zSQ6eJF*+?8BU_oCVT~{v)dPSLNJ)#^yekf_D+zXzv1$u?tG{oU&{zjHZ8T(R1zW2a z!L)qDG+UtIZ^I|iP@&TmO4US(42N?yGkcdacDWRuibH_Ia^gvmaxAEPKqa$pIZU9W zu;piH>-}LF0-iUh3J@qLhbORe$usgov73GtYsq@g(t@{Y)_YoOp>2zSg%qo>OvLHG zf>ol|JDLE@AP>JlC6=gyQNtY%=Hk!e)J`+&P`C+K&H$M zfP&C`JOv(ly*MQZ11Ab@1tz)ic>r?65WO(L$Q*{sXg?yQz-ybi6eZ&){4QnJd0XU4 zw epsilon + # debug: never exploit + do_exploit = False + + if do_exploit: + # exploit + move = exploitation(ttt, Q) + if not ttt.input_is_valid(move): + move = random_agent(ttt) + + else: + # Random Agent (exploration) + move = random_agent(ttt) + + # remember the couples (s,a) to give reward at the end of the episode + list_state_move.append((state_index, move)) + + ttt.update_board(move) + ttt.winner = ttt.check_win() + + + if k % 1000 == 0: + tqdm.write(f"epsiode: {k}, epsilon: {epsilon:0.3f}, winner: {ttt.winner}, \ +exploited: {tradeoff > epsilon}") + + # reward shaping + if ttt.winner == 1: + r = 1 + elif ttt.winner == 2: + r = -1 + else: + r = 0 # draw + + #print(r) + reward_history.append(r) + + # Update Q-table (not yet uising the Bellman equation, case is too simple) + for s, a in list_state_move: + Q[s,a] += alpha * r + r *= -1 # inverse the reward for the next move due to player inversion + + # Update the epsilon-greedy (decreasing) strategy + epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*k) + + # remember stats for history + winner_history.append(ttt.winner) + epsilon_history.append(epsilon) + exploit_history.append(tradeoff > epsilon) + # input() + #import code + #code.interact(local=locals()) + +# Helper functions + +def player_move(ttt): + ttt.display_board() + state_index_from_board(ttt.board) + ttt.display_board() + print("Human's turn") + + flag = False + while flag is not True: + move = int(input("What is your move? [1-9] ")) + move -= 1 # range is 0-8 in array + flag = ttt.input_is_valid(move) + + ttt.update_board(move) + ttt.winner = ttt.check_win() + ttt.display_board() + +def ai_move(ttt, Q): + ttt.display_board() + print("AI's turn") + + move = exploitation(ttt, Q) + + ttt.input_is_valid(move) + ttt.update_board(move) + ttt.winner = ttt.check_win() + ttt.display_board() + + + +# plot graph +cumulative_win = np.cumsum(reward_history) +plt.plot(cumulative_win) +plt.title("Cumulative reward") +plt.xlabel("epochs") +plt.ylabel("cumsum of rewards") +plt.show() + +plt.plot(epsilon_history) +plt.title("Epsilon history") +plt.show()