Skip to content

Instantly share code, notes, and snippets.

@ltbringer
Created September 9, 2018 20:15
Show Gist options
  • Select an option

  • Save ltbringer/3814b1b4e1a1c3bbc03ba9d1582d36d8 to your computer and use it in GitHub Desktop.

Select an option

Save ltbringer/3814b1b4e1a1c3bbc03ba9d1582d36d8 to your computer and use it in GitHub Desktop.
Training code for tic tac toe solver
bot1_sym = 'O'
bot2_sym = 'X'
def optimize_bot(game, bot1, bot2):
"""
Punish or Reward the bot with respect to the agent that wins the game
"""
if game.winner == bot1_sym:
bot1.on_reward(1)
# reward
bot2.on_reward(-1)
# punishment
elif game.winner == bot2_sym:
bot1.on_reward(-1)
bot2.on_reward(1)
def train(epochs, bot1, bot2):
bot1_wins = 0
bot2_wins = 0
win_trace = pd.DataFrame(data=np.zeros((epochs, 2)), columns=['bot1', 'bot2'])
for i in range(epochs):
print('-' * 100)
print('epoch: {}'.format(i + 1))
game = Board()
while not game.stale:
# Exit if the board is full
winner = game.player_move(bot1_sym, *bot1.select_move(game.board))
# Check if the move results in a winner
# winner = None if no one wins,
# winner = 'draw' if no one can win
if winner:
optimize_bot(game, bot1, bot2)
# reward the winner
bot1_wins += 1
win_trace.set_value(i, 'bot1', 1)
break
elif winner == 'draw':
break
winner = game.player_move(bot2_sym, *bot2.select_move(game.board))
if winner:
optimize_bot(game, bot1, bot2)
bot2_wins += 1
win_trace.set_value(i, 'bot2', 1)
break
win_trace[i] = 2
elif winner == 'draw':
break
return win_trace, bot1_wins, bot2_wins
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment