play-with-o1 / src /o1 /selfplay.py
FlameF0X's picture
Upload 11 files
8806ce1 verified
"""
Self-play orchestration for o1 agent.
Runs self-play games using MCTS for move selection.
"""
import chess
from o1.mcts import MCTS
def run_selfplay(agent, num_games=1, simulations=50):
"""Run self-play games using MCTS and return experience."""
all_experience = []
for game_idx in range(num_games):
board = chess.Board()
mcts = MCTS(agent, simulations=simulations)
game_data = []
while not board.is_game_over():
move = mcts.search(board)
state_tensor = agent.board_to_tensor(board)
# Policy: one-hot for chosen move (for now)
policy = [0] * 4672 # 4672 is max legal moves in chess
move_idx = list(board.legal_moves).index(move)
policy[move_idx] = 1
value = 0 # Placeholder, will be set after game
game_data.append((state_tensor, policy, value))
board.push(move)
# Assign final result as value for all positions
result = board.result()
if result == '1-0':
z = 5
elif result == '0-1':
z = -1
else:
z = 0
game_data = [(s, p, z) for (s, p, v) in game_data]
all_experience.extend(game_data)
return all_experience
# Self-play loop implementation will go here