File size: 1,347 Bytes

8806ce1

"""

Self-play orchestration for o1 agent.

Runs self-play games using MCTS for move selection.

"""
import chess
from o1.mcts import MCTS

def run_selfplay(agent, num_games=1, simulations=50):
    """Run self-play games using MCTS and return experience."""
    all_experience = []
    for game_idx in range(num_games):
        board = chess.Board()
        mcts = MCTS(agent, simulations=simulations)
        game_data = []
        while not board.is_game_over():
            move = mcts.search(board)
            state_tensor = agent.board_to_tensor(board)
            # Policy: one-hot for chosen move (for now)
            policy = [0] * 4672  # 4672 is max legal moves in chess
            move_idx = list(board.legal_moves).index(move)
            policy[move_idx] = 1
            value = 0  # Placeholder, will be set after game
            game_data.append((state_tensor, policy, value))
            board.push(move)
        # Assign final result as value for all positions
        result = board.result()
        if result == '1-0':
            z = 5
        elif result == '0-1':
            z = -1
        else:
            z = 0
        game_data = [(s, p, z) for (s, p, v) in game_data]
        all_experience.extend(game_data)
    return all_experience

# Self-play loop implementation will go here