""" Self-play orchestration for o1 agent. Runs self-play games using MCTS for move selection. """ import chess from o1.mcts import MCTS def run_selfplay(agent, num_games=1, simulations=50): """Run self-play games using MCTS and return experience.""" all_experience = [] for game_idx in range(num_games): board = chess.Board() mcts = MCTS(agent, simulations=simulations) game_data = [] while not board.is_game_over(): move = mcts.search(board) state_tensor = agent.board_to_tensor(board) # Policy: one-hot for chosen move (for now) policy = [0] * 4672 # 4672 is max legal moves in chess move_idx = list(board.legal_moves).index(move) policy[move_idx] = 1 value = 0 # Placeholder, will be set after game game_data.append((state_tensor, policy, value)) board.push(move) # Assign final result as value for all positions result = board.result() if result == '1-0': z = 5 elif result == '0-1': z = -1 else: z = 0 game_data = [(s, p, z) for (s, p, v) in game_data] all_experience.extend(game_data) return all_experience # Self-play loop implementation will go here