Spaces:
Build error
Build error
""" | |
Self-play orchestration for o1 agent. | |
Runs self-play games using MCTS for move selection. | |
""" | |
import chess | |
from o1.mcts import MCTS | |
def run_selfplay(agent, num_games=1, simulations=50): | |
"""Run self-play games using MCTS and return experience.""" | |
all_experience = [] | |
for game_idx in range(num_games): | |
board = chess.Board() | |
mcts = MCTS(agent, simulations=simulations) | |
game_data = [] | |
while not board.is_game_over(): | |
move = mcts.search(board) | |
state_tensor = agent.board_to_tensor(board) | |
# Policy: one-hot for chosen move (for now) | |
policy = [0] * 4672 # 4672 is max legal moves in chess | |
move_idx = list(board.legal_moves).index(move) | |
policy[move_idx] = 1 | |
value = 0 # Placeholder, will be set after game | |
game_data.append((state_tensor, policy, value)) | |
board.push(move) | |
# Assign final result as value for all positions | |
result = board.result() | |
if result == '1-0': | |
z = 5 | |
elif result == '0-1': | |
z = -1 | |
else: | |
z = 0 | |
game_data = [(s, p, z) for (s, p, v) in game_data] | |
all_experience.extend(game_data) | |
return all_experience | |
# Self-play loop implementation will go here | |