File size: 1,347 Bytes
8806ce1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
"""

Self-play orchestration for o1 agent.

Runs self-play games using MCTS for move selection.

"""
import chess
from o1.mcts import MCTS

def run_selfplay(agent, num_games=1, simulations=50):
    """Run self-play games using MCTS and return experience."""
    all_experience = []
    for game_idx in range(num_games):
        board = chess.Board()
        mcts = MCTS(agent, simulations=simulations)
        game_data = []
        while not board.is_game_over():
            move = mcts.search(board)
            state_tensor = agent.board_to_tensor(board)
            # Policy: one-hot for chosen move (for now)
            policy = [0] * 4672  # 4672 is max legal moves in chess
            move_idx = list(board.legal_moves).index(move)
            policy[move_idx] = 1
            value = 0  # Placeholder, will be set after game
            game_data.append((state_tensor, policy, value))
            board.push(move)
        # Assign final result as value for all positions
        result = board.result()
        if result == '1-0':
            z = 5
        elif result == '0-1':
            z = -1
        else:
            z = 0
        game_data = [(s, p, z) for (s, p, v) in game_data]
        all_experience.extend(game_data)
    return all_experience

# Self-play loop implementation will go here