File size: 3,825 Bytes

8806ce1

"""

Monte Carlo Tree Search (MCTS) for o1 agent.

Basic implementation: runs simulations, selects moves by visit count.

Integrate with neural net for policy/value guidance for full strength.

"""
import chess
import random
from collections import defaultdict
import torch

class MCTSNode:
    def __init__(self, board, parent=None, move=None):
        self.board = board.copy()
        self.parent = parent
        self.move = move
        self.children = []
        self.visits = 0
        self.value = 0.0
        self.untried_moves = list(board.legal_moves)

    def is_fully_expanded(self):
        return len(self.untried_moves) == 0

    def best_child(self, c_param=1.4):
        choices = [
            (child.value / (child.visits + 1e-6) + c_param * ( (2 * (self.visits + 1e-6)) ** 0.5 / (child.visits + 1e-6) ), child)
            for child in self.children
        ]
        return max(choices, key=lambda x: x[0])[1]

class MCTS:
    def __init__(self, agent=None, simulations=50):
        self.agent = agent
        self.simulations = simulations

    def search(self, board, restrict_top_n=None):
        root = MCTSNode(board)
        for _ in range(self.simulations):
            node = root
            sim_board = board.copy()
            # Selection
            while node.is_fully_expanded() and node.children:
                node = node.best_child()
                sim_board.push(node.move)
            # Expansion
            if node.untried_moves:
                move = random.choice(node.untried_moves)
                sim_board.push(move)
                child = MCTSNode(sim_board, parent=node, move=move)
                node.children.append(child)
                node.untried_moves.remove(move)
                node = child
            # Simulation
            result = self.simulate(sim_board)
            # Backpropagation
            # If it's black's turn at the node, invert the value for correct perspective
            invert = False
            temp_node = node
            while temp_node.parent is not None:
                temp_node = temp_node.parent
                invert = not invert
            value = -result if invert else result
            while node:
                node.visits += 1
                node.value += value
                node = node.parent
        # Choose move with most visits, but restrict to top-N if specified
        if not root.children:
            return random.choice(list(board.legal_moves))
        children_sorted = sorted(root.children, key=lambda c: c.visits, reverse=True)
        if restrict_top_n is not None and restrict_top_n < len(children_sorted):
            # Only consider top-N moves
            children_sorted = children_sorted[:restrict_top_n]
        best = max(children_sorted, key=lambda c: c.visits)
        return best.move

    def simulate(self, board, use_diffusion=True, diffusion_steps=10, noise_scale=1.0):
        # Use neural network to evaluate the board instead of random playout
        if self.agent is not None:
            with torch.no_grad():
                if use_diffusion and hasattr(self.agent, 'predict_with_diffusion'):
                    _, value = self.agent.predict_with_diffusion(board, steps=diffusion_steps, noise_scale=noise_scale)
                else:
                    _, value = self.agent.predict(board)
                return value.item()
        # Fallback: play random moves until game ends
        while not board.is_game_over():
            move = random.choice(list(board.legal_moves))
            board.push(move)
        result = board.result()
        if result == '1-0':
            return 1
        elif result == '0-1':
            return -1
        else:
            return 0