Spaces:

FlameF0X
/

Play-with-o2

Sleeping

App Files Files Community

FlameF0X commited on May 28

Commit

cc24eeb

verified ·

1 Parent(s): 4867fd5

Upload 3 files

Browse files

Files changed (3) hide show

src/mcts.py +88 -0
src/o2_agent.py +78 -0
src/o2_model.py +77 -0

src/mcts.py ADDED Viewed

	@@ -0,0 +1,88 @@

+import chess
+import torch
+import numpy as np
+from o2_model import board_to_tensor
+class MCTSNode:
+    def __init__(self, board, parent=None, move=None):
+        self.board = board.copy()
+        self.parent = parent
+        self.move = move
+        self.children = {}
+        self.N = 0  # Visit count
+        self.W = 0  # Total value
+        self.Q = 0  # Mean value
+        self.P = 0  # Prior probability
+class MCTS:
+    def __init__(self, model, simulations=100, c_puct=1.5):
+        self.model = model
+        self.simulations = simulations
+        self.c_puct = c_puct
+    def run(self, board):
+        root = MCTSNode(board)
+        self._expand(root)
+        for _ in range(self.simulations):
+            node = root
+            search_path = [node]
+            # Selection
+            while node.children:
+                max_ucb = -float('inf')
+                best_move = None
+                for move, child in node.children.items():
+                    ucb = child.Q + self.c_puct * child.P * np.sqrt(node.N) / (1 + child.N)
+                    if ucb > max_ucb:
+                        max_ucb = ucb
+                        best_move = move
+                node = node.children[best_move]
+                search_path.append(node)
+            # Expansion
+            value = self._expand(node)
+            # Backpropagation
+            for n in reversed(search_path):
+                n.N += 1
+                n.W += value
+                n.Q = n.W / n.N
+                value = -value  # Switch perspective
+        # Choose move with highest visit count
+        best_move = max(root.children.items(), key=lambda item: item[1].N)[0]
+        return best_move
+    def _expand(self, node):
+        if node.board.is_game_over():
+            result = node.board.result()
+            if result == '1-0':
+                return 1
+            elif result == '0-1':
+                return -1
+            else:
+                return 0
+        tensor = torch.tensor(board_to_tensor(node.board)).unsqueeze(0)
+        with torch.no_grad():
+            policy, value = self.model(tensor)
+        policy = torch.softmax(policy, dim=1).numpy()[0]
+        legal_moves = list(node.board.legal_moves)
+        total_p = 1e-8
+        for move in legal_moves:
+            idx = self.move_to_index(move)
+            p = policy[idx]
+            total_p += p
+        for move in legal_moves:
+            idx = self.move_to_index(move)
+            p = policy[idx] / total_p
+            child_board = node.board.copy()
+            child_board.push(move)
+            child = MCTSNode(child_board, parent=node, move=move)
+            child.P = p
+            node.children[move] = child
+        return value.item()
+    def move_to_index(self, move):
+        from_square = move.from_square
+        to_square = move.to_square
+        promotion = move.promotion if move.promotion else 0
+        promotion_offset = 0
+        if promotion:
+            promotion_offset = 4096 + (promotion - 1)
+        return from_square * 64 + to_square + promotion_offset

src/o2_agent.py ADDED Viewed

	@@ -0,0 +1,78 @@

+import chess
+import torch
+from o2_model import O2Net, board_to_tensor
+from mcts import MCTS
+import random
+# Optional: Endgame tablebase and opening book integration placeholders
+# You can use python-chess's tablebase and opening book modules if desired
+# Example for endgame tablebase:
+# from chess import tablebase
+# tb = tablebase.Tablebase()
+# tb.add_tablebase('/path/to/syzygy')
+# if tb.probe_wdl(board) is not None:
+#     # Use tablebase move
+# Example for opening book:
+# from chess.polyglot import open_reader
+# with open_reader('book.bin') as reader:
+#     entry = reader.find(board)
+#     move = entry.move
+class O2Agent:
+    def __init__(self, model_path=None):
+        self.model = O2Net()
+        if model_path:
+            self.model.load_state_dict(torch.load(model_path))
+        self.model.eval()
+    def select_move(self, board, use_mcts=True, simulations=100):
+        if use_mcts:
+            mcts = MCTS(self.model, simulations=simulations)
+            return mcts.run(board)
+        tensor = torch.tensor(board_to_tensor(board)).unsqueeze(0)
+        with torch.no_grad():
+            policy, _ = self.model(tensor)
+        legal_moves = list(board.legal_moves)
+        move_scores = []
+        for move in legal_moves:
+            move_idx = self.move_to_index(move)
+            move_scores.append(policy[0, move_idx].item())
+        best_move = legal_moves[int(torch.tensor(move_scores).argmax())]
+        return best_move
+    def move_to_index(self, move):
+        # Encode move as from_square * 64 + to_square + promotion_offset
+        from_square = move.from_square
+        to_square = move.to_square
+        promotion = move.promotion if move.promotion else 0
+        promotion_offset = 0
+        if promotion:
+            # Promotion: 1=Knight, 2=Bishop, 3=Rook, 4=Queen (python-chess)
+            # Offset: 4096 + (promotion-1)*64*64//4
+            promotion_offset = 4096 + (promotion - 1) * 256
+        idx = from_square * 64 + to_square + promotion_offset
+        # Ensure index is within bounds
+        return idx if idx < 4672 else idx % 4672
+    def index_to_move(self, board, index):
+        # Decode index to move (reverse of move_to_index)
+        if index >= 4096:
+            promotion = (index - 4096) % 4 + 1
+            idx = index - 4096
+            from_square = idx // 64
+            to_square = idx % 64
+            move = chess.Move(from_square, to_square, promotion=promotion)
+        else:
+            from_square = index // 64
+            to_square = index % 64
+            move = chess.Move(from_square, to_square)
+        if move in board.legal_moves:
+            return move
+        # Fallback: pick a random legal move
+        return random.choice(list(board.legal_moves))
+if __name__ == "__main__":
+    board = chess.Board()
+    agent = O2Agent()
+    move = agent.select_move(board)
+    print("O2 selects:", move)

src/o2_model.py ADDED Viewed

	@@ -0,0 +1,77 @@

+import chess
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class O2Net(nn.Module):
+    def __init__(self):
+        super(O2Net, self).__init__()
+        # Input layer (updated to 1152 for 8x8x18 encoding)
+        self.input_fc = nn.Linear(1152, 1024)
+        # 10 deep residual blocks
+        self.res_blocks = nn.ModuleList([
+            nn.Sequential(
+                nn.Linear(1024, 1024),
+                nn.BatchNorm1d(1024),
+                nn.ReLU(),
+                nn.Linear(1024, 1024),
+                nn.BatchNorm1d(1024)
+            ) for _ in range(10)
+        ])
+        self.res_relu = nn.ReLU()
+        # Policy head
+        self.policy_fc1 = nn.Linear(1024, 512)
+        self.policy_fc2 = nn.Linear(512, 256)
+        self.policy_fc3 = nn.Linear(256, 4672)
+        # Value head
+        self.value_fc1 = nn.Linear(1024, 512)
+        self.value_fc2 = nn.Linear(512, 128)
+        self.value_fc3 = nn.Linear(128, 1)
+    def forward(self, x):
+        x = F.relu(self.input_fc(x))
+        for block in self.res_blocks:
+            residual = x
+            out = block(x)
+            x = self.res_relu(out + residual)
+        # Policy head
+        p = F.relu(self.policy_fc1(x))
+        p = F.relu(self.policy_fc2(p))
+        policy = self.policy_fc3(p)
+        # Value head
+        v = F.relu(self.value_fc1(x))
+        v = F.relu(self.value_fc2(v))
+        value = torch.tanh(self.value_fc3(v))
+        return policy, value
+def board_to_tensor(board):
+    # Improved encoding: 8x8x18 planes (12 for pieces, 6 for state), flattened
+    # 12 planes: one for each piece type/color
+    # 6 planes: turn, castling rights (4), en passant
+    planes = np.zeros((18, 8, 8), dtype=np.float32)
+    piece_map = board.piece_map()
+    for square, piece in piece_map.items():
+        plane = (piece.piece_type - 1) + (0 if piece.color == chess.WHITE else 6)
+        row, col = divmod(square, 8)
+        planes[plane, row, col] = 1
+    # Turn plane
+    planes[12, :, :] = int(board.turn)
+    # Castling rights
+    planes[13, :, :] = int(board.has_kingside_castling_rights(chess.WHITE))
+    planes[14, :, :] = int(board.has_queenside_castling_rights(chess.WHITE))
+    planes[15, :, :] = int(board.has_kingside_castling_rights(chess.BLACK))
+    planes[16, :, :] = int(board.has_queenside_castling_rights(chess.BLACK))
+    # En passant
+    if board.ep_square is not None:
+        row, col = divmod(board.ep_square, 8)
+        planes[17, row, col] = 1
+    return planes.flatten()
+if __name__ == "__main__":
+    board = chess.Board()
+    net = O2Net()
+    x = torch.tensor(board_to_tensor(board)).unsqueeze(0)
+    policy, value = net(x)
+    print("Policy shape:", policy.shape)
+    print("Value:", value.item())