Spaces:

FlameF0X
/

Play-with-o2

Sleeping

App Files Files Community

FlameF0X commited on May 31

Commit

bcc23fe

verified ·

1 Parent(s): 6112849

Upload 4 files

Browse files

Files changed (3) hide show

src/app.py +277 -0
src/mcts.py +40 -17
src/o2_agent.py +86 -88

src/app.py ADDED Viewed

	@@ -0,0 +1,277 @@

+import streamlit as st
+st.set_page_config(page_title="Play Chess vs o2", layout="centered")
+import chess
+import chess.svg
+import torch
+from o2_model import O2Net, board_to_tensor
+from o2_agent import O2Agent
+from PIL import Image
+import io
+import base64
+import os
+import chess.pgn
+import requests
+import random
+import re
+import tempfile
+# Use temp directory for cache/model paths
+MODEL_CACHE_DIR = tempfile.gettempdir()
+MODEL_REPO = "FlameF0X/o2"
+MODEL_FILENAME = "o2_agent.pth"
+def ensure_model():
+    from huggingface_hub import hf_hub_download
+    # Always download to cache and return the path
+    return hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME, cache_dir=MODEL_CACHE_DIR)
+# Load model with better error handling
+@st.cache_resource
+def load_agent():
+    try:
+        model_path = ensure_model()
+        if model_path is None:
+            return None
+        agent = O2Agent()
+        agent.model.load_state_dict(torch.load(model_path, map_location=torch.device('cpu')))
+        agent.model.eval()
+        return agent
+    except Exception as e:
+        st.error(f"Error loading agent: {e}")
+        return None
+def render_svg(svg):
+    b64 = base64.b64encode(svg.encode('utf-8')).decode('utf-8')
+    return f"<img src='data:image/svg+xml;base64,{b64}'/>", b64
+# --- Move parsing utility ---
+def parse_move_input(move_input, board):
+    if not move_input:
+        return None
+    move_input = move_input.strip()
+    if len(move_input) >= 4 and move_input[2:4].isalnum():
+        try:
+            move = chess.Move.from_uci(move_input.lower())
+            if move in board.legal_moves:
+                return move
+        except:
+            pass
+    try:
+        move = board.parse_san(move_input)
+        if move in board.legal_moves:
+            return move
+    except:
+        pass
+    try:
+        variations = [move_input.upper(), move_input.lower(), move_input.capitalize()]
+        for variation in variations:
+            try:
+                move = board.parse_san(variation)
+                if move in board.legal_moves:
+                    return move
+            except:
+                continue
+    except:
+        pass
+    if len(move_input) == 2 and move_input[0].lower() in 'abcdefgh' and move_input[1] in '12345678':
+        try:
+            move = board.parse_san(move_input.lower())
+            if move in board.legal_moves:
+                return move
+        except:
+            pass
+    castling_variations = {
+        '0-0': 'O-O', '0-0-0': 'O-O-O', 'oo': 'O-O', 'ooo': 'O-O-O', 'o-o': 'O-O', 'o-o-o': 'O-O-O',
+    }
+    lower_input = move_input.lower()
+    if lower_input in castling_variations:
+        try:
+            move = board.parse_san(castling_variations[lower_input])
+            if move in board.legal_moves:
+                return move
+        except:
+            pass
+    return None
+# --- Main UI ---
+if "board" not in st.session_state:
+    st.session_state.board = chess.Board()
+if "history" not in st.session_state:
+    st.session_state.history = []
+# Load agent with error handling
+agent = None
+agent_loaded = False
+with st.spinner("Loading o2 model..."):
+    try:
+        agent = load_agent()
+        if agent is not None:
+            agent_loaded = True
+            st.success("o2 model loaded successfully!")
+        else:
+            st.warning("Failed to load o2 model. Using random moves for AI.")
+    except Exception as e:
+        st.error(f"Failed to load o2: {e}")
+        st.warning("Using random moves for AI.")
+board = st.session_state.board
+history = st.session_state.history
+st.title("♟️ Play Chess vs o2")
+if not agent_loaded:
+    st.info("🎲 AI is using random moves (o2 model not available)")
+if st.button("Reset Game"):
+    st.session_state.board = chess.Board()
+    st.session_state.history = []
+    st.rerun()
+# Create two columns for layout
+col_board, col_pgn = st.columns([2, 1])
+with col_board:
+    board_placeholder = st.empty()
+    def render_board():
+        try:
+            last_move = board.peek() if board.move_stack else None
+            svg_board = chess.svg.board(board=board, lastmove=last_move, size=400)
+            board_placeholder.markdown(f'<div style="display: flex; justify-content: center;">{svg_board}</div>', unsafe_allow_html=True)
+        except Exception as e:
+            st.error(f"Error rendering board: {e}")
+    render_board()
+    col1, col2 = st.columns(2)
+    with col1:
+        st.write(f"**Turn:** {'White' if board.turn == chess.WHITE else 'Black'}")
+    with col2:
+        if board.is_check():
+            st.write("**Check!**")
+with col_pgn:
+    st.write("### Game History")
+    pgn_placeholder = st.empty()
+    def render_pgn():
+        if history:
+            try:
+                game = chess.pgn.Game()
+                game.headers["Event"] = "Human vs o2"
+                game.headers["White"] = "Human"
+                game.headers["Black"] = "o2" if agent_loaded else "Random AI"
+                node = game
+                temp_board = chess.Board()
+                for uci in history:
+                    move = chess.Move.from_uci(uci)
+                    if move in temp_board.legal_moves:
+                        node = node.add_main_variation(move)
+                        temp_board.push(move)
+                    else:
+                        break
+                pgn_placeholder.code(str(game), language="pgn")
+            except Exception as e:
+                move_pairs = []
+                for i in range(0, len(history), 2):
+                    white_move = history[i]
+                    black_move = history[i+1] if i+1 < len(history) else ""
+                    move_pairs.append(f"{i//2 + 1}. {white_move} {black_move}")
+                pgn_placeholder.code("\n".join(move_pairs))
+        else:
+            pgn_placeholder.text("No moves yet")
+    render_pgn()
+if not board.is_game_over() and board.turn == chess.WHITE:
+    st.write("### Your Turn (White)")
+    legal_moves = list(board.legal_moves)
+    legal_moves_uci = [move.uci() for move in legal_moves]
+    legal_moves_san = []
+    for move in legal_moves:
+        try:
+            san = board.san(move)
+            legal_moves_san.append(san)
+        except:
+            legal_moves_san.append(move.uci())
+    with st.expander("Show legal moves"):
+        col1, col2 = st.columns(2)
+        with col1:
+            st.write("**Algebraic notation:**")
+            st.write(", ".join(sorted(legal_moves_san)))
+        with col2:
+            st.write("**UCI notation:**")
+            st.write(", ".join(sorted(legal_moves_uci)))
+    user_move = st.text_input("Enter your move (e.g., E4, Nf3, e2e4, O-O):", key="move_input", help="You can use algebraic notation (E4, Nf3) or UCI notation (e2e4). Case doesn't matter!")
+    col1, col2 = st.columns(2)
+    with col1:
+        if st.button("Submit Move"):
+            if user_move:
+                parsed_move = parse_move_input(user_move, board)
+                if parsed_move:
+                    try:
+                        board.push(parsed_move)
+                        history.append(parsed_move.uci())
+                        render_board()  # Update board immediately
+                        render_pgn()    # Update PGN immediately
+                        st.success(f"You played: {board.san(parsed_move)} ({parsed_move.uci()})")
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Error making move: {e}")
+                else:
+                    st.warning(f"Invalid move: '{user_move}'. Please check the legal moves above.")
+            else:
+                st.warning("Please enter a move.")
+    with col2:
+        if st.button("Random Move"):
+            if legal_moves:
+                random_move = random.choice(legal_moves)
+                board.push(random_move)
+                history.append(random_move.uci())
+                render_board()  # Update board immediately
+                render_pgn()    # Update PGN immediately
+                st.rerun()
+if not board.is_game_over() and board.turn == chess.BLACK:
+    st.write("### o2's Turn (Black)")
+    with st.spinner("o2 is thinking..."):
+        try:
+            if agent_loaded and agent:
+                # Use temperature sampling for first 10 moves, then greedy
+                if len(history) < 20:  # 10 moves per side
+                    move = agent.select_move(board, use_mcts=True, simulations=30, temperature=1.2)
+                else:
+                    move = agent.select_move(board, use_mcts=True, simulations=30, temperature=0.0)
+            else:
+                legal_moves = list(board.legal_moves)
+                move = random.choice(legal_moves) if legal_moves else None
+            if move and move in board.legal_moves:
+                move_san = board.san(move)
+                board.push(move)
+                history.append(move.uci())
+                st.success(f"o2 played: {move_san} ({move.uci()})")
+                st.rerun()
+            else:
+                st.error("o2 couldn't find a valid move")
+        except Exception as e:
+            st.error(f"Error during o2 move: {e}")
+if board.is_game_over():
+    st.write("### Game Over!")
+    result = board.result()
+    outcome = board.outcome()
+    if result == "1-0":
+        st.success("White wins!")
+    elif result == "0-1":
+        st.error("Black wins!")
+    else:
+        st.info("Draw!")
+    st.write(f"**Result:** {result}")
+    st.write(f"**Termination:** {outcome.termination.name}")
+    if st.button("Start New Game"):
+        st.session_state.board = chess.Board()
+        st.session_state.history = []
+        st.rerun()

src/mcts.py CHANGED Viewed

@@ -43,11 +43,10 @@ class MCTS:
             for n in reversed(search_path):
                 n.N += 1
                 n.W += value
-                n.Q = n.W / n.N
                 value = -value  # Switch perspective
         # Temperature-based sampling for opening diversity
         if temperature and temperature > 0:
-            import numpy as np
             moves = list(root.children.keys())
             visits = np.array([root.children[m].N for m in moves], dtype=np.float32)
             probs = visits ** (1.0 / temperature)
@@ -71,27 +70,51 @@ class MCTS:
         with torch.no_grad():
             policy, value = self.model(tensor)
         policy = torch.softmax(policy, dim=1).numpy()[0]
         legal_moves = list(node.board.legal_moves)
-        total_p = 1e-8
         for move in legal_moves:
-            idx = self.move_to_index(move)
-            p = policy[idx]
-            total_p += p
         for move in legal_moves:
-            idx = self.move_to_index(move)
-            p = policy[idx] / total_p
-            child_board = node.board.copy()
-            child_board.push(move)
-            child = MCTSNode(child_board, parent=node, move=move)
-            child.P = p
-            node.children[move] = child
         return value.item()
     def move_to_index(self, move):
         from_square = move.from_square
         to_square = move.to_square
         promotion = move.promotion if move.promotion else 0
-        promotion_offset = 0
         if promotion:
-            promotion_offset = 4096 + (promotion - 1)
-        return from_square * 64 + to_square + promotion_offset

             for n in reversed(search_path):
                 n.N += 1
                 n.W += value
+                n.Q = n.W / n.N if n.N > 0 else 0.0
                 value = -value  # Switch perspective
         # Temperature-based sampling for opening diversity
         if temperature and temperature > 0:
             moves = list(root.children.keys())
             visits = np.array([root.children[m].N for m in moves], dtype=np.float32)
             probs = visits ** (1.0 / temperature)
         with torch.no_grad():
             policy, value = self.model(tensor)
         policy = torch.softmax(policy, dim=1).numpy()[0]
+        assert len(policy) == 4672, f"Policy size mismatch: expected 4672, got {len(policy)}"
         legal_moves = list(node.board.legal_moves)
+        total_p = 1e-8  # Small epsilon to prevent division by zero
         for move in legal_moves:
+            try:
+                idx = self.move_to_index(move)
+                if 0 <= idx < 4672:  # Ensure index is within bounds
+                    p = policy[idx]
+                    total_p += p
+            except Exception:
+                continue  # Skip moves that can't be indexed properly
+        if total_p < 1e-8:  # If all probabilities are extremely small
+            total_p = 1.0  # Fall back to uniform distribution
+            # Use uniform distribution only for legal moves
+            for move in legal_moves:
+                idx = self.move_to_index(move)
+                if 0 <= idx < 4672:
+                    policy[idx] = 1.0 / len(legal_moves)
+        # Create child nodes only for valid moves
         for move in legal_moves:
+            try:
+                idx = self.move_to_index(move)
+                if 0 <= idx < 4672:
+                    p = policy[idx] / total_p
+                    child_board = node.board.copy()
+                    child_board.push(move)
+                    child = MCTSNode(child_board, parent=node, move=move)
+                    child.P = p
+                    node.children[move] = child
+            except Exception:
+                continue  # Skip problematic moves
         return value.item()
     def move_to_index(self, move):
         from_square = move.from_square
         to_square = move.to_square
         promotion = move.promotion if move.promotion else 0
+        # Base index for normal moves
+        idx = from_square * 64 + to_square
+        # Handle promotions (knight=1, bishop=2, rook=3, queen=4)
         if promotion:
+            # Map to indices after normal moves (4096 onwards)
+            idx = 4096 + ((promotion - 1) * 64 * 64 // 4) + (from_square * 8 + to_square // 8)
+        # Ensure index is within bounds (4672 = 64*64 + 64*8)
+        return min(idx, 4671)

src/o2_agent.py CHANGED Viewed

@@ -1,88 +1,86 @@
-import numpy as np
-import chess
-import torch
-from o2_model import O2Net, board_to_tensor
-from mcts import MCTS
-import random
-# Optional: Endgame tablebase and opening book integration placeholders
-# You can use python-chess's tablebase and opening book modules if desired
-# Example for endgame tablebase:
-# from chess import tablebase
-# tb = tablebase.Tablebase()
-# tb.add_tablebase('/path/to/syzygy')
-# if tb.probe_wdl(board) is not None:
-#     # Use tablebase move
-# Example for opening book:
-# from chess.polyglot import open_reader
-# with open_reader('book.bin') as reader:
-#     entry = reader.find(board)
-#     move = entry.move
-class O2Agent:
-    def __init__(self, model_path=None):
-        self.model = O2Net()
-        if model_path:
-            self.model.load_state_dict(torch.load(model_path))
-        self.model.eval()
-    def select_move(self, board, use_mcts=True, simulations=100, temperature=0.0):
-        if use_mcts:
-            mcts = MCTS(self.model, simulations=simulations)
-            return mcts.run(board, temperature=temperature)
-        # SAFEGUARD IMPORT (add this line)
-        import numpy as np
-        tensor = torch.tensor(board_to_tensor(board)).unsqueeze(0)
-        with torch.no_grad():
-            policy, _ = self.model(tensor)
-        legal_moves = list(board.legal_moves)
-        move_scores = []
-        for move in legal_moves:
-            move_idx = self.move_to_index(move)
-            move_scores.append(policy[0, move_idx].item())
-        if temperature and temperature > 0:
-            # Softmax sampling
-            scores = np.array(move_scores)
-            exp_scores = np.exp(scores / temperature)
-            probs = exp_scores / np.sum(exp_scores)
-            move = np.random.choice(legal_moves, p=probs)
-            return move
-        best_move = legal_moves[int(torch.tensor(move_scores).argmax())]
-        return best_move
-    def move_to_index(self, move):
-        # Encode move as from_square * 64 + to_square + promotion_offset
-        from_square = move.from_square
-        to_square = move.to_square
-        promotion = move.promotion if move.promotion else 0
-        promotion_offset = 0
-        if promotion:
-            # Promotion: 1=Knight, 2=Bishop, 3=Rook, 4=Queen (python-chess)
-            # Offset: 4096 + (promotion-1)*64*64//4
-            promotion_offset = 4096 + (promotion - 1) * 256
-        idx = from_square * 64 + to_square + promotion_offset
-        # Ensure index is within bounds
-        return idx if idx < 4672 else idx % 4672
-    def index_to_move(self, board, index):
-        # Decode index to move (reverse of move_to_index)
-        if index >= 4096:
-            promotion = (index - 4096) % 4 + 1
-            idx = index - 4096
-            from_square = idx // 64
-            to_square = idx % 64
-            move = chess.Move(from_square, to_square, promotion=promotion)
-        else:
-            from_square = index // 64
-            to_square = index % 64
-            move = chess.Move(from_square, to_square)
-        if move in board.legal_moves:
-            return move
-        # Fallback: pick a random legal move
-        return random.choice(list(board.legal_moves))
-if __name__ == "__main__":
-    board = chess.Board()
-    agent = O2Agent()
-    move = agent.select_move(board)
-    print("O2 selects:", move)

+import numpy as np
+import chess
+import torch
+from o2_model import O2Net, board_to_tensor
+from mcts import MCTS
+import random
+# Optional: Endgame tablebase and opening book integration placeholders
+# You can use python-chess's tablebase and opening book modules if desired
+# Example for endgame tablebase:
+# from chess import tablebase
+# tb = tablebase.Tablebase()
+# tb.add_tablebase('/path/to/syzygy')
+# if tb.probe_wdl(board) is not None:
+#     # Use tablebase move
+# Example for opening book:
+# from chess.polyglot import open_reader
+# with open_reader('book.bin') as reader:
+#     entry = reader.find(board)
+#     move = entry.move
+class O2Agent:
+    def __init__(self, model_path=None):
+        self.model = O2Net()
+        if model_path:
+            self.model.load_state_dict(torch.load(model_path))
+        self.model.eval()
+    def select_move(self, board, use_mcts=True, simulations=100, temperature=0.0):
+        if use_mcts:
+            mcts = MCTS(self.model, simulations=simulations)
+            return mcts.run(board, temperature=temperature)
+        tensor = torch.tensor(board_to_tensor(board)).unsqueeze(0)
+        with torch.no_grad():
+            policy, _ = self.model(tensor)
+        legal_moves = list(board.legal_moves)
+        move_scores = []
+        for move in legal_moves:
+            move_idx = self.move_to_index(move)
+            move_scores.append(policy[0, move_idx].item())
+        if temperature and temperature > 0:
+            # Softmax sampling
+            scores = np.array(move_scores)
+            exp_scores = np.exp(scores / temperature)
+            probs = exp_scores / np.sum(exp_scores)
+            move = np.random.choice(legal_moves, p=probs)
+            return move
+        best_move = legal_moves[int(torch.tensor(move_scores).argmax())]
+        return best_move
+    def move_to_index(self, move):
+        # Encode move as from_square * 64 + to_square + promotion_offset
+        from_square = move.from_square
+        to_square = move.to_square
+        promotion = move.promotion if move.promotion else 0
+        promotion_offset = 0
+        if promotion:
+            # Promotion: 1=Knight, 2=Bishop, 3=Rook, 4=Queen (python-chess)
+            # Offset: 4096 + (promotion-1)*64*64//4
+            promotion_offset = 4096 + (promotion - 1) * 256
+        idx = from_square * 64 + to_square + promotion_offset
+        # Ensure index is within bounds
+        return idx if idx < 4672 else idx % 4672
+    def index_to_move(self, board, index):
+        # Decode index to move (reverse of move_to_index)
+        if index >= 4096:
+            promotion = (index - 4096) % 4 + 1
+            idx = index - 4096
+            from_square = idx // 64
+            to_square = idx % 64
+            move = chess.Move(from_square, to_square, promotion=promotion)
+        else:
+            from_square = index // 64
+            to_square = index % 64
+            move = chess.Move(from_square, to_square)
+        if move in board.legal_moves:
+            return move
+        # Fallback: pick a random legal move
+        return random.choice(list(board.legal_moves))
+if __name__ == "__main__":
+    board = chess.Board()
+    agent = O2Agent()
+    move = agent.select_move(board)
+    print("O2 selects:", move)