Spaces:

FlameF0X
/

Play-with-o2

Sleeping

FlameF0X commited on May 29

Commit

5342d4d

verified ·

1 Parent(s): ce00937

Upload 2 files

Files changed (2) hide show

src/mcts.py CHANGED Viewed

@@ -20,7 +20,7 @@ class MCTS:
         self.simulations = simulations
         self.c_puct = c_puct
-    def run(self, board):
         root = MCTSNode(board)
         self._expand(root)
         for _ in range(self.simulations):
@@ -45,6 +45,15 @@ class MCTS:
                 n.W += value
                 n.Q = n.W / n.N
                 value = -value  # Switch perspective
         # Choose move with highest visit count
         best_move = max(root.children.items(), key=lambda item: item[1].N)[0]
         return best_move

         self.simulations = simulations
         self.c_puct = c_puct
+    def run(self, board, temperature=0.0):
         root = MCTSNode(board)
         self._expand(root)
         for _ in range(self.simulations):
                 n.W += value
                 n.Q = n.W / n.N
                 value = -value  # Switch perspective
+        # Temperature-based sampling for opening diversity
+        if temperature and temperature > 0:
+            import numpy as np
+            moves = list(root.children.keys())
+            visits = np.array([root.children[m].N for m in moves], dtype=np.float32)
+            probs = visits ** (1.0 / temperature)
+            probs = probs / np.sum(probs)
+            move = np.random.choice(moves, p=probs)
+            return move
         # Choose move with highest visit count
         best_move = max(root.children.items(), key=lambda item: item[1].N)[0]
         return best_move

src/o2_agent.py CHANGED Viewed

@@ -25,10 +25,10 @@ class O2Agent:
             self.model.load_state_dict(torch.load(model_path))
         self.model.eval()
-    def select_move(self, board, use_mcts=True, simulations=100):
         if use_mcts:
             mcts = MCTS(self.model, simulations=simulations)
-            return mcts.run(board)
         tensor = torch.tensor(board_to_tensor(board)).unsqueeze(0)
         with torch.no_grad():
             policy, _ = self.model(tensor)
@@ -37,6 +37,14 @@ class O2Agent:
         for move in legal_moves:
             move_idx = self.move_to_index(move)
             move_scores.append(policy[0, move_idx].item())
         best_move = legal_moves[int(torch.tensor(move_scores).argmax())]
         return best_move

             self.model.load_state_dict(torch.load(model_path))
         self.model.eval()
+    def select_move(self, board, use_mcts=True, simulations=100, temperature=0.0):
         if use_mcts:
             mcts = MCTS(self.model, simulations=simulations)
+            return mcts.run(board, temperature=temperature)
         tensor = torch.tensor(board_to_tensor(board)).unsqueeze(0)
         with torch.no_grad():
             policy, _ = self.model(tensor)
         for move in legal_moves:
             move_idx = self.move_to_index(move)
             move_scores.append(policy[0, move_idx].item())
+        if temperature and temperature > 0:
+            # Softmax sampling
+            import numpy as np
+            scores = np.array(move_scores)
+            exp_scores = np.exp(scores / temperature)
+            probs = exp_scores / np.sum(exp_scores)
+            move = np.random.choice(legal_moves, p=probs)
+            return move
         best_move = legal_moves[int(torch.tensor(move_scores).argmax())]
         return best_move