Spaces:

FlameF0X
/

play-with-o1

Build error

play-with-o1 / src /o1 /selfplay.py

Upload 11 files

8806ce1 verified 4 months ago

1.35 kB

	"""
	Self-play orchestration for o1 agent.
	Runs self-play games using MCTS for move selection.
	"""
	import chess
	from o1.mcts import MCTS

	def run_selfplay(agent, num_games=1, simulations=50):
	"""Run self-play games using MCTS and return experience."""
	all_experience = []
	for game_idx in range(num_games):
	board = chess.Board()
	mcts = MCTS(agent, simulations=simulations)
	game_data = []
	while not board.is_game_over():
	move = mcts.search(board)
	state_tensor = agent.board_to_tensor(board)
	# Policy: one-hot for chosen move (for now)
	policy = [0] * 4672 # 4672 is max legal moves in chess
	move_idx = list(board.legal_moves).index(move)
	policy[move_idx] = 1
	value = 0 # Placeholder, will be set after game
	game_data.append((state_tensor, policy, value))
	board.push(move)
	# Assign final result as value for all positions
	result = board.result()
	if result == '1-0':
	z = 5
	elif result == '0-1':
	z = -1
	else:
	z = 0
	game_data = [(s, p, z) for (s, p, v) in game_data]
	all_experience.extend(game_data)
	return all_experience

	# Self-play loop implementation will go here