Spaces:

Multichem-PD
/

DFS_Portfolio_Manager

Running

DFS_Portfolio_Manager / global_func /find_csv_mismatches.py

James McCool

Refactor import statements across multiple files to replace 'fuzzywuzzy' with 'rapidfuzz' for improved performance and consistency in string matching functionality. Additionally, clean up unused imports in app.py and related global functions to enhance code clarity and maintainability.

d9db89f 3 months ago

raw

history blame contribute delete

4.28 kB

	import streamlit as st
	import numpy as np
	import pandas as pd
	from rapidfuzz import process

	def find_csv_mismatches(csv_df, projections_df):
	# Create copies of the dataframes to avoid modifying the originals
	csv_df = csv_df.copy()
	projections_df = projections_df.copy()

	if 'Name' not in csv_df.columns:
	st.error("No 'Name' column found in CSV file")
	return csv_df

	if 'player_names' not in projections_df.columns:
	st.error("No 'player_names' column found in projections file")
	return csv_df

	# Get unique player names from CSV and projections
	csv_players = set(csv_df['Name'].dropna().unique())
	projection_players = set(projections_df['player_names'].unique())
	projection_players_list = list(csv_players)

	# Find players in CSV that are missing from projections
	players_missing_from_projections = list(projection_players - csv_players)

	# Automatically handle 100% matches before starting interactive process
	players_to_process = []
	for player in players_missing_from_projections:
	if not isinstance(player, str):
	st.warning(f"Skipping non-string value: {player}")
	continue
	closest_matches = process.extract(player, projection_players_list, limit=1)
	if closest_matches[0][1] == 100: # If perfect match found
	match_name = closest_matches[0][0]
	# Update CSV DataFrame to use the projection name
	csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name
	st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)")
	else:
	players_to_process.append(player)

	# Initialize session state for tracking current player if not exists
	if 'csv_current_player_index' not in st.session_state:
	st.session_state.csv_current_player_index = 0
	st.session_state.csv_players_to_process = players_to_process

	# Display results
	if players_missing_from_projections:
	st.warning("Players in CSV but missing from projections")

	# Display remaining players
	remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:]
	st.info(f"Remaining players to process ({len(remaining_players)}):\n" +
	"\n".join(f"- {player}" for player in remaining_players))

	if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process):
	current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index]

	# Find the top 3 closest matches
	closest_matches = process.extract(current_player, projection_players_list, limit=3)

	st.write(f"Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}: {current_player}")

	# Create radio buttons for selection
	options = [f"{match[0]} ({match[1]}%)" for match in closest_matches]
	options.append("None of these")

	selected_option = st.radio(
	f"Select correct match:",
	options,
	key=f"csv_radio_{current_player}"
	)

	if st.button("Confirm Selection", key="csv_confirm"):
	if selected_option != "None of these":
	selected_name = selected_option.split(" (")[0]
	# Update CSV DataFrame
	csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name
	st.success(f"Replaced '{current_player}' with '{selected_name}'")
	st.session_state['csv_file'] = csv_df

	# Move to next player
	st.session_state.csv_current_player_index += 1
	st.rerun()
	else:
	st.success("All players have been processed!")
	# Reset the index for future runs
	st.session_state.csv_current_player_index = 0
	st.session_state.csv_players_to_process = []
	else:
	st.success("All CSV players found in projections!")

	return csv_df