Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import pandas as pd | |
| from fuzzywuzzy import process | |
| def find_csv_mismatches(csv_df, projections_df): | |
| # Create copies of the dataframes to avoid modifying the originals | |
| csv_df = csv_df.copy() | |
| projections_df = projections_df.copy() | |
| if 'Name' not in csv_df.columns: | |
| st.error("No 'Name' column found in CSV file") | |
| return csv_df | |
| if 'player_names' not in projections_df.columns: | |
| st.error("No 'player_names' column found in projections file") | |
| return csv_df | |
| # Get unique player names from CSV and projections | |
| csv_players = set(csv_df['Name'].dropna().unique()) | |
| projection_players = set(projections_df['player_names'].unique()) | |
| projection_players_list = list(csv_players) | |
| # Find players in CSV that are missing from projections | |
| players_missing_from_projections = list(projection_players - csv_players) | |
| # Automatically handle 100% matches before starting interactive process | |
| players_to_process = [] | |
| for player in players_missing_from_projections: | |
| if not isinstance(player, str): | |
| st.warning(f"Skipping non-string value: {player}") | |
| continue | |
| closest_matches = process.extract(player, projection_players_list, limit=1) | |
| if closest_matches[0][1] == 100: # If perfect match found | |
| match_name = closest_matches[0][0] | |
| # Update CSV DataFrame to use the projection name | |
| csv_df.loc[csv_df['Name'] == player, 'Name'] = match_name | |
| st.success(f"Automatically matched '{player}' with '{match_name}' (100% match)") | |
| else: | |
| players_to_process.append(player) | |
| # Initialize session state for tracking current player if not exists | |
| if 'csv_current_player_index' not in st.session_state: | |
| st.session_state.csv_current_player_index = 0 | |
| st.session_state.csv_players_to_process = players_to_process | |
| # Display results | |
| if players_missing_from_projections: | |
| st.warning("Players in CSV but missing from projections") | |
| # Display remaining players | |
| remaining_players = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index:] | |
| st.info(f"Remaining players to process ({len(remaining_players)}):\n" + | |
| "\n".join(f"- {player}" for player in remaining_players)) | |
| if st.session_state.csv_current_player_index < len(st.session_state.csv_players_to_process): | |
| current_player = st.session_state.csv_players_to_process[st.session_state.csv_current_player_index] | |
| # Find the top 3 closest matches | |
| closest_matches = process.extract(current_player, projection_players_list, limit=3) | |
| st.write(f"**Missing Player {st.session_state.csv_current_player_index + 1} of {len(st.session_state.csv_players_to_process)}:** {current_player}") | |
| # Create radio buttons for selection | |
| options = [f"{match[0]} ({match[1]}%)" for match in closest_matches] | |
| options.append("None of these") | |
| selected_option = st.radio( | |
| f"Select correct match:", | |
| options, | |
| key=f"csv_radio_{current_player}" | |
| ) | |
| if st.button("Confirm Selection", key="csv_confirm"): | |
| if selected_option != "None of these": | |
| selected_name = selected_option.split(" (")[0] | |
| # Update CSV DataFrame | |
| csv_df.loc[csv_df['Name'] == current_player, 'Name'] = selected_name | |
| st.success(f"Replaced '{current_player}' with '{selected_name}'") | |
| st.session_state['csv_file'] = csv_df | |
| # Move to next player | |
| st.session_state.csv_current_player_index += 1 | |
| st.rerun() | |
| else: | |
| st.success("All players have been processed!") | |
| # Reset the index for future runs | |
| st.session_state.csv_current_player_index = 0 | |
| st.session_state.csv_players_to_process = [] | |
| else: | |
| st.success("All CSV players found in projections!") | |
| return csv_df |