Spaces:

Multichem-PD
/

DFS_Contest_Analyzer

Running

File size: 11,534 Bytes

9c7e08b
 
91e473e
3894e93
9c7e08b
54d06a1
9c7e08b
 
5c9b782
 
 
 
 
 
 
 
 
ab18789
dd89b11
 
54d06a1
 
bbf6bb9
 
9c7e08b
dd89b11
 
 
 
 
9c7e08b
bbf6bb9
 
9c7e08b
 
 
 
 
3894e93
 
9c7e08b
795a6d7
e1f40de
 
 
 
c6e3488
 
bbf6bb9
 
795a6d7
dd89b11
 
8d72ffa
dd89b11
bbf6bb9
91e473e
 
8dfe988
 
91e473e
 
7740ecc
889410b
91e473e
 
889410b
91e473e
 
 
 
 
 
 
7740ecc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91e473e
1d9fb5a
f8c497a
1d9fb5a
bbf6bb9
795a6d7
dd89b11
9681ab8
 
 
 
 
dd89b11
9681ab8
 
 
 
 
bbf6bb9
 
42712b2
795a6d7
faa4887
440bba8
2ab6242
 
 
440bba8
 
 
 
 
5b2d759
9c09ce3
7ca4aa0
 
e076a1d
4151add
440bba8
 
2ab6242
 
 
440bba8
 
 
 
 
5b2d759
9c09ce3
7ca4aa0
 
e076a1d
47c6390
440bba8
 
 
2ab6242
 
 
440bba8
 
 
 
 
5b2d759
9c09ce3
7ca4aa0
 
e076a1d
 
440bba8
c2b7029
472d4c4
9ba550c
 
8fd8af6
 
7ca4aa0
 
c2b7029
 
440bba8
 
3dc6731
440bba8
 
 
1e08ae6
91e473e
bbf6bb9
54d06a1
 
 
 
 
b67c4a9
54d06a1
9c7e08b
178f6d2
4727314
2841628
 
178f6d2
 
996f8cb
795a6d7
9c7e08b

import streamlit as st
import pandas as pd
from rapidfuzz import process, fuzz
from numpy import where as np_where

def load_contest_file(upload, type, helper = None, sport = None, portfolio = None):
    if upload is not None:
        try:
            try:
                if upload.name.endswith('.csv'):
                    raw_df = pd.read_csv(upload)
                elif upload.name.endswith(('.xls', '.xlsx')):
                    raw_df = pd.read_excel(upload)
                else:
                    st.error('Please upload either a CSV or Excel file')
                    return None
            except:
                raw_df = upload
            if helper is not None:
                helper_df = helper
            if portfolio is not None:
                portfolio_df = portfolio

            print('Made it through initial upload')
            
            # Select and rename essential columns for the actual upload
            if helper is None:
                df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
            else:
                df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
            df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})

            print('Made it through rename')
            
            # Split EntryName into base name and entry count
            df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
            df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
            df['EntryCount'] = df['EntryCount'].fillna('1/1')  # Default to 1/1 if no entry count
            if type == 'Showdown':
                df['FPTS'] = np_where(df['Pos'] == 'CPT', df['FPTS'] / 1.5, df['FPTS'])
            
            # Convert ownership percentage to float
            try:
                df['Own'] = df['Own'].str.replace('%', '').astype(float)
            except:
                df['Own'] = df['Own'].astype(float)
            
            df['Player'] = df['Player'].str.strip()

            print('Made it through ownership conversion')
            
            # Select and rename essential columns for the actual upload
            if helper is not None:
                df_helper = helper_df[['Player', 'Salary', 'Team']]
            
            print('Made it through helper')

            contest_names = df.Player.unique()
            if helper is not None:
                helper_names = helper_df.Player.unique()

            contest_match_dict = {}
            helper_match_dict = {}
            for names in contest_names:
                match = process.extractOne(
                    names,
                    helper_names,
                    score_cutoff = 85
                )
                if match:
                    contest_match_dict[names] = match[0]
                else:
                    contest_match_dict[names] = names
            
            for names in helper_names:
                match = process.extractOne(
                    names,
                    contest_names,
                    score_cutoff = 85
                )
                if match:
                    helper_match_dict[names] = match[0]
                else:
                    helper_match_dict[names] = names
            
            for key, value in helper_match_dict.items():
                if key not in contest_match_dict:
                    contest_match_dict[key] = value
            
            df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
            # df['Player'] = df['Player'].map(contest_match_dict)
            df_helper = df_helper.drop_duplicates(subset='Player', keep='first')
            # df = df.drop_duplicates(subset='Player', keep='first')
            
            # Create separate dataframes for different player attributes
            if helper is not None:
                ownership_df = df[['Player', 'Own']].drop_duplicates(subset='Player', keep='first')
                fpts_df = df[['Player', 'FPTS']].drop_duplicates(subset='Player', keep='first')
                salary_df = df_helper[['Player', 'Salary']].drop_duplicates(subset='Player', keep='first')
                team_df = df_helper[['Player', 'Team']].drop_duplicates(subset='Player', keep='first')
                pos_df = df[['Player', 'Pos']].drop_duplicates(subset='Player', keep='first')
            else:
                ownership_df = df[['Player', 'Own']].drop_duplicates(subset='Player', keep='first')
                fpts_df = df[['Player', 'FPTS']].drop_duplicates(subset='Player', keep='first')
                salary_df = df[['Player', 'Salary']].drop_duplicates(subset='Player', keep='first')
                team_df = df[['Player', 'Team']].drop_duplicates(subset='Player', keep='first')
                pos_df = df[['Player', 'Pos']].drop_duplicates(subset='Player', keep='first')

            print('Made it through dictionaries')
            
            # Create the cleaned dataframe with just the essential columns
            cleaned_df = df[['BaseName', 'Lineup']]
            if type == 'Classic':
                if sport == 'NFL':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' QB ', 'QB ', ' RB ', 'RB ', ' WR ', 'WR ', ' TE ', 'TE ', ' DST ', 'DST ', ' FLEX ', 'FLEX '], value=',', regex=True)
                elif sport == 'MLB':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True)
                elif sport == 'MMA':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True)
                elif sport == 'GOLF':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True)
                elif sport == 'WNBA':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' G ', 'F ', ' F ', ' UTIL '], value=',', regex=True)
                elif sport == 'NAS':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' D ', 'D '], value=',', regex=True)
                elif sport == 'CFB':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' QB ', 'QB ', ' RB ', 'RB ', 'WR ', 'WR ', ' S-FLEX ', 'S-FLEX ', ' FLEX ', 'FLEX '], value=',', regex=True)
                print(sport)
                check_lineups = cleaned_df.copy()
                if sport == 'NFL':
                    cleaned_df[['Remove', 'DST', 'FLEX', 'QB', 'RB1', 'RB2', 'TE', 'WR1', 'WR2', 'WR3']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'MLB':
                    cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'MMA':
                    cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'GOLF':
                    cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'WNBA':
                    cleaned_df[['Remove', 'G1', 'G2', 'F1', 'F2', 'F3', 'UTIL']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'NAS':
                    cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
                elif sport == 'CFB':
                    cleaned_df[['Remove', 'FLEX', 'QB', 'RB1', 'RB2', 'S-FLEX', 'WR1', 'WR2', 'WR3']] = cleaned_df['Lineup'].str.split(',', expand=True)
                cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
                entry_counts = cleaned_df['BaseName'].value_counts()
                cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
                if sport == 'NFL':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'TE', 'FLEX', 'DST']]
                elif sport == 'MLB':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
                elif sport == 'MMA':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
                elif sport == 'GOLF':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
                elif sport == 'WNBA':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'G1', 'G2', 'F1', 'F2', 'F3', 'UTIL']]
                elif sport == 'NAS':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
                elif sport == 'CFB':
                    cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'FLEX', 'S-FLEX']]
            elif type == 'Showdown':
                if sport == 'NHL':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' FLEX ', 'CPT '], value=',', regex=True)
                if sport == 'NFL':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' FLEX ', 'CPT '], value=',', regex=True)
                if sport == 'GOLF':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' G ', 'G '], value=',', regex=True)
                elif sport == 'NAS':
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' D ', 'D '], value=',', regex=True)
                else:
                    cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True)
                print(type)
                check_lineups = cleaned_df.copy()
                cleaned_df[['Remove', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']] = cleaned_df['Lineup'].str.split(',', expand=True)
                cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
                entry_counts = cleaned_df['BaseName'].value_counts()
                cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
                cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']]
            
            print('Made it through check_lineups')

            if portfolio is not None:
                portfolio_df['BaseName'] = 'Backtesting_upload'
                portfolio_df['EntryCount'] = len(portfolio_df)
                original_columns = cleaned_df.columns.tolist()
                portfolio_df.columns = original_columns
                cleaned_df = pd.concat([cleaned_df, portfolio_df], ignore_index=True)
            
            # Get unique entry names
            entry_list = list(set(df['BaseName'].dropna()))
            if portfolio is not None:
                entry_list.append('Backtesting_upload')
            entry_list.sort()

            return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups
            
        except Exception as e:
            st.error(f'Error loading file: {str(e)}')
            return None
    return None