File size: 11,534 Bytes
9c7e08b 91e473e 3894e93 9c7e08b 54d06a1 9c7e08b 5c9b782 ab18789 dd89b11 54d06a1 bbf6bb9 9c7e08b dd89b11 9c7e08b bbf6bb9 9c7e08b 3894e93 9c7e08b 795a6d7 e1f40de c6e3488 bbf6bb9 795a6d7 dd89b11 8d72ffa dd89b11 bbf6bb9 91e473e 8dfe988 91e473e 7740ecc 889410b 91e473e 889410b 91e473e 7740ecc 91e473e 1d9fb5a f8c497a 1d9fb5a bbf6bb9 795a6d7 dd89b11 9681ab8 dd89b11 9681ab8 bbf6bb9 42712b2 795a6d7 faa4887 440bba8 2ab6242 440bba8 5b2d759 9c09ce3 7ca4aa0 e076a1d 4151add 440bba8 2ab6242 440bba8 5b2d759 9c09ce3 7ca4aa0 e076a1d 47c6390 440bba8 2ab6242 440bba8 5b2d759 9c09ce3 7ca4aa0 e076a1d 440bba8 c2b7029 472d4c4 9ba550c 8fd8af6 7ca4aa0 c2b7029 440bba8 3dc6731 440bba8 1e08ae6 91e473e bbf6bb9 54d06a1 b67c4a9 54d06a1 9c7e08b 178f6d2 4727314 2841628 178f6d2 996f8cb 795a6d7 9c7e08b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 |
import streamlit as st
import pandas as pd
from rapidfuzz import process, fuzz
from numpy import where as np_where
def load_contest_file(upload, type, helper = None, sport = None, portfolio = None):
if upload is not None:
try:
try:
if upload.name.endswith('.csv'):
raw_df = pd.read_csv(upload)
elif upload.name.endswith(('.xls', '.xlsx')):
raw_df = pd.read_excel(upload)
else:
st.error('Please upload either a CSV or Excel file')
return None
except:
raw_df = upload
if helper is not None:
helper_df = helper
if portfolio is not None:
portfolio_df = portfolio
print('Made it through initial upload')
# Select and rename essential columns for the actual upload
if helper is None:
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS', 'Salary', 'Team']]
else:
df = raw_df[['EntryId', 'EntryName', 'TimeRemaining', 'Points', 'Lineup', 'Player', 'Roster Position', '%Drafted', 'FPTS']]
df = df.rename(columns={'Roster Position': 'Pos', '%Drafted': 'Own'})
print('Made it through rename')
# Split EntryName into base name and entry count
df['BaseName'] = df['EntryName'].str.replace(r'\s*\(\d+/\d+\)$', '', regex=True)
df['EntryCount'] = df['EntryName'].str.extract(r'\((\d+/\d+)\)')
df['EntryCount'] = df['EntryCount'].fillna('1/1') # Default to 1/1 if no entry count
if type == 'Showdown':
df['FPTS'] = np_where(df['Pos'] == 'CPT', df['FPTS'] / 1.5, df['FPTS'])
# Convert ownership percentage to float
try:
df['Own'] = df['Own'].str.replace('%', '').astype(float)
except:
df['Own'] = df['Own'].astype(float)
df['Player'] = df['Player'].str.strip()
print('Made it through ownership conversion')
# Select and rename essential columns for the actual upload
if helper is not None:
df_helper = helper_df[['Player', 'Salary', 'Team']]
print('Made it through helper')
contest_names = df.Player.unique()
if helper is not None:
helper_names = helper_df.Player.unique()
contest_match_dict = {}
helper_match_dict = {}
for names in contest_names:
match = process.extractOne(
names,
helper_names,
score_cutoff = 85
)
if match:
contest_match_dict[names] = match[0]
else:
contest_match_dict[names] = names
for names in helper_names:
match = process.extractOne(
names,
contest_names,
score_cutoff = 85
)
if match:
helper_match_dict[names] = match[0]
else:
helper_match_dict[names] = names
for key, value in helper_match_dict.items():
if key not in contest_match_dict:
contest_match_dict[key] = value
df_helper['Player'] = df_helper['Player'].map(contest_match_dict)
# df['Player'] = df['Player'].map(contest_match_dict)
df_helper = df_helper.drop_duplicates(subset='Player', keep='first')
# df = df.drop_duplicates(subset='Player', keep='first')
# Create separate dataframes for different player attributes
if helper is not None:
ownership_df = df[['Player', 'Own']].drop_duplicates(subset='Player', keep='first')
fpts_df = df[['Player', 'FPTS']].drop_duplicates(subset='Player', keep='first')
salary_df = df_helper[['Player', 'Salary']].drop_duplicates(subset='Player', keep='first')
team_df = df_helper[['Player', 'Team']].drop_duplicates(subset='Player', keep='first')
pos_df = df[['Player', 'Pos']].drop_duplicates(subset='Player', keep='first')
else:
ownership_df = df[['Player', 'Own']].drop_duplicates(subset='Player', keep='first')
fpts_df = df[['Player', 'FPTS']].drop_duplicates(subset='Player', keep='first')
salary_df = df[['Player', 'Salary']].drop_duplicates(subset='Player', keep='first')
team_df = df[['Player', 'Team']].drop_duplicates(subset='Player', keep='first')
pos_df = df[['Player', 'Pos']].drop_duplicates(subset='Player', keep='first')
print('Made it through dictionaries')
# Create the cleaned dataframe with just the essential columns
cleaned_df = df[['BaseName', 'Lineup']]
if type == 'Classic':
if sport == 'NFL':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' QB ', 'QB ', ' RB ', 'RB ', ' WR ', 'WR ', ' TE ', 'TE ', ' DST ', 'DST ', ' FLEX ', 'FLEX '], value=',', regex=True)
elif sport == 'MLB':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF '], value=',', regex=True)
elif sport == 'MMA':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' F ', 'F '], value=',', regex=True)
elif sport == 'GOLF':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' P ', ' C ', '1B ', ' 2B ', ' 3B ', ' SS ', ' OF ', ' G ', 'G '], value=',', regex=True)
elif sport == 'WNBA':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' G ', 'F ', ' F ', ' UTIL '], value=',', regex=True)
elif sport == 'NAS':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' D ', 'D '], value=',', regex=True)
elif sport == 'CFB':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' QB ', 'QB ', ' RB ', 'RB ', 'WR ', 'WR ', ' S-FLEX ', 'S-FLEX ', ' FLEX ', 'FLEX '], value=',', regex=True)
print(sport)
check_lineups = cleaned_df.copy()
if sport == 'NFL':
cleaned_df[['Remove', 'DST', 'FLEX', 'QB', 'RB1', 'RB2', 'TE', 'WR1', 'WR2', 'WR3']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'MLB':
cleaned_df[['Remove', '1B', '2B', '3B', 'C', 'OF1', 'OF2', 'OF3', 'P1', 'P2', 'SS']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'MMA':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'GOLF':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'WNBA':
cleaned_df[['Remove', 'G1', 'G2', 'F1', 'F2', 'F3', 'UTIL']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'NAS':
cleaned_df[['Remove', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']] = cleaned_df['Lineup'].str.split(',', expand=True)
elif sport == 'CFB':
cleaned_df[['Remove', 'FLEX', 'QB', 'RB1', 'RB2', 'S-FLEX', 'WR1', 'WR2', 'WR3']] = cleaned_df['Lineup'].str.split(',', expand=True)
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
entry_counts = cleaned_df['BaseName'].value_counts()
cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
if sport == 'NFL':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'TE', 'FLEX', 'DST']]
elif sport == 'MLB':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'P1', 'P2', 'C', '1B', '2B', '3B', 'SS', 'OF1', 'OF2', 'OF3']]
elif sport == 'MMA':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
elif sport == 'GOLF':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
elif sport == 'WNBA':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'G1', 'G2', 'F1', 'F2', 'F3', 'UTIL']]
elif sport == 'NAS':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'Guy', 'Dude', 'Pooba', 'Bub', 'Chief', 'Buddy']]
elif sport == 'CFB':
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'QB', 'RB1', 'RB2', 'WR1', 'WR2', 'WR3', 'FLEX', 'S-FLEX']]
elif type == 'Showdown':
if sport == 'NHL':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' FLEX ', 'CPT '], value=',', regex=True)
if sport == 'NFL':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' FLEX ', 'CPT '], value=',', regex=True)
if sport == 'GOLF':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' G ', 'G '], value=',', regex=True)
elif sport == 'NAS':
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' D ', 'D '], value=',', regex=True)
else:
cleaned_df['Lineup'] = cleaned_df['Lineup'].replace([' UTIL ', 'CPT '], value=',', regex=True)
print(type)
check_lineups = cleaned_df.copy()
cleaned_df[['Remove', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']] = cleaned_df['Lineup'].str.split(',', expand=True)
cleaned_df = cleaned_df.drop(columns=['Lineup', 'Remove'])
entry_counts = cleaned_df['BaseName'].value_counts()
cleaned_df['EntryCount'] = cleaned_df['BaseName'].map(entry_counts)
cleaned_df = cleaned_df[['BaseName', 'EntryCount', 'CPT', 'UTIL1', 'UTIL2', 'UTIL3', 'UTIL4', 'UTIL5']]
print('Made it through check_lineups')
if portfolio is not None:
portfolio_df['BaseName'] = 'Backtesting_upload'
portfolio_df['EntryCount'] = len(portfolio_df)
original_columns = cleaned_df.columns.tolist()
portfolio_df.columns = original_columns
cleaned_df = pd.concat([cleaned_df, portfolio_df], ignore_index=True)
# Get unique entry names
entry_list = list(set(df['BaseName'].dropna()))
if portfolio is not None:
entry_list.append('Backtesting_upload')
entry_list.sort()
return cleaned_df, ownership_df, fpts_df, entry_list, check_lineups
except Exception as e:
st.error(f'Error loading file: {str(e)}')
return None
return None |