James McCool
commited on
Commit
·
62a6685
1
Parent(s):
2ceda65
Add unique and under-5/under-10 duplicate counts to duplication frame in app.py
Browse files- Introduced calculations for 'uniques', 'under_5', and 'under_10' metrics in the working DataFrame, enhancing the analysis of duplicate lineups.
- Updated the duplication frame to include these new metrics, improving data clarity and analysis capabilities.
app.py
CHANGED
|
@@ -187,6 +187,19 @@ with tab2:
|
|
| 187 |
axis=1
|
| 188 |
)
|
| 189 |
working_df['dupes'] = working_df.groupby('sorted').transform('size')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 190 |
working_df = working_df.reset_index()
|
| 191 |
working_df['percentile_finish'] = working_df['index'].rank(pct=True)
|
| 192 |
working_df['finish'] = working_df['index']
|
|
@@ -433,8 +446,8 @@ with tab2:
|
|
| 433 |
dupe_frame['uniques%'] = dupe_frame['uniques'] / dupe_frame['EntryCount']
|
| 434 |
dupe_frame['under_5%'] = dupe_frame['under_5'] / dupe_frame['EntryCount']
|
| 435 |
dupe_frame['under_10%'] = dupe_frame['under_10'] / dupe_frame['EntryCount']
|
| 436 |
-
st.session_state['duplication_frame'] = dupe_frame[['BaseName', 'EntryCount', 'average_dupes', 'uniques', 'uniques%', 'under_5', 'under_5%', 'under_10', 'under_10%']].drop_duplicates(subset='BaseName', keep='first')
|
| 437 |
st.dataframe(st.session_state['duplication_frame'].style.
|
| 438 |
background_gradient(cmap='RdYlGn', subset=['uniques%', 'under_5%', 'under_10%'], axis=0).
|
| 439 |
-
background_gradient(cmap='RdYlGn_r', subset=['
|
| 440 |
format(precision=2), hide_index=True)
|
|
|
|
| 187 |
axis=1
|
| 188 |
)
|
| 189 |
working_df['dupes'] = working_df.groupby('sorted').transform('size')
|
| 190 |
+
|
| 191 |
+
working_df['uniques'] = working_df.groupby('BaseName').apply(
|
| 192 |
+
lambda x: (x['dupes'] == 1).sum()
|
| 193 |
+
).reindex(working_df['BaseName']).values
|
| 194 |
+
|
| 195 |
+
working_df['under_5'] = working_df.groupby('BaseName').apply(
|
| 196 |
+
lambda x: (x['dupes'] <= 5).sum()
|
| 197 |
+
).reindex(working_df['BaseName']).values
|
| 198 |
+
|
| 199 |
+
working_df['under_10'] = working_df.groupby('BaseName').apply(
|
| 200 |
+
lambda x: (x['dupes'] <= 10).sum()
|
| 201 |
+
).reindex(working_df['BaseName']).values
|
| 202 |
+
|
| 203 |
working_df = working_df.reset_index()
|
| 204 |
working_df['percentile_finish'] = working_df['index'].rank(pct=True)
|
| 205 |
working_df['finish'] = working_df['index']
|
|
|
|
| 446 |
dupe_frame['uniques%'] = dupe_frame['uniques'] / dupe_frame['EntryCount']
|
| 447 |
dupe_frame['under_5%'] = dupe_frame['under_5'] / dupe_frame['EntryCount']
|
| 448 |
dupe_frame['under_10%'] = dupe_frame['under_10'] / dupe_frame['EntryCount']
|
| 449 |
+
st.session_state['duplication_frame'] = dupe_frame[['BaseName', 'EntryCount', 'average_dupes', 'dupes', 'uniques', 'uniques%', 'under_5', 'under_5%', 'under_10', 'under_10%']].drop_duplicates(subset='BaseName', keep='first')
|
| 450 |
st.dataframe(st.session_state['duplication_frame'].style.
|
| 451 |
background_gradient(cmap='RdYlGn', subset=['uniques%', 'under_5%', 'under_10%'], axis=0).
|
| 452 |
+
background_gradient(cmap='RdYlGn_r', subset=['uniques', 'under_5', 'under_10'], axis=0).
|
| 453 |
format(precision=2), hide_index=True)
|