Spaces:
Running
Running
| import pandas as pd | |
| from typing import List, Dict, Optional | |
| import gradio as gr | |
| from datasets import load_dataset | |
| import numpy as np | |
| class AuthorLeaderboard: | |
| """ | |
| A class to manage and process author leaderboard data for display in a Gradio Dataframe component. | |
| """ | |
| # Class-level constants defining columns and their data types | |
| COLUMNS_ORDER: List[str] = [ | |
| 'Rank', | |
| 'Author', | |
| 'Total Artifacts', | |
| 'Avg Artifacts per Paper', | |
| 'Total Papers', | |
| 'Total Models', | |
| 'Total Datasets', | |
| 'Total Spaces', | |
| 'Upvotes', | |
| 'Comments', | |
| ] | |
| DATATYPES: Dict[str, str] = { | |
| 'Rank': 'str', | |
| 'Author': 'markdown', | |
| 'Total Artifacts': 'int', | |
| 'Avg Artifacts per Paper': 'float', | |
| 'Total Papers': 'int', | |
| 'Total Models': 'int', | |
| 'Total Datasets': 'int', | |
| 'Total Spaces': 'int', | |
| 'Upvotes': 'int', | |
| 'Comments': 'int', | |
| } | |
| EMOTICONS = { | |
| 1: '🥇', | |
| 2: '🥈', | |
| 3: '🥉' | |
| } | |
| def __init__(self): | |
| """ | |
| Initialize the AuthorLeaderboard class by loading and processing the dataset. | |
| """ | |
| self.df_raw: pd.DataFrame = self.get_df() | |
| self.df_prettified: pd.DataFrame = self.prettify(self.df_raw) | |
| def get_df() -> pd.DataFrame: | |
| """ | |
| Load and process the leaderboard dataset. | |
| Returns: | |
| pd.DataFrame: The processed DataFrame. | |
| """ | |
| # Load the dataset from the Hugging Face Hub | |
| dataset = load_dataset('IAMJB/paper-central-leaderboard', split='train') | |
| df = dataset.to_pandas() | |
| # Calculate total artifacts | |
| df['Total Artifacts'] = df['num_models'] + df['num_datasets'] + df['num_spaces'] | |
| # Calculate average artifacts per paper | |
| df['Avg Artifacts per Paper'] = df['Total Artifacts'] / df['num_papers'] | |
| df['Avg Artifacts per Paper'] = df['Avg Artifacts per Paper'].round(2) | |
| # Rename columns for clarity | |
| df.rename(columns={ | |
| 'name': 'Author', | |
| 'num_papers': 'Total Papers', | |
| 'num_models': 'Total Models', | |
| 'num_datasets': 'Total Datasets', | |
| 'num_spaces': 'Total Spaces', | |
| 'upvotes': 'Upvotes', | |
| 'num_comments': 'Comments', | |
| }, inplace=True) | |
| return df | |
| def prettify(self, df: pd.DataFrame) -> pd.DataFrame: | |
| """ | |
| Prettify the DataFrame by adding rankings, emoticons, and markdown links. | |
| Args: | |
| df (pd.DataFrame): The DataFrame to prettify. | |
| Returns: | |
| pd.DataFrame: The prettified DataFrame. | |
| """ | |
| df = df.copy() | |
| # Sort authors by Total Artifacts descending | |
| df.sort_values(by='Total Artifacts', ascending=False, inplace=True) | |
| # Reset index to get ranks | |
| df.reset_index(drop=True, inplace=True) | |
| df.index += 1 # Start ranks from 1 | |
| # Add Rank column | |
| df['Rank'] = df.index | |
| # Add emoticons for top 3 ranks | |
| df['Rank'] = df['Rank'].apply(lambda x: f"{self.EMOTICONS.get(x, '')} {x}" if x <= 3 else f"{x}") | |
| # Convert 'Author' to markdown with profile links if 'username' is available | |
| df['Author'] = df.apply(self._create_author_link, axis=1) | |
| # Select columns to display | |
| df = df[self.COLUMNS_ORDER] | |
| return df | |
| def _create_author_link(self, row: pd.Series) -> str: | |
| """ | |
| Create a markdown link for the author's profile. | |
| Args: | |
| row (pd.Series): A row from the DataFrame. | |
| Returns: | |
| str: The markdown link for the author. | |
| """ | |
| if pd.notna(row.get('username')) and row['username']: | |
| profile_url = f"https://huggingface.co/{row['username']}" | |
| return f"[{row['Author']}]({profile_url})" | |
| else: | |
| return row['Author'] | |
| def filter(self, author_search_input: Optional[str] = None) -> gr.update: | |
| """ | |
| Filter the DataFrame based on the author search input. | |
| Args: | |
| author_search_input (Optional[str]): The author name to search for. | |
| Returns: | |
| gr.Update: An update object for the Gradio Dataframe component. | |
| """ | |
| filtered_df: pd.DataFrame = self.df_prettified.copy() | |
| if author_search_input: | |
| search_string = author_search_input.lower() | |
| filtered_df = filtered_df[filtered_df['Author'].str.lower().str.contains(search_string)] | |
| # Get the corresponding data types for the columns | |
| datatypes: List[str] = [self.DATATYPES.get(col, 'str') for col in filtered_df.columns] | |
| return gr.update(value=filtered_df, datatype=datatypes) | |