Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from collections import defaultdict | |
| from functools import lru_cache | |
| import argilla as rg | |
| import pandas as pd | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| # Constants | |
| DATA_DIR = "data" | |
| PARTICIPANTS_CSV = os.path.join(DATA_DIR, "participants.csv") | |
| EQUIPOS_CSV = os.path.join(DATA_DIR, "equipos.csv") | |
| LEADERBOARD_PERSONAL_CSV = "leaderboard_personal.csv" | |
| LEADERBOARD_EQUIPOS_CSV = "leaderboard_equipos.csv" | |
| # Column mappings for participants info | |
| COLUMN_MAP = { | |
| "gmail": "Dirección de correo electrónico", | |
| "discord": "¿Cuál es tu nombre en Discord?", | |
| "hf_username": "¿Cuál es tu nombre en el Hub de Hugging Face?", | |
| "contact_email": "Email de contacto", | |
| } | |
| # Initialize Argilla client | |
| try: | |
| client = rg.Argilla( | |
| api_url=os.getenv("ARGILLA_API_URL", ""), | |
| api_key=os.getenv("ARGILLA_API_KEY", ""), | |
| ) | |
| except Exception as e: | |
| print(f"Error initializing Argilla client: {e}") | |
| client = None | |
| # Countries data | |
| countries = { | |
| "Argentina": {"iso": "ARG", "emoji": "🇦🇷"}, | |
| "Bolivia": {"iso": "BOL", "emoji": "🇧🇴"}, | |
| "Chile": {"iso": "CHL", "emoji": "🇨🇱"}, | |
| "Colombia": {"iso": "COL", "emoji": "🇨🇴"}, | |
| "Costa Rica": {"iso": "CRI", "emoji": "🇨🇷"}, | |
| "Cuba": {"iso": "CUB", "emoji": "🇨🇺"}, | |
| "Ecuador": {"iso": "ECU", "emoji": "🇪🇨"}, | |
| "El Salvador": {"iso": "SLV", "emoji": "🇸🇻"}, | |
| "España": {"iso": "ESP", "emoji": "🇪🇸"}, | |
| "Guatemala": {"iso": "GTM", "emoji": "🇬🇹"}, | |
| "Honduras": {"iso": "HND", "emoji": "🇭🇳"}, | |
| "México": {"iso": "MEX", "emoji": "🇲🇽"}, | |
| "Nicaragua": {"iso": "NIC", "emoji": "🇳🇮"}, | |
| "Panamá": {"iso": "PAN", "emoji": "🇵🇦"}, | |
| "Paraguay": {"iso": "PRY", "emoji": "🇵🇾"}, | |
| "Perú": {"iso": "PER", "emoji": "🇵🇪"}, | |
| "Puerto Rico": {"iso": "PRI", "emoji": "🇵🇷"}, | |
| "República Dominicana": {"iso": "DOM", "emoji": "🇩🇴"}, | |
| "Uruguay": {"iso": "URY", "emoji": "🇺🇾"}, | |
| "Venezuela": {"iso": "VEN", "emoji": "🇻🇪"}, | |
| } | |
| def get_user_mapping(): | |
| """Get cached mapping of emails and hf_usernames to discord usernames.""" | |
| if not os.path.exists(PARTICIPANTS_CSV): | |
| return {}, {} | |
| try: | |
| df = pd.read_csv(PARTICIPANTS_CSV) | |
| email_to_discord = {} | |
| hf_to_discord = {} | |
| for _, row in df.iterrows(): | |
| discord = row.get(COLUMN_MAP["discord"], "") | |
| if pd.notna(discord) and discord != "NA": | |
| discord_lower = discord.lower() | |
| # Map gmail to discord | |
| gmail = row.get(COLUMN_MAP["gmail"], "") | |
| if pd.notna(gmail) and gmail.strip(): | |
| email_to_discord[gmail.lower()] = discord_lower | |
| # Map contact_email to discord | |
| contact_email = row.get(COLUMN_MAP["contact_email"], "") | |
| if pd.notna(contact_email) and contact_email.strip(): | |
| email_to_discord[contact_email.lower()] = discord_lower | |
| # Map hf_username to discord | |
| hf_username = row.get(COLUMN_MAP["hf_username"], "") | |
| if pd.notna(hf_username) and hf_username.strip(): | |
| hf_to_discord[hf_username.lower()] = discord_lower | |
| return email_to_discord, hf_to_discord | |
| except Exception as e: | |
| print(f"Error loading {PARTICIPANTS_CSV}: {e}") | |
| return {}, {} | |
| def get_discord_username(identifier): | |
| """Get discord username from email or hf_username.""" | |
| email_to_discord, hf_to_discord = get_user_mapping() | |
| if "@" in identifier: | |
| return email_to_discord.get(identifier.lower(), identifier.split("@")[0]) | |
| return hf_to_discord.get(identifier.lower(), identifier) | |
| def get_participant_info(): | |
| """Get participant information from CSV.""" | |
| if not os.path.exists(PARTICIPANTS_CSV): | |
| return {} | |
| try: | |
| df = pd.read_csv(PARTICIPANTS_CSV) | |
| participant_info = {} | |
| for _, row in df.iterrows(): | |
| discord_username = row.get(COLUMN_MAP["discord"], "") | |
| if pd.notna(discord_username) and discord_username != "NA": | |
| participant_info[discord_username.lower()] = { | |
| "gmail": row.get(COLUMN_MAP["gmail"], ""), | |
| "discord_username": discord_username, | |
| "hf_username": row.get(COLUMN_MAP["hf_username"], ""), | |
| "email": row.get(COLUMN_MAP["contact_email"], ""), | |
| } | |
| return participant_info | |
| except Exception as e: | |
| print(f"Error loading participant info: {e}") | |
| return {} | |
| def get_blend_es_data(): | |
| """Get blend-es data from Argilla.""" | |
| if not client: | |
| return [] | |
| data = [] | |
| for country, info in countries.items(): | |
| dataset_name = f"{info['emoji']} {country} - {info['iso']} - Responder" | |
| try: | |
| dataset = client.datasets(dataset_name) | |
| records = list(dataset.records(with_responses=True)) | |
| user_counts = defaultdict(int) | |
| user_mapping = {} | |
| for record in records: | |
| if "answer_1" in record.responses: | |
| for answer in record.responses["answer_1"]: | |
| if answer.user_id: | |
| user_id = answer.user_id | |
| user_counts[user_id] += 1 | |
| if user_id not in user_mapping: | |
| try: | |
| user = client.users(id=user_id) | |
| user_mapping[user_id] = user.username | |
| except: | |
| user_mapping[user_id] = f"User-{user_id[:8]}" | |
| for user_id, count in user_counts.items(): | |
| hf_username = user_mapping.get(user_id, f"User-{user_id[:8]}") | |
| username = get_discord_username(hf_username) | |
| data.append( | |
| {"source": "blend-es", "username": username, "count": count} | |
| ) | |
| except Exception as e: | |
| print(f"Error processing {dataset_name}: {e}") | |
| return data | |
| def get_include_data(): | |
| """Get include data from CSV.""" | |
| csv_path = os.path.join(DATA_DIR, "include.csv") | |
| if not os.path.exists(csv_path): | |
| return [] | |
| try: | |
| df = pd.read_csv(csv_path) | |
| username_col = "Nombre en Discord / username" | |
| questions_col = "Total preguntas hackathon" | |
| if username_col not in df.columns or questions_col not in df.columns: | |
| return [] | |
| user_counts = defaultdict(int) | |
| for _, row in df.iterrows(): | |
| username = row[username_col][1:] if pd.notna(row[username_col]) else "" | |
| questions = row[questions_col] if pd.notna(row[questions_col]) else 0 | |
| if username and questions: | |
| user_counts[username.lower()] += int(questions) | |
| return [ | |
| {"source": "include", "username": username, "count": count} | |
| for username, count in user_counts.items() | |
| ] | |
| except Exception as e: | |
| print(f"Error loading include data: {e}") | |
| return [] | |
| def get_estereotipos_data(): | |
| """Get estereotipos data from CSV.""" | |
| csv_path = os.path.join(DATA_DIR, "stereotypes.csv") | |
| if not os.path.exists(csv_path): | |
| return [] | |
| try: | |
| df = pd.read_csv(csv_path) | |
| if "token_id" not in df.columns or "count" not in df.columns: | |
| return [] | |
| user_counts = defaultdict(int) | |
| for _, row in df.iterrows(): | |
| mail = row.get("token_id", "") | |
| count = row.get("count", 0) | |
| if pd.notna(mail) and pd.notna(count): | |
| user_counts[mail.lower()] += int(count) | |
| return [ | |
| { | |
| "source": "estereotipos", | |
| "username": get_discord_username(mail), | |
| "count": count, | |
| } | |
| for mail, count in user_counts.items() | |
| ] | |
| except Exception as e: | |
| print(f"Error loading estereotipos data: {e}") | |
| return [] | |
| def get_arena_data(): | |
| """Get arena data from JSON.""" | |
| json_path = os.path.join(DATA_DIR, "arena.json") | |
| if not os.path.exists(json_path): | |
| return [] | |
| try: | |
| with open(json_path, "r", encoding="utf-8") as f: | |
| arena_data = json.load(f) | |
| user_counts = defaultdict(int) | |
| for conversations in arena_data.values(): | |
| for conversation in conversations: | |
| if username := conversation.get("username"): | |
| user_counts[username.lower()] += 1 | |
| return [ | |
| {"source": "arena", "username": get_discord_username(mail), "count": count} | |
| for mail, count in user_counts.items() | |
| ] | |
| except Exception as e: | |
| print(f"Error loading arena data: {e}") | |
| return [] | |
| def calculate_personal_scores(): | |
| """Consolidate all data sources and create leaderboard.""" | |
| # Collect all data | |
| all_data = ( | |
| get_blend_es_data() | |
| + get_include_data() | |
| + get_estereotipos_data() | |
| + get_arena_data() | |
| ) | |
| # Get participant info | |
| participant_info = get_participant_info() | |
| # Aggregate user contributions | |
| user_contributions = defaultdict( | |
| lambda: { | |
| "username": "", | |
| "gmail": "", | |
| "discord_username": "", | |
| "hf_username": "", | |
| "email": "", | |
| "blend_es": 0, | |
| "include": 0, | |
| "estereotipos": 0, | |
| "arena": 0, | |
| } | |
| ) | |
| for item in all_data: | |
| source = item["source"] | |
| username = item["username"] | |
| count = item["count"] | |
| user_key = username.lower() | |
| if not user_contributions[user_key]["username"]: | |
| user_contributions[user_key]["username"] = username | |
| if username.lower() in participant_info: | |
| info = participant_info[username.lower()] | |
| user_contributions[user_key].update( | |
| { | |
| "gmail": info["gmail"], | |
| "discord_username": info["discord_username"], | |
| "hf_username": info["hf_username"], | |
| "email": info["email"], | |
| } | |
| ) | |
| if source == "blend-es": | |
| user_contributions[user_key]["blend_es"] += count | |
| elif source == "include": | |
| user_contributions[user_key]["include"] += count | |
| elif source == "estereotipos": | |
| user_contributions[user_key]["estereotipos"] += count | |
| elif source == "arena": | |
| user_contributions[user_key]["arena"] += count | |
| # Create dataframes | |
| full_rows = [] | |
| display_rows = [] | |
| for data in user_contributions.values(): | |
| # Full data for CSV | |
| full_rows.append( | |
| { | |
| "Username": data["username"], | |
| "Gmail": data["gmail"], | |
| "Discord_Username": data["discord_username"], | |
| "HF_Username": data["hf_username"], | |
| "Email": data["email"], | |
| "Arena": data["arena"], | |
| "Blend-ES": data["blend_es"], | |
| "Estereotipos": data["estereotipos"], | |
| "INCLUDE": data["include"], | |
| } | |
| ) | |
| # Display data for UI (public) | |
| display_rows.append( | |
| { | |
| "Username": data["username"], | |
| "Arena": data["arena"], | |
| "Blend-ES": data["blend_es"], | |
| "Estereotipos": data["estereotipos"], | |
| "INCLUDE": data["include"], | |
| } | |
| ) | |
| # Save full data to CSV | |
| full_df = pd.DataFrame(full_rows) | |
| if not full_df.empty: | |
| full_df.sort_values("Arena", ascending=False, inplace=True) | |
| full_df.to_csv( | |
| os.path.join(DATA_DIR, LEADERBOARD_PERSONAL_CSV), | |
| index=False, | |
| encoding="utf-8", | |
| ) | |
| # Return display dataframe for UI | |
| display_df = pd.DataFrame(display_rows) | |
| if not display_df.empty: | |
| display_df.sort_values("Arena", ascending=False, inplace=True) | |
| display_df.to_csv( | |
| os.path.join(LEADERBOARD_PERSONAL_CSV), index=False, encoding="utf-8" | |
| ) | |
| return display_df | |
| if __name__ == "__main__": | |
| calculate_personal_scores() | |