IPA-Transcription-EN

Running

App Files Files Community

arunasrivastava commited on Dec 4, 2024

Commit

7fc4a6c

1 Parent(s): ba129e3

it worked! mostly

Browse files

Files changed (2) hide show

app.py +161 -114
utils_display.py +1 -1

app.py CHANGED Viewed

@@ -1,140 +1,187 @@
 import gradio as gr
 import pandas as pd
 import json
-from constants import BANNER, INTRODUCTION_TEXT, CITATION_TEXT, METRICS_TAB_TEXT, DIR_OUTPUT_REQUESTS, LEADERBOARD_CSS
-from init import is_model_on_hub, upload_file, load_all_info_from_dataset_hub
-from utils_display import AutoEvalColumn, fields, make_clickable_model, styled_error, styled_message
 from datetime import datetime, timezone
-LAST_UPDATED = "Nov 22th 2024"
 column_names = {
     "MODEL": "Model",
-    "Avg. PER": "Average PER ⬇️",
-    "Avg. PWED": "Avg. PWED ⬆️️",
 }
-eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
-if not csv_results.exists():
-    raise Exception(f"CSV file {csv_results} does not exist locally")
-# Get csv with data and parse columns
-original_df = pd.read_csv(csv_results)
-# Formats the columns
-def formatter(x):
-    if type(x) is str:
-        x = x
-    else:
-        x = round(x, 2)
-    return x
-for col in original_df.columns:
-    if col == "model":
-        original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
-    else:
-        original_df[col] = original_df[col].apply(formatter) # For numerical values
-original_df.rename(columns=column_names, inplace=True)
-original_df.sort_values(by='Average WER ⬇️', inplace=True)
-COLS = [c.name for c in fields(AutoEvalColumn)]
-TYPES = [c.type for c in fields(AutoEvalColumn)]
-def request_model(model_text, chbcoco2017):
-    # Determine the selected checkboxes
-    dataset_selection = []
-    if chbcoco2017:
-        dataset_selection.append("ESB Datasets tests only")
-    if len(dataset_selection) == 0:
-        return styled_error("You need to select at least one dataset")
-    base_model_on_hub, error_msg = is_model_on_hub(model_text)
-    if not base_model_on_hub:
-        return styled_error(f"Base model '{model_text}' {error_msg}")
-    # Construct the output dictionary
-    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
-    required_datasets = ', '.join(dataset_selection)
-    eval_entry = {
-        "date": current_time,
-        "model": model_text,
-        "datasets_selected": required_datasets
     }
-    # Prepare file path
-    DIR_OUTPUT_REQUESTS.mkdir(parents=True, exist_ok=True)
-    fn_datasets = '@ '.join(dataset_selection)
-    filename = model_text.replace("/","@") + "@@" + fn_datasets
-    if filename in requested_models:
-        return styled_error(f"A request for this model '{model_text}' and dataset(s) was already made.")
     try:
-        filename_ext = filename + ".txt"
-        out_filepath = DIR_OUTPUT_REQUESTS / filename_ext
-        # Write the results to a text file
-        with open(out_filepath, "w") as f:
-            f.write(json.dumps(eval_entry))
-        upload_file(filename, out_filepath)
-        # Include file in the list of uploaded files
-        requested_models.append(filename)
-        # Remove the local file
-        out_filepath.unlink()
-        return styled_message("🤗 Your request has been submitted and will be evaluated soon!</p>")
     except Exception as e:
-        return styled_error(f"Error submitting request!")
-with gr.Blocks(css=LEADERBOARD_CSS) as demo:
-    gr.HTML(BANNER, elem_id="banner")
-    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
-    with gr.Tabs(elem_classes="tab-buttons") as tabs:
-        with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
-            leaderboard_table = gr.components.Dataframe(
-                value=original_df,
-                datatype=TYPES,
-                elem_id="leaderboard-table",
                 interactive=False,
-                visible=True,
-                )
-        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
-            gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
-            with gr.Column():
-                gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
             with gr.Column():
-                gr.Markdown("Select a dataset:", elem_classes="markdown-text")
-                with gr.Column():
-                    model_name_textbox = gr.Textbox(label="Model name (user_name/model_name)")
-                    chb_coco2017 = gr.Checkbox(label="COCO validation 2017 dataset", visible=False, value=True, interactive=False)
-                with gr.Column():
-                    mdw_submission_result = gr.Markdown()
-                    btn_submitt = gr.Button(value="🚀 Request")
-                    btn_submitt.click(request_model,
-                                      [model_name_textbox, chb_coco2017],
-                                      mdw_submission_result)
-    gr.Markdown(f"Last updated on **{LAST_UPDATED}**", elem_classes="markdown-text")
-    with gr.Row():
-        with gr.Accordion("📙 Citation", open=False):
-            gr.Textbox(
-                value=CITATION_TEXT, lines=7,
-                label="Copy the BibTeX snippet to cite this source",
-                elem_id="citation-button",
-                show_copy_button=True,
             )
-demo.launch(ssr_mode=False)

 import gradio as gr
 import pandas as pd
 import json
+from pathlib import Path
 from datetime import datetime, timezone
+LAST_UPDATED = "Dec 4th 2024"
+QUEUE_DIR = Path("/Users/arunasrivastava/Koel/IPA-Leaderboard/IPA-Transcription-EN-queue/queue")
+APP_DIR = Path("./")
+# Modified column names for phonemic transcription metrics
 column_names = {
     "MODEL": "Model",
+    "SUBMISSION_NAME": "Submission Name",
+    "AVG_PER": "Average PER ⬇️",
+    "AVG_PFER": "Average PFER ⬇️",
+    "SUBSET": "Dataset Subset",
+    "GITHUB_URL": "GitHub",
+    "DATE": "Submission Date"
 }
+def load_leaderboard_data():
+    leaderboard_path = QUEUE_DIR / "leaderboard.json"
+    if not leaderboard_path.exists():
+        print(f"Warning: Leaderboard file not found at {leaderboard_path}")
+        return pd.DataFrame()
+    try:
+        with open(leaderboard_path, 'r') as f:
+            data = json.load(f)
+        df = pd.DataFrame(data)
+        return df
+    except Exception as e:
+        print(f"Error loading leaderboard data: {e}")
+        return pd.DataFrame()
+def format_leaderboard_df(df):
+    if df.empty:
+        return df
+    # Rename columns to display names
+    display_df = df.rename(columns={
+        "model": "MODEL",
+        "submission_name": "SUBMISSION_NAME",
+        "average_per": "AVG_PER",
+        "average_pfer": "AVG_PFER",
+        "subset": "SUBSET",
+        "github_url": "GITHUB_URL",
+        "submission_date": "DATE"
+    })
+    # Format numeric columns
+    display_df["AVG_PER"] = display_df["AVG_PER"].apply(lambda x: f"{x:.4f}")
+    display_df["AVG_PFER"] = display_df["AVG_PFER"].apply(lambda x: f"{x:.4f}")
+    # Make GitHub URLs clickable
+    display_df["GITHUB_URL"] = display_df["GITHUB_URL"].apply(
+        lambda x: f'<a href="{x}" target="_blank">Repository</a>' if x else "N/A"
+    )
+    # Sort by PER (ascending)
+    display_df.sort_values(by="AVG_PER", inplace=True)
+    return display_df
+def request_evaluation(model_name, submission_name, github_url, subset="test", max_samples=5):
+    if not model_name or not submission_name:
+        return gr.Markdown("⚠️ Please provide both model name and submission name.")
+    request_data = {
+        "transcription_model": model_name,
+        "subset": subset,
+        "max_samples": max_samples,
+        "submission_name": submission_name,
+        "github_url": github_url or ""
     }
     try:
+        # Ensure queue directory exists
+        QUEUE_DIR.mkdir(parents=True, exist_ok=True)
+        # Generate unique timestamp for request file
+        timestamp = datetime.now(timezone.utc).isoformat().replace(":", "-")
+        request_file = QUEUE_DIR / f"request_{timestamp}.json"
+        with open(request_file, 'w') as f:
+            json.dump(request_data, f, indent=2)
+        return gr.Markdown("✅ Evaluation request submitted successfully! Your results will appear on the leaderboard once processing is complete.")
     except Exception as e:
+        return gr.Markdown(f"❌ Error submitting request: {str(e)}")
+def load_results_for_model(model_name):
+    results_path = QUEUE_DIR / "results.json"
+    try:
+        with open(results_path, 'r') as f:
+            results = json.load(f)
+        # Filter results for the specific model
+        model_results = [r for r in results if r["model"] == model_name]
+        if not model_results:
+            return None
+        # Get the most recent result
+        latest_result = max(model_results, key=lambda x: x["timestamp"])
+        return latest_result
+    except Exception as e:
+        print(f"Error loading results: {e}")
+        return None
+# Create Gradio interface
+with gr.Blocks() as demo:
+    gr.Markdown("# 🎯 Phonemic Transcription Model Evaluation Leaderboard")
+    gr.Markdown("""
+        Compare the performance of different phonemic transcription models on speech-to-IPA transcription tasks.
+        **Metrics:**
+        - **PER (Phoneme Error Rate)**: Measures the edit distance between predicted and ground truth phonemes (lower is better)
+        - **PFER (Phoneme Frame Error Rate)**: Measures frame-level phoneme prediction accuracy (lower is better)
+    """)
+    with gr.Tabs() as tabs:
+        with gr.TabItem("🏆 Leaderboard"):
+            leaderboard_df = load_leaderboard_data()
+            formatted_df = format_leaderboard_df(leaderboard_df)
+            leaderboard_table = gr.DataFrame(
+                value=formatted_df,
                 interactive=False,
+                headers=list(column_names.values())
+            )
+            refresh_btn = gr.Button("🔄 Refresh Leaderboard")
+            refresh_btn.click(
+                lambda: gr.DataFrame(value=format_leaderboard_df(load_leaderboard_data()))
+            )
+        with gr.TabItem("📝 Submit Model"):
             with gr.Column():
+                model_input = gr.Textbox(
+                    label="Model Name",
+                    placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft",
+                    info="Enter the Hugging Face model ID"
+                )
+                submission_name = gr.Textbox(
+                    label="Submission Name",
+                    placeholder="My Awesome Model v1.0",
+                    info="Give your submission a descriptive name"
+                )
+                github_url = gr.Textbox(
+                    label="GitHub Repository URL (optional)",
+                    placeholder="https://github.com/username/repo",
+                    info="Link to your model's code repository"
+                )
+                submit_btn = gr.Button("🚀 Submit for Evaluation")
+                result_text = gr.Markdown()
+                submit_btn.click(
+                    request_evaluation,
+                    inputs=[model_input, submission_name, github_url],
+                    outputs=result_text
+                )
+        with gr.TabItem("ℹ️ Detailed Results"):
+            model_selector = gr.Textbox(
+                label="Enter Model Name to View Details",
+                placeholder="facebook/wav2vec2-lv-60-espeak-cv-ft"
             )
+            view_btn = gr.Button("View Results")
+            results_json = gr.JSON(label="Detailed Results")
+            def show_model_results(model_name):
+                results = load_results_for_model(model_name)
+                return results or {"error": "No results found for this model"}
+            view_btn.click(
+                show_model_results,
+                inputs=[model_selector],
+                outputs=[results_json]
+            )
+    gr.Markdown(f"Last updated: {LAST_UPDATED}")
+demo.launch()

utils_display.py CHANGED Viewed

@@ -13,7 +13,7 @@ def fields(raw_class):
 @dataclass(frozen=True)
 class AutoEvalColumn: # Auto evals column
     model = ColumnContent("Model", "markdown")
-    avg_wer = ColumnContent("Average WER ⬇️", "number")
     avg_wped = ColumnContent("Average PWED ⬇️", "number")

 @dataclass(frozen=True)
 class AutoEvalColumn: # Auto evals column
     model = ColumnContent("Model", "markdown")
+    avg_per = ColumnContent("Average PER ⬇️", "number")
     avg_wped = ColumnContent("Average PWED ⬇️", "number")