import gradio as gr import pandas as pd import io import os from pathlib import Path from huggingface_hub import HfApi, Repository df = pd.DataFrame() def upload_csv(file): global df df = pd.read_csv(file.name) if "text" not in df.columns or "label" not in df.columns: return gr.update(visible=False), "❌ CSV must contain ‘text’ and ‘label’ columns." df["label"] = df["label"].fillna("") return ( gr.update(value=df[["text","label"]], visible=True), "✅ File uploaded — you can now edit labels." ) def save_changes(edited_table): global df df = pd.DataFrame(edited_table, columns=["text","label"]) return "💾 Changes saved." def download_csv(): global df out_path = "annotated_data.csv" df.to_csv(out_path, index=False) return out_path def push_to_hub(repo_name: str, hf_token: str) -> str: global df try: api = HfApi() api.create_repo( repo_id=repo_name, token=hf_token, repo_type="dataset", exist_ok=True ) local_dir = Path(f"./{repo_name.replace('/', '_')}") if local_dir.exists(): for child in local_dir.iterdir(): child.unlink() local_dir.rmdir() repo = Repository( local_dir=str(local_dir), clone_from=repo_name, repo_type="dataset", # <-- important fix! use_auth_token=hf_token ) csv_path = local_dir / "data.csv" df.to_csv(csv_path, index=False) repo.push_to_hub(commit_message="📑 Update annotated data") return f"🚀 Pushed to https://huggingface.co/datasets/{repo_name}" except Exception as e: return f"❌ Push failed: {e}" with gr.Blocks(theme=gr.themes.Default()) as app: gr.Markdown("## 🏷️ Label It! Text Annotation Tool") gr.Markdown("Upload a `.csv` with `text` + `label` columns, annotate in-place, then export or publish.") with gr.Row(): file_input = gr.File(label="📁 Upload CSV", file_types=[".csv"]) upload_btn = gr.Button("Upload") df_table = gr.Dataframe( headers=["text","label"], label="📝 Editable Table", interactive=True, visible=False ) status = gr.Textbox(label="Status", interactive=False) with gr.Row(): save_btn = gr.Button("💾 Save") download_btn = gr.Button("⬇️ Download CSV") download_out = gr.File(label="📥 Downloaded File") with gr.Accordion("📦 Push to Hugging Face Hub", open=False): repo_input = gr.Textbox(label="Repo (username/dataset-name)") token_input = gr.Textbox(label="HF Token", type="password") push_btn = gr.Button("🚀 Push") push_status = gr.Textbox(label="Push Status", interactive=False) upload_btn.click(upload_csv, inputs=file_input, outputs=[df_table, status]) save_btn.click( save_changes, inputs=df_table, outputs=status) download_btn.click(download_csv, outputs=download_out) push_btn.click( push_to_hub, inputs=[repo_input, token_input], outputs=push_status) app.launch()