import gradio as gr import os import json import uuid import torch import datetime import pandas as pd from transformers import AutoTokenizer, AutoModelForSequenceClassification from huggingface_hub import HfApi, create_repo, upload_file from datasets import Dataset # Configuration MODEL_NAME = "distilbert-base-uncased-finetuned-sst-2-english" HF_DATASET_REPO = "M2ai/mgtd-logs" HF_TOKEN = os.getenv("Mgtd") DATASET_CREATED = False # Load model and tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) # Make directories os.makedirs("logs", exist_ok=True) def setup_hf_dataset(): global DATASET_CREATED if not DATASET_CREATED and HF_TOKEN: try: api = HfApi() create_repo(HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN, exist_ok=True) DATASET_CREATED = True print(f"Dataset {HF_DATASET_REPO} is ready") except Exception as e: print(f"Error setting up dataset: {e}") elif not HF_TOKEN: print("Warning: HF_TOKEN not set. Logs will be saved locally only.") def infer_and_log(text_input): inputs = tokenizer(text_input, return_tensors="pt", truncation=True) with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits.tolist() predicted = torch.argmax(outputs.logits, dim=-1).item() label = model.config.id2label[predicted] timestamp = datetime.datetime.now().isoformat() submission_id = str(uuid.uuid4()) log_data = { "id": submission_id, "timestamp": timestamp, "input": text_input, "logits": logits } log_file = f"logs/{timestamp.replace(':', '_')}.json" with open(log_file, "w") as f: json.dump(log_data, f, indent=2) if HF_TOKEN and DATASET_CREATED: try: api = HfApi() api.upload_file( path_or_fileobj=log_file, path_in_repo=f"logs/{os.path.basename(log_file)}", repo_id=HF_DATASET_REPO, repo_type="dataset", token=HF_TOKEN ) print(f"Uploaded log {submission_id} to {HF_DATASET_REPO}") except Exception as e: print(f"Error uploading to HF dataset: {e}") return label def clear_fields(): return "", "" # Setup the dataset on startup setup_hf_dataset() with gr.Blocks() as app: gr.Markdown("## AI Text Detector") with gr.Row(): input_box = gr.Textbox(label="Input Text", lines=10, interactive=True) output_box = gr.Textbox(label="Output", lines=2, interactive=False) with gr.Row(): submit_btn = gr.Button("Submit") clear_btn = gr.Button("Clear") submit_btn.click(fn=infer_and_log, inputs=input_box, outputs=output_box) clear_btn.click(fn=clear_fields, outputs=[input_box, output_box]) if __name__ == "__main__": app.launch()