import gradio as gr
import json
from transformers import pipeline
import torch
import random
import numpy as np

torch.manual_seed(42)
random.seed(42)
np.random.seed(42)

torch.use_deterministic_algorithms(True)

# Load Hugging Face model (text classification)
classifier = pipeline(
    task="text-classification",
    model="CIRCL/cwe-parent-vulnerability-classification-roberta-base",
    top_k=None
)
classifier.model.eval()

# Load child-to-parent mapping
with open("child_to_parent_mapping.json", "r") as f:
    child_to_parent = json.load(f)

def predict_cwe(commit_message: str):
    """
    Predict CWE(s) from a commit message and map to parent CWEs.
    """
    results = classifier(commit_message)[0]
    sorted_results = sorted(results, key=lambda x: x["score"], reverse=True)
        
    threshold = 0.2
    filtered_results = [item for item in sorted_results if item["score"] >= threshold]

    # Map predictions to parent CWE (if available)
    mapped_results = {}
    for item in sorted_results[:5]:
        child_cwe = item["label"].replace("CWE-", "")
        parent_cwe = child_to_parent.get(child_cwe, child_cwe)  # default to child if no parent
        mapped_results[f"CWE-{parent_cwe}"] = round(float(item["score"]), 4)

    return mapped_results

# Gradio UI
demo = gr.Interface(
    fn=predict_cwe,
    inputs=gr.Textbox(lines=3, placeholder="Enter your commit message here..."),
    outputs=gr.Label(num_top_classes=5),
    title="CWE Prediction from Commit Message",
    description="This tool uses a fine-tuned model to predict CWE categories from Git commit messages. "
                "Predicted child CWEs are mapped to their parent CWEs if applicable.",
    examples=[
        ["Fixed buffer overflow in input parsing"],
        ["SQL injection possible in login flow"],
        ["Improved input validation to prevent XSS"],
    ]
)

if __name__ == "__main__":
    demo.launch()