import gradio as gr import json from transformers import pipeline import torch import random import numpy as np torch.manual_seed(42) random.seed(42) np.random.seed(42) torch.use_deterministic_algorithms(True) # Load Hugging Face model (text classification) classifier = pipeline( task="text-classification", model="CIRCL/cwe-parent-vulnerability-classification-roberta-base", top_k=None ) classifier.model.eval() # Load child-to-parent mapping with open("child_to_parent_mapping.json", "r") as f: child_to_parent = json.load(f) def predict_cwe(commit_message: str): """ Predict CWE(s) from a commit message and map to parent CWEs. """ results = classifier(commit_message)[0] sorted_results = sorted(results, key=lambda x: x["score"], reverse=True) threshold = 0.2 filtered_results = [item for item in sorted_results if item["score"] >= threshold] # Map predictions to parent CWE (if available) mapped_results = {} for item in sorted_results[:5]: child_cwe = item["label"].replace("CWE-", "") parent_cwe = child_to_parent.get(child_cwe, child_cwe) # default to child if no parent mapped_results[f"CWE-{parent_cwe}"] = round(float(item["score"]), 4) return mapped_results # Gradio UI demo = gr.Interface( fn=predict_cwe, inputs=gr.Textbox(lines=3, placeholder="Enter your commit message here..."), outputs=gr.Label(num_top_classes=5), title="CWE Prediction from Commit Message", description="This tool uses a fine-tuned model to predict CWE categories from Git commit messages. " "Predicted child CWEs are mapped to their parent CWEs if applicable.", examples=[ ["Fixed buffer overflow in input parsing"], ["SQL injection possible in login flow"], ["Improved input validation to prevent XSS"], ] ) if __name__ == "__main__": demo.launch()