Spaces:
Runtime error
Runtime error
| import spaces | |
| import gradio as gr | |
| from transformers import pipeline, AutoImageProcessor, SwinForImageClassification, Swinv2ForImageClassification, AutoFeatureExtractor, AutoModelForImageClassification | |
| from torchvision import transforms | |
| import torch | |
| from PIL import Image | |
| import numpy as np | |
| import io | |
| import logging | |
| from utils.utils import softmax, augment_image, convert_pil_to_bytes, ELA | |
| from utils.gradient import gradient_processing | |
| from utils.minmax import preprocess as minmax_preprocess | |
| # Configure logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Ensure using GPU if available | |
| device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
| # Model paths and class names | |
| MODEL_PATHS = { | |
| "model_1": "haywoodsloan/ai-image-detector-deploy", | |
| "model_2": "Heem2/AI-vs-Real-Image-Detection", | |
| "model_3": "Organika/sdxl-detector", | |
| "model_4": "cmckinle/sdxl-flux-detector", | |
| "model_5": "prithivMLmods/Deep-Fake-Detector-v2-Model", | |
| "model_5b": "prithivMLmods/Deepfake-Detection-Exp-02-22", | |
| "model_6": "ideepankarsharma2003/AI_ImageClassification_MidjourneyV6_SDXL", | |
| "model_7": "date3k2/vit-real-fake-classification-v4" | |
| } | |
| CLASS_NAMES = { | |
| "model_1": ['artificial', 'real'], | |
| "model_2": ['AI Image', 'Real Image'], | |
| "model_3": ['AI', 'Real'], | |
| "model_4": ['AI', 'Real'], | |
| "model_5": ['Realism', 'Deepfake'], | |
| "model_5b": ['Real', 'Deepfake'], | |
| "model_6": ['ai_gen', 'human'], | |
| "model_7": ['Fake', 'Real'], | |
| } | |
| # Load models and processors | |
| def load_models(): | |
| image_processor_1 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_1"], use_fast=True) | |
| model_1 = Swinv2ForImageClassification.from_pretrained(MODEL_PATHS["model_1"]) | |
| model_1 = model_1.to(device) | |
| clf_1 = pipeline(model=model_1, task="image-classification", image_processor=image_processor_1, device=device) | |
| clf_2 = pipeline("image-classification", model=MODEL_PATHS["model_2"], device=device) | |
| feature_extractor_3 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_3"], device=device) | |
| model_3 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_3"]).to(device) | |
| feature_extractor_4 = AutoFeatureExtractor.from_pretrained(MODEL_PATHS["model_4"], device=device) | |
| model_4 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_4"]).to(device) | |
| clf_5 = pipeline("image-classification", model=MODEL_PATHS["model_5"], device=device) | |
| clf_5b = pipeline("image-classification", model=MODEL_PATHS["model_5b"], device=device) | |
| image_processor_6 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_6"], use_fast=True) | |
| model_6 = SwinForImageClassification.from_pretrained(MODEL_PATHS["model_6"]).to(device) | |
| clf_6 = pipeline(model=model_6, task="image-classification", image_processor=image_processor_6, device=device) | |
| image_processor_7 = AutoImageProcessor.from_pretrained(MODEL_PATHS["model_7"], use_fast=True) | |
| model_7 = AutoModelForImageClassification.from_pretrained(MODEL_PATHS["model_7"]).to(device) | |
| clf_7 = pipeline(model=model_7, task="image-classification", image_processor=image_processor_7, device=device) | |
| return clf_1, clf_2, feature_extractor_3, model_3, feature_extractor_4, model_4, clf_5, clf_5b, clf_6, model_7, clf_7 | |
| clf_1, clf_2, feature_extractor_3, model_3, feature_extractor_4, model_4, clf_5, clf_5b, clf_6, model_7, clf_7 = load_models() | |
| def predict_with_model(img_pil, clf, class_names, confidence_threshold, model_name, model_id, feature_extractor=None): | |
| try: | |
| if feature_extractor: | |
| inputs = feature_extractor(img_pil, return_tensors="pt").to(device) | |
| with torch.no_grad(): | |
| outputs = clf(**inputs) | |
| logits = outputs.logits | |
| probabilities = softmax(logits.cpu().numpy()[0]) | |
| result = {class_names[i]: probabilities[i] for i in range(len(class_names))} | |
| else: | |
| prediction = clf(img_pil) | |
| result = {pred['label']: pred['score'] for pred in prediction} | |
| result_output = [model_id, model_name, result.get(class_names[1], 0.0), result.get(class_names[0], 0.0)] | |
| logger.info(result_output) | |
| for class_name in class_names: | |
| if class_name not in result: | |
| result[class_name] = 0.0 | |
| if result[class_names[0]] >= confidence_threshold: | |
| label = f"AI, Confidence: {result[class_names[0]]:.4f}" | |
| result_output.append('AI') | |
| elif result[class_names[1]] >= confidence_threshold: | |
| label = f"Real, Confidence: {result[class_names[1]]:.4f}" | |
| result_output.append('REAL') | |
| else: | |
| label = "Uncertain Classification" | |
| result_output.append('UNCERTAIN') | |
| except Exception as e: | |
| label = f"Error: {str(e)}" | |
| result_output = [model_id, model_name, 0.0, 0.0, 'ERROR'] # Ensure result_output is assigned in case of error | |
| return label, result_output | |
| def predict_image(img, confidence_threshold): | |
| if not isinstance(img, Image.Image): | |
| raise ValueError(f"Expected a PIL Image, but got {type(img)}") | |
| if img.mode != 'RGB': | |
| img_pil = img.convert('RGB') | |
| else: | |
| img_pil = img | |
| img_pil = transforms.Resize((256, 256))(img_pil) | |
| img_pilvits = transforms.Resize((224, 224))(img_pil) | |
| label_1, result_1output = predict_with_model(img_pil, clf_1, CLASS_NAMES["model_1"], confidence_threshold, "SwinV2-base", 1) | |
| label_2, result_2output = predict_with_model(img_pilvits, clf_2, CLASS_NAMES["model_2"], confidence_threshold, "ViT-base Classifier", 2) | |
| label_3, result_3output = predict_with_model(img_pil, model_3, CLASS_NAMES["model_3"], confidence_threshold, "SDXL-Trained", 3, feature_extractor_3) | |
| label_4, result_4output = predict_with_model(img_pil, model_4, CLASS_NAMES["model_4"], confidence_threshold, "SDXL + FLUX", 4, feature_extractor_4) | |
| label_5, result_5output = predict_with_model(img_pilvits, clf_5, CLASS_NAMES["model_5"], confidence_threshold, "ViT-base Newcomer", 5) | |
| label_5b, result_5boutput = predict_with_model(img_pilvits, clf_5b, CLASS_NAMES["model_5b"], confidence_threshold, "ViT-base Newcomer", 6) | |
| label_6, result_6output = predict_with_model(img_pilvits, clf_6, CLASS_NAMES["model_6"], confidence_threshold, "Swin Midjourney/SDXL", 7) | |
| label_7, result_7output = predict_with_model(img_pilvits, clf_7, CLASS_NAMES["model_7"], confidence_threshold, "Vit", 7) | |
| combined_results = { | |
| "SwinV2/detect": label_1, | |
| "ViT/AI-vs-Real": label_2, | |
| "Swin/SDXL": label_3, | |
| "Swin/SDXL-FLUX": label_4, | |
| "prithivMLmods": label_5, | |
| "prithivMLmods-2-22": label_5b, | |
| "SwinMidSDXL": label_6, | |
| "Vit": label_7 | |
| } | |
| print(combined_results) | |
| combined_outputs = [result_1output, result_2output, result_3output, result_4output, result_5output, result_5boutput, result_6output, result_7output] | |
| return img_pil, combined_outputs | |
| # Define a function to generate the HTML content | |
| def generate_results_html(results): | |
| def get_header_color(label): | |
| if label == 'AI': | |
| return 'bg-red-500 text-red-700', 'bg-red-400', 'bg-red-100', 'bg-red-700 text-red-700', 'bg-red-200' | |
| elif label == 'REAL': | |
| return 'bg-green-500 text-green-700', 'bg-green-400', 'bg-green-100', 'bg-green-700 text-green-700', 'bg-green-200' | |
| elif label == 'UNCERTAIN': | |
| return 'bg-yellow-500 text-yellow-700 bg-yellow-100', 'bg-yellow-400', 'bg-yellow-100', 'bg-yellow-700 text-yellow-700', 'bg-yellow-200' | |
| elif label == 'MAINTENANCE': | |
| return 'bg-blue-500 text-blue-700', 'bg-blue-400', 'bg-blue-100', 'bg-blue-700 text-blue-700', 'bg-blue-200' | |
| else: | |
| return 'bg-gray-300 text-gray-700', 'bg-gray-400', 'bg-gray-100', 'bg-gray-700 text-gray-700', 'bg-gray-200' | |
| def generate_tile_html(index, result, model_name, contributor, model_path): | |
| label = result[-1] | |
| header_colors = get_header_color(label) | |
| real_conf = result[2] | |
| ai_conf = result[3] | |
| return f""" | |
| <div | |
| class="flex flex-col bg-gray-800 rounded-sm p-4 m-1 border border-gray-800 shadow-xs transition hover:shadow-lg dark:shadow-gray-700/25"> | |
| <div | |
| class="-m-4 h-24 {header_colors[0]} rounded-sm rounded-b-none transition border group-hover:border-gray-100 group-hover:shadow-lg group-hover:{header_colors[4]}"> | |
| <span class="text-gray-300 font-mono tracking-widest p-4 pb-3 block text-xs text-center">MODEL {index + 1}:</span> | |
| <span | |
| class="flex w-30 mx-auto tracking-wide items-center justify-center rounded-full {header_colors[2]} px-1 py-0.5 {header_colors[3]}" | |
| > | |
| <svg xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24" stroke-width="3" stroke="currentColor" class="w-4 h-4 mr-2 -ml-3 group-hover:animate group-hover:animate-pulse"> | |
| {'<path stroke-linecap="round" stroke-linejoin="round" d="M9 12.75 11.25 15 15 9.75M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />' if label == 'REAL' else '<path stroke-linecap="round" stroke-linejoin="round" d="m9.75 9.75 4.5 4.5m0-4.5-4.5 4.5M21 12a9 9 0 1 1-18 0 9 9 0 0 1 18 0Z" />'} | |
| </svg> | |
| <p class="whitespace-nowrap text-lg leading-normal font-bold text-center self-center align-middle py-px">{label}</p> | |
| </span> | |
| </div> | |
| <div> | |
| <div class="mt-4 relative -mx-4 bg-gray-800"> | |
| <div class="w-full bg-gray-400 rounded-none h-8"> | |
| <div class="inline-flex whitespace-nowrap bg-green-400 h-full rounded-none" style="width: {real_conf * 100:.2f}%;"> | |
| <p class="p-2 px-4 text-xs self-center align-middle">Conf: | |
| <span class="ml-1 font-medium font-mono">{real_conf:.4f}</span> | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="relative -mx-4 bg-gray-800"> | |
| <div class="w-full bg-gray-400 rounded-none h-8"> | |
| <div class="inline-flex whitespace-nowrap bg-red-400 h-full rounded-none" style="width: {ai_conf * 100:.2f}%;"> | |
| <p class="p-2 px-4 text-xs self-center align-middle">Conf: | |
| <span class="ml-1 font-medium font-mono">{ai_conf:.4f}</span> | |
| </p> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <div class="flex flex-col items-start"> | |
| <h4 class="mt-4 text-sm font-semibold tracking-wide">{model_name}</h4> | |
| <div class="text-xs font-mono">Real: {real_conf:.4f}, AI: {ai_conf:.4f}</div> | |
| <div class="card-footer"> | |
| <a href="https://huggingface.co/{model_path}" target="_blank" class="mt-2 text-xs tracking-wide nowrap" style="font-size:0.66rem !important;">by @{contributor}</a> | |
| </div> | |
| </div> | |
| </div> | |
| """ | |
| html_content = f""" | |
| <link href="https://unpkg.com/[email protected]/dist/tailwind.min.css" rel="stylesheet"> | |
| <div class="container mx-auto"> | |
| <div class="grid xl:grid-cols-4 md:grid-cols-4 grid-cols-1 gap-1"> | |
| {generate_tile_html(0, results[0], "SwinV2 Based", "haywoodsloan", MODEL_PATHS["model_1"])} | |
| {generate_tile_html(1, results[1], "ViT Based", "Heem2", MODEL_PATHS["model_2"])} | |
| {generate_tile_html(2, results[2], "SDXL Dataset", "Organika", MODEL_PATHS["model_3"])} | |
| {generate_tile_html(3, results[3], "SDXL + FLUX", "cmckinle", MODEL_PATHS["model_4"])} | |
| {generate_tile_html(4, results[4], "Vit Based", "prithivMLmods", MODEL_PATHS["model_5"])} | |
| {generate_tile_html(5, results[5], "Vit Based, Newer Dataset", "prithivMLmods", MODEL_PATHS["model_5b"])} | |
| {generate_tile_html(6, results[6], "Swin, Midj + SDXL", "ideepankarsharma2003", MODEL_PATHS["model_6"])} | |
| {generate_tile_html(7, results[7], "ViT", "temp", MODEL_PATHS["model_7"])} | |
| </div> | |
| </div> | |
| """ | |
| return html_content | |
| def predict_image_with_html(img, confidence_threshold, augment_methods, rotate_degrees, noise_level, sharpen_strength): | |
| if augment_methods: | |
| img_pil, _ = augment_image(img, augment_methods, rotate_degrees, noise_level, sharpen_strength) | |
| else: | |
| img_pil = img | |
| img_pil, results = predict_image(img_pil, confidence_threshold) | |
| img_np = np.array(img_pil) # Convert PIL Image to NumPy array | |
| gradient_image = gradient_processing(img_np) # Added gradient processing | |
| minmax_image = minmax_preprocess(img_np) # Added MinMax processing | |
| # Generate ELA images with different presets | |
| ela_img_1 = ELA(img_pil, scale=100, alpha=0.66) | |
| ela_img_2 = ELA(img_pil, scale=50, alpha=0.5) | |
| forensics_images = [img_pil, ela_img_1, ela_img_2, gradient_image, minmax_image] | |
| html_content = generate_results_html(results) | |
| return img_pil, forensics_images, html_content | |
| with gr.Blocks(css="#post-gallery { overflow: hidden !important; }") as iface: | |
| with gr.Accordion("Project OpenSight - Model Evaluations & Playground", open=False, elem_id="project_accordion"): | |
| gr.Markdown("## OpenSight is a SOTA gen. image detection model, in pre-release prep.\n\nThis HF Space is a temporary home for us and the public to evaluate the shortcomings of current open source models.\n\n<-- Feel free to play around by starting with an image as we prepare our formal announcement.") | |
| with gr.Tab("AI Image Detection"): | |
| gr.Markdown("# AI Generated Image / Deepfake Detection Models Evaluation") | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| image_input = gr.Image(label="Upload Image to Analyze", sources=['upload'], type='pil') | |
| with gr.Accordion("Settings (Optional)", open=False, elem_id="settings_accordion"): | |
| augment_checkboxgroup = gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], label="Augmentation Methods") | |
| rotate_slider = gr.Slider(0, 45, value=2, step=1, label="Rotate Degrees", visible=False) | |
| noise_slider = gr.Slider(0, 50, value=4, step=1, label="Noise Level", visible=False) | |
| sharpen_slider = gr.Slider(0, 50, value=11, step=1, label="Sharpen Strength", visible=False) | |
| confidence_slider = gr.Slider(0.0, 1.0, value=0.75, step=0.05, label="Confidence Threshold") | |
| inputs = [image_input, confidence_slider, augment_checkboxgroup, rotate_slider, noise_slider, sharpen_slider] | |
| predict_button = gr.Button("Predict") | |
| augment_button = gr.Button("Augment & Predict") | |
| image_output = gr.Image(label="Processed Image", visible=False) | |
| with gr.Column(scale=2): | |
| # Custom HTML component to display results in 5 columns | |
| results_html = gr.HTML(label="Model Predictions") | |
| forensics_gallery = gr.Gallery(label="Post Processed Images", visible=True, columns=[5], rows=[1], container=False, height="auto", object_fit="contain", elem_id="post-gallery") | |
| outputs = [image_output, forensics_gallery, results_html] | |
| # Show/hide rotate slider based on selected augmentation method | |
| augment_checkboxgroup.change(lambda methods: gr.update(visible="rotate" in methods), inputs=[augment_checkboxgroup], outputs=[rotate_slider]) | |
| augment_checkboxgroup.change(lambda methods: gr.update(visible="add_noise" in methods), inputs=[augment_checkboxgroup], outputs=[noise_slider]) | |
| augment_checkboxgroup.change(lambda methods: gr.update(visible="sharpen" in methods), inputs=[augment_checkboxgroup], outputs=[sharpen_slider]) | |
| predict_button.click( | |
| fn=predict_image_with_html, | |
| inputs=inputs, | |
| outputs=outputs | |
| ) | |
| augment_button.click( # Connect Augment button to the function | |
| fn=predict_image_with_html, | |
| inputs=[ | |
| image_input, | |
| confidence_slider, | |
| gr.CheckboxGroup(["rotate", "add_noise", "sharpen"], value=["rotate", "add_noise", "sharpen"], visible=False), # Default values | |
| rotate_slider, | |
| noise_slider, | |
| sharpen_slider | |
| ], | |
| outputs=outputs | |
| ) | |
| predict_button.click( | |
| fn=None, | |
| js="() => {document.getElementById('project_accordion').open = false;}", # Close the project accordion | |
| inputs=[], | |
| outputs=[] | |
| ) | |
| with gr.Tab("Another Interface"): | |
| # Add components for the second interface here | |
| gr.Markdown("# Another Interface") | |
| with gr.Sidebar(): | |
| gr.Markdown("# Another Interface") | |
| # Example: Add a simple text input and output | |
| text_input = gr.Textbox(label="Enter Text") | |
| text_output = gr.Textbox(label="Processed Text") | |
| text_button = gr.Button("Process Text") | |
| text_button.click( | |
| fn=lambda x: x.upper(), # Example function to convert text to uppercase | |
| inputs=text_input, | |
| outputs=text_output | |
| ) | |
| # Launch the interface | |
| iface.launch() |