Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import torch | |
| from PIL import Image | |
| import io | |
| from peft import PeftModel | |
| from unsloth import FastVisionModel | |
| import tempfile | |
| import os | |
| # App title and description | |
| st.set_page_config( | |
| page_title="Deepfake Analyzer", | |
| layout="wide", | |
| page_icon="π" | |
| ) | |
| # Main title and description | |
| st.title("Deepfake Image Analyzer") | |
| st.markdown("Upload an image to analyze it for possible deepfake manipulation") | |
| # Check for GPU availability | |
| def check_gpu(): | |
| if torch.cuda.is_available(): | |
| gpu_info = torch.cuda.get_device_properties(0) | |
| st.sidebar.success(f"β GPU available: {gpu_info.name} ({gpu_info.total_memory / (1024**3):.2f} GB)") | |
| return True | |
| else: | |
| st.sidebar.warning("β οΈ No GPU detected. Analysis will be slower.") | |
| return False | |
| # Sidebar components | |
| st.sidebar.title("Options") | |
| # Temperature slider | |
| temperature = st.sidebar.slider( | |
| "Temperature", | |
| min_value=0.1, | |
| max_value=1.0, | |
| value=0.7, | |
| step=0.1, | |
| help="Higher values make output more random, lower values more deterministic" | |
| ) | |
| # Max response length slider | |
| max_tokens = st.sidebar.slider( | |
| "Maximum Response Length", | |
| min_value=100, | |
| max_value=1000, | |
| value=500, | |
| step=50, | |
| help="The maximum number of tokens in the response" | |
| ) | |
| # Custom instruction text area in sidebar | |
| custom_instruction = st.sidebar.text_area( | |
| "Custom Instructions (Advanced)", | |
| value="Analyze for facial inconsistencies, lighting irregularities, mismatched shadows, and other signs of manipulation.", | |
| help="Add specific instructions for the model" | |
| ) | |
| # About section in sidebar | |
| st.sidebar.markdown("---") | |
| st.sidebar.subheader("About") | |
| st.sidebar.markdown(""" | |
| This analyzer looks for: | |
| - Facial inconsistencies | |
| - Unnatural movements | |
| - Lighting issues | |
| - Texture anomalies | |
| - Edge artifacts | |
| - Blending problems | |
| **Model**: Fine-tuned Llama 3.2 Vision | |
| **Creator**: [Saakshi Gupta](https://huggingface.co/saakshigupta) | |
| """) | |
| # Function to fix cross-attention masks | |
| def fix_cross_attention_mask(inputs): | |
| if 'cross_attention_mask' in inputs and 0 in inputs['cross_attention_mask'].shape: | |
| batch_size, seq_len, _, num_tiles = inputs['cross_attention_mask'].shape | |
| visual_features = 6404 # Critical dimension | |
| new_mask = torch.ones((batch_size, seq_len, visual_features, num_tiles), | |
| device=inputs['cross_attention_mask'].device) | |
| inputs['cross_attention_mask'] = new_mask | |
| st.success("Fixed cross-attention mask dimensions") | |
| return inputs | |
| # Load model function | |
| def load_model(): | |
| with st.spinner("Loading model... This may take a few minutes. Please be patient..."): | |
| try: | |
| # Check for GPU | |
| has_gpu = check_gpu() | |
| # Load base model and tokenizer using Unsloth | |
| base_model_id = "unsloth/llama-3.2-11b-vision-instruct" | |
| model, tokenizer = FastVisionModel.from_pretrained( | |
| base_model_id, | |
| load_in_4bit=True, | |
| ) | |
| # Load the adapter | |
| adapter_id = "saakshigupta/deepfake-explainer-1" | |
| model = PeftModel.from_pretrained(model, adapter_id) | |
| # Set to inference mode | |
| FastVisionModel.for_inference(model) | |
| return model, tokenizer | |
| except Exception as e: | |
| st.error(f"Error loading model: {str(e)}") | |
| return None, None | |
| # Analyze image function | |
| def analyze_image(image, question, model, tokenizer, temperature=0.7, max_tokens=500, custom_instruction=""): | |
| # Combine question with custom instruction if provided | |
| if custom_instruction.strip(): | |
| full_prompt = f"{question}\n\nAdditional instructions: {custom_instruction}" | |
| else: | |
| full_prompt = question | |
| # Format the message | |
| messages = [ | |
| {"role": "user", "content": [ | |
| {"type": "image"}, | |
| {"type": "text", "text": full_prompt} | |
| ]} | |
| ] | |
| # Apply chat template | |
| input_text = tokenizer.apply_chat_template(messages, add_generation_prompt=True) | |
| # Process with image | |
| inputs = tokenizer( | |
| image, | |
| input_text, | |
| add_special_tokens=False, | |
| return_tensors="pt", | |
| ).to(model.device) | |
| # Fix cross-attention mask if needed | |
| inputs = fix_cross_attention_mask(inputs) | |
| # Generate response | |
| with st.spinner("Analyzing image... (this may take 15-30 seconds)"): | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| **inputs, | |
| max_new_tokens=max_tokens, | |
| use_cache=True, | |
| temperature=temperature, | |
| top_p=0.9 | |
| ) | |
| # Decode the output | |
| response = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| # Try to extract just the model's response (after the prompt) | |
| if full_prompt in response: | |
| result = response.split(full_prompt)[-1].strip() | |
| else: | |
| result = response | |
| return result | |
| # Main app | |
| def main(): | |
| # Create a button to load the model | |
| if 'model_loaded' not in st.session_state: | |
| st.session_state.model_loaded = False | |
| st.session_state.model = None | |
| st.session_state.tokenizer = None | |
| # Load model button | |
| if not st.session_state.model_loaded: | |
| if st.button("π₯ Load Deepfake Analysis Model", type="primary"): | |
| model, tokenizer = load_model() | |
| if model is not None and tokenizer is not None: | |
| st.session_state.model = model | |
| st.session_state.tokenizer = tokenizer | |
| st.session_state.model_loaded = True | |
| st.success("β Model loaded successfully! You can now analyze images.") | |
| else: | |
| st.error("β Failed to load model. Please check the logs for errors.") | |
| else: | |
| st.success("β Model loaded successfully! You can now analyze images.") | |
| # Image upload section | |
| st.subheader("Upload an Image") | |
| uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) | |
| # Default question with option to customize | |
| default_question = "Analyze this image and tell me if it's a deepfake. Provide both technical and non-technical explanations." | |
| question = st.text_area("Question/Prompt:", value=default_question, height=100) | |
| if uploaded_file is not None: | |
| # Display the uploaded image | |
| image = Image.open(uploaded_file).convert("RGB") | |
| st.image(image, caption="Uploaded Image", use_column_width=True) | |
| # Analyze button - only enabled if model is loaded | |
| if st.session_state.model_loaded: | |
| if st.button("π Analyze Image", type="primary"): | |
| result = analyze_image( | |
| image, | |
| question, | |
| st.session_state.model, | |
| st.session_state.tokenizer, | |
| temperature=temperature, | |
| max_tokens=max_tokens, | |
| custom_instruction=custom_instruction | |
| ) | |
| # Display results | |
| st.success("β Analysis complete!") | |
| # Check if the result contains both technical and non-technical explanations | |
| if "Technical" in result and "Non-Technical" in result: | |
| # Split the result into technical and non-technical sections | |
| parts = result.split("Non-Technical") | |
| technical = parts[0] | |
| non_technical = "Non-Technical" + parts[1] | |
| # Display in two columns | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Technical Analysis") | |
| st.markdown(technical) | |
| with col2: | |
| st.subheader("Simple Explanation") | |
| st.markdown(non_technical) | |
| else: | |
| # Just display the whole result | |
| st.subheader("Analysis Result") | |
| st.markdown(result) | |
| else: | |
| st.warning("β οΈ Please load the model first before analyzing images.") | |
| # Footer | |
| st.markdown("---") | |
| st.caption("Deepfake Image Analyzer") | |
| if __name__ == "__main__": | |
| main() |