Spaces:
Runtime error
Runtime error
import gradio as gr | |
import time | |
from video_processing import process_video | |
from PIL import Image | |
import matplotlib.pyplot as plt | |
import numpy as np | |
import pandas as pd | |
# Ensure high DPI plots | |
plt.rcParams['figure.dpi'] = 300 | |
plt.rcParams['savefig.dpi'] = 300 | |
def process_and_show_completion(video_input_path, anomaly_threshold_input, fps, progress=gr.Progress()): | |
try: | |
print("Starting video processing...") | |
results = process_video(video_input_path, anomaly_threshold_input, fps, progress=progress) | |
print("Video processing completed.") | |
if isinstance(results[0], str) and results[0].startswith("Error"): | |
print(f"Error occurred: {results[0]}") | |
return [results[0]] + [None] * 27 | |
exec_time, results_summary, df, mse_embeddings, mse_posture, mse_voice, \ | |
mse_plot_embeddings, mse_plot_posture, mse_plot_voice, \ | |
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice, \ | |
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice, \ | |
face_samples_frequent, \ | |
anomaly_faces_embeddings, anomaly_frames_posture_images, \ | |
aligned_faces_folder, frames_folder, \ | |
heatmap_video_path, combined_mse_plot, correlation_heatmap = results | |
anomaly_faces_embeddings_pil = [Image.fromarray(face) for face in anomaly_faces_embeddings] if anomaly_faces_embeddings is not None else [] | |
anomaly_frames_posture_pil = [Image.fromarray(frame) for frame in anomaly_frames_posture_images] if anomaly_frames_posture_images is not None else [] | |
face_samples_frequent = [Image.open(path) for path in face_samples_frequent] if face_samples_frequent is not None else [] | |
output = [ | |
exec_time, results_summary, | |
mse_plot_embeddings, mse_plot_posture, mse_plot_voice, | |
mse_histogram_embeddings, mse_histogram_posture, mse_histogram_voice, | |
mse_heatmap_embeddings, mse_heatmap_posture, mse_heatmap_voice, | |
anomaly_faces_embeddings_pil, anomaly_frames_posture_pil, | |
face_samples_frequent, | |
heatmap_video_path, combined_mse_plot, correlation_heatmap | |
] | |
return output | |
except Exception as e: | |
error_message = f"An error occurred: {str(e)}" | |
print(error_message) | |
import traceback | |
traceback.print_exc() | |
return [error_message] + [None] * 16 | |
def on_button_click(video, threshold, fps): | |
start_time = time.time() | |
# Show execution time immediately and hide description | |
yield { | |
execution_time: gr.update(visible=True, value=0), | |
description: gr.update(visible=False), | |
results: gr.update(visible=True) | |
} | |
results = process_and_show_completion(video, threshold, fps) | |
end_time = time.time() | |
exec_time = end_time - start_time | |
return { | |
execution_time: gr.update(visible=True, value=exec_time), | |
results_text: results[1], | |
mse_features_plot: results[2], | |
mse_posture_plot: results[3], | |
mse_voice_plot: results[4], | |
mse_features_hist: results[5], | |
mse_posture_hist: results[6], | |
mse_voice_hist: results[7], | |
mse_features_heatmap: results[8], | |
mse_posture_heatmap: results[9], | |
mse_voice_heatmap: results[10], | |
anomaly_frames_features: results[11], | |
anomaly_frames_posture: results[12], | |
face_samples_most_frequent: results[13], | |
heatmap_video: results[14], | |
combined_mse_plot: results[15], | |
correlation_heatmap_plot: results[16], | |
video_display_facial: video, | |
video_display_body: video, | |
video_display_voice: video | |
} | |
with gr.Blocks() as iface: | |
gr.Markdown(""" | |
# Multimodal Behavioral Anomalies Detection | |
This tool detects anomalies in facial expressions, body language, and voice over the timeline of a video. | |
It extracts faces, postures, and voice from video frames, and analyzes them to identify anomalies using time series analysis and a variational autoencoder (VAE) approach. | |
""") | |
video_input = gr.Video(label="Input Video", visible=True) | |
anomaly_threshold = gr.Slider(minimum=1, maximum=5, step=0.1, value=3, label="Anomaly Detection Threshold (Standard deviation)") | |
fps_slider = gr.Slider(minimum=5, maximum=20, step=1, value=10, label="Frames Per Second (FPS)") | |
process_btn = gr.Button("Detect Anomalies") | |
execution_time = gr.Number(label="Execution Time (seconds)", visible=False) | |
description = gr.Markdown(visible=True, value=""" | |
# Multimodal Behavioral Anomalies Detection | |
The purpose of this tool is to detect anomalies in facial expressions, body language, and voice over the timeline of a video. | |
It extracts faces, postures, and voice features from video frames, detects unique facial features, body postures, and speaker embeddings, and analyzes them to identify anomalies using time series analysis, specifically utilizing a variational autoencoder (VAE) approach. | |
## Applications | |
- Identify suspicious behavior in surveillance footage. | |
- Analyze micro-expressions. | |
- Monitor and assess emotional states in communications. | |
- Evaluate changes in vocal tone and speech patterns. | |
## Features | |
- **Face Extraction**: Extracts faces from video frames using the MTCNN model. | |
- **Feature Embeddings**: Extracts facial feature embeddings using the InceptionResnetV1 model. | |
- **Body Posture Analysis**: Evaluates body postures using MediaPipe Pose. | |
- **Voice Analysis**: Extracts and segment speaker embeddings from audio using PyAnnote. | |
- **Anomaly Detection**: Uses Variational Autoencoder (VAE) to detect anomalies in facial expressions, body postures, and voice features over time. | |
- **Visualization**: Represents changes in facial expressions, body postures, and vocal tone over time, marking anomaly key points. | |
## Limitations | |
- **Evaluation Challenges**: Since this is an unsupervised method, there is no labeled data to compare against. | |
- **Subjectivity**: The concept of what constitutes an "anomaly" can be subjective and context-dependent. | |
- **Lighting and Resolution**: Variability in lighting conditions and camera resolution can affect the quality of detected features. | |
- **Audio Quality**: Background noise and poor audio quality can affect the accuracy of voice analysis. | |
- **Generalization**: The model may not generalize well to all types of videos and contexts. | |
- **Computationally Intensive**: Processing high-resolution video frames can be computationally demanding. | |
## Conclusion | |
This tool offers solutions for detecting behavioral anomalies in video content. However, users should be aware of its limitations and interpret results with caution. | |
""") | |
results = gr.Tabs(visible=False) | |
with results: | |
with gr.TabItem("Facial Features"): | |
video_display_facial = gr.Video(label="Input Video") | |
results_text = gr.TextArea(label="Faces Breakdown", lines=5) | |
mse_features_plot = gr.Plot(label="MSE: Facial Features") | |
mse_features_hist = gr.Plot(label="MSE Distribution: Facial Features") | |
mse_features_heatmap = gr.Plot(label="MSE Heatmap: Facial Features") | |
anomaly_frames_features = gr.Gallery(label="Anomaly Frames (Facial Features)", columns=6, rows=2, height="auto") | |
face_samples_most_frequent = gr.Gallery(label="Most Frequent Person Samples", columns=10, rows=2, height="auto") | |
with gr.TabItem("Body Posture"): | |
video_display_body = gr.Video(label="Input Video") | |
mse_posture_plot = gr.Plot(label="MSE: Body Posture") | |
mse_posture_hist = gr.Plot(label="MSE Distribution: Body Posture") | |
mse_posture_heatmap = gr.Plot(label="MSE Heatmap: Body Posture") | |
anomaly_frames_posture = gr.Gallery(label="Anomaly Frames (Body Posture)", columns=6, rows=2, height="auto") | |
with gr.TabItem("Voice"): | |
video_display_voice = gr.Video(label="Input Video") | |
mse_voice_plot = gr.Plot(label="MSE: Voice") | |
mse_voice_hist = gr.Plot(label="MSE Distribution: Voice") | |
mse_voice_heatmap = gr.Plot(label="MSE Heatmap: Voice") | |
with gr.TabItem("Combined"): | |
heatmap_video = gr.Video(label="Video with Anomaly Heatmap") | |
combined_mse_plot = gr.Plot(label="Combined MSE Plot") | |
correlation_heatmap_plot = gr.Plot(label="Correlation Heatmap") | |
process_btn.click( | |
fn=on_button_click, | |
inputs=[video_input, anomaly_threshold, fps_slider], | |
outputs=[ | |
execution_time, description, results, | |
results_text, mse_features_plot, mse_posture_plot, mse_voice_plot, | |
mse_features_hist, mse_posture_hist, mse_voice_hist, | |
mse_features_heatmap, mse_posture_heatmap, mse_voice_heatmap, | |
anomaly_frames_features, anomaly_frames_posture, | |
face_samples_most_frequent, heatmap_video, combined_mse_plot, | |
correlation_heatmap_plot, video_display_facial, video_display_body, video_display_voice | |
] | |
) | |
if __name__ == "__main__": | |
iface.launch(share=True) |