Spaces:

phitran
/

viralplay

Running on Zero

App Files Files Community

phitran commited on Feb 10

Commit

0d44a50

1 Parent(s): 041f44a

fix build error

Browse files

Files changed (4) hide show

.DS_Store +0 -0
handlers/__pycache__/frame_handler_resnet.cpython-311.pyc +0 -0
handlers/frame_handler_resnet.py +0 -253
requirements.txt +20 -6

.DS_Store ADDED Viewed

Binary file (6.15 kB). View file

handlers/__pycache__/frame_handler_resnet.cpython-311.pyc DELETED Viewed

Binary file (8.39 kB)

handlers/frame_handler_resnet.py DELETED Viewed

@@ -1,253 +0,0 @@
-import os
-import cv2
-import torch
-#from transformers import DetrImageProcessor, DetrForObjectDetection
-from transformers import AutoImageProcessor, AutoModelForObjectDetection
-from PIL import Image
-import numpy as np
-def crop_preserve_key_objects(input_folder, output_folder, model_name='facebook/detr-resnet-50', target_resolution=(360, 640)):
-    """
-    Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
-    if a football is not detected, and extending the area until it reaches the target resolution.
-    Args:
-        input_folder (str): Path to the folder containing key frames.
-        output_folder (str): Path to save the processed frames.
-        model_name (str): Hugging Face model name for DETR.
-        target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
-    """
-    print("Preprocessing frames using DETR to fit the target aspect ratio...")
-    # Load the DETR model and processor
-    #processor = DetrImageProcessor.from_pretrained(model_name)
-    #model = DetrForObjectDetection.from_pretrained(model_name)
-    processor = AutoImageProcessor.from_pretrained(model_name)
-    model = AutoModelForObjectDetection.from_pretrained(model_name)
-    target_aspect_ratio = target_resolution[0] / target_resolution[1]
-    for frame_name in os.listdir(input_folder):
-        frame_path = os.path.join(input_folder, frame_name)
-        if not frame_name.lower().endswith(('.jpg', '.png')):
-            continue  # Skip non-image files
-        # Read the frame
-        frame = cv2.imread(frame_path)
-        if frame is None:
-            print(f"Error reading frame: {frame_path}")
-            continue
-        original_height, original_width = frame.shape[:2]
-        frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
-        # Run inference
-        inputs = processor(images=frame_pil, return_tensors="pt")
-        outputs = model(**inputs)
-        # Extract bounding boxes and labels
-        logits = outputs.logits.softmax(-1)[0]
-        boxes = outputs.pred_boxes[0].cpu().detach().numpy()
-        labels = logits.argmax(-1).cpu().detach().numpy()
-        scores = logits.max(-1).values.cpu().detach().numpy()
-        # Filter boxes with a confidence threshold
-        confidence_threshold = 0.1
-        filtered_boxes = []
-        for i, score in enumerate(scores):
-            if score >= confidence_threshold:
-                filtered_boxes.append((labels[i], score, boxes[i]))
-        # Separate detections into categories
-        ball_detected = False
-        people_boxes = []
-        for label, score, box in filtered_boxes:
-            # Convert box from normalized coordinates to pixel values
-            x_min, y_min, x_max, y_max = (
-                int(box[0] * original_width),
-                int(box[1] * original_height),
-                int(box[2] * original_width),
-                int(box[3] * original_height),
-            )
-            if label == 32:  # "sports ball" class in COCO
-                print("Ball is detected in the frame.")
-                x_center = (x_min + x_max) // 2
-                y_center = (y_min + y_max) // 2
-                ball_detected = True
-                break
-            elif label == 1:  # "person" class in COCO
-                print("Person is detected in the frame.")
-                people_boxes.append((x_min, y_min, x_max, y_max))
-        # If no ball is detected, focus on the densest group of people
-        if not ball_detected and people_boxes:
-            # Cluster the people into groups based on proximity
-            centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
-            distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
-            # Define a distance threshold to group nearby people
-            threshold = max(original_width, original_height) * 0.1  # Adjust clustering sensitivity
-            clusters = []
-            visited = set()
-            for i, center in enumerate(centers):
-                if i in visited:
-                    continue
-                cluster = [i]
-                visited.add(i)
-                for j in range(len(centers)):
-                    if j not in visited and distances[i, j] < threshold:
-                        cluster.append(j)
-                        visited.add(j)
-                clusters.append(cluster)
-            # Find the largest cluster and calculate its bounding box
-            largest_cluster = max(clusters, key=len)
-            x_min = min(people_boxes[i][0] for i in largest_cluster)
-            y_min = min(people_boxes[i][1] for i in largest_cluster)
-            x_max = max(people_boxes[i][2] for i in largest_cluster)
-            y_max = max(people_boxes[i][3] for i in largest_cluster)
-            # Center the crop on the largest cluster
-            x_center = (x_min + x_max) // 2
-            y_center = (y_min + y_max) // 2
-        # Calculate the cropping region to fit the target resolution
-        new_width = int(original_height * target_aspect_ratio)
-        new_height = int(original_width / target_aspect_ratio)
-        x_start = max(0, x_center - new_width // 2)
-        y_start = max(0, y_center - new_height // 2)
-        x_end = min(original_width, x_start + new_width)
-        y_end = min(original_height, y_start + new_height)
-        # Adjust the crop if the size is smaller than the target resolution
-        if (x_end - x_start) < new_width:
-            x_start = max(0, x_end - new_width)
-        if (y_end - y_start) < new_height:
-            y_start = max(0, y_end - new_height)
-        # Crop and resize the frame
-        frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
-        frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
-        # Save the processed frame
-        output_path = os.path.join(output_folder, frame_name)
-        cv2.imwrite(output_path, frame_resized)
-        print(f"Processed frame saved: {output_path}")
-    print("Preprocessing completed.")
-#back up
-def backup_yolo_crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)):
-    """
-    Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
-    if a football is not detected, and extending the area until it reaches the target resolution.
-    Args:
-        input_folder (str): Path to the folder containing key frames.
-        output_folder (str): Path to save the processed frames.
-        model_path (str): Path to the YOLOv8 model file.
-        target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
-    """
-    print("Preprocessing frames to fit the target aspect ratio...")
-    model = YOLO(model_path)
-    target_aspect_ratio = target_resolution[0] / target_resolution[1]
-    for frame_name in os.listdir(input_folder):
-        frame_path = os.path.join(input_folder, frame_name)
-        if not frame_name.lower().endswith(('.jpg', '.png')):
-            continue  # Skip non-image files
-        # Read the frame
-        frame = cv2.imread(frame_path)
-        if frame is None:
-            print(f"Error reading frame: {frame_path}")
-            continue
-        original_height, original_width = frame.shape[:2]
-        # Run YOLOv8 inference
-        # TTP adjusted conf to 0.3 from 0.5 originally
-        results = model.predict(frame, conf=0.3)
-        # Initialize cropping region
-        x_center, y_center = original_width // 2, original_height // 2
-        ball_detected = False
-        people_boxes = []
-        # Process detections to find "sports ball" or "person"
-        for result in results[0].boxes:
-            label = result.cls
-            if model.names[int(label)] == "sports ball":
-                # Get the center of the detected football
-                x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
-                x_center = int((x_min + x_max) / 2)
-                y_center = int((y_min + y_max) / 2)
-                ball_detected = True
-                break
-            elif model.names[int(label)] == "person":
-                # Collect bounding boxes for people
-                x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
-                people_boxes.append((x_min, y_min, x_max, y_max))
-        # If no ball is detected, focus on the densest group of people
-        if not ball_detected and people_boxes:
-            # Cluster the people into groups based on proximity
-            centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
-            distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
-            # Define a distance threshold to group nearby people - Adjust clustering sensitivity
-            threshold = max(original_width, original_height) * 0.2  # TTP adjusted to 0.2
-            clusters = []
-            visited = set()
-            for i, center in enumerate(centers):
-                if i in visited:
-                    continue
-                cluster = [i]
-                visited.add(i)
-                for j in range(len(centers)):
-                    if j not in visited and distances[i, j] < threshold:
-                        cluster.append(j)
-                        visited.add(j)
-                clusters.append(cluster)
-            # Find the largest cluster and calculate its bounding box
-            largest_cluster = max(clusters, key=len)
-            x_min = min(people_boxes[i][0] for i in largest_cluster)
-            y_min = min(people_boxes[i][1] for i in largest_cluster)
-            x_max = max(people_boxes[i][2] for i in largest_cluster)
-            y_max = max(people_boxes[i][3] for i in largest_cluster)
-            # Center the crop on the largest cluster
-            x_center = int((x_min + x_max) / 2)
-            y_center = int((y_min + y_max) / 2)
-        # Calculate the cropping region to fit the target resolution
-        new_width = int(original_height * target_aspect_ratio)
-        new_height = int(original_width / target_aspect_ratio)
-        x_start = max(0, x_center - new_width // 2)
-        y_start = max(0, y_center - new_height // 2)
-        x_end = min(original_width, x_start + new_width)
-        y_end = min(original_height, y_start + new_height)
-        # Adjust the crop if the size is smaller than the target resolution
-        if (x_end - x_start) < new_width:
-            x_start = max(0, x_end - new_width)
-        if (y_end - y_start) < new_height:
-            y_start = max(0, y_end - new_height)
-        # Crop and resize the frame
-        frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
-        frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
-        # Save the processed frame
-        output_path = os.path.join(output_folder, frame_name)
-        cv2.imwrite(output_path, frame_resized)
-        print(f"Processed frame saved: {output_path}")
-    print("Preprocessing completed.")

requirements.txt CHANGED Viewed

@@ -35,7 +35,7 @@ dill==0.3.7
 dnspython==2.6.1
 email_validator==2.1.1
 executing==2.1.0
-fastapi==0.111.0
 fastapi-cli==0.0.3
 fastjsonschema==2.20.0
 ffmpy==0.3.2
@@ -44,14 +44,15 @@ fonttools==4.51.0
 fqdn==1.5.1
 frozenlist==1.3.3
 fsspec==2023.6.0
-gradio_client==0.16.2
 grpcio==1.66.2
 h11==0.14.0
 holidays==0.57
 httpcore==1.0.5
 httptools==0.6.1
 httpx==0.27.0
-huggingface-hub==0.23.0
 idna==3.4
 importlib_resources==6.4.0
 ipykernel==6.29.5
@@ -89,6 +90,7 @@ mistune==3.0.2
 mpmath==1.3.0
 multidict==6.0.4
 multiprocess==0.70.15
 nbclient==0.10.0
 nbconvert==7.16.4
 nbformat==5.10.4
@@ -98,6 +100,7 @@ neuralprophet==0.9.0
 notebook_shim==0.2.4
 numpy==1.26.1
 openai==0.27.7
 openml==0.14.2
 orjson==3.10.3
 osqp==0.6.7.post3
@@ -112,10 +115,12 @@ platformdirs==4.3.2
 plotly==5.24.1
 prometheus_client==0.20.0
 prompt_toolkit==3.0.47
 protobuf==5.28.2
 psutil==6.0.0
 ptyprocess==0.7.0
 pure_eval==0.2.3
 pyarrow==13.0.0
 pycparser==2.22
 pycryptodome==3.20.0
@@ -127,7 +132,7 @@ pyparsing==3.1.2
 python-dateutil==2.8.2
 python-dotenv==1.0.1
 python-json-logger==2.0.7
-python-multipart==0.0.9
 pytorch-lightning==2.4.0
 pytz==2023.3.post1
 PyYAML==6.0.1
@@ -139,27 +144,32 @@ requests==2.31.0
 rfc3339-validator==0.1.4
 rfc3986-validator==0.1.1
 rich==13.7.1
 rpds-py==0.18.1
-ruff==0.4.3
 s3transfer==0.7.0
 safetensors==0.4.0
 scikit-learn==1.5.2
 scipy==1.14.1
 scs==3.2.7
 semantic-version==2.10.0
 Send2Trash==1.8.3
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
 soupsieve==2.6
 stack-data==0.6.3
-starlette==0.37.2
 sympy==1.12
 tenacity==9.0.0
 tensorboard==2.18.0
 tensorboard-data-server==0.7.2
 terminado==0.18.1
 threadpoolctl==3.5.0
 tinycss2==1.3.0
 tokenizers==0.14.1
 tomlkit==0.12.0
@@ -167,14 +177,18 @@ toolz==0.12.1
 torch==2.1.0
 torchdata==0.7.0
 torchmetrics==1.4.2
 tornado==6.4.1
 tqdm==4.65.0
 traitlets==5.14.3
 typer==0.12.3
 types-python-dateutil==2.9.0.20240906
 typing_extensions==4.8.0
 tzdata==2023.3
 ujson==5.9.0
 uri-template==1.3.0
 urllib3==2.0.2
 uvicorn==0.29.0

 dnspython==2.6.1
 email_validator==2.1.1
 executing==2.1.0
+fastapi==0.115.8
 fastapi-cli==0.0.3
 fastjsonschema==2.20.0
 ffmpy==0.3.2
 fqdn==1.5.1
 frozenlist==1.3.3
 fsspec==2023.6.0
+gradio==5.15.0
+gradio_client==1.7.0
 grpcio==1.66.2
 h11==0.14.0
 holidays==0.57
 httpcore==1.0.5
 httptools==0.6.1
 httpx==0.27.0
+huggingface-hub==0.28.1
 idna==3.4
 importlib_resources==6.4.0
 ipykernel==6.29.5
 mpmath==1.3.0
 multidict==6.0.4
 multiprocess==0.70.15
+narwhals==1.25.2
 nbclient==0.10.0
 nbconvert==7.16.4
 nbformat==5.10.4
 notebook_shim==0.2.4
 numpy==1.26.1
 openai==0.27.7
+opencv-python==4.11.0.86
 openml==0.14.2
 orjson==3.10.3
 osqp==0.6.7.post3
 plotly==5.24.1
 prometheus_client==0.20.0
 prompt_toolkit==3.0.47
+propcache==0.2.1
 protobuf==5.28.2
 psutil==6.0.0
 ptyprocess==0.7.0
 pure_eval==0.2.3
+py-cpuinfo==9.0.0
 pyarrow==13.0.0
 pycparser==2.22
 pycryptodome==3.20.0
 python-dateutil==2.8.2
 python-dotenv==1.0.1
 python-json-logger==2.0.7
+python-multipart==0.0.20
 pytorch-lightning==2.4.0
 pytz==2023.3.post1
 PyYAML==6.0.1
 rfc3339-validator==0.1.4
 rfc3986-validator==0.1.1
 rich==13.7.1
+rich-toolkit==0.13.2
 rpds-py==0.18.1
+ruff==0.9.5
 s3transfer==0.7.0
+safehttpx==0.1.6
 safetensors==0.4.0
 scikit-learn==1.5.2
 scipy==1.14.1
 scs==3.2.7
+seaborn==0.13.2
 semantic-version==2.10.0
 Send2Trash==1.8.3
 shellingham==1.5.4
 six==1.16.0
 sniffio==1.3.1
 soupsieve==2.6
+spaces==0.32.0
 stack-data==0.6.3
+starlette==0.45.3
 sympy==1.12
 tenacity==9.0.0
 tensorboard==2.18.0
 tensorboard-data-server==0.7.2
 terminado==0.18.1
 threadpoolctl==3.5.0
+timm==1.0.14
 tinycss2==1.3.0
 tokenizers==0.14.1
 tomlkit==0.12.0
 torch==2.1.0
 torchdata==0.7.0
 torchmetrics==1.4.2
+torchvision==0.20.1
 tornado==6.4.1
 tqdm==4.65.0
 traitlets==5.14.3
+transformers==4.48.1
 typer==0.12.3
 types-python-dateutil==2.9.0.20240906
 typing_extensions==4.8.0
 tzdata==2023.3
 ujson==5.9.0
+ultralytics==8.3.64
+ultralytics-thop==2.0.14
 uri-template==1.3.0
 urllib3==2.0.2
 uvicorn==0.29.0