phitran commited on
Commit
0d44a50
·
1 Parent(s): 041f44a

fix build error

Browse files
.DS_Store ADDED
Binary file (6.15 kB). View file
 
handlers/__pycache__/frame_handler_resnet.cpython-311.pyc DELETED
Binary file (8.39 kB)
 
handlers/frame_handler_resnet.py DELETED
@@ -1,253 +0,0 @@
1
- import os
2
- import cv2
3
- import torch
4
- #from transformers import DetrImageProcessor, DetrForObjectDetection
5
- from transformers import AutoImageProcessor, AutoModelForObjectDetection
6
- from PIL import Image
7
- import numpy as np
8
-
9
- def crop_preserve_key_objects(input_folder, output_folder, model_name='facebook/detr-resnet-50', target_resolution=(360, 640)):
10
- """
11
- Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
12
- if a football is not detected, and extending the area until it reaches the target resolution.
13
-
14
- Args:
15
- input_folder (str): Path to the folder containing key frames.
16
- output_folder (str): Path to save the processed frames.
17
- model_name (str): Hugging Face model name for DETR.
18
- target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
19
- """
20
- print("Preprocessing frames using DETR to fit the target aspect ratio...")
21
-
22
- # Load the DETR model and processor
23
- #processor = DetrImageProcessor.from_pretrained(model_name)
24
- #model = DetrForObjectDetection.from_pretrained(model_name)
25
-
26
- processor = AutoImageProcessor.from_pretrained(model_name)
27
- model = AutoModelForObjectDetection.from_pretrained(model_name)
28
- target_aspect_ratio = target_resolution[0] / target_resolution[1]
29
-
30
- for frame_name in os.listdir(input_folder):
31
- frame_path = os.path.join(input_folder, frame_name)
32
- if not frame_name.lower().endswith(('.jpg', '.png')):
33
- continue # Skip non-image files
34
-
35
- # Read the frame
36
- frame = cv2.imread(frame_path)
37
- if frame is None:
38
- print(f"Error reading frame: {frame_path}")
39
- continue
40
-
41
- original_height, original_width = frame.shape[:2]
42
- frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
43
-
44
- # Run inference
45
- inputs = processor(images=frame_pil, return_tensors="pt")
46
- outputs = model(**inputs)
47
-
48
- # Extract bounding boxes and labels
49
- logits = outputs.logits.softmax(-1)[0]
50
- boxes = outputs.pred_boxes[0].cpu().detach().numpy()
51
- labels = logits.argmax(-1).cpu().detach().numpy()
52
- scores = logits.max(-1).values.cpu().detach().numpy()
53
-
54
- # Filter boxes with a confidence threshold
55
- confidence_threshold = 0.1
56
- filtered_boxes = []
57
- for i, score in enumerate(scores):
58
- if score >= confidence_threshold:
59
- filtered_boxes.append((labels[i], score, boxes[i]))
60
-
61
- # Separate detections into categories
62
- ball_detected = False
63
- people_boxes = []
64
- for label, score, box in filtered_boxes:
65
- # Convert box from normalized coordinates to pixel values
66
- x_min, y_min, x_max, y_max = (
67
- int(box[0] * original_width),
68
- int(box[1] * original_height),
69
- int(box[2] * original_width),
70
- int(box[3] * original_height),
71
- )
72
- if label == 32: # "sports ball" class in COCO
73
- print("Ball is detected in the frame.")
74
- x_center = (x_min + x_max) // 2
75
- y_center = (y_min + y_max) // 2
76
- ball_detected = True
77
- break
78
- elif label == 1: # "person" class in COCO
79
- print("Person is detected in the frame.")
80
- people_boxes.append((x_min, y_min, x_max, y_max))
81
-
82
- # If no ball is detected, focus on the densest group of people
83
- if not ball_detected and people_boxes:
84
- # Cluster the people into groups based on proximity
85
- centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
86
- distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
87
-
88
- # Define a distance threshold to group nearby people
89
- threshold = max(original_width, original_height) * 0.1 # Adjust clustering sensitivity
90
- clusters = []
91
- visited = set()
92
-
93
- for i, center in enumerate(centers):
94
- if i in visited:
95
- continue
96
- cluster = [i]
97
- visited.add(i)
98
- for j in range(len(centers)):
99
- if j not in visited and distances[i, j] < threshold:
100
- cluster.append(j)
101
- visited.add(j)
102
- clusters.append(cluster)
103
-
104
- # Find the largest cluster and calculate its bounding box
105
- largest_cluster = max(clusters, key=len)
106
- x_min = min(people_boxes[i][0] for i in largest_cluster)
107
- y_min = min(people_boxes[i][1] for i in largest_cluster)
108
- x_max = max(people_boxes[i][2] for i in largest_cluster)
109
- y_max = max(people_boxes[i][3] for i in largest_cluster)
110
-
111
- # Center the crop on the largest cluster
112
- x_center = (x_min + x_max) // 2
113
- y_center = (y_min + y_max) // 2
114
-
115
- # Calculate the cropping region to fit the target resolution
116
- new_width = int(original_height * target_aspect_ratio)
117
- new_height = int(original_width / target_aspect_ratio)
118
-
119
- x_start = max(0, x_center - new_width // 2)
120
- y_start = max(0, y_center - new_height // 2)
121
- x_end = min(original_width, x_start + new_width)
122
- y_end = min(original_height, y_start + new_height)
123
-
124
- # Adjust the crop if the size is smaller than the target resolution
125
- if (x_end - x_start) < new_width:
126
- x_start = max(0, x_end - new_width)
127
- if (y_end - y_start) < new_height:
128
- y_start = max(0, y_end - new_height)
129
-
130
- # Crop and resize the frame
131
- frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
132
- frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
133
-
134
- # Save the processed frame
135
- output_path = os.path.join(output_folder, frame_name)
136
- cv2.imwrite(output_path, frame_resized)
137
- print(f"Processed frame saved: {output_path}")
138
-
139
- print("Preprocessing completed.")
140
-
141
-
142
- #back up
143
- def backup_yolo_crop_preserve_key_objects(input_folder, output_folder, model_path='yolov8n.pt', target_resolution=(360, 640)):
144
- """
145
- Preprocess frames to fit a target aspect ratio, focusing on the densest group of people
146
- if a football is not detected, and extending the area until it reaches the target resolution.
147
-
148
- Args:
149
- input_folder (str): Path to the folder containing key frames.
150
- output_folder (str): Path to save the processed frames.
151
- model_path (str): Path to the YOLOv8 model file.
152
- target_resolution (tuple): Desired resolution (width, height), e.g., (1920, 1080).
153
- """
154
- print("Preprocessing frames to fit the target aspect ratio...")
155
-
156
- model = YOLO(model_path)
157
- target_aspect_ratio = target_resolution[0] / target_resolution[1]
158
-
159
- for frame_name in os.listdir(input_folder):
160
- frame_path = os.path.join(input_folder, frame_name)
161
- if not frame_name.lower().endswith(('.jpg', '.png')):
162
- continue # Skip non-image files
163
-
164
- # Read the frame
165
- frame = cv2.imread(frame_path)
166
- if frame is None:
167
- print(f"Error reading frame: {frame_path}")
168
- continue
169
-
170
- original_height, original_width = frame.shape[:2]
171
-
172
- # Run YOLOv8 inference
173
- # TTP adjusted conf to 0.3 from 0.5 originally
174
- results = model.predict(frame, conf=0.3)
175
-
176
- # Initialize cropping region
177
- x_center, y_center = original_width // 2, original_height // 2
178
- ball_detected = False
179
- people_boxes = []
180
-
181
- # Process detections to find "sports ball" or "person"
182
- for result in results[0].boxes:
183
- label = result.cls
184
- if model.names[int(label)] == "sports ball":
185
- # Get the center of the detected football
186
- x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
187
- x_center = int((x_min + x_max) / 2)
188
- y_center = int((y_min + y_max) / 2)
189
- ball_detected = True
190
- break
191
- elif model.names[int(label)] == "person":
192
- # Collect bounding boxes for people
193
- x_min, y_min, x_max, y_max = result.xyxy[0].numpy()
194
- people_boxes.append((x_min, y_min, x_max, y_max))
195
-
196
- # If no ball is detected, focus on the densest group of people
197
- if not ball_detected and people_boxes:
198
- # Cluster the people into groups based on proximity
199
- centers = np.array([(int((x1 + x2) / 2), int((y1 + y2) / 2)) for x1, y1, x2, y2 in people_boxes])
200
- distances = np.linalg.norm(centers[:, None, :] - centers[None, :, :], axis=2)
201
-
202
- # Define a distance threshold to group nearby people - Adjust clustering sensitivity
203
- threshold = max(original_width, original_height) * 0.2 # TTP adjusted to 0.2
204
- clusters = []
205
- visited = set()
206
-
207
- for i, center in enumerate(centers):
208
- if i in visited:
209
- continue
210
- cluster = [i]
211
- visited.add(i)
212
- for j in range(len(centers)):
213
- if j not in visited and distances[i, j] < threshold:
214
- cluster.append(j)
215
- visited.add(j)
216
- clusters.append(cluster)
217
-
218
- # Find the largest cluster and calculate its bounding box
219
- largest_cluster = max(clusters, key=len)
220
- x_min = min(people_boxes[i][0] for i in largest_cluster)
221
- y_min = min(people_boxes[i][1] for i in largest_cluster)
222
- x_max = max(people_boxes[i][2] for i in largest_cluster)
223
- y_max = max(people_boxes[i][3] for i in largest_cluster)
224
-
225
- # Center the crop on the largest cluster
226
- x_center = int((x_min + x_max) / 2)
227
- y_center = int((y_min + y_max) / 2)
228
-
229
- # Calculate the cropping region to fit the target resolution
230
- new_width = int(original_height * target_aspect_ratio)
231
- new_height = int(original_width / target_aspect_ratio)
232
-
233
- x_start = max(0, x_center - new_width // 2)
234
- y_start = max(0, y_center - new_height // 2)
235
- x_end = min(original_width, x_start + new_width)
236
- y_end = min(original_height, y_start + new_height)
237
-
238
- # Adjust the crop if the size is smaller than the target resolution
239
- if (x_end - x_start) < new_width:
240
- x_start = max(0, x_end - new_width)
241
- if (y_end - y_start) < new_height:
242
- y_start = max(0, y_end - new_height)
243
-
244
- # Crop and resize the frame
245
- frame_cropped = frame[int(y_start):int(y_end), int(x_start):int(x_end)]
246
- frame_resized = cv2.resize(frame_cropped, target_resolution, interpolation=cv2.INTER_LINEAR)
247
-
248
- # Save the processed frame
249
- output_path = os.path.join(output_folder, frame_name)
250
- cv2.imwrite(output_path, frame_resized)
251
- print(f"Processed frame saved: {output_path}")
252
-
253
- print("Preprocessing completed.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -35,7 +35,7 @@ dill==0.3.7
35
  dnspython==2.6.1
36
  email_validator==2.1.1
37
  executing==2.1.0
38
- fastapi==0.111.0
39
  fastapi-cli==0.0.3
40
  fastjsonschema==2.20.0
41
  ffmpy==0.3.2
@@ -44,14 +44,15 @@ fonttools==4.51.0
44
  fqdn==1.5.1
45
  frozenlist==1.3.3
46
  fsspec==2023.6.0
47
- gradio_client==0.16.2
 
48
  grpcio==1.66.2
49
  h11==0.14.0
50
  holidays==0.57
51
  httpcore==1.0.5
52
  httptools==0.6.1
53
  httpx==0.27.0
54
- huggingface-hub==0.23.0
55
  idna==3.4
56
  importlib_resources==6.4.0
57
  ipykernel==6.29.5
@@ -89,6 +90,7 @@ mistune==3.0.2
89
  mpmath==1.3.0
90
  multidict==6.0.4
91
  multiprocess==0.70.15
 
92
  nbclient==0.10.0
93
  nbconvert==7.16.4
94
  nbformat==5.10.4
@@ -98,6 +100,7 @@ neuralprophet==0.9.0
98
  notebook_shim==0.2.4
99
  numpy==1.26.1
100
  openai==0.27.7
 
101
  openml==0.14.2
102
  orjson==3.10.3
103
  osqp==0.6.7.post3
@@ -112,10 +115,12 @@ platformdirs==4.3.2
112
  plotly==5.24.1
113
  prometheus_client==0.20.0
114
  prompt_toolkit==3.0.47
 
115
  protobuf==5.28.2
116
  psutil==6.0.0
117
  ptyprocess==0.7.0
118
  pure_eval==0.2.3
 
119
  pyarrow==13.0.0
120
  pycparser==2.22
121
  pycryptodome==3.20.0
@@ -127,7 +132,7 @@ pyparsing==3.1.2
127
  python-dateutil==2.8.2
128
  python-dotenv==1.0.1
129
  python-json-logger==2.0.7
130
- python-multipart==0.0.9
131
  pytorch-lightning==2.4.0
132
  pytz==2023.3.post1
133
  PyYAML==6.0.1
@@ -139,27 +144,32 @@ requests==2.31.0
139
  rfc3339-validator==0.1.4
140
  rfc3986-validator==0.1.1
141
  rich==13.7.1
 
142
  rpds-py==0.18.1
143
- ruff==0.4.3
144
  s3transfer==0.7.0
 
145
  safetensors==0.4.0
146
  scikit-learn==1.5.2
147
  scipy==1.14.1
148
  scs==3.2.7
 
149
  semantic-version==2.10.0
150
  Send2Trash==1.8.3
151
  shellingham==1.5.4
152
  six==1.16.0
153
  sniffio==1.3.1
154
  soupsieve==2.6
 
155
  stack-data==0.6.3
156
- starlette==0.37.2
157
  sympy==1.12
158
  tenacity==9.0.0
159
  tensorboard==2.18.0
160
  tensorboard-data-server==0.7.2
161
  terminado==0.18.1
162
  threadpoolctl==3.5.0
 
163
  tinycss2==1.3.0
164
  tokenizers==0.14.1
165
  tomlkit==0.12.0
@@ -167,14 +177,18 @@ toolz==0.12.1
167
  torch==2.1.0
168
  torchdata==0.7.0
169
  torchmetrics==1.4.2
 
170
  tornado==6.4.1
171
  tqdm==4.65.0
172
  traitlets==5.14.3
 
173
  typer==0.12.3
174
  types-python-dateutil==2.9.0.20240906
175
  typing_extensions==4.8.0
176
  tzdata==2023.3
177
  ujson==5.9.0
 
 
178
  uri-template==1.3.0
179
  urllib3==2.0.2
180
  uvicorn==0.29.0
 
35
  dnspython==2.6.1
36
  email_validator==2.1.1
37
  executing==2.1.0
38
+ fastapi==0.115.8
39
  fastapi-cli==0.0.3
40
  fastjsonschema==2.20.0
41
  ffmpy==0.3.2
 
44
  fqdn==1.5.1
45
  frozenlist==1.3.3
46
  fsspec==2023.6.0
47
+ gradio==5.15.0
48
+ gradio_client==1.7.0
49
  grpcio==1.66.2
50
  h11==0.14.0
51
  holidays==0.57
52
  httpcore==1.0.5
53
  httptools==0.6.1
54
  httpx==0.27.0
55
+ huggingface-hub==0.28.1
56
  idna==3.4
57
  importlib_resources==6.4.0
58
  ipykernel==6.29.5
 
90
  mpmath==1.3.0
91
  multidict==6.0.4
92
  multiprocess==0.70.15
93
+ narwhals==1.25.2
94
  nbclient==0.10.0
95
  nbconvert==7.16.4
96
  nbformat==5.10.4
 
100
  notebook_shim==0.2.4
101
  numpy==1.26.1
102
  openai==0.27.7
103
+ opencv-python==4.11.0.86
104
  openml==0.14.2
105
  orjson==3.10.3
106
  osqp==0.6.7.post3
 
115
  plotly==5.24.1
116
  prometheus_client==0.20.0
117
  prompt_toolkit==3.0.47
118
+ propcache==0.2.1
119
  protobuf==5.28.2
120
  psutil==6.0.0
121
  ptyprocess==0.7.0
122
  pure_eval==0.2.3
123
+ py-cpuinfo==9.0.0
124
  pyarrow==13.0.0
125
  pycparser==2.22
126
  pycryptodome==3.20.0
 
132
  python-dateutil==2.8.2
133
  python-dotenv==1.0.1
134
  python-json-logger==2.0.7
135
+ python-multipart==0.0.20
136
  pytorch-lightning==2.4.0
137
  pytz==2023.3.post1
138
  PyYAML==6.0.1
 
144
  rfc3339-validator==0.1.4
145
  rfc3986-validator==0.1.1
146
  rich==13.7.1
147
+ rich-toolkit==0.13.2
148
  rpds-py==0.18.1
149
+ ruff==0.9.5
150
  s3transfer==0.7.0
151
+ safehttpx==0.1.6
152
  safetensors==0.4.0
153
  scikit-learn==1.5.2
154
  scipy==1.14.1
155
  scs==3.2.7
156
+ seaborn==0.13.2
157
  semantic-version==2.10.0
158
  Send2Trash==1.8.3
159
  shellingham==1.5.4
160
  six==1.16.0
161
  sniffio==1.3.1
162
  soupsieve==2.6
163
+ spaces==0.32.0
164
  stack-data==0.6.3
165
+ starlette==0.45.3
166
  sympy==1.12
167
  tenacity==9.0.0
168
  tensorboard==2.18.0
169
  tensorboard-data-server==0.7.2
170
  terminado==0.18.1
171
  threadpoolctl==3.5.0
172
+ timm==1.0.14
173
  tinycss2==1.3.0
174
  tokenizers==0.14.1
175
  tomlkit==0.12.0
 
177
  torch==2.1.0
178
  torchdata==0.7.0
179
  torchmetrics==1.4.2
180
+ torchvision==0.20.1
181
  tornado==6.4.1
182
  tqdm==4.65.0
183
  traitlets==5.14.3
184
+ transformers==4.48.1
185
  typer==0.12.3
186
  types-python-dateutil==2.9.0.20240906
187
  typing_extensions==4.8.0
188
  tzdata==2023.3
189
  ujson==5.9.0
190
+ ultralytics==8.3.64
191
+ ultralytics-thop==2.0.14
192
  uri-template==1.3.0
193
  urllib3==2.0.2
194
  uvicorn==0.29.0