Gabriel commited on
Commit
3d11dc6
·
verified ·
1 Parent(s): c79571d

Update visualizer.py

Browse files
Files changed (1) hide show
  1. visualizer.py +42 -34
visualizer.py CHANGED
@@ -83,19 +83,19 @@ def _get_dynamic_font_size(
83
 
84
  total_area = 0
85
  valid_count = 0
86
-
87
  for points in polygons:
88
  area = _calculate_polygon_area(points)
89
  if area > 0:
90
  total_area += area
91
  valid_count += 1
92
-
93
  if valid_count == 0:
94
  return 16
95
-
96
  avg_area = total_area / valid_count
97
  font_size = int(math.sqrt(avg_area) * 0.2)
98
-
99
  return max(12, min(72, font_size))
100
 
101
 
@@ -107,11 +107,11 @@ def _get_font(size: int) -> Optional[ImageFont.FreeTypeFont]:
107
  "/System/Library/Fonts/Helvetica.ttc",
108
  "C:\\Windows\\Fonts\\arial.ttf",
109
  ]
110
-
111
  for font_path in font_paths:
112
  if os.path.exists(font_path):
113
  return ImageFont.truetype(font_path, size)
114
-
115
  return ImageFont.load_default()
116
  except:
117
  return ImageFont.load_default()
@@ -130,30 +130,30 @@ def _visualize_page_xml(
130
  for elem in root.iter():
131
  if elem.tag.endswith("TextLine"):
132
  text_lines.append(elem)
133
-
134
  line_data = []
135
  all_polygons = []
136
-
137
  for text_line in text_lines:
138
  coords_elem = None
139
  for child in text_line:
140
  if child.tag.endswith("Coords"):
141
  coords_elem = child
142
  break
143
-
144
  if coords_elem is not None:
145
  points_str = coords_elem.get("points", "")
146
  points = _parse_points(points_str)
147
-
148
  if len(points) >= 3:
149
  text_content = ""
150
  confidence = None
151
-
152
  for te in text_line.iter():
153
  if te.tag.endswith("Unicode") and te.text:
154
  text_content = te.text.strip()
155
  break
156
-
157
  for te in text_line.iter():
158
  if te.tag.endswith("TextEquiv"):
159
  conf_str = te.get("conf")
@@ -163,30 +163,34 @@ def _visualize_page_xml(
163
  except:
164
  pass
165
  break
166
-
167
  display_text = text_content
168
  if confidence is not None:
169
  display_text = f"{text_content} ({confidence:.3f})"
170
-
171
  line_data.append((points, display_text))
172
  all_polygons.append(points)
173
-
174
  font_size = _get_dynamic_font_size(all_polygons, image_size)
175
  font = _get_font(font_size)
176
-
177
  for i, (points, text) in enumerate(line_data):
178
  color = "red" if i % 2 == 0 else "blue"
179
  draw.polygon(points, outline=color, width=2)
180
-
181
  if text:
182
  centroid_x = sum(p[0] for p in points) // len(points)
183
  centroid_y = sum(p[1] for p in points) // len(points)
184
-
185
  if font != ImageFont.load_default():
186
- bbox = draw.textbbox((centroid_x, centroid_y), text, font=font, anchor="mm")
 
 
187
  bbox = (bbox[0] - 2, bbox[1] - 2, bbox[2] + 2, bbox[3] + 2)
188
  draw.rectangle(bbox, fill=(255, 255, 255, 200), outline="black")
189
- draw.text((centroid_x, centroid_y), text, fill="black", font=font, anchor="mm")
 
 
190
  else:
191
  draw.text((centroid_x, centroid_y), text, fill="black")
192
 
@@ -195,15 +199,15 @@ def _visualize_alto_xml(
195
  draw: ImageDraw.Draw, root: ET.Element, image_size: Tuple[int, int]
196
  ):
197
  namespace = _get_namespace(root)
198
-
199
  text_lines = []
200
  for elem in root.iter():
201
  if elem.tag.endswith("TextLine"):
202
  text_lines.append(elem)
203
-
204
  line_data = []
205
  all_polygons = []
206
-
207
  for text_line in text_lines:
208
  points = []
209
  for shape in text_line.iter():
@@ -214,11 +218,11 @@ def _visualize_alto_xml(
214
  points = _parse_points(points_str)
215
  break
216
  break
217
-
218
  if len(points) >= 3:
219
  text_content = ""
220
  confidence = None
221
-
222
  for string_elem in text_line.iter():
223
  if string_elem.tag.endswith("String"):
224
  text_content = string_elem.get("CONTENT", "")
@@ -229,29 +233,33 @@ def _visualize_alto_xml(
229
  except:
230
  pass
231
  break
232
-
233
  display_text = text_content
234
  if confidence is not None:
235
  display_text = f"{text_content} ({confidence:.3f})"
236
-
237
  line_data.append((points, display_text))
238
  all_polygons.append(points)
239
-
240
  font_size = _get_dynamic_font_size(all_polygons, image_size)
241
  font = _get_font(font_size)
242
-
243
  for i, (points, text) in enumerate(line_data):
244
  color = "red" if i % 2 == 0 else "blue"
245
  draw.polygon(points, outline=color, width=2)
246
-
247
  if text:
248
  centroid_x = sum(p[0] for p in points) // len(points)
249
  centroid_y = sum(p[1] for p in points) // len(points)
250
-
251
  if font != ImageFont.load_default():
252
- bbox = draw.textbbox((centroid_x, centroid_y), text, font=font, anchor="mm")
 
 
253
  bbox = (bbox[0] - 2, bbox[1] - 2, bbox[2] + 2, bbox[3] + 2)
254
  draw.rectangle(bbox, fill=(255, 255, 255, 200), outline="black")
255
- draw.text((centroid_x, centroid_y), text, fill="black", font=font, anchor="mm")
 
 
256
  else:
257
- draw.text((centroid_x, centroid_y), text, fill="black")
 
83
 
84
  total_area = 0
85
  valid_count = 0
86
+
87
  for points in polygons:
88
  area = _calculate_polygon_area(points)
89
  if area > 0:
90
  total_area += area
91
  valid_count += 1
92
+
93
  if valid_count == 0:
94
  return 16
95
+
96
  avg_area = total_area / valid_count
97
  font_size = int(math.sqrt(avg_area) * 0.2)
98
+
99
  return max(12, min(72, font_size))
100
 
101
 
 
107
  "/System/Library/Fonts/Helvetica.ttc",
108
  "C:\\Windows\\Fonts\\arial.ttf",
109
  ]
110
+
111
  for font_path in font_paths:
112
  if os.path.exists(font_path):
113
  return ImageFont.truetype(font_path, size)
114
+
115
  return ImageFont.load_default()
116
  except:
117
  return ImageFont.load_default()
 
130
  for elem in root.iter():
131
  if elem.tag.endswith("TextLine"):
132
  text_lines.append(elem)
133
+
134
  line_data = []
135
  all_polygons = []
136
+
137
  for text_line in text_lines:
138
  coords_elem = None
139
  for child in text_line:
140
  if child.tag.endswith("Coords"):
141
  coords_elem = child
142
  break
143
+
144
  if coords_elem is not None:
145
  points_str = coords_elem.get("points", "")
146
  points = _parse_points(points_str)
147
+
148
  if len(points) >= 3:
149
  text_content = ""
150
  confidence = None
151
+
152
  for te in text_line.iter():
153
  if te.tag.endswith("Unicode") and te.text:
154
  text_content = te.text.strip()
155
  break
156
+
157
  for te in text_line.iter():
158
  if te.tag.endswith("TextEquiv"):
159
  conf_str = te.get("conf")
 
163
  except:
164
  pass
165
  break
166
+
167
  display_text = text_content
168
  if confidence is not None:
169
  display_text = f"{text_content} ({confidence:.3f})"
170
+
171
  line_data.append((points, display_text))
172
  all_polygons.append(points)
173
+
174
  font_size = _get_dynamic_font_size(all_polygons, image_size)
175
  font = _get_font(font_size)
176
+
177
  for i, (points, text) in enumerate(line_data):
178
  color = "red" if i % 2 == 0 else "blue"
179
  draw.polygon(points, outline=color, width=2)
180
+
181
  if text:
182
  centroid_x = sum(p[0] for p in points) // len(points)
183
  centroid_y = sum(p[1] for p in points) // len(points)
184
+
185
  if font != ImageFont.load_default():
186
+ bbox = draw.textbbox(
187
+ (centroid_x, centroid_y), text, font=font, anchor="mm"
188
+ )
189
  bbox = (bbox[0] - 2, bbox[1] - 2, bbox[2] + 2, bbox[3] + 2)
190
  draw.rectangle(bbox, fill=(255, 255, 255, 200), outline="black")
191
+ draw.text(
192
+ (centroid_x, centroid_y), text, fill="black", font=font, anchor="mm"
193
+ )
194
  else:
195
  draw.text((centroid_x, centroid_y), text, fill="black")
196
 
 
199
  draw: ImageDraw.Draw, root: ET.Element, image_size: Tuple[int, int]
200
  ):
201
  namespace = _get_namespace(root)
202
+
203
  text_lines = []
204
  for elem in root.iter():
205
  if elem.tag.endswith("TextLine"):
206
  text_lines.append(elem)
207
+
208
  line_data = []
209
  all_polygons = []
210
+
211
  for text_line in text_lines:
212
  points = []
213
  for shape in text_line.iter():
 
218
  points = _parse_points(points_str)
219
  break
220
  break
221
+
222
  if len(points) >= 3:
223
  text_content = ""
224
  confidence = None
225
+
226
  for string_elem in text_line.iter():
227
  if string_elem.tag.endswith("String"):
228
  text_content = string_elem.get("CONTENT", "")
 
233
  except:
234
  pass
235
  break
236
+
237
  display_text = text_content
238
  if confidence is not None:
239
  display_text = f"{text_content} ({confidence:.3f})"
240
+
241
  line_data.append((points, display_text))
242
  all_polygons.append(points)
243
+
244
  font_size = _get_dynamic_font_size(all_polygons, image_size)
245
  font = _get_font(font_size)
246
+
247
  for i, (points, text) in enumerate(line_data):
248
  color = "red" if i % 2 == 0 else "blue"
249
  draw.polygon(points, outline=color, width=2)
250
+
251
  if text:
252
  centroid_x = sum(p[0] for p in points) // len(points)
253
  centroid_y = sum(p[1] for p in points) // len(points)
254
+
255
  if font != ImageFont.load_default():
256
+ bbox = draw.textbbox(
257
+ (centroid_x, centroid_y), text, font=font, anchor="mm"
258
+ )
259
  bbox = (bbox[0] - 2, bbox[1] - 2, bbox[2] + 2, bbox[3] + 2)
260
  draw.rectangle(bbox, fill=(255, 255, 255, 200), outline="black")
261
+ draw.text(
262
+ (centroid_x, centroid_y), text, fill="black", font=font, anchor="mm"
263
+ )
264
  else:
265
+ draw.text((centroid_x, centroid_y), text, fill="black")