Gabriel commited on
Commit
c662fe8
·
verified ·
1 Parent(s): a987d91

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -225
app.py CHANGED
@@ -1,18 +1,17 @@
1
  import gradio as gr
2
  import json
3
- import base64
4
  import tempfile
5
  import os
6
- from typing import Dict, List, Optional, Literal
7
- from datetime import datetime
8
- from PIL import Image, ImageDraw, ImageFont
9
- import io
10
  import spaces
11
- import shutil
12
  from pathlib import Path
13
  from htrflow.volume.volume import Collection
14
  from htrflow.pipeline.pipeline import Pipeline
15
 
 
 
 
16
  PIPELINE_CONFIGS = {
17
  "letter_english": {
18
  "steps": [
@@ -117,10 +116,10 @@ PIPELINE_CONFIGS = {
117
  }
118
 
119
  @spaces.GPU
120
- def process_htr(image: Image.Image, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_english", confidence_threshold: float = 0.8, custom_settings: Optional[str] = None) -> Dict:
121
- """Process handwritten text recognition on uploaded images using HTRflow pipelines."""
122
  if image is None:
123
- return {"success": False, "error": "No image provided", "results": None}
124
 
125
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
126
  image.save(temp_file.name, "PNG")
@@ -131,7 +130,7 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
131
  try:
132
  config = json.loads(custom_settings)
133
  except json.JSONDecodeError:
134
- return {"success": False, "error": "Invalid JSON in custom_settings parameter", "results": None}
135
  else:
136
  config = PIPELINE_CONFIGS[document_type]
137
 
@@ -141,236 +140,53 @@ def process_htr(image: Image.Image, document_type: Literal["letter_english", "le
141
  try:
142
  processed_collection = pipeline.run(collection)
143
  except Exception as pipeline_error:
144
- return {"success": False, "error": f"Pipeline execution failed: {str(pipeline_error)}", "results": None}
145
 
146
- results = extract_text_results(processed_collection, confidence_threshold)
147
- collection_data = serialize_collection_data(processed_collection)
 
 
 
 
 
 
 
148
 
149
- processing_state = {
150
- "collection_data": collection_data,
151
- "document_type": document_type,
152
- "confidence_threshold": confidence_threshold,
153
- "timestamp": datetime.now().isoformat(),
154
- }
155
 
156
- return {
157
- "success": True,
158
- "results": results,
159
- "processing_state": json.dumps(processing_state),
160
- "metadata": {
161
- "total_lines": len(results.get("text_lines", [])),
162
- "average_confidence": results.get("average_confidence", 0),
163
- "document_type": document_type,
164
- "image_dimensions": image.size,
165
- },
166
- }
167
  except Exception as e:
168
- return {"success": False, "error": f"HTR processing failed: {str(e)}", "results": None}
169
  finally:
170
  if os.path.exists(temp_image_path):
171
  os.unlink(temp_image_path)
172
 
173
- def visualize_results(processing_state: str, image: Image.Image, visualization_type: Literal["overlay", "confidence_heatmap", "text_regions"] = "overlay", show_confidence: bool = True, highlight_low_confidence: bool = True) -> Dict:
174
- """Generate interactive visualizations of HTR processing results."""
175
- try:
176
- if image is None:
177
- return {"success": False, "error": "Image is required for visualization", "visualization": None}
178
-
179
- state = json.loads(processing_state)
180
- collection_data = state["collection_data"]
181
-
182
- viz_image = create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence)
183
-
184
- img_buffer = io.BytesIO()
185
- viz_image.save(img_buffer, format="PNG")
186
- img_base64 = base64.b64encode(img_buffer.getvalue()).decode("utf-8")
187
-
188
- return {
189
- "success": True,
190
- "visualization": {
191
- "image_base64": img_base64,
192
- "image_format": "PNG",
193
- "visualization_type": visualization_type,
194
- "dimensions": viz_image.size,
195
- },
196
- "metadata": {"total_elements": len(collection_data.get("text_elements", []))},
197
- }
198
-
199
- except Exception as e:
200
- return {"success": False, "error": f"Visualization generation failed: {str(e)}", "visualization": None}
201
-
202
- def export_results(processing_state: str, image: Image.Image, output_formats: List[Literal["txt", "json", "alto", "page"]] = ["txt"], confidence_filter: float = 0.0) -> Dict:
203
- """Export HTR results to multiple formats using HTRflow's native export functionality."""
204
- try:
205
- if image is None:
206
- return {"success": False, "error": "Image is required for export", "exports": None}
207
-
208
- state = json.loads(processing_state)
209
-
210
- with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
211
- image.save(temp_file.name, "PNG")
212
- temp_image_path = temp_file.name
213
-
214
- try:
215
- collection = Collection([temp_image_path])
216
- pipeline = Pipeline.from_config(PIPELINE_CONFIGS[state["document_type"]])
217
- processed_collection = pipeline.run(collection)
218
-
219
- temp_dir = Path(tempfile.mkdtemp())
220
- exports = {}
221
-
222
- for fmt in output_formats:
223
- export_dir = temp_dir / fmt
224
- processed_collection.save(directory=str(export_dir), serializer=fmt)
225
-
226
- export_files = []
227
- for root, _, files in os.walk(export_dir):
228
- for file in files:
229
- file_path = os.path.join(root, file)
230
- try:
231
- with open(file_path, 'r', encoding='utf-8') as f:
232
- content = f.read()
233
- export_files.append({"filename": file, "content": content})
234
- except UnicodeDecodeError:
235
- with open(file_path, 'rb') as f:
236
- content = base64.b64encode(f.read()).decode('utf-8')
237
- export_files.append({"filename": file, "content": content, "encoding": "base64"})
238
-
239
- exports[fmt] = export_files
240
-
241
- shutil.rmtree(temp_dir)
242
-
243
- return {
244
- "success": True,
245
- "exports": exports,
246
- "export_metadata": {
247
- "formats_generated": output_formats,
248
- "confidence_filter": confidence_filter,
249
- "timestamp": datetime.now().isoformat(),
250
- },
251
- }
252
- finally:
253
- if os.path.exists(temp_image_path):
254
- os.unlink(temp_image_path)
255
-
256
- except Exception as e:
257
- return {"success": False, "error": f"Export generation failed: {str(e)}", "exports": None}
258
-
259
- def extract_text_results(collection: Collection, confidence_threshold: float) -> Dict:
260
- results = {"extracted_text": "", "text_lines": [], "confidence_scores": []}
261
  for page in collection.pages:
262
  for node in page.traverse():
263
  if hasattr(node, "text") and node.text:
264
- confidence = getattr(node, "confidence", 1.0)
265
- if confidence >= confidence_threshold:
266
- results["text_lines"].append({
267
- "text": node.text,
268
- "confidence": confidence,
269
- "bbox": getattr(node, "bbox", None),
270
- })
271
- results["extracted_text"] += node.text + "\n"
272
- results["confidence_scores"].append(confidence)
273
-
274
- results["average_confidence"] = sum(results["confidence_scores"]) / len(results["confidence_scores"]) if results["confidence_scores"] else 0
275
- return results
276
-
277
- def serialize_collection_data(collection: Collection) -> Dict:
278
- text_elements = []
279
- for page in collection.pages:
280
- for node in page.traverse():
281
- if hasattr(node, "text") and node.text:
282
- text_elements.append({
283
- "text": node.text,
284
- "confidence": getattr(node, "confidence", 1.0),
285
- "bbox": getattr(node, "bbox", None),
286
- })
287
- return {"text_elements": text_elements}
288
-
289
- def create_visualization(image, collection_data, visualization_type, show_confidence, highlight_low_confidence):
290
- viz_image = image.copy()
291
- draw = ImageDraw.Draw(viz_image)
292
-
293
- try:
294
- font = ImageFont.truetype("arial.ttf", 12)
295
- except:
296
- font = ImageFont.load_default()
297
-
298
- for element in collection_data.get("text_elements", []):
299
- if element.get("bbox"):
300
- bbox = element["bbox"]
301
- confidence = element.get("confidence", 1.0)
302
-
303
- if visualization_type == "overlay":
304
- color = (255, 165, 0) if highlight_low_confidence and confidence < 0.7 else (0, 255, 0)
305
- draw.rectangle(bbox, outline=color, width=2)
306
- if show_confidence:
307
- draw.text((bbox[0], bbox[1] - 15), f"{confidence:.2f}", fill=color, font=font)
308
-
309
- elif visualization_type == "confidence_heatmap":
310
- if confidence < 0.5:
311
- color = (255, 0, 0, 100)
312
- elif confidence < 0.8:
313
- color = (255, 255, 0, 100)
314
- else:
315
- color = (0, 255, 0, 100)
316
- overlay = Image.new("RGBA", viz_image.size, (0, 0, 0, 0))
317
- overlay_draw = ImageDraw.Draw(overlay)
318
- overlay_draw.rectangle(bbox, fill=color)
319
- viz_image = Image.alpha_composite(viz_image.convert("RGBA"), overlay)
320
-
321
- elif visualization_type == "text_regions":
322
- colors = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
323
- color = colors[hash(str(bbox)) % len(colors)]
324
- draw.rectangle(bbox, outline=color, width=3)
325
-
326
- return viz_image.convert("RGB") if visualization_type == "confidence_heatmap" else viz_image
327
 
328
  def create_htrflow_mcp_server():
329
- demo = gr.TabbedInterface(
330
- [
331
- gr.Interface(
332
- fn=process_htr,
333
- inputs=[
334
- gr.Image(type="pil", label="Upload Image"),
335
- gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_english", label="Document Type"),
336
- gr.Slider(0.0, 1.0, value=0.8, label="Confidence Threshold"),
337
- gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings"),
338
- ],
339
- outputs=gr.JSON(label="Processing Results"),
340
- title="HTR Processing Tool",
341
- description="Process handwritten text using configurable HTRflow pipelines",
342
- api_name="process_htr",
343
- ),
344
- gr.Interface(
345
- fn=visualize_results,
346
- inputs=[
347
- gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
348
- gr.Image(type="pil", label="Image"),
349
- gr.Dropdown(choices=["overlay", "confidence_heatmap", "text_regions"], value="overlay", label="Visualization Type"),
350
- gr.Checkbox(value=True, label="Show Confidence Scores"),
351
- gr.Checkbox(value=True, label="Highlight Low Confidence"),
352
- ],
353
- outputs=gr.JSON(label="Visualization Results"),
354
- title="Results Visualization Tool",
355
- description="Generate interactive visualizations of HTR results",
356
- api_name="visualize_results",
357
- ),
358
- gr.Interface(
359
- fn=export_results,
360
- inputs=[
361
- gr.Textbox(label="Processing State (JSON)", placeholder="Paste processing results from HTR tool"),
362
- gr.Image(type="pil", label="Image"),
363
- gr.CheckboxGroup(choices=["txt", "json", "alto", "page"], value=["txt"], label="Output Formats"),
364
- gr.Slider(0.0, 1.0, value=0.0, label="Confidence Filter"),
365
- ],
366
- outputs=gr.JSON(label="Export Results"),
367
- title="Export Tool",
368
- description="Export HTR results to multiple formats",
369
- api_name="export_results",
370
- ),
371
  ],
372
- ["HTR Processing", "Results Visualization", "Export Results"],
373
  title="HTRflow MCP Server",
 
 
374
  )
375
  return demo
376
 
 
1
  import gradio as gr
2
  import json
 
3
  import tempfile
4
  import os
5
+ from typing import List, Optional, Literal
6
+ from PIL import Image
 
 
7
  import spaces
 
8
  from pathlib import Path
9
  from htrflow.volume.volume import Collection
10
  from htrflow.pipeline.pipeline import Pipeline
11
 
12
+ DEFAULT_OUTPUT = "alto"
13
+ CHOICES = ["txt", "alto", "page", "json"]
14
+
15
  PIPELINE_CONFIGS = {
16
  "letter_english": {
17
  "steps": [
 
116
  }
117
 
118
  @spaces.GPU
119
+ def process_htr(image: Image.Image, document_type: Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"] = "letter_english", output_format: Literal["txt", "alto", "page", "json"] = DEFAULT_OUTPUT, custom_settings: Optional[str] = None):
120
+ """Process handwritten text recognition and return extracted text with specified format file."""
121
  if image is None:
122
+ return "Error: No image provided", None
123
 
124
  with tempfile.NamedTemporaryFile(suffix=".png", delete=False) as temp_file:
125
  image.save(temp_file.name, "PNG")
 
130
  try:
131
  config = json.loads(custom_settings)
132
  except json.JSONDecodeError:
133
+ return "Error: Invalid JSON in custom_settings parameter", None
134
  else:
135
  config = PIPELINE_CONFIGS[document_type]
136
 
 
140
  try:
141
  processed_collection = pipeline.run(collection)
142
  except Exception as pipeline_error:
143
+ return f"Error: Pipeline execution failed: {str(pipeline_error)}", None
144
 
145
+ temp_dir = Path(tempfile.mkdtemp())
146
+ export_dir = temp_dir / output_format
147
+ processed_collection.save(directory=str(export_dir), serializer=output_format)
148
+
149
+ output_file_path = None
150
+ for root, _, files in os.walk(export_dir):
151
+ for file in files:
152
+ output_file_path = os.path.join(root, file)
153
+ break
154
 
155
+ extracted_text = extract_text_from_collection(processed_collection)
 
 
 
 
 
156
 
157
+ return extracted_text, output_file_path
158
+
 
 
 
 
 
 
 
 
 
159
  except Exception as e:
160
+ return f"Error: HTR processing failed: {str(e)}", None
161
  finally:
162
  if os.path.exists(temp_image_path):
163
  os.unlink(temp_image_path)
164
 
165
+ def extract_text_from_collection(collection: Collection) -> str:
166
+ """Extract plain text from processed collection."""
167
+ text_lines = []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  for page in collection.pages:
169
  for node in page.traverse():
170
  if hasattr(node, "text") and node.text:
171
+ text_lines.append(node.text)
172
+ return "\n".join(text_lines)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
  def create_htrflow_mcp_server():
175
+ demo = gr.Interface(
176
+ fn=process_htr,
177
+ inputs=[
178
+ gr.Image(type="pil", label="Upload Image"),
179
+ gr.Dropdown(choices=["letter_english", "letter_swedish", "spread_english", "spread_swedish"], value="letter_english", label="Document Type"),
180
+ gr.Dropdown(choices=CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
181
+ gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings"),
182
+ ],
183
+ outputs=[
184
+ gr.Textbox(label="Extracted Text", lines=10),
185
+ gr.File(label="Download Output File")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  ],
 
187
  title="HTRflow MCP Server",
188
+ description="Process handwritten text and get extracted text with output file in specified format",
189
+ api_name="process_htr",
190
  )
191
  return demo
192