Gabriel commited on
Commit
c79571d
·
verified ·
1 Parent(s): 37ebc76

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -40
app.py CHANGED
@@ -4,16 +4,26 @@ import tempfile
4
  import os
5
  from typing import List, Optional, Literal, Tuple
6
  from PIL import Image
 
7
  import spaces
8
  from pathlib import Path
 
9
  from htrflow.volume.volume import Collection
10
  from htrflow.pipeline.pipeline import Pipeline
11
 
 
12
  DEFAULT_OUTPUT = "alto"
13
- FORMAT_CHOICES = ["letter_english", "letter_swedish", "spread_english", "spread_swedish"]
 
 
 
 
 
14
  FILE_CHOICES = ["txt", "alto", "page", "json"]
15
 
16
- FormatChoices = Literal["letter_english", "letter_swedish", "spread_english", "spread_swedish"]
 
 
17
  FileChoices = Literal["txt", "alto", "page", "json"]
18
 
19
  PIPELINE_CONFIGS = {
@@ -23,7 +33,9 @@ PIPELINE_CONFIGS = {
23
  "step": "Segmentation",
24
  "settings": {
25
  "model": "yolo",
26
- "model_settings": {"model": "Riksarkivet/yolov9-lines-within-regions-1"},
 
 
27
  "generation_settings": {"batch_size": 8},
28
  },
29
  },
@@ -44,7 +56,9 @@ PIPELINE_CONFIGS = {
44
  "step": "Segmentation",
45
  "settings": {
46
  "model": "yolo",
47
- "model_settings": {"model": "Riksarkivet/yolov9-lines-within-regions-1"},
 
 
48
  "generation_settings": {"batch_size": 8},
49
  },
50
  },
@@ -52,7 +66,9 @@ PIPELINE_CONFIGS = {
52
  "step": "TextRecognition",
53
  "settings": {
54
  "model": "TrOCR",
55
- "model_settings": {"model": "Riksarkivet/trocr-base-handwritten-hist-swe-2"},
 
 
56
  "generation_settings": {"batch_size": 16},
57
  },
58
  },
@@ -73,7 +89,9 @@ PIPELINE_CONFIGS = {
73
  "step": "Segmentation",
74
  "settings": {
75
  "model": "yolo",
76
- "model_settings": {"model": "Riksarkivet/yolov9-lines-within-regions-1"},
 
 
77
  "generation_settings": {"batch_size": 8},
78
  },
79
  },
@@ -102,7 +120,9 @@ PIPELINE_CONFIGS = {
102
  "step": "Segmentation",
103
  "settings": {
104
  "model": "yolo",
105
- "model_settings": {"model": "Riksarkivet/yolov9-lines-within-regions-1"},
 
 
106
  "generation_settings": {"batch_size": 8},
107
  },
108
  },
@@ -110,7 +130,9 @@ PIPELINE_CONFIGS = {
110
  "step": "TextRecognition",
111
  "settings": {
112
  "model": "TrOCR",
113
- "model_settings": {"model": "Riksarkivet/trocr-base-handwritten-hist-swe-2"},
 
 
114
  "generation_settings": {"batch_size": 16},
115
  },
116
  },
@@ -119,9 +141,13 @@ PIPELINE_CONFIGS = {
119
  },
120
  }
121
 
 
122
  @spaces.GPU
123
- def _process_htr_pipeline(image_path: str, document_type: FormatChoices, custom_settings: Optional[str] = None) -> Collection:
 
 
124
  """Process HTR pipeline and return the processed collection."""
 
125
  if not image_path:
126
  raise ValueError("No image provided")
127
 
@@ -135,60 +161,78 @@ def _process_htr_pipeline(image_path: str, document_type: FormatChoices, custom_
135
 
136
  collection = Collection([image_path])
137
  pipeline = Pipeline.from_config(config)
138
-
139
  try:
140
  processed_collection = pipeline.run(collection)
141
  return processed_collection
142
  except Exception as pipeline_error:
143
  raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}")
144
 
145
- def htr_text(image_path: str, document_type: FormatChoices = "letter_swedish", custom_settings: Optional[str] = None) -> str:
 
 
 
 
 
146
  """Extract text from handwritten documents using HTR."""
147
  try:
148
- processed_collection = _process_htr_pipeline(image_path, document_type, custom_settings)
 
 
149
  extracted_text = extract_text_from_collection(processed_collection)
150
  return extracted_text
151
-
152
  except Exception as e:
153
  return f"HTR text extraction failed: {str(e)}"
154
 
155
- def htrflow_file(image_path: str, document_type: FormatChoices = "letter_swedish", output_format: FileChoices = DEFAULT_OUTPUT, custom_settings: Optional[str] = None, server_name: str = "https://gabriel-htrflow-mcp.hf.space") -> str:
 
 
 
 
 
 
 
156
  """
157
  Process HTR and return a formatted file for download.
158
-
159
  Returns:
160
  str: File path for direct download via gr.File (server_name/gradio_api/file=/tmp/gradio/{temp_folder}/{file_name})
161
  """
162
  try:
163
  original_filename = Path(image_path).stem or "output"
164
-
165
- processed_collection = _process_htr_pipeline(image_path, document_type, custom_settings)
 
 
166
 
167
  temp_dir = Path(tempfile.mkdtemp())
168
  export_dir = temp_dir / output_format
169
  processed_collection.save(directory=str(export_dir), serializer=output_format)
170
-
171
  output_file_path = None
172
  for root, _, files in os.walk(export_dir):
173
  for file in files:
174
  old_path = os.path.join(root, file)
175
  file_ext = Path(file).suffix
176
- new_filename = f"{original_filename}.{output_format}" if not file_ext else f"{original_filename}{file_ext}"
 
 
 
 
177
  new_path = os.path.join(root, new_filename)
178
  os.rename(old_path, new_path)
179
  output_file_path = new_path
180
  break
181
-
182
  if output_file_path and os.path.exists(output_file_path):
183
  return output_file_path
184
  else:
185
  return None
186
-
187
  except Exception as e:
188
  return None
189
 
190
- def htrflow_visualizer(image: str, htr_document: str) -> str:
191
- pass
192
 
193
  def extract_text_from_collection(collection: Collection) -> str:
194
  text_lines = []
@@ -198,17 +242,22 @@ def extract_text_from_collection(collection: Collection) -> str:
198
  text_lines.append(node.text)
199
  return "\n".join(text_lines)
200
 
 
201
  def create_htrflow_mcp_server():
202
  htr_text_interface = gr.Interface(
203
  fn=htr_text,
204
  inputs=[
205
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
206
- gr.Dropdown(choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type"),
207
- gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value=""),
208
- ],
209
- outputs=[
210
- gr.Textbox(label="Extracted Text", lines=10)
 
 
 
211
  ],
 
212
  description="Extract plain text from handwritten documents using HTR",
213
  api_name="htr_text",
214
  )
@@ -217,14 +266,24 @@ def create_htrflow_mcp_server():
217
  fn=htrflow_file,
218
  inputs=[
219
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
220
- gr.Dropdown(choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type"),
221
- gr.Dropdown(choices=FILE_CHOICES, value=DEFAULT_OUTPUT, label="Output Format"),
222
- gr.Textbox(label="Custom Settings (JSON)", placeholder="Optional custom pipeline settings", value=""),
223
- gr.Textbox(label="Server Name", value="https://gabriel-htrflow-mcp.hf.space", placeholder="Server URL for download links"),
224
- ],
225
- outputs=[
226
- gr.File(label="Download HTR Output File")
 
 
 
 
 
 
 
 
 
227
  ],
 
228
  description="Process handwritten text and get formatted file (ALTO XML, PAGE XML, JSON, or TXT)",
229
  api_name="htrflow_file",
230
  )
@@ -232,12 +291,12 @@ def create_htrflow_mcp_server():
232
  htrflow_viz = gr.Interface(
233
  fn=htrflow_visualizer,
234
  inputs=[
235
- gr.Image(type="filepath", label="Upload Image or Enter URL"),
236
- gr.Textbox(label="HTR Document content", placeholder="Path to the HTR document file", value=""),
237
  ],
238
- outputs=gr.File(label="Download Output File"),
239
- description="Visualize document",
240
- api_name="htrflow_visualizer"
241
  )
242
 
243
  demo = gr.TabbedInterface(
@@ -248,6 +307,7 @@ def create_htrflow_mcp_server():
248
 
249
  return demo
250
 
 
251
  if __name__ == "__main__":
252
  demo = create_htrflow_mcp_server()
253
  demo.launch(mcp_server=True, share=False, debug=False)
 
4
  import os
5
  from typing import List, Optional, Literal, Tuple
6
  from PIL import Image
7
+
8
  import spaces
9
  from pathlib import Path
10
+ from visualizer import htrflow_visualizer
11
  from htrflow.volume.volume import Collection
12
  from htrflow.pipeline.pipeline import Pipeline
13
 
14
+
15
  DEFAULT_OUTPUT = "alto"
16
+ FORMAT_CHOICES = [
17
+ "letter_english",
18
+ "letter_swedish",
19
+ "spread_english",
20
+ "spread_swedish",
21
+ ]
22
  FILE_CHOICES = ["txt", "alto", "page", "json"]
23
 
24
+ FormatChoices = Literal[
25
+ "letter_english", "letter_swedish", "spread_english", "spread_swedish"
26
+ ]
27
  FileChoices = Literal["txt", "alto", "page", "json"]
28
 
29
  PIPELINE_CONFIGS = {
 
33
  "step": "Segmentation",
34
  "settings": {
35
  "model": "yolo",
36
+ "model_settings": {
37
+ "model": "Riksarkivet/yolov9-lines-within-regions-1"
38
+ },
39
  "generation_settings": {"batch_size": 8},
40
  },
41
  },
 
56
  "step": "Segmentation",
57
  "settings": {
58
  "model": "yolo",
59
+ "model_settings": {
60
+ "model": "Riksarkivet/yolov9-lines-within-regions-1"
61
+ },
62
  "generation_settings": {"batch_size": 8},
63
  },
64
  },
 
66
  "step": "TextRecognition",
67
  "settings": {
68
  "model": "TrOCR",
69
+ "model_settings": {
70
+ "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2"
71
+ },
72
  "generation_settings": {"batch_size": 16},
73
  },
74
  },
 
89
  "step": "Segmentation",
90
  "settings": {
91
  "model": "yolo",
92
+ "model_settings": {
93
+ "model": "Riksarkivet/yolov9-lines-within-regions-1"
94
+ },
95
  "generation_settings": {"batch_size": 8},
96
  },
97
  },
 
120
  "step": "Segmentation",
121
  "settings": {
122
  "model": "yolo",
123
+ "model_settings": {
124
+ "model": "Riksarkivet/yolov9-lines-within-regions-1"
125
+ },
126
  "generation_settings": {"batch_size": 8},
127
  },
128
  },
 
130
  "step": "TextRecognition",
131
  "settings": {
132
  "model": "TrOCR",
133
+ "model_settings": {
134
+ "model": "Riksarkivet/trocr-base-handwritten-hist-swe-2"
135
+ },
136
  "generation_settings": {"batch_size": 16},
137
  },
138
  },
 
141
  },
142
  }
143
 
144
+
145
  @spaces.GPU
146
+ def _process_htr_pipeline(
147
+ image_path: str, document_type: FormatChoices, custom_settings: Optional[str] = None
148
+ ) -> Collection:
149
  """Process HTR pipeline and return the processed collection."""
150
+
151
  if not image_path:
152
  raise ValueError("No image provided")
153
 
 
161
 
162
  collection = Collection([image_path])
163
  pipeline = Pipeline.from_config(config)
164
+
165
  try:
166
  processed_collection = pipeline.run(collection)
167
  return processed_collection
168
  except Exception as pipeline_error:
169
  raise RuntimeError(f"Pipeline execution failed: {str(pipeline_error)}")
170
 
171
+
172
+ def htr_text(
173
+ image_path: str,
174
+ document_type: FormatChoices = "letter_swedish",
175
+ custom_settings: Optional[str] = None,
176
+ ) -> str:
177
  """Extract text from handwritten documents using HTR."""
178
  try:
179
+ processed_collection = _process_htr_pipeline(
180
+ image_path, document_type, custom_settings
181
+ )
182
  extracted_text = extract_text_from_collection(processed_collection)
183
  return extracted_text
184
+
185
  except Exception as e:
186
  return f"HTR text extraction failed: {str(e)}"
187
 
188
+
189
+ def htrflow_file(
190
+ image_path: str,
191
+ document_type: FormatChoices = "letter_swedish",
192
+ output_format: FileChoices = DEFAULT_OUTPUT,
193
+ custom_settings: Optional[str] = None,
194
+ server_name: str = "https://gabriel-htrflow-mcp.hf.space",
195
+ ) -> str:
196
  """
197
  Process HTR and return a formatted file for download.
198
+
199
  Returns:
200
  str: File path for direct download via gr.File (server_name/gradio_api/file=/tmp/gradio/{temp_folder}/{file_name})
201
  """
202
  try:
203
  original_filename = Path(image_path).stem or "output"
204
+
205
+ processed_collection = _process_htr_pipeline(
206
+ image_path, document_type, custom_settings
207
+ )
208
 
209
  temp_dir = Path(tempfile.mkdtemp())
210
  export_dir = temp_dir / output_format
211
  processed_collection.save(directory=str(export_dir), serializer=output_format)
212
+
213
  output_file_path = None
214
  for root, _, files in os.walk(export_dir):
215
  for file in files:
216
  old_path = os.path.join(root, file)
217
  file_ext = Path(file).suffix
218
+ new_filename = (
219
+ f"{original_filename}.{output_format}"
220
+ if not file_ext
221
+ else f"{original_filename}{file_ext}"
222
+ )
223
  new_path = os.path.join(root, new_filename)
224
  os.rename(old_path, new_path)
225
  output_file_path = new_path
226
  break
227
+
228
  if output_file_path and os.path.exists(output_file_path):
229
  return output_file_path
230
  else:
231
  return None
232
+
233
  except Exception as e:
234
  return None
235
 
 
 
236
 
237
  def extract_text_from_collection(collection: Collection) -> str:
238
  text_lines = []
 
242
  text_lines.append(node.text)
243
  return "\n".join(text_lines)
244
 
245
+
246
  def create_htrflow_mcp_server():
247
  htr_text_interface = gr.Interface(
248
  fn=htr_text,
249
  inputs=[
250
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
251
+ gr.Dropdown(
252
+ choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type"
253
+ ),
254
+ gr.Textbox(
255
+ label="Custom Settings (JSON)",
256
+ placeholder="Optional custom pipeline settings",
257
+ value="",
258
+ ),
259
  ],
260
+ outputs=[gr.Textbox(label="Extracted Text", lines=10)],
261
  description="Extract plain text from handwritten documents using HTR",
262
  api_name="htr_text",
263
  )
 
266
  fn=htrflow_file,
267
  inputs=[
268
  gr.Image(type="filepath", label="Upload Image or Enter URL"),
269
+ gr.Dropdown(
270
+ choices=FORMAT_CHOICES, value="letter_swedish", label="Document Type"
271
+ ),
272
+ gr.Dropdown(
273
+ choices=FILE_CHOICES, value=DEFAULT_OUTPUT, label="Output Format"
274
+ ),
275
+ gr.Textbox(
276
+ label="Custom Settings (JSON)",
277
+ placeholder="Optional custom pipeline settings",
278
+ value="",
279
+ ),
280
+ gr.Textbox(
281
+ label="Server Name",
282
+ value="https://gabriel-htrflow-mcp.hf.space",
283
+ placeholder="Server URL for download links",
284
+ ),
285
  ],
286
+ outputs=[gr.File(label="Download HTR Output File")],
287
  description="Process handwritten text and get formatted file (ALTO XML, PAGE XML, JSON, or TXT)",
288
  api_name="htrflow_file",
289
  )
 
291
  htrflow_viz = gr.Interface(
292
  fn=htrflow_visualizer,
293
  inputs=[
294
+ gr.Image(type="filepath", label="Upload Original Image"),
295
+ gr.File(label="Upload ALTO/PAGE XML File"),
296
  ],
297
+ outputs=gr.File(label="Download Visualization Image"),
298
+ description="Visualize HTR results by overlaying text regions and polygons on the original image",
299
+ api_name="htrflow_visualizer",
300
  )
301
 
302
  demo = gr.TabbedInterface(
 
307
 
308
  return demo
309
 
310
+
311
  if __name__ == "__main__":
312
  demo = create_htrflow_mcp_server()
313
  demo.launch(mcp_server=True, share=False, debug=False)