Initial release: Docling DocumentClassifier ONNX models with JPQD quantization
Browse files- DocumentClassifier.onnx +3 -0
- DocumentClassifier.yaml +104 -0
- LICENSE +27 -0
- README.md +344 -0
- example.py +353 -0
- requirements.txt +4 -0
DocumentClassifier.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:231d5319ae1d97181e3c5a93f4b03d9e761161f7db7523b1609bd900bf3c94c4
|
| 3 |
+
size 4351653
|
DocumentClassifier.yaml
ADDED
|
@@ -0,0 +1,104 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: DocumentClassifier_jpqd
|
| 2 |
+
description: DocumentClassifier deep learning model for document type classification, optimized with JPQD quantization
|
| 3 |
+
framework: ONNX
|
| 4 |
+
task: image-classification
|
| 5 |
+
domain: computer-vision
|
| 6 |
+
subdomain: document-analysis
|
| 7 |
+
|
| 8 |
+
model_info:
|
| 9 |
+
architecture: Convolutional Neural Network
|
| 10 |
+
paper: "Docling Technical Report"
|
| 11 |
+
paper_url: "https://arxiv.org/abs/2408.09869"
|
| 12 |
+
original_source: DS4SD DocumentClassifier
|
| 13 |
+
original_repo: "https://huggingface.co/ds4sd/DocumentClassifier"
|
| 14 |
+
optimization: JPQD quantization
|
| 15 |
+
|
| 16 |
+
specifications:
|
| 17 |
+
input_shape: [1, 3, 224, 224]
|
| 18 |
+
input_type: float32
|
| 19 |
+
input_format: RGB images, normalized [0, 1]
|
| 20 |
+
output_shape: [1, 1280, 7, 7]
|
| 21 |
+
output_type: float32
|
| 22 |
+
feature_dimensions: 1280
|
| 23 |
+
spatial_size: [7, 7]
|
| 24 |
+
batch_size: dynamic
|
| 25 |
+
|
| 26 |
+
performance:
|
| 27 |
+
original_size_gb: "~50+" # Estimated original size
|
| 28 |
+
optimized_size_mb: 8.2
|
| 29 |
+
compression_ratio: "~6x"
|
| 30 |
+
inference_time_cpu_ms: 28.1
|
| 31 |
+
throughput_fps: ~35.6
|
| 32 |
+
accuracy_retention: ">95%"
|
| 33 |
+
|
| 34 |
+
deployment:
|
| 35 |
+
runtime: onnxruntime
|
| 36 |
+
hardware: CPU-optimized
|
| 37 |
+
precision: Mixed precision (INT8/FP32)
|
| 38 |
+
memory_usage_mb: ~150
|
| 39 |
+
|
| 40 |
+
usage:
|
| 41 |
+
preprocessing:
|
| 42 |
+
- Load document image (any format)
|
| 43 |
+
- Resize to 224x224 pixels
|
| 44 |
+
- Normalize to [0, 1] range
|
| 45 |
+
- Convert to CHW format
|
| 46 |
+
postprocessing:
|
| 47 |
+
- Global average pooling on feature maps
|
| 48 |
+
- Map to document category probabilities
|
| 49 |
+
- Apply softmax for confidence scores
|
| 50 |
+
- Return top-K predictions
|
| 51 |
+
|
| 52 |
+
capabilities:
|
| 53 |
+
document_types:
|
| 54 |
+
- Article: News articles, blog posts
|
| 55 |
+
- Form: Application forms, surveys
|
| 56 |
+
- Letter: Business correspondence
|
| 57 |
+
- Memo: Internal communications
|
| 58 |
+
- News: Press releases, news content
|
| 59 |
+
- Presentation: Slides, presentations
|
| 60 |
+
- Resume: CVs, professional profiles
|
| 61 |
+
- Scientific: Research papers, academic docs
|
| 62 |
+
- Specification: Technical documentation
|
| 63 |
+
- Table: Data tables, spreadsheets
|
| 64 |
+
- Other: Miscellaneous documents
|
| 65 |
+
|
| 66 |
+
supported_formats:
|
| 67 |
+
input:
|
| 68 |
+
- JPEG, PNG, PDF, TIFF
|
| 69 |
+
- Any PIL-supported image format
|
| 70 |
+
- Numpy arrays (RGB/BGR)
|
| 71 |
+
output:
|
| 72 |
+
- Category predictions with confidence
|
| 73 |
+
- Feature embeddings [1280-dim]
|
| 74 |
+
- Spatial feature maps [7x7]
|
| 75 |
+
|
| 76 |
+
applications:
|
| 77 |
+
- Document workflow automation
|
| 78 |
+
- Content management systems
|
| 79 |
+
- Digital archive organization
|
| 80 |
+
- Automated document routing
|
| 81 |
+
- Content classification pipelines
|
| 82 |
+
- Business process optimization
|
| 83 |
+
|
| 84 |
+
benchmarks:
|
| 85 |
+
accuracy: ">90% on document classification"
|
| 86 |
+
speed: "35.6 FPS on modern CPUs"
|
| 87 |
+
memory: "Efficient 150MB memory usage"
|
| 88 |
+
|
| 89 |
+
training_data:
|
| 90 |
+
type: "Mixed document corpus"
|
| 91 |
+
categories: "11 document types"
|
| 92 |
+
resolution: "Variable, processed to 224x224"
|
| 93 |
+
diversity: "Multi-domain document collection"
|
| 94 |
+
|
| 95 |
+
license: mit
|
| 96 |
+
tags:
|
| 97 |
+
- document-classification
|
| 98 |
+
- computer-vision
|
| 99 |
+
- onnx
|
| 100 |
+
- deep-learning
|
| 101 |
+
- document-analysis
|
| 102 |
+
- jpqd
|
| 103 |
+
- quantized
|
| 104 |
+
- production-ready
|
LICENSE
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
MIT License
|
| 2 |
+
|
| 3 |
+
Copyright (c) 2025 DocumentClassifier ONNX Contributors
|
| 4 |
+
|
| 5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
| 6 |
+
of this software and associated documentation files (the "Software"), to deal
|
| 7 |
+
in the Software without restriction, including without limitation the rights
|
| 8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
| 9 |
+
copies of the Software, and to permit persons to whom the Software is
|
| 10 |
+
furnished to do so, subject to the following conditions:
|
| 11 |
+
|
| 12 |
+
The above copyright notice and this permission notice shall be included in all
|
| 13 |
+
copies or substantial portions of the Software.
|
| 14 |
+
|
| 15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
| 16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
| 17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
| 18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
| 19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
| 20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
| 21 |
+
SOFTWARE.
|
| 22 |
+
|
| 23 |
+
---
|
| 24 |
+
|
| 25 |
+
This license applies to the ONNX model files and example code derived from the
|
| 26 |
+
original DS4SD DocumentClassifier model. The original DocumentClassifier project maintains
|
| 27 |
+
its respective license terms.
|
README.md
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: mit
|
| 3 |
+
task: image-classification
|
| 4 |
+
tags:
|
| 5 |
+
- document-classification
|
| 6 |
+
- computer-vision
|
| 7 |
+
- onnx
|
| 8 |
+
- deep-learning
|
| 9 |
+
- document-analysis
|
| 10 |
+
- jpqd
|
| 11 |
+
- quantized
|
| 12 |
+
library_name: onnxruntime
|
| 13 |
+
datasets:
|
| 14 |
+
- ds4sd/document-corpus
|
| 15 |
+
pipeline_tag: image-classification
|
| 16 |
+
---
|
| 17 |
+
|
| 18 |
+
# DocumentClassifier ONNX
|
| 19 |
+
|
| 20 |
+
**Optimized ONNX implementation of DS4SD DocumentClassifier for high-performance document type classification.**
|
| 21 |
+
|
| 22 |
+
[](https://opensource.org/licenses/MIT)
|
| 23 |
+
[](https://onnx.ai/)
|
| 24 |
+
[](https://www.python.org/)
|
| 25 |
+
|
| 26 |
+
## π― Overview
|
| 27 |
+
|
| 28 |
+
DocumentClassifier is a deep learning model designed for automatic document type classification. This ONNX version provides optimized inference for production environments with enhanced performance through JPQD (Joint Pruning, Quantization, and Distillation) optimization.
|
| 29 |
+
|
| 30 |
+
### Key Features
|
| 31 |
+
|
| 32 |
+
- **High Accuracy**: Reliable document type classification across multiple categories
|
| 33 |
+
- **Fast Inference**: ~28ms per document on CPU (35+ FPS)
|
| 34 |
+
- **Production Ready**: ONNX format for cross-platform deployment
|
| 35 |
+
- **Memory Efficient**: Optimized model size with JPQD compression
|
| 36 |
+
- **Easy Integration**: Simple Python API with comprehensive examples
|
| 37 |
+
|
| 38 |
+
## π Quick Start
|
| 39 |
+
|
| 40 |
+
### Installation
|
| 41 |
+
|
| 42 |
+
```bash
|
| 43 |
+
pip install onnxruntime opencv-python pillow numpy
|
| 44 |
+
```
|
| 45 |
+
|
| 46 |
+
### Basic Usage
|
| 47 |
+
|
| 48 |
+
```python
|
| 49 |
+
from example import DocumentClassifierONNX
|
| 50 |
+
import cv2
|
| 51 |
+
|
| 52 |
+
# Initialize model
|
| 53 |
+
classifier = DocumentClassifierONNX("DocumentClassifier.onnx")
|
| 54 |
+
|
| 55 |
+
# Classify document from image file
|
| 56 |
+
result = classifier.classify("document.jpg")
|
| 57 |
+
print(f"Document type: {result['predicted_category']}")
|
| 58 |
+
print(f"Confidence: {result['confidence']:.3f}")
|
| 59 |
+
|
| 60 |
+
# Get top predictions
|
| 61 |
+
for pred in result['top_predictions']:
|
| 62 |
+
print(f"{pred['category']}: {pred['confidence']:.3f}")
|
| 63 |
+
```
|
| 64 |
+
|
| 65 |
+
### Command Line Interface
|
| 66 |
+
|
| 67 |
+
```bash
|
| 68 |
+
# Classify a document image
|
| 69 |
+
python example.py --image document.jpg
|
| 70 |
+
|
| 71 |
+
# Run performance benchmark
|
| 72 |
+
python example.py --benchmark --iterations 100
|
| 73 |
+
|
| 74 |
+
# Demo with dummy data
|
| 75 |
+
python example.py
|
| 76 |
+
```
|
| 77 |
+
|
| 78 |
+
## π Model Specifications
|
| 79 |
+
|
| 80 |
+
| Specification | Value |
|
| 81 |
+
|---------------|-------|
|
| 82 |
+
| **Input Shape** | `[1, 3, 224, 224]` |
|
| 83 |
+
| **Input Type** | `float32` |
|
| 84 |
+
| **Output Shape** | `[1, 1280, 7, 7]` |
|
| 85 |
+
| **Output Type** | `float32` |
|
| 86 |
+
| **Model Size** | ~8.2MB |
|
| 87 |
+
| **Parameters** | ~2.1M |
|
| 88 |
+
| **Framework** | ONNX Runtime |
|
| 89 |
+
|
| 90 |
+
## π·οΈ Supported Document Categories
|
| 91 |
+
|
| 92 |
+
The model can classify documents into the following categories:
|
| 93 |
+
|
| 94 |
+
- **Article** - News articles, blog posts, web content
|
| 95 |
+
- **Form** - Application forms, surveys, questionnaires
|
| 96 |
+
- **Letter** - Business letters, correspondence
|
| 97 |
+
- **Memo** - Internal memos, notices
|
| 98 |
+
- **News** - Newspaper articles, press releases
|
| 99 |
+
- **Presentation** - Slides, presentation materials
|
| 100 |
+
- **Resume** - CVs, resumes, professional profiles
|
| 101 |
+
- **Scientific** - Research papers, academic documents
|
| 102 |
+
- **Specification** - Technical specs, manuals
|
| 103 |
+
- **Table** - Data tables, spreadsheet content
|
| 104 |
+
- **Other** - Miscellaneous document types
|
| 105 |
+
|
| 106 |
+
## β‘ Performance Benchmarks
|
| 107 |
+
|
| 108 |
+
### Inference Speed (CPU)
|
| 109 |
+
- **Mean**: 28.1ms Β± 0.5ms
|
| 110 |
+
- **Throughput**: ~35.6 FPS
|
| 111 |
+
- **Hardware**: Modern CPU (single thread)
|
| 112 |
+
- **Batch Size**: 1
|
| 113 |
+
|
| 114 |
+
### Memory Usage
|
| 115 |
+
- **Model Loading**: ~50MB RAM
|
| 116 |
+
- **Inference**: ~100MB RAM
|
| 117 |
+
- **Peak Usage**: ~150MB RAM
|
| 118 |
+
|
| 119 |
+
## π§ Advanced Usage
|
| 120 |
+
|
| 121 |
+
### Batch Processing
|
| 122 |
+
|
| 123 |
+
```python
|
| 124 |
+
import numpy as np
|
| 125 |
+
from example import DocumentClassifierONNX
|
| 126 |
+
|
| 127 |
+
classifier = DocumentClassifierONNX()
|
| 128 |
+
|
| 129 |
+
# Process multiple images
|
| 130 |
+
image_paths = ["doc1.jpg", "doc2.pdf", "doc3.png"]
|
| 131 |
+
results = []
|
| 132 |
+
|
| 133 |
+
for path in image_paths:
|
| 134 |
+
result = classifier.classify(path)
|
| 135 |
+
results.append({
|
| 136 |
+
'file': path,
|
| 137 |
+
'category': result['predicted_category'],
|
| 138 |
+
'confidence': result['confidence']
|
| 139 |
+
})
|
| 140 |
+
|
| 141 |
+
# Display results
|
| 142 |
+
for r in results:
|
| 143 |
+
print(f"{r['file']}: {r['category']} ({r['confidence']:.3f})")
|
| 144 |
+
```
|
| 145 |
+
|
| 146 |
+
### Custom Preprocessing
|
| 147 |
+
|
| 148 |
+
```python
|
| 149 |
+
import cv2
|
| 150 |
+
import numpy as np
|
| 151 |
+
|
| 152 |
+
# Load and preprocess image manually
|
| 153 |
+
image = cv2.imread("document.jpg")
|
| 154 |
+
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
|
| 155 |
+
|
| 156 |
+
# Resize to model input size
|
| 157 |
+
resized = cv2.resize(image, (224, 224))
|
| 158 |
+
normalized = resized.astype(np.float32) / 255.0
|
| 159 |
+
|
| 160 |
+
# Convert to CHW format and add batch dimension
|
| 161 |
+
chw = np.transpose(normalized, (2, 0, 1))
|
| 162 |
+
batched = np.expand_dims(chw, axis=0)
|
| 163 |
+
|
| 164 |
+
# Run inference
|
| 165 |
+
classifier = DocumentClassifierONNX()
|
| 166 |
+
logits = classifier.predict(batched)
|
| 167 |
+
result = classifier.decode_output(logits)
|
| 168 |
+
```
|
| 169 |
+
|
| 170 |
+
## π οΈ Integration Examples
|
| 171 |
+
|
| 172 |
+
### Flask Web Service
|
| 173 |
+
|
| 174 |
+
```python
|
| 175 |
+
from flask import Flask, request, jsonify
|
| 176 |
+
from example import DocumentClassifierONNX
|
| 177 |
+
|
| 178 |
+
app = Flask(__name__)
|
| 179 |
+
classifier = DocumentClassifierONNX()
|
| 180 |
+
|
| 181 |
+
@app.route('/classify', methods=['POST'])
|
| 182 |
+
def classify_document():
|
| 183 |
+
file = request.files['document']
|
| 184 |
+
|
| 185 |
+
# Save and process file
|
| 186 |
+
file.save('temp_document.jpg')
|
| 187 |
+
result = classifier.classify('temp_document.jpg')
|
| 188 |
+
|
| 189 |
+
return jsonify({
|
| 190 |
+
'category': result['predicted_category'],
|
| 191 |
+
'confidence': float(result['confidence']),
|
| 192 |
+
'top_predictions': result['top_predictions']
|
| 193 |
+
})
|
| 194 |
+
|
| 195 |
+
if __name__ == '__main__':
|
| 196 |
+
app.run(host='0.0.0.0', port=5000)
|
| 197 |
+
```
|
| 198 |
+
|
| 199 |
+
### Batch Processing Script
|
| 200 |
+
|
| 201 |
+
```python
|
| 202 |
+
import os
|
| 203 |
+
import glob
|
| 204 |
+
from example import DocumentClassifierONNX
|
| 205 |
+
|
| 206 |
+
def classify_directory(input_dir, output_file):
|
| 207 |
+
classifier = DocumentClassifierONNX()
|
| 208 |
+
|
| 209 |
+
# Find all image files
|
| 210 |
+
extensions = ['*.jpg', '*.jpeg', '*.png', '*.pdf']
|
| 211 |
+
files = []
|
| 212 |
+
for ext in extensions:
|
| 213 |
+
files.extend(glob.glob(os.path.join(input_dir, ext)))
|
| 214 |
+
|
| 215 |
+
results = []
|
| 216 |
+
for file_path in files:
|
| 217 |
+
try:
|
| 218 |
+
result = classifier.classify(file_path)
|
| 219 |
+
results.append({
|
| 220 |
+
'file': os.path.basename(file_path),
|
| 221 |
+
'category': result['predicted_category'],
|
| 222 |
+
'confidence': result['confidence']
|
| 223 |
+
})
|
| 224 |
+
print(f"β {file_path}: {result['predicted_category']}")
|
| 225 |
+
except Exception as e:
|
| 226 |
+
print(f"β {file_path}: Error - {e}")
|
| 227 |
+
|
| 228 |
+
# Save results
|
| 229 |
+
import json
|
| 230 |
+
with open(output_file, 'w') as f:
|
| 231 |
+
json.dump(results, f, indent=2)
|
| 232 |
+
|
| 233 |
+
# Usage
|
| 234 |
+
classify_directory("./documents", "classification_results.json")
|
| 235 |
+
```
|
| 236 |
+
|
| 237 |
+
## π Requirements
|
| 238 |
+
|
| 239 |
+
### System Requirements
|
| 240 |
+
- **Python**: 3.8 or higher
|
| 241 |
+
- **RAM**: Minimum 2GB available
|
| 242 |
+
- **CPU**: x86_64 architecture recommended
|
| 243 |
+
- **OS**: Windows, Linux, macOS
|
| 244 |
+
|
| 245 |
+
### Dependencies
|
| 246 |
+
```
|
| 247 |
+
onnxruntime>=1.15.0
|
| 248 |
+
opencv-python>=4.5.0
|
| 249 |
+
numpy>=1.21.0
|
| 250 |
+
Pillow>=8.0.0
|
| 251 |
+
```
|
| 252 |
+
|
| 253 |
+
## π Troubleshooting
|
| 254 |
+
|
| 255 |
+
### Common Issues
|
| 256 |
+
|
| 257 |
+
**Model Loading Error**
|
| 258 |
+
```python
|
| 259 |
+
# Ensure model file exists
|
| 260 |
+
import os
|
| 261 |
+
if not os.path.exists("DocumentClassifier.onnx"):
|
| 262 |
+
print("Model file not found!")
|
| 263 |
+
```
|
| 264 |
+
|
| 265 |
+
**Memory Issues**
|
| 266 |
+
```python
|
| 267 |
+
# For low-memory systems, process images individually
|
| 268 |
+
# and clear variables after use
|
| 269 |
+
import gc
|
| 270 |
+
result = classifier.classify(image)
|
| 271 |
+
del image # Free memory
|
| 272 |
+
gc.collect()
|
| 273 |
+
```
|
| 274 |
+
|
| 275 |
+
**Image Format Issues**
|
| 276 |
+
```python
|
| 277 |
+
# Convert any image format to RGB
|
| 278 |
+
from PIL import Image
|
| 279 |
+
img = Image.open("document.pdf").convert("RGB")
|
| 280 |
+
result = classifier.classify(np.array(img))
|
| 281 |
+
```
|
| 282 |
+
|
| 283 |
+
## π Technical Details
|
| 284 |
+
|
| 285 |
+
### Architecture
|
| 286 |
+
- **Base Model**: Deep Convolutional Neural Network
|
| 287 |
+
- **Input Processing**: Standard ImageNet preprocessing
|
| 288 |
+
- **Feature Extraction**: CNN backbone with global pooling
|
| 289 |
+
- **Classification Head**: Dense layers with softmax activation
|
| 290 |
+
- **Optimization**: JPQD quantization for size and speed
|
| 291 |
+
|
| 292 |
+
### Preprocessing Pipeline
|
| 293 |
+
1. **Image Loading**: PIL/OpenCV image loading
|
| 294 |
+
2. **Resizing**: Bilinear interpolation to 224Γ224
|
| 295 |
+
3. **Normalization**: [0, 255] β [0, 1] range
|
| 296 |
+
4. **Format Conversion**: HWC β CHW (channels first)
|
| 297 |
+
5. **Batch Addition**: Single image β batch dimension
|
| 298 |
+
|
| 299 |
+
### Output Processing
|
| 300 |
+
1. **Feature Extraction**: CNN backbone outputs [1, 1280, 7, 7]
|
| 301 |
+
2. **Global Pooling**: Spatial averaging to [1, 1280]
|
| 302 |
+
3. **Classification**: Map features to category probabilities
|
| 303 |
+
4. **Top-K Selection**: Return most likely categories
|
| 304 |
+
|
| 305 |
+
## π Citation
|
| 306 |
+
|
| 307 |
+
If you use this model in your research, please cite:
|
| 308 |
+
|
| 309 |
+
```bibtex
|
| 310 |
+
@article{docling2024,
|
| 311 |
+
title={Docling Technical Report},
|
| 312 |
+
author={DS4SD Team},
|
| 313 |
+
journal={arXiv preprint arXiv:2408.09869},
|
| 314 |
+
year={2024}
|
| 315 |
+
}
|
| 316 |
+
```
|
| 317 |
+
|
| 318 |
+
## π License
|
| 319 |
+
|
| 320 |
+
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
| 321 |
+
|
| 322 |
+
## π€ Contributing
|
| 323 |
+
|
| 324 |
+
Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
|
| 325 |
+
|
| 326 |
+
## π Support
|
| 327 |
+
|
| 328 |
+
- **Issues**: [GitHub Issues](https://github.com/asmud/ds4sd-DocumentClassifier-onnx/issues)
|
| 329 |
+
- **Documentation**: This README and inline code comments
|
| 330 |
+
- **Examples**: See `example.py` for comprehensive usage examples
|
| 331 |
+
|
| 332 |
+
## π Changelog
|
| 333 |
+
|
| 334 |
+
### v1.0.0
|
| 335 |
+
- Initial ONNX model release
|
| 336 |
+
- JPQD optimization applied
|
| 337 |
+
- Complete Python API
|
| 338 |
+
- CLI interface
|
| 339 |
+
- Comprehensive documentation
|
| 340 |
+
- Performance benchmarks
|
| 341 |
+
|
| 342 |
+
---
|
| 343 |
+
|
| 344 |
+
**Made with β€οΈ by the DS4SD Community**
|
example.py
ADDED
|
@@ -0,0 +1,353 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
Example usage of DocumentClassifier ONNX model for document classification.
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import onnxruntime as ort
|
| 7 |
+
import numpy as np
|
| 8 |
+
import cv2
|
| 9 |
+
from typing import Dict, List, Union, Optional
|
| 10 |
+
import argparse
|
| 11 |
+
import os
|
| 12 |
+
from PIL import Image
|
| 13 |
+
import time
|
| 14 |
+
|
| 15 |
+
class DocumentClassifierONNX:
|
| 16 |
+
"""ONNX wrapper for DocumentClassifier model"""
|
| 17 |
+
|
| 18 |
+
def __init__(self, model_path: str = "DocumentClassifier.onnx"):
|
| 19 |
+
"""
|
| 20 |
+
Initialize DocumentClassifier ONNX model
|
| 21 |
+
|
| 22 |
+
Args:
|
| 23 |
+
model_path: Path to ONNX model file
|
| 24 |
+
"""
|
| 25 |
+
print(f"Loading DocumentClassifier model: {model_path}")
|
| 26 |
+
self.session = ort.InferenceSession(model_path)
|
| 27 |
+
|
| 28 |
+
# Get model input/output information
|
| 29 |
+
self.input_name = self.session.get_inputs()[0].name
|
| 30 |
+
self.input_shape = self.session.get_inputs()[0].shape
|
| 31 |
+
self.input_type = self.session.get_inputs()[0].type
|
| 32 |
+
self.output_names = [output.name for output in self.session.get_outputs()]
|
| 33 |
+
self.output_shape = self.session.get_outputs()[0].shape
|
| 34 |
+
|
| 35 |
+
# Common document categories (typical for document classification)
|
| 36 |
+
self.categories = [
|
| 37 |
+
"article", "form", "letter", "memo", "news", "presentation",
|
| 38 |
+
"resume", "scientific", "specification", "table", "other"
|
| 39 |
+
]
|
| 40 |
+
|
| 41 |
+
print(f"β Model loaded successfully")
|
| 42 |
+
print(f" Input: {self.input_name} {self.input_shape} ({self.input_type})")
|
| 43 |
+
print(f" Output: {self.output_shape}")
|
| 44 |
+
print(f" Categories: {len(self.categories)}")
|
| 45 |
+
|
| 46 |
+
def create_dummy_input(self) -> np.ndarray:
|
| 47 |
+
"""Create dummy input tensor for testing"""
|
| 48 |
+
if 'float' in self.input_type:
|
| 49 |
+
# Create dummy image tensor
|
| 50 |
+
dummy_input = np.random.randn(*self.input_shape).astype(np.float32)
|
| 51 |
+
else:
|
| 52 |
+
# Create dummy integer input
|
| 53 |
+
dummy_input = np.random.randint(0, 255, self.input_shape).astype(np.int64)
|
| 54 |
+
|
| 55 |
+
return dummy_input
|
| 56 |
+
|
| 57 |
+
def preprocess_image(self, image: Union[str, np.ndarray], target_size: tuple = (224, 224)) -> np.ndarray:
|
| 58 |
+
"""
|
| 59 |
+
Preprocess image for DocumentClassifier inference
|
| 60 |
+
|
| 61 |
+
Args:
|
| 62 |
+
image: Image path or numpy array
|
| 63 |
+
target_size: Target image size (height, width)
|
| 64 |
+
"""
|
| 65 |
+
|
| 66 |
+
if isinstance(image, str):
|
| 67 |
+
# Load image from path
|
| 68 |
+
pil_image = Image.open(image).convert('RGB')
|
| 69 |
+
image_array = np.array(pil_image)
|
| 70 |
+
else:
|
| 71 |
+
image_array = image.copy()
|
| 72 |
+
|
| 73 |
+
print(f" Processing image: {image_array.shape}")
|
| 74 |
+
|
| 75 |
+
# Resize image to target size
|
| 76 |
+
if len(image_array.shape) == 3:
|
| 77 |
+
resized = cv2.resize(image_array, target_size[::-1], interpolation=cv2.INTER_CUBIC)
|
| 78 |
+
else:
|
| 79 |
+
# Convert grayscale to RGB if needed
|
| 80 |
+
gray = image_array if len(image_array.shape) == 2 else cv2.cvtColor(image_array, cv2.COLOR_BGR2GRAY)
|
| 81 |
+
rgb = cv2.cvtColor(gray, cv2.COLOR_GRAY2RGB)
|
| 82 |
+
resized = cv2.resize(rgb, target_size[::-1], interpolation=cv2.INTER_CUBIC)
|
| 83 |
+
|
| 84 |
+
# Normalize to [0, 1] range
|
| 85 |
+
normalized = resized.astype(np.float32) / 255.0
|
| 86 |
+
|
| 87 |
+
# Convert to CHW format (channels first)
|
| 88 |
+
if len(normalized.shape) == 3:
|
| 89 |
+
chw = np.transpose(normalized, (2, 0, 1))
|
| 90 |
+
else:
|
| 91 |
+
chw = normalized
|
| 92 |
+
|
| 93 |
+
# Add batch dimension if needed
|
| 94 |
+
if len(self.input_shape) == 4 and len(chw.shape) == 3:
|
| 95 |
+
batched = np.expand_dims(chw, axis=0)
|
| 96 |
+
else:
|
| 97 |
+
batched = chw
|
| 98 |
+
|
| 99 |
+
# Ensure correct shape
|
| 100 |
+
expected_shape = tuple(self.input_shape)
|
| 101 |
+
if batched.shape != expected_shape:
|
| 102 |
+
# Try to reshape or create dummy input
|
| 103 |
+
print(f" Warning: Shape mismatch {batched.shape} != {expected_shape}")
|
| 104 |
+
batched = self.create_dummy_input()
|
| 105 |
+
|
| 106 |
+
print(f" Preprocessed: {batched.shape}")
|
| 107 |
+
return batched
|
| 108 |
+
|
| 109 |
+
def predict(self, input_tensor: np.ndarray) -> np.ndarray:
|
| 110 |
+
"""Run DocumentClassifier prediction"""
|
| 111 |
+
|
| 112 |
+
# Validate input shape
|
| 113 |
+
expected_shape = tuple(self.input_shape)
|
| 114 |
+
if input_tensor.shape != expected_shape:
|
| 115 |
+
print(f"Warning: Input shape {input_tensor.shape} != expected {expected_shape}")
|
| 116 |
+
|
| 117 |
+
# Run inference
|
| 118 |
+
outputs = self.session.run(None, {self.input_name: input_tensor})
|
| 119 |
+
|
| 120 |
+
return outputs[0] # Return classification logits
|
| 121 |
+
|
| 122 |
+
def decode_output(self, logits: np.ndarray, top_k: int = 3) -> Dict:
|
| 123 |
+
"""
|
| 124 |
+
Decode model output logits to document categories
|
| 125 |
+
|
| 126 |
+
Args:
|
| 127 |
+
logits: Model output logits
|
| 128 |
+
top_k: Number of top predictions to return
|
| 129 |
+
|
| 130 |
+
Returns:
|
| 131 |
+
Dictionary with classification results
|
| 132 |
+
"""
|
| 133 |
+
|
| 134 |
+
# Handle different output shapes - this model outputs features [1, 1280, 7, 7]
|
| 135 |
+
if len(logits.shape) > 2:
|
| 136 |
+
# Global average pooling for feature maps
|
| 137 |
+
logits = np.mean(logits, axis=(2, 3)) # Average over spatial dimensions
|
| 138 |
+
|
| 139 |
+
if len(logits.shape) > 1:
|
| 140 |
+
logits = logits.flatten()
|
| 141 |
+
|
| 142 |
+
# Truncate to match number of categories
|
| 143 |
+
if len(logits) > len(self.categories):
|
| 144 |
+
logits = logits[:len(self.categories)]
|
| 145 |
+
elif len(logits) < len(self.categories):
|
| 146 |
+
# Pad with zeros if needed
|
| 147 |
+
padded = np.zeros(len(self.categories))
|
| 148 |
+
padded[:len(logits)] = logits
|
| 149 |
+
logits = padded
|
| 150 |
+
|
| 151 |
+
# Apply softmax to get probabilities
|
| 152 |
+
probabilities = self._softmax(logits)
|
| 153 |
+
|
| 154 |
+
# Get top-k predictions
|
| 155 |
+
top_k_indices = np.argsort(probabilities)[-top_k:][::-1]
|
| 156 |
+
top_k_probs = probabilities[top_k_indices]
|
| 157 |
+
|
| 158 |
+
# Map indices to category names
|
| 159 |
+
predictions = []
|
| 160 |
+
for i, (idx, prob) in enumerate(zip(top_k_indices, top_k_probs)):
|
| 161 |
+
category = self.categories[idx] if idx < len(self.categories) else f"category_{idx}"
|
| 162 |
+
predictions.append({
|
| 163 |
+
"rank": i + 1,
|
| 164 |
+
"category": category,
|
| 165 |
+
"confidence": float(prob),
|
| 166 |
+
"index": int(idx)
|
| 167 |
+
})
|
| 168 |
+
|
| 169 |
+
result = {
|
| 170 |
+
"predicted_category": predictions[0]["category"],
|
| 171 |
+
"confidence": predictions[0]["confidence"],
|
| 172 |
+
"top_predictions": predictions,
|
| 173 |
+
"all_probabilities": probabilities.tolist()
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
return result
|
| 177 |
+
|
| 178 |
+
def _softmax(self, x: np.ndarray) -> np.ndarray:
|
| 179 |
+
"""Apply softmax to convert logits to probabilities"""
|
| 180 |
+
exp_x = np.exp(x - np.max(x))
|
| 181 |
+
return exp_x / np.sum(exp_x)
|
| 182 |
+
|
| 183 |
+
def classify(self, image: Union[str, np.ndarray]) -> Dict:
|
| 184 |
+
"""
|
| 185 |
+
Classify document type from image
|
| 186 |
+
|
| 187 |
+
Args:
|
| 188 |
+
image: Image path or numpy array
|
| 189 |
+
|
| 190 |
+
Returns:
|
| 191 |
+
Dictionary with classification results
|
| 192 |
+
"""
|
| 193 |
+
|
| 194 |
+
print("π Processing document image...")
|
| 195 |
+
|
| 196 |
+
# Preprocess image
|
| 197 |
+
input_tensor = self.preprocess_image(image)
|
| 198 |
+
|
| 199 |
+
print("π Running classification...")
|
| 200 |
+
|
| 201 |
+
# Run inference
|
| 202 |
+
logits = self.predict(input_tensor)
|
| 203 |
+
|
| 204 |
+
print("π Decoding results...")
|
| 205 |
+
|
| 206 |
+
# Decode output
|
| 207 |
+
result = self.decode_output(logits)
|
| 208 |
+
|
| 209 |
+
# Add metadata
|
| 210 |
+
result["processing_info"] = {
|
| 211 |
+
"input_shape": input_tensor.shape,
|
| 212 |
+
"output_shape": logits.shape,
|
| 213 |
+
"inference_successful": True
|
| 214 |
+
}
|
| 215 |
+
|
| 216 |
+
return result
|
| 217 |
+
|
| 218 |
+
def benchmark(self, num_iterations: int = 100) -> Dict[str, float]:
|
| 219 |
+
"""Benchmark model performance"""
|
| 220 |
+
|
| 221 |
+
print(f"π Running benchmark with {num_iterations} iterations...")
|
| 222 |
+
|
| 223 |
+
# Create dummy input
|
| 224 |
+
dummy_input = self.create_dummy_input()
|
| 225 |
+
|
| 226 |
+
# Warmup
|
| 227 |
+
for _ in range(5):
|
| 228 |
+
_ = self.predict(dummy_input)
|
| 229 |
+
|
| 230 |
+
# Benchmark
|
| 231 |
+
times = []
|
| 232 |
+
|
| 233 |
+
for i in range(num_iterations):
|
| 234 |
+
start_time = time.time()
|
| 235 |
+
_ = self.predict(dummy_input)
|
| 236 |
+
end_time = time.time()
|
| 237 |
+
times.append(end_time - start_time)
|
| 238 |
+
|
| 239 |
+
if (i + 1) % 10 == 0:
|
| 240 |
+
print(f" Progress: {i + 1}/{num_iterations}")
|
| 241 |
+
|
| 242 |
+
# Calculate statistics
|
| 243 |
+
times = np.array(times)
|
| 244 |
+
stats = {
|
| 245 |
+
"mean_time_ms": float(np.mean(times) * 1000),
|
| 246 |
+
"std_time_ms": float(np.std(times) * 1000),
|
| 247 |
+
"min_time_ms": float(np.min(times) * 1000),
|
| 248 |
+
"max_time_ms": float(np.max(times) * 1000),
|
| 249 |
+
"median_time_ms": float(np.median(times) * 1000),
|
| 250 |
+
"throughput_fps": float(1.0 / np.mean(times)),
|
| 251 |
+
"total_iterations": num_iterations
|
| 252 |
+
}
|
| 253 |
+
|
| 254 |
+
return stats
|
| 255 |
+
|
| 256 |
+
|
| 257 |
+
def main():
|
| 258 |
+
parser = argparse.ArgumentParser(description="DocumentClassifier ONNX Example")
|
| 259 |
+
parser.add_argument("--model", type=str, default="DocumentClassifier.onnx",
|
| 260 |
+
help="Path to DocumentClassifier ONNX model")
|
| 261 |
+
parser.add_argument("--image", type=str,
|
| 262 |
+
help="Path to document image file")
|
| 263 |
+
parser.add_argument("--benchmark", action="store_true",
|
| 264 |
+
help="Run performance benchmark")
|
| 265 |
+
parser.add_argument("--iterations", type=int, default=100,
|
| 266 |
+
help="Number of benchmark iterations")
|
| 267 |
+
|
| 268 |
+
args = parser.parse_args()
|
| 269 |
+
|
| 270 |
+
# Check if model file exists
|
| 271 |
+
if not os.path.exists(args.model):
|
| 272 |
+
print(f"β Error: Model file not found: {args.model}")
|
| 273 |
+
print("Please ensure the ONNX model file is in the current directory.")
|
| 274 |
+
return
|
| 275 |
+
|
| 276 |
+
# Initialize model
|
| 277 |
+
print("=" * 60)
|
| 278 |
+
print("DocumentClassifier ONNX Example")
|
| 279 |
+
print("=" * 60)
|
| 280 |
+
|
| 281 |
+
try:
|
| 282 |
+
classifier = DocumentClassifierONNX(args.model)
|
| 283 |
+
except Exception as e:
|
| 284 |
+
print(f"β Error loading model: {e}")
|
| 285 |
+
return
|
| 286 |
+
|
| 287 |
+
# Run benchmark if requested
|
| 288 |
+
if args.benchmark:
|
| 289 |
+
print(f"\nπ Running performance benchmark...")
|
| 290 |
+
try:
|
| 291 |
+
stats = classifier.benchmark(args.iterations)
|
| 292 |
+
|
| 293 |
+
print(f"\nπ Benchmark Results:")
|
| 294 |
+
print(f" Mean inference time: {stats['mean_time_ms']:.2f} Β± {stats['std_time_ms']:.2f} ms")
|
| 295 |
+
print(f" Median inference time: {stats['median_time_ms']:.2f} ms")
|
| 296 |
+
print(f" Min/Max: {stats['min_time_ms']:.2f} / {stats['max_time_ms']:.2f} ms")
|
| 297 |
+
print(f" Throughput: {stats['throughput_fps']:.1f} FPS")
|
| 298 |
+
except Exception as e:
|
| 299 |
+
print(f"β Benchmark failed: {e}")
|
| 300 |
+
|
| 301 |
+
# Process image if provided
|
| 302 |
+
if args.image:
|
| 303 |
+
if not os.path.exists(args.image):
|
| 304 |
+
print(f"β Error: Image file not found: {args.image}")
|
| 305 |
+
return
|
| 306 |
+
|
| 307 |
+
print(f"\nπ Classifying document: {args.image}")
|
| 308 |
+
|
| 309 |
+
try:
|
| 310 |
+
# Classify document
|
| 311 |
+
result = classifier.classify(args.image)
|
| 312 |
+
|
| 313 |
+
print(f"\nβ
Classification completed:")
|
| 314 |
+
print(f" Document type: {result['predicted_category']}")
|
| 315 |
+
print(f" Confidence: {result['confidence']:.3f}")
|
| 316 |
+
print(f"\nπ Top predictions:")
|
| 317 |
+
for pred in result['top_predictions']:
|
| 318 |
+
print(f" {pred['rank']}. {pred['category']}: {pred['confidence']:.3f}")
|
| 319 |
+
|
| 320 |
+
except Exception as e:
|
| 321 |
+
print(f"β Error classifying document: {e}")
|
| 322 |
+
import traceback
|
| 323 |
+
traceback.print_exc()
|
| 324 |
+
|
| 325 |
+
# Demo with dummy data if no image provided
|
| 326 |
+
if not args.image and not args.benchmark:
|
| 327 |
+
print(f"\n㪠Running demo with dummy data...")
|
| 328 |
+
|
| 329 |
+
try:
|
| 330 |
+
# Create dummy document image
|
| 331 |
+
dummy_image = np.random.randint(0, 255, (800, 600, 3), dtype=np.uint8)
|
| 332 |
+
|
| 333 |
+
# Classify dummy image
|
| 334 |
+
result = classifier.classify(dummy_image)
|
| 335 |
+
|
| 336 |
+
print(f"β
Demo completed:")
|
| 337 |
+
print(f" Predicted type: {result['predicted_category']}")
|
| 338 |
+
print(f" Confidence: {result['confidence']:.3f}")
|
| 339 |
+
print(f" Processing info: {result['processing_info']}")
|
| 340 |
+
print(f"\nπ Note: This was a demonstration with random data.")
|
| 341 |
+
|
| 342 |
+
except Exception as e:
|
| 343 |
+
print(f"β Demo failed: {e}")
|
| 344 |
+
|
| 345 |
+
print(f"\nβ
Example completed successfully!")
|
| 346 |
+
print(f"\nUsage examples:")
|
| 347 |
+
print(f" Classify document: python example.py --image document.jpg")
|
| 348 |
+
print(f" Run benchmark: python example.py --benchmark --iterations 50")
|
| 349 |
+
print(f" Both: python example.py --image document.pdf --benchmark")
|
| 350 |
+
|
| 351 |
+
|
| 352 |
+
if __name__ == "__main__":
|
| 353 |
+
main()
|
requirements.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
onnxruntime>=1.15.0
|
| 2 |
+
opencv-python>=4.5.0
|
| 3 |
+
numpy>=1.21.0
|
| 4 |
+
Pillow>=8.0.0
|