puhuilab commited on Jul 13

Commit

f75ac71

verified ·

1 Parent(s): 21514c8

Upload folder using huggingface_hub

Browse files

Files changed (25) hide show

.gitattributes +41 -9
LICENSE +27 -0
README.md +201 -3
README_CN.md +215 -0
configuration.json +1 -0
onnx/PH-OCRv1/rec/ch_rec_decoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/ch_rec_encoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/jp_rec_decoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/jp_rec_encoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/ko_rec_decoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/ko_rec_encoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/ru_rec_decoder_v1.onnx +3 -0
onnx/PH-OCRv1/rec/ru_rec_encoder_v1.onnx +3 -0
onnx/PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_infer.onnx +3 -0
onnx/PP-OCRv4/det/Multilingual_PP-OCRv3_det_infer.onnx +3 -0
onnx/PP-OCRv4/det/ch_PP-OCRv4_det_infer.onnx +3 -0
onnx/PP-OCRv4/det/ch_PP-OCRv4_det_server_infer.onnx +3 -0
onnx/PP-OCRv4/det/en_PP-OCRv3_det_infer.onnx +3 -0
onnx/PP-OCRv5/det/ch_PP-OCRv5_mobile_det.onnx +3 -0
onnx/PP-OCRv5/det/ch_PP-OCRv5_server_det.onnx +3 -0
resources/fonts/FZYTK.TTF +3 -0
resources/fonts/cyrillic.ttf +0 -0
resources/fonts/japan.ttc +3 -0
resources/fonts/korean.ttf +3 -0
resources/fonts/方正宋黑.TTF +3 -0

.gitattributes CHANGED Viewed

@@ -1,35 +1,67 @@
 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.7z filter=lfs diff=lfs merge=lfs -text
 *.arrow filter=lfs diff=lfs merge=lfs -text
 *.bin filter=lfs diff=lfs merge=lfs -text
+*.bin.* filter=lfs diff=lfs merge=lfs -text
 *.bz2 filter=lfs diff=lfs merge=lfs -text
 *.ftz filter=lfs diff=lfs merge=lfs -text
 *.gz filter=lfs diff=lfs merge=lfs -text
 *.h5 filter=lfs diff=lfs merge=lfs -text
 *.joblib filter=lfs diff=lfs merge=lfs -text
 *.lfs.* filter=lfs diff=lfs merge=lfs -text
 *.model filter=lfs diff=lfs merge=lfs -text
 *.msgpack filter=lfs diff=lfs merge=lfs -text
 *.onnx filter=lfs diff=lfs merge=lfs -text
 *.ot filter=lfs diff=lfs merge=lfs -text
 *.parquet filter=lfs diff=lfs merge=lfs -text
 *.pb filter=lfs diff=lfs merge=lfs -text
 *.pt filter=lfs diff=lfs merge=lfs -text
 *.pth filter=lfs diff=lfs merge=lfs -text
 *.rar filter=lfs diff=lfs merge=lfs -text
 saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.tar.* filter=lfs diff=lfs merge=lfs -text
 *.tflite filter=lfs diff=lfs merge=lfs -text
 *.tgz filter=lfs diff=lfs merge=lfs -text
 *.xz filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
+*.zstandard filter=lfs diff=lfs merge=lfs -text
+*.tfevents* filter=lfs diff=lfs merge=lfs -text
+*.db* filter=lfs diff=lfs merge=lfs -text
+*.ark* filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
+**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.gguf* filter=lfs diff=lfs merge=lfs -text
+*.ggml filter=lfs diff=lfs merge=lfs -text
+*.llamafile* filter=lfs diff=lfs merge=lfs -text
+*.pt2 filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+resources/fonts/FZYTK.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/japan.ttc filter=lfs diff=lfs merge=lfs -text
+resources/fonts/korean.ttf filter=lfs diff=lfs merge=lfs -text
+resources/fonts/方正宋黑.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/FZYTK.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/japan.ttc filter=lfs diff=lfs merge=lfs -text
+resources/fonts/korean.ttf filter=lfs diff=lfs merge=lfs -text
+resources/fonts/方正宋黑.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/FZYTK.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/japan.ttc filter=lfs diff=lfs merge=lfs -text
+resources/fonts/korean.ttf filter=lfs diff=lfs merge=lfs -text
+resources/fonts/方正宋黑.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/japan.ttc filter=lfs diff=lfs merge=lfs -text
+resources/fonts/FZYTK.TTF filter=lfs diff=lfs merge=lfs -text
+resources/fonts/korean.ttf filter=lfs diff=lfs merge=lfs -text
+resources/fonts/方正宋黑.TTF filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,27 @@

+MIT License
+Copyright (c) 2025 PuHui Lab
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+Additional Terms
+If you use the PHOCR models in commercial products or services,
+you **must include clear attribution** to PHOCR in your product documentation,
+user interface, or other appropriate locations, e.g. Powered By PHOCR from PuHui Lab.

README.md CHANGED Viewed

@@ -1,3 +1,201 @@
----
-license: mit
----

+---
+tags:
+- ocr
+- image-to-text
+license: mit
+library_name: transformers
+---
+# Model Card: PHOCR
+an open high-performance Optical Character Recognition (OCR) toolkit [PHOCR](https://github.com/puhuilab/phocr).
+# PHOCR: High-Performance OCR Toolkit
+[English](README.md) | [简体中文](README_CN.md)
+PHOCR is an open high-performance Optical Character Recognition (OCR) toolkit designed for efficient text recognition across multiple languages including Chinese, Japanese, Korean, Russian, Vietnamese, and Thai. **PHOCR features a completely custom-developed recognition model (PH-OCRv1) that significantly outperforms existing solutions.**
+## Motivation
+Current token-prediction-based model architectures are highly sensitive to the accuracy of contextual tokens. Repetitive patterns, even as few as a thousand instances, can lead to persistent memorization by the model. While most open-source text recognition models currently achieve character error rates (CER) in the percent range, our goal is to push this further into the per-mille range. At that level, for a system processing 100 million characters, the total number of recognition errors would be reduced to under 1 million — an order of magnitude improvement.
+## Features
+- **Custom Recognition Model**: **PH-OCRv1** achieves sub-0.x% character error rate in document-style settings by leveraging open-source models. Even achieves 0.0x% character error rate in English.
+- **Multi-language Support**: Chinese, English, Japanese, Korean, Russian, and more
+- **Rich Vocabulary**: Comprehensive vocabulary for each language. Chinese: 15,316, Korean: 17,388, Japanese: 11,186, Russian: 292.
+- **High Performance**: Optimized inference engine with ONNX Runtime support
+- **Easy Integration**: Simple Python API for quick deployment
+- **Cross-platform**: Support for CPU and CUDA
+## Visualization
+![Visualization](./vis.gif)
+## Installation
+```bash
+pip install phocr
+```
+## Quick Start
+```python
+from phocr import PHOCR
+# Initialize OCR engine
+engine = PHOCR()
+# Perform OCR on image
+result = engine("path/to/image.jpg")
+print(result)
+# Visualize results
+result.vis("output.jpg")
+print(result.to_markdown())
+```
+## Benchmarks
+We conducted comprehensive benchmarks comparing PHOCR with leading OCR solutions across multiple languages and scenarios. **Our custom-developed PH-OCRv1 model demonstrates significant improvements over existing solutions.**
+### Overall Performance Comparison
+<table style="width: 90%; margin: auto; border-collapse: collapse; font-size: small;">
+  <thead>
+    <tr>
+      <th rowspan="2">Model</th>
+      <th colspan="4">ZH & EN<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+      <th colspan="2">JP<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+      <th colspan="2">KO<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+      <th colspan="1">RU<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+    </tr>
+    <tr>
+      <th><i>English</i></th>
+      <th><i>Simplified Chinese</i></th>
+      <th><i>EN CH Mixed</i></th>
+      <th><i>Traditional Chinese</i></th>
+      <th><i>Document</i></th>
+      <th><i>Scene</i></th>
+      <th><i>Document</i></th>
+      <th><i>Scene</i></th>
+      <th><i>Document</i></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>PHOCR</td>
+      <td><strong>0.0008</strong></td>
+      <td><strong>0.0057</strong></td>
+      <td><strong>0.0171</strong></td>
+      <td><strong>0.0145</strong></td>
+      <td><strong>0.0039</strong></td>
+      <td><strong>0.0197</strong></td>
+      <td><strong>0.0050</strong></td>
+      <td><strong>0.0255</strong></td>
+      <td><strong>0.0046</strong></td>
+    </tr>
+    <tr>
+      <td>Baidu</td>
+      <td>0.0014</td>
+      <td>0.0069</td>
+      <td>0.0354</td>
+      <td>0.0431</td>
+      <td>0.0222</td>
+      <td>0.0607</td>
+      <td>0.0238</td>
+      <td>0.212</td>
+      <td>0.0786</td>
+    </tr>
+    <tr>
+      <td>Ali</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+      <td>0.0272</td>
+      <td>0.0564</td>
+      <td>0.0159</td>
+      <td>0.102</td>
+      <td>0.0616</td>
+    </tr>
+  </tbody>
+</table>
+Notice
+- baidu: [Baidu Accurate API](https://ai.baidu.com/tech/ocr/general)
+- Ali: [Aliyun API](https://help.aliyun.com/zh/ocr/product-overview/recognition-of-characters-in-languages-except-for-chinese-and-english-1)
+- CER: the total edit distance divided by the total number of characters in the ground truth.
+## Advanced Usage
+With global KV cache enabled, we implement a simple version using PyTorch (CUDA). When running with torch (CUDA), you can enable caching by setting `use_cache=True` in `ORTSeq2Seq(...)`, which also allows for larger batch sizes.
+### Language-specific Configuration
+See [demo.py](./demo.py) for more examples.
+## Evaluation & Benchmarking
+PHOCR provides comprehensive benchmarking tools to evaluate model performance across different languages and scenarios.
+### Quick Benchmark
+Run the complete benchmark pipeline:
+```bash
+sh benchmark/run_recognition.sh
+```
+Calculate Character Error Rate (CER) for model predictions:
+```bash
+sh benchmark/run_score.sh
+```
+### Benchmark Datasets
+PHOCR uses standardized benchmark datasets for fair comparison:
+- **zh_en_rec_bench** [Chinese & English mixed text recognition](https://huggingface.co/datasets/puhuilab/zh_en_rec_bench)
+- **jp_rec_bench** [Japanese text recognition](https://huggingface.co/datasets/puhuilab/jp_rec_bench)
+- **ko_rec_bench** [Korean text recognition](https://huggingface.co/datasets/puhuilab/ko_rec_bench)
+- **ru_rec_bench** [Russian text recognition](https://huggingface.co/datasets/puhuilab/ru_rec_bench)
+## Further Improvements
+- Character error rate (CER), including punctuation, can be further reduced through additional normalization of the training corpus.
+- Text detection accuracy can be further enhanced by employing a more advanced detection framework.
+## Contributing
+We welcome contributions! Please feel free to submit issues, feature requests, or pull requests.
+## Support
+For questions and support, please open an issue on GitHub or contact the maintainers.
+## Acknowledgements
+Many thanks to [RapidOCR](https://github.com/RapidAI/RapidOCR) for detection and main framework.
+## License
+- This project is released under the Apache 2.0 license
+- The copyright of the OCR detection and classification model is held by Baidu
+- The PHOCR recognition models are under the modified MIT License - see the [LICENSE](./LICENSE) file for details
+## Citation
+If you use PHOCR in your research, please cite:
+```bibtex
+@misc{phocr2025,
+  title={PHOCR: High-Performance OCR Toolkit},
+  author={PuHui Lab},
+  year={2025},
+  url={https://github.com/puhuilab/phocr}
+}
+```

README_CN.md ADDED Viewed

	@@ -0,0 +1,215 @@

+---
+tags:
+- ocr
+- image-to-text
+license: mit
+library_name: transformers
+---
+# Model Card: PHOCR
+高性能文字识别工具包 [PHOCR](https://github.com/puhuilab/phocr).
+# PHOCR：
+[English](README.md) | [简体中文](README_CN.md)
+PHOCR 是一个高性能的开源光学字符识别（OCR）工具包，专为多语种文本识别任务设计，支持包括中文、日文、韩文、俄文、越南文和泰文在内的多种语言。**PHOCR 搭载了我们完全自研的识别模型 PH-OCRv1，在准确率上显著优于现有解决方案。**
+## 背景动机
+当前基于下一个 token 预测的模型结构对上下文 token 的准确性非常敏感。即使仅有千次重复的模式，也可能导致模型产生永久性记忆。虽然大多数开源文字识别模型目前的字符错误率（CER）仍处于百分位水平，我们的目标是将其进一步提升至千分位（0.x%）。在这一精度下，对于处理 1 亿字符的系统，总错误字符数将下降至100万内，达到数量级的准确率提升。
+## 主要特性
+- **自研识别模型**：**PH-OCRv1**，可在文档场景中实现千分位CER，英文场景下甚至可达0.0x%。
+- **多语种支持**：支持中文、英文、日文、韩文、俄文等多种语言。
+- **丰富词表覆盖**：中文 15316，韩文 17388，日文 11186，俄文 292。
+- **高性能推理**：深度优化的模型结构，集成 ONNX Runtime 支持。
+- **轻松集成**：提供简洁的 Python API。
+- **跨平台**：支持 CPU 与 CUDA 环境。
+## 可视化效果
+![可视化效果](./vis.gif)
+## 安装方式
+```bash
+pip install phocr
+```
+## 快速开始
+```python
+from phocr import PHOCR
+# 初始化 OCR 引擎
+engine = PHOCR()
+# 对图像进行 OCR 识别
+result = engine("path/to/image.jpg")
+print(result)
+# 可视化结果
+result.vis("output.jpg")
+print(result.to_markdown())
+```
+## 性能基准测试
+我们进行了全面的基准测试，将 PHOCR 与领先的 OCR 解决方案在多种语言和场景下进行比较。**我们自研的 PH-OCRv1 模型在现有解决方案基础上实现了显著改进。**
+### 整体性能对比
+<table style="width: 90%; margin: auto; border-collapse: collapse; font-size: small;">
+  <thead>
+    <tr>
+      <th rowspan="2">模型</th>
+      <th colspan="4">中文 & 英文<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+      <th colspan="2">日文<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+      <th colspan="2">韩文<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+      <th colspan="1">俄文<br><span style="font-weight: normal; font-size: x-small;">CER ↓</span></th>
+    </tr>
+    <tr>
+      <th><i>英文</i></th>
+      <th><i>简体中文</i></th>
+      <th><i>中英混合</i></th>
+      <th><i>繁体中文</i></th>
+      <th><i>文档</i></th>
+      <th><i>场景</i></th>
+      <th><i>文档</i></th>
+      <th><i>场景</i></th>
+      <th><i>文档</i></th>
+    </tr>
+  </thead>
+  <tbody>
+    <tr>
+      <td>PHOCR</td>
+      <td><strong>0.0008</strong></td>
+      <td><strong>0.0057</strong></td>
+      <td><strong>0.0171</strong></td>
+      <td><strong>0.0145</strong></td>
+      <td><strong>0.0039</strong></td>
+      <td><strong>0.0197</strong></td>
+      <td><strong>0.0050</strong></td>
+      <td><strong>0.0255</strong></td>
+      <td><strong>0.0046</strong></td>
+    </tr>
+    <tr>
+      <td>百度</td>
+      <td>0.0014</td>
+      <td>0.0069</td>
+      <td>0.0354</td>
+      <td>0.0431</td>
+      <td>0.0222</td>
+      <td>0.0607</td>
+      <td>0.0238</td>
+      <td>0.212</td>
+      <td>0.0786</td>
+    </tr>
+    <tr>
+      <td>阿里</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+      <td>0.0272</td>
+      <td>0.0564</td>
+      <td>0.0159</td>
+      <td>0.102</td>
+      <td>0.0616</td>
+    </tr>
+    <tr>
+      <td>PPOCR V5</td>
+      <td>0.011</td>
+      <td>0.060</td>
+      <td>0.032</td>
+      <td>0.061</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+      <td>-</td>
+    </tr>
+  </tbody>
+</table>
+说明:
+- baidu: [Baidu Accurate API](https://ai.baidu.com/tech/ocr/general)
+- Ali: [Aliyun API](https://help.aliyun.com/zh/ocr/product-overview/recognition-of-characters-in-languages-except-for-chinese-and-english-1)
+- 字符错误率（CER）：总的编辑距离除以真实标签（ground truth）中字符的总数量。
+## 高级用法
+启用全局 KV 缓存后，我们使用 PyTorch (CUDA) 实现了一个简单版本。在使用 torch (CUDA) 运行时，您可以通过在 `ORTSeq2Seq(...)` 中设置 `use_cache=True` 来启用缓存，这也允许更大的批处理大小。
+### 语言特定配置
+更多示例请参见 [demo.py](./demo.py)。
+## 评估与基准测试
+PHOCR 提供全面的基准测试工具，用于评估模型在不同语言和场景下的性能。
+### 快速基准测试
+运行完整的基准测试流程：
+```bash
+sh benchmark/run_recognition.sh
+```
+计算模型预测的字符错误率 (CER)：
+```bash
+sh benchmark/run_score.sh
+```
+### 基准测试数据集
+PHOCR 使用标准化的基准测试数据集进行公平比较：
+- **zh_en_rec_bench** [中英文混合文本识别](https://huggingface.co/datasets/puhuilab/zh_en_rec_bench)
+- **jp_rec_bench** [日文文本识别](https://huggingface.co/datasets/puhuilab/jp_rec_bench)
+- **ko_rec_bench** [韩文文本识别](https://huggingface.co/datasets/puhuilab/ko_rec_bench)
+- **ru_rec_bench** [俄文文本识别](https://huggingface.co/datasets/puhuilab/ru_rec_bench)
+中英文混合文本识别主要来自于[OmniDocBench](https://github.com/opendatalab/OmniDocBench)的随机采样。
+其它数据由我们的团队手工采集完成。
+## 后续优化方向
+- 通过进一步归一化训练语料，字符错误率（CER，包括标点符号）可以得到进一步降低。
+- 通过采用更先进的检测框架，文字检测的准确率可以进一步提升。
+## 贡献指南
+我们欢迎任何贡献！请随时提交 issue、功能请求或 pull request。
+## 支持
+如有问题或需要支持，请在 GitHub 上提交 issue 或联系维护者。
+## 鸣谢
+特别感谢 [RapidOCR](https://github.com/RapidAI/RapidOCR) 提供的检测模型及主框架支持。
+## 许可证
+- 本项目采用 Apache 2.0 开源许可证
+- OCR 检测和分类模型的版权归百度所有
+- PHOCR 识别模型采用修改版 MIT 许可证，详情请见 [LICENSE](./LICENSE) 文件
+## 引用方式
+如果您在研究中使用了 PHOCR，请引用：
+```bibtex
+@misc{phocr2025,
+  title={PHOCR: High-Performance OCR Toolkit},
+  author={PuHui Lab},
+  year={2025},
+  url={https://github.com/puhuilab/phocr}
+}
+```

configuration.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"framework":"other","task":"other"}

onnx/PH-OCRv1/rec/ch_rec_decoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b1a88ee6975b11b729c814b0fd4caac7b563ed3d4e68cc90b98a609222eb04ba
+size 126086098

onnx/PH-OCRv1/rec/ch_rec_encoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bb4140a6369b3be487d7c05e84a89e9c58e954bc2de9f5f0ee988d46f492499f
+size 99945814

onnx/PH-OCRv1/rec/jp_rec_decoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b9c330f578606c625f449c49e8a993338e3e100ab0a67ef0d171360d410d3be9
+size 113369603

onnx/PH-OCRv1/rec/jp_rec_encoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8ceade124c37c6a6ce7157e30322859a257a2aedc4f1f886473ea10d8478578
+size 99945814

onnx/PH-OCRv1/rec/ko_rec_decoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6798eb63098fa0342c340ee3d6f6c42b2cbff74e0572e4abe1b630462b7d039c
+size 132465395

onnx/PH-OCRv1/rec/ko_rec_encoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:215c9a04af7a37f00cf1deef6a12ac3a5eb6eebbf45267ba35cdc6cb7f1e46e9
+size 99945814

onnx/PH-OCRv1/rec/ru_rec_decoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b8866fd683115abe2a2767192155e85b9b531da92927338e4bbeefa1fb78b69a
+size 79826892

onnx/PH-OCRv1/rec/ru_rec_encoder_v1.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f6237816d4374281884d75ab364e35b31dcbd7cb06a7808bde4531578fec960d
+size 99945814

onnx/PP-OCRv4/cls/ch_ppocr_mobile_v2.0_cls_infer.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e47acedf663230f8863ff1ab0e64dd2d82b838fceb5957146dab185a89d6215c
+size 585532

onnx/PP-OCRv4/det/Multilingual_PP-OCRv3_det_infer.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5475c6c7f4d84a6c4f32241b487435d59f126a40c023387af99732258844cdc3
+size 2421639

onnx/PP-OCRv4/det/ch_PP-OCRv4_det_infer.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d2a7720d45a54257208b1e13e36a8479894cb74155a5efe29462512d42f49da9
+size 4745517

onnx/PP-OCRv4/det/ch_PP-OCRv4_det_server_infer.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:cfa39a3f298f6d3fc71789834d15da36d11a6c59b489fc16ea4733728012f786
+size 113352104

onnx/PP-OCRv4/det/en_PP-OCRv3_det_infer.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea07c15d38ac40cd69da3c493444ec75b44ff23840553ff8ba102c1219ed39c2
+size 2421707

onnx/PP-OCRv5/det/ch_PP-OCRv5_mobile_det.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4d97c44a20d30a81aad087d6a396b08f786c4635742afc391f6621f5c6ae78ae
+size 4819576

onnx/PP-OCRv5/det/ch_PP-OCRv5_server_det.onnx ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0f8846b1d4bba223a2a2f9d9b44022fbc22cc019051a602b41a7fda9667e4cad
+size 88118768

resources/fonts/FZYTK.TTF ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4065a23df6823c8e2b69a0e76d02f02a6470b8774a5e91086609701ad95cc33f
+size 3241748

resources/fonts/cyrillic.ttf ADDED Viewed

Binary file (56.2 kB). View file

resources/fonts/japan.ttc ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:11122490a5e3a862015c8894183750de59abf95c3936d63d5978293d92f23dba
+size 3478068

resources/fonts/korean.ttf ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0897316bdb2e308cea2841c54940f2ef5707856000aa07910c8bff39a47e40bd
+size 1222780

resources/fonts/方正宋黑.TTF ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9a2041695a15d341d8f344bac12e90e2228414a18531af6678c0e380d910e2c2
+size 17546288