kawaiipeace commited on
Commit
f28d3f0
·
1 Parent(s): 1064ef9

update code

Browse files
Files changed (2) hide show
  1. app.py +61 -59
  2. app_bk.py +59 -61
app.py CHANGED
@@ -1,39 +1,43 @@
1
  import os
2
- import re
3
- from fastapi import FastAPI, HTTPException, UploadFile, File
4
  from fastapi.middleware.cors import CORSMiddleware
 
 
5
  from pdf2image import convert_from_bytes
6
  from PIL import Image
7
- from transformers import pipeline
8
- import torch
9
- import gradio as gr
10
 
11
- # -------------------------
12
- # Load Hugging Face Model
13
- # -------------------------
14
- ocr_pipeline = pipeline(
15
- task="document-question-answering",
16
- model="scb10x/typhoon-ocr-7b",
17
- device=0 if torch.cuda.is_available() else -1
18
- )
19
 
20
- # -------------------------
21
- # OCR Wrapper
22
- # -------------------------
23
- def ocr_document(image):
24
- result = ocr_pipeline(image)
25
- return result[0]["generated_text"] if result and isinstance(result, list) else str(result)
 
 
 
 
26
 
27
- # -------------------------
28
- # Regex Field Extractor
29
- # -------------------------
30
  def extract_fields_regex(text: str) -> dict:
31
- text = re.sub(r"<.*?>", "", text) # remove tags
32
- text = re.sub(r"\s{2,}", " ", text) # collapse spaces
33
- text = re.sub(r"\n{2,}", "\n", text) # collapse newlines
 
 
34
 
35
  patterns = {
36
  "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
 
 
37
  "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
38
  "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
39
  "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
@@ -47,73 +51,71 @@ def extract_fields_regex(text: str) -> dict:
47
  for field, pattern in patterns.items():
48
  match = re.search(pattern, text, re.IGNORECASE)
49
  results[field] = match.group(1).strip() if match else None
 
 
 
 
 
 
50
 
51
  return results
52
 
53
- # -------------------------
54
- # PDF Handling
55
- # -------------------------
56
  def pdf_to_image(file_bytes: bytes) -> Image.Image:
57
  images = convert_from_bytes(file_bytes)
58
- return images[0] # first page only
59
-
60
- # -------------------------
61
- # FastAPI App
62
- # -------------------------
63
- app = FastAPI()
64
-
65
- # Optional: Allow all CORS (customize for security)
66
- app.add_middleware(
67
- CORSMiddleware,
68
- allow_origins=["*"],
69
- allow_methods=["*"],
70
- allow_headers=["*"],
71
- )
72
 
 
73
  @app.post("/api/ocr_receipt")
74
- async def ocr_receipt(file: UploadFile = File(...)):
 
 
 
 
 
 
75
  content = await file.read()
76
 
77
  try:
78
- # Convert PDF to image or use image directly
79
  if file.filename.lower().endswith(".pdf"):
80
  image = pdf_to_image(content)
 
81
  else:
82
- image = Image.open(file.file)
83
 
84
- text = ocr_document(image)
85
- fields = extract_fields_regex(text)
86
 
87
  return {
88
  "raw_ocr": text,
89
- "extracted_fields": fields,
90
  }
91
 
92
  except Exception as e:
93
  raise HTTPException(status_code=500, detail=str(e))
94
 
95
- # -------------------------
96
- # Gradio UI
97
- # -------------------------
98
- def gradio_interface(image_path):
99
  if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
100
  with open(image_path, "rb") as f:
101
  image = pdf_to_image(f.read())
102
  else:
103
  image = image_path
104
 
105
- text = ocr_document(image)
 
106
  extracted = extract_fields_regex(text)
107
  return text, extracted
108
 
109
  with gr.Blocks() as demo:
110
- gr.Markdown("# 🧾 OCR ใบเสร็จ (Thai Receipt Scanner)")
111
  with gr.Row():
112
  img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
113
- out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=12)
114
- out_fields = gr.JSON(label="ฟิลด์ที่ดึงออกมา")
115
- btn = gr.Button("ประมวลผล")
116
  btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
117
 
118
- # For Hugging Face Spaces no uvicorn needed
119
- demo.launch()
 
 
1
  import os
2
+ from fastapi import FastAPI, HTTPException, Header, UploadFile, File
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
+ import gradio as gr
5
+ from typhoon_ocr import ocr_document
6
  from pdf2image import convert_from_bytes
7
  from PIL import Image
8
+ import re
9
+ from dotenv import load_dotenv
 
10
 
11
+ # --- Load environment variables from .env ---
12
+ load_dotenv()
13
+
14
+ # --- Config ---
15
+ API_KEY = os.getenv("API_KEY")
16
+ TYPHOON_API_KEY = os.getenv("TYPHOON_OCR_API_KEY")
17
+ TYPHOON_BASE_URL = os.getenv("TYPHOON_BASE_URL", "https://api.opentyphoon.ai/v1")
 
18
 
19
+ # --- FastAPI App ---
20
+ app = FastAPI()
21
+
22
+ # CORS (optional for public usage)
23
+ app.add_middleware(
24
+ CORSMiddleware,
25
+ allow_origins=["*"],
26
+ allow_methods=["*"],
27
+ allow_headers=["*"],
28
+ )
29
 
 
 
 
30
  def extract_fields_regex(text: str) -> dict:
31
+ # Preprocess text
32
+ text = re.sub(r"<.*?>", "", text) # Strip tags
33
+ text = re.sub(r"\n+", "\n", text) # Collapse newlines
34
+ text = re.sub(r"\s{2,}", " ", text) # Collapse multiple spaces
35
+ text = re.sub(r"\t+", " ", text)
36
 
37
  patterns = {
38
  "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
39
+
40
+ # Updated pattern for correct tax invoice number
41
  "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
42
  "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
43
  "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
 
51
  for field, pattern in patterns.items():
52
  match = re.search(pattern, text, re.IGNORECASE)
53
  results[field] = match.group(1).strip() if match else None
54
+
55
+ # Optional fallback if regex fails
56
+ # if not results["เลขที่ใบกำกับภาษี"]:
57
+ # match = re.search(r"TAX\s*INV\.?\s*</td>\s*<td>\s*([\d\-]+)", text, re.IGNORECASE)
58
+ # if match:
59
+ # results["เลขที่ใบกำกับภาษี"] = match.group(1).strip()
60
 
61
  return results
62
 
 
 
 
63
  def pdf_to_image(file_bytes: bytes) -> Image.Image:
64
  images = convert_from_bytes(file_bytes)
65
+ return images[0] # First page only
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
+ # --- API Endpoint ---
68
  @app.post("/api/ocr_receipt")
69
+ async def ocr_receipt(
70
+ file: UploadFile = File(...),
71
+ x_api_key: str | None = Header(None),
72
+ ):
73
+ if API_KEY and x_api_key != API_KEY:
74
+ raise HTTPException(status_code=401, detail="Invalid API key")
75
+
76
  content = await file.read()
77
 
78
  try:
79
+ # Handle PDF and image
80
  if file.filename.lower().endswith(".pdf"):
81
  image = pdf_to_image(content)
82
+ raw_output = ocr_document(image, task_type="structure")
83
  else:
84
+ raw_output = ocr_document(content, task_type="structure")
85
 
86
+ text = raw_output if isinstance(raw_output, str) else raw_output.get("text", "")
87
+ extracted = extract_fields_regex(text)
88
 
89
  return {
90
  "raw_ocr": text,
91
+ "extracted_fields": extracted,
92
  }
93
 
94
  except Exception as e:
95
  raise HTTPException(status_code=500, detail=str(e))
96
 
97
+ # --- Gradio UI ---
98
+ def gradio_interface(image_path: str | Image.Image):
 
 
99
  if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
100
  with open(image_path, "rb") as f:
101
  image = pdf_to_image(f.read())
102
  else:
103
  image = image_path
104
 
105
+ raw = ocr_document(image, task_type="structure")
106
+ text = raw if isinstance(raw, str) else raw.get("text", "")
107
  extracted = extract_fields_regex(text)
108
  return text, extracted
109
 
110
  with gr.Blocks() as demo:
111
+ gr.Markdown("# 🧾 แปลงและตรวจสอบใบเสร็จ")
112
  with gr.Row():
113
  img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
114
+ out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=10)
115
+ out_fields = gr.JSON(label="ข้อความที่ดึงออกมา")
116
+ btn = gr.Button("ประมวลผลใบเสร็จ")
117
  btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
118
 
119
+ # --- Mount Gradio on FastAPI ---
120
+ # app = gr.mount_gradio_app(app, demo, path="/ui")
121
+ demo.launch(share=False)
app_bk.py CHANGED
@@ -1,43 +1,39 @@
1
  import os
2
- from fastapi import FastAPI, HTTPException, Header, UploadFile, File
 
3
  from fastapi.middleware.cors import CORSMiddleware
4
- import gradio as gr
5
- from typhoon_ocr import ocr_document
6
  from pdf2image import convert_from_bytes
7
  from PIL import Image
8
- import re
9
- from dotenv import load_dotenv
10
-
11
- # --- Load environment variables from .env ---
12
- load_dotenv()
13
-
14
- # --- Config ---
15
- API_KEY = os.getenv("API_KEY")
16
- TYPHOON_API_KEY = os.getenv("TYPHOON_OCR_API_KEY")
17
- TYPHOON_BASE_URL = os.getenv("TYPHOON_BASE_URL", "https://api.opentyphoon.ai/v1")
18
-
19
- # --- FastAPI App ---
20
- app = FastAPI()
21
 
22
- # CORS (optional for public usage)
23
- app.add_middleware(
24
- CORSMiddleware,
25
- allow_origins=["*"],
26
- allow_methods=["*"],
27
- allow_headers=["*"],
 
28
  )
29
 
 
 
 
 
 
 
 
 
 
 
30
  def extract_fields_regex(text: str) -> dict:
31
- # Preprocess text
32
- text = re.sub(r"<.*?>", "", text) # Strip tags
33
- text = re.sub(r"\n+", "\n", text) # Collapse newlines
34
- text = re.sub(r"\s{2,}", " ", text) # Collapse multiple spaces
35
- text = re.sub(r"\t+", " ", text)
36
 
37
  patterns = {
38
  "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
39
-
40
- # Updated pattern for correct tax invoice number
41
  "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
42
  "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
43
  "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
@@ -51,71 +47,73 @@ def extract_fields_regex(text: str) -> dict:
51
  for field, pattern in patterns.items():
52
  match = re.search(pattern, text, re.IGNORECASE)
53
  results[field] = match.group(1).strip() if match else None
54
-
55
- # Optional fallback if regex fails
56
- # if not results["เลขที่ใบกำกับภาษี"]:
57
- # match = re.search(r"TAX\s*INV\.?\s*</td>\s*<td>\s*([\d\-]+)", text, re.IGNORECASE)
58
- # if match:
59
- # results["เลขที่ใบกำกับภาษี"] = match.group(1).strip()
60
 
61
  return results
62
 
 
 
 
63
  def pdf_to_image(file_bytes: bytes) -> Image.Image:
64
  images = convert_from_bytes(file_bytes)
65
- return images[0] # First page only
66
 
67
- # --- API Endpoint ---
68
- @app.post("/api/ocr_receipt")
69
- async def ocr_receipt(
70
- file: UploadFile = File(...),
71
- x_api_key: str | None = Header(None),
72
- ):
73
- if API_KEY and x_api_key != API_KEY:
74
- raise HTTPException(status_code=401, detail="Invalid API key")
 
 
 
 
75
 
 
 
76
  content = await file.read()
77
 
78
  try:
79
- # Handle PDF and image
80
  if file.filename.lower().endswith(".pdf"):
81
  image = pdf_to_image(content)
82
- raw_output = ocr_document(image, task_type="structure")
83
  else:
84
- raw_output = ocr_document(content, task_type="structure")
85
 
86
- text = raw_output if isinstance(raw_output, str) else raw_output.get("text", "")
87
- extracted = extract_fields_regex(text)
88
 
89
  return {
90
  "raw_ocr": text,
91
- "extracted_fields": extracted,
92
  }
93
 
94
  except Exception as e:
95
  raise HTTPException(status_code=500, detail=str(e))
96
 
97
- # --- Gradio UI ---
98
- def gradio_interface(image_path: str | Image.Image):
 
 
99
  if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
100
  with open(image_path, "rb") as f:
101
  image = pdf_to_image(f.read())
102
  else:
103
  image = image_path
104
 
105
- raw = ocr_document(image, task_type="structure")
106
- text = raw if isinstance(raw, str) else raw.get("text", "")
107
  extracted = extract_fields_regex(text)
108
  return text, extracted
109
 
110
  with gr.Blocks() as demo:
111
- gr.Markdown("# 🧾 แปลงและตรวจสอบใบเสร็จ")
112
  with gr.Row():
113
  img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
114
- out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=10)
115
- out_fields = gr.JSON(label="ข้อความที่ดึงออกมา")
116
- btn = gr.Button("ประมวลผลใบเสร็จ")
117
  btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
118
 
119
- # --- Mount Gradio on FastAPI ---
120
- # app = gr.mount_gradio_app(app, demo, path="/ui")
121
- demo.launch(share=False)
 
1
  import os
2
+ import re
3
+ from fastapi import FastAPI, HTTPException, UploadFile, File
4
  from fastapi.middleware.cors import CORSMiddleware
 
 
5
  from pdf2image import convert_from_bytes
6
  from PIL import Image
7
+ from transformers import pipeline
8
+ import torch
9
+ import gradio as gr
 
 
 
 
 
 
 
 
 
 
10
 
11
+ # -------------------------
12
+ # Load Hugging Face Model
13
+ # -------------------------
14
+ ocr_pipeline = pipeline(
15
+ task="document-question-answering",
16
+ model="scb10x/typhoon-ocr-7b",
17
+ device=0 if torch.cuda.is_available() else -1
18
  )
19
 
20
+ # -------------------------
21
+ # OCR Wrapper
22
+ # -------------------------
23
+ def ocr_document(image):
24
+ result = ocr_pipeline(image)
25
+ return result[0]["generated_text"] if result and isinstance(result, list) else str(result)
26
+
27
+ # -------------------------
28
+ # Regex Field Extractor
29
+ # -------------------------
30
  def extract_fields_regex(text: str) -> dict:
31
+ text = re.sub(r"<.*?>", "", text) # remove tags
32
+ text = re.sub(r"\s{2,}", " ", text) # collapse spaces
33
+ text = re.sub(r"\n{2,}", "\n", text) # collapse newlines
 
 
34
 
35
  patterns = {
36
  "เลขที่ผู้เสียภาษี": r"(?:TAX\s*ID|เลขที่ผู้เสียภาษี)[\s:\-\.]*([\d]{10,13})",
 
 
37
  "เลขที่ใบกำกับภาษี": r"(?:TAX\s*INV\.?|เลขที่ใบกำกับภาษี|ใบกำกับ)[\s:\-\.]*([\d]{8,20})",
38
  "จำนวนเงิน": r"(?:AMOUNT\s*THB|จำนวนเงิน|รวมเงิน)[\s:\-\.]*([\d,]+\.\d{2})",
39
  "ราคาต่อลิตร": r"(?:Baht\/Litr\.?|Bath\/Ltr\.?|ราคาต่อลิตร|ราคา\/ลิตร|ราคาน้ำมัน)[\s:\-\.]*([\d,]+\.\d{2})",
 
47
  for field, pattern in patterns.items():
48
  match = re.search(pattern, text, re.IGNORECASE)
49
  results[field] = match.group(1).strip() if match else None
 
 
 
 
 
 
50
 
51
  return results
52
 
53
+ # -------------------------
54
+ # PDF Handling
55
+ # -------------------------
56
  def pdf_to_image(file_bytes: bytes) -> Image.Image:
57
  images = convert_from_bytes(file_bytes)
58
+ return images[0] # first page only
59
 
60
+ # -------------------------
61
+ # FastAPI App
62
+ # -------------------------
63
+ app = FastAPI()
64
+
65
+ # Optional: Allow all CORS (customize for security)
66
+ app.add_middleware(
67
+ CORSMiddleware,
68
+ allow_origins=["*"],
69
+ allow_methods=["*"],
70
+ allow_headers=["*"],
71
+ )
72
 
73
+ @app.post("/api/ocr_receipt")
74
+ async def ocr_receipt(file: UploadFile = File(...)):
75
  content = await file.read()
76
 
77
  try:
78
+ # Convert PDF to image or use image directly
79
  if file.filename.lower().endswith(".pdf"):
80
  image = pdf_to_image(content)
 
81
  else:
82
+ image = Image.open(file.file)
83
 
84
+ text = ocr_document(image)
85
+ fields = extract_fields_regex(text)
86
 
87
  return {
88
  "raw_ocr": text,
89
+ "extracted_fields": fields,
90
  }
91
 
92
  except Exception as e:
93
  raise HTTPException(status_code=500, detail=str(e))
94
 
95
+ # -------------------------
96
+ # Gradio UI
97
+ # -------------------------
98
+ def gradio_interface(image_path):
99
  if isinstance(image_path, str) and image_path.lower().endswith(".pdf"):
100
  with open(image_path, "rb") as f:
101
  image = pdf_to_image(f.read())
102
  else:
103
  image = image_path
104
 
105
+ text = ocr_document(image)
 
106
  extracted = extract_fields_regex(text)
107
  return text, extracted
108
 
109
  with gr.Blocks() as demo:
110
+ gr.Markdown("# 🧾 OCR ใบเสร็จ (Thai Receipt Scanner)")
111
  with gr.Row():
112
  img = gr.Image(type="filepath", label="อัปโหลดไฟล์ PDF หรือรูปภาพ")
113
+ out_text = gr.Textbox(label="ข้อความทั้งหมด", lines=12)
114
+ out_fields = gr.JSON(label="ฟิลด์ที่ดึงออกมา")
115
+ btn = gr.Button("ประมวลผล")
116
  btn.click(fn=gradio_interface, inputs=img, outputs=[out_text, out_fields])
117
 
118
+ # For Hugging Face Spaces no uvicorn needed
119
+ demo.launch()