kawaiipeace commited on
Commit
0364029
·
1 Parent(s): 46ae679
Files changed (1) hide show
  1. app.py +3 -7
app.py CHANGED
@@ -13,8 +13,7 @@ load_dotenv()
13
  API_KEY = os.getenv("API_KEY")
14
  MODEL_ID = "scb10x/typhoon-ocr-7b"
15
 
16
- # Load pipeline (require torch)
17
- ocr_pipeline = pipeline("document-question-answering", model=MODEL_ID)
18
 
19
  # FastAPI app init
20
  app = FastAPI()
@@ -31,11 +30,8 @@ def pdf_to_image(file_bytes: bytes) -> Image.Image:
31
  return images[0] # Only first page for now
32
 
33
  def run_ocr(image: Image.Image) -> str:
34
- result = ocr_pipeline({
35
- "image": image,
36
- "question": "อ่านข้อความทั้งหมดในภาพนี้"
37
- })
38
- return result[0]["answer"] if isinstance(result, list) else str(result)
39
 
40
  def preprocess_text(text: str) -> str:
41
  text = re.sub(r"</?(figure|table|tr|td|th|b|i|u|p|div|span)[^>]*>", "\n", text)
 
13
  API_KEY = os.getenv("API_KEY")
14
  MODEL_ID = "scb10x/typhoon-ocr-7b"
15
 
16
+ ocr_pipeline = pipeline("image-to-text", model="scb10x/typhoon-ocr-7b")
 
17
 
18
  # FastAPI app init
19
  app = FastAPI()
 
30
  return images[0] # Only first page for now
31
 
32
  def run_ocr(image: Image.Image) -> str:
33
+ result = ocr_pipeline(image)
34
+ return result[0]["generated_text"]
 
 
 
35
 
36
  def preprocess_text(text: str) -> str:
37
  text = re.sub(r"</?(figure|table|tr|td|th|b|i|u|p|div|span)[^>]*>", "\n", text)