Legal_OCR / requirements.txt
tdnathmlenthusiast's picture
resolve spacy library issue
f5293cd verified
raw
history blame contribute delete
995 Bytes
# PDF handling
PyMuPDF # provides fitz; replace ‘fitz’ to avoid the frontend conflict :contentReference[oaicite:0]{index=0}
pdf2image # PDF→PIL images wrapper :contentReference[oaicite:1]{index=1}
# OCR & layout
paddleocr # PaddleOCR toolkit :contentReference[oaicite:2]{index=2}
paddlepaddle # Paddle backend for PaddleOCR
# Table extraction
camelot-py[base] # Camelot’s core + cv dependencies :contentReference[oaicite:3]{index=3}
# Data processing
numpy
pandas
# NLP & ML
spacy
transformers
torch
tqdm
# Vision (if using any OpenCV ops)
opencv-python
# HTML parsing (if needed)
beautifulsoup4
# System dependency wrapper (note: poppler-utils is a system package, not pip)
# poppler-utils ← install via apt/conda, not pip
# Install spaCy small English model
en_core_web_sm @ https://huggingface.co/spacy/en_core_web_sm/resolve/main/en_core_web_sm-any-py3-none-any.whl