Update app.py
Browse files
app.py
CHANGED
|
@@ -30,7 +30,16 @@ logger = setup_logging('app')
|
|
| 30 |
nltk.download('punkt')
|
| 31 |
nltk.download('punkt_tab')
|
| 32 |
nltk.download('stopwords')
|
| 33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
# Load environment variables
|
| 35 |
load_dotenv()
|
| 36 |
|
|
@@ -500,12 +509,12 @@ def is_query_relevant(question, source_documents, threshold=0.1):
|
|
| 500 |
except Exception as e:
|
| 501 |
logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
|
| 502 |
return False
|
| 503 |
-
|
| 504 |
def get_pdf_details(filename, page_number):
|
| 505 |
"""Get details of a specific PDF page."""
|
| 506 |
logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
|
| 507 |
try:
|
| 508 |
-
|
|
|
|
| 509 |
file_path = os.path.join(data_path, filename)
|
| 510 |
|
| 511 |
# Open the PDF
|
|
@@ -565,7 +574,8 @@ def get_romanized_text(filename):
|
|
| 565 |
"""Get romanized text from a PDF."""
|
| 566 |
logger.info(f"Processing romanized text for file: {filename}")
|
| 567 |
try:
|
| 568 |
-
|
|
|
|
| 569 |
file_path = os.path.join(data_path, filename)
|
| 570 |
|
| 571 |
# Open the PDF
|
|
@@ -607,6 +617,9 @@ def get_romanized_text(filename):
|
|
| 607 |
|
| 608 |
def main():
|
| 609 |
logger.info("Starting Smart PDF Search application")
|
|
|
|
|
|
|
|
|
|
| 610 |
|
| 611 |
# Detect page from query parameters
|
| 612 |
query_params = st.query_params
|
|
|
|
| 30 |
nltk.download('punkt')
|
| 31 |
nltk.download('punkt_tab')
|
| 32 |
nltk.download('stopwords')
|
| 33 |
+
|
| 34 |
+
# Create directories if they don't exist
|
| 35 |
+
def create_dirs_if_needed():
|
| 36 |
+
"""Create the necessary directories if they don't exist."""
|
| 37 |
+
os.makedirs('/tmp/data', exist_ok=True)
|
| 38 |
+
os.makedirs('/tmp/db', exist_ok=True)
|
| 39 |
+
|
| 40 |
+
# Call the function at the start of your app
|
| 41 |
+
create_dirs_if_needed()
|
| 42 |
+
|
| 43 |
# Load environment variables
|
| 44 |
load_dotenv()
|
| 45 |
|
|
|
|
| 509 |
except Exception as e:
|
| 510 |
logger.error(f"Error checking query relevance: {str(e)}", exc_info=True)
|
| 511 |
return False
|
|
|
|
| 512 |
def get_pdf_details(filename, page_number):
|
| 513 |
"""Get details of a specific PDF page."""
|
| 514 |
logger.info(f"Processing PDF details for file: {filename}, page: {page_number}")
|
| 515 |
try:
|
| 516 |
+
# Update the paths to point to /tmp for Hugging Face Space
|
| 517 |
+
data_path = '/tmp/data'
|
| 518 |
file_path = os.path.join(data_path, filename)
|
| 519 |
|
| 520 |
# Open the PDF
|
|
|
|
| 574 |
"""Get romanized text from a PDF."""
|
| 575 |
logger.info(f"Processing romanized text for file: {filename}")
|
| 576 |
try:
|
| 577 |
+
# Update the paths to point to /tmp for Hugging Face Space
|
| 578 |
+
data_path = '/tmp/data'
|
| 579 |
file_path = os.path.join(data_path, filename)
|
| 580 |
|
| 581 |
# Open the PDF
|
|
|
|
| 617 |
|
| 618 |
def main():
|
| 619 |
logger.info("Starting Smart PDF Search application")
|
| 620 |
+
|
| 621 |
+
# Ensure directories are created before file processing starts
|
| 622 |
+
create_dirs_if_needed()
|
| 623 |
|
| 624 |
# Detect page from query parameters
|
| 625 |
query_params = st.query_params
|