samyak152002's picture
Update app.py
de1c169 verified
# app.py
import json
import traceback
import tempfile
import os
import gradio as gr
from typing import Tuple, Optional, Any # Any for file_data_binary for now, though bytes is expected
# Import config first to ensure JAVA_HOME is set early
import config
from main_analyzer import analyze_pdf
# Import language_tool_python only for the test in __main__ if needed
# import language_tool_python
def process_upload(file_data_binary: Optional[bytes]) -> Tuple[str, Optional[str]]: # Explicitly Optional[bytes]
if not isinstance(file_data_binary, bytes):
if file_data_binary is None:
error_msg = "No file uploaded or file data is None."
else:
error_msg = f"Unexpected file data type: {type(file_data_binary)}. Expected bytes."
return json.dumps({"error": error_msg}, indent=2), None
temp_pdf_path = None
try:
# Create a temporary file to store the uploaded PDF bytes
# delete=False is used because analyze_pdf will open it by path.
# We are responsible for deleting it in the finally block.
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_pdf_path = temp_file.name
temp_file.write(file_data_binary)
# The file is closed when exiting the 'with' block, but still exists due to delete=False.
print(f"App: Processing PDF via temporary file: {temp_pdf_path}")
results_dict, _ = analyze_pdf(temp_pdf_path) # Pass the path to the temporary file
results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
return results_json, None
except Exception as e:
print(f"Error in process_upload: {e}\n{traceback.format_exc()}")
error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
return error_message, None
finally:
# Clean up the temporary file if it was created
if temp_pdf_path and os.path.exists(temp_pdf_path):
try:
os.remove(temp_pdf_path)
print(f"App: Cleaned up temporary PDF file: {temp_pdf_path}")
except Exception as e_clean:
print(f"App: Error cleaning up temporary PDF file {temp_pdf_path}: {e_clean}")
def create_interface():
with gr.Blocks(title="PDF Analyzer") as interface:
with gr.Row():
file_input = gr.File(
label="Upload PDF",
file_types=[".pdf"],
type="binary" # This ensures file_data_binary is bytes
)
with gr.Row():
analyze_btn = gr.Button("Analyze PDF")
with gr.Row():
results_output = gr.JSON(
label="Analysis Results",
show_label=True
)
pdf_output = gr.File(
label="Annotated PDF (Placeholder - View Coordinates in JSON)",
show_label=True,
interactive=False
)
analyze_btn.click(
fn=process_upload,
inputs=[file_input],
outputs=[results_output, pdf_output]
)
return interface
if __name__ == "__main__":
print("\n--- Launching Gradio Interface ---")
# config.set_java_home() is called when config.py is imported.
# Optional: Test LanguageTool initialization
try:
import language_tool_python
lt_test = language_tool_python.LanguageTool('en-US')
lt_test.close()
print("App: LanguageTool initialized successfully for test.")
except Exception as lt_e:
print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
app_interface = create_interface()
app_interface.launch(
share=False,
# server_port=7860
)