# app.py import json import traceback import tempfile import os import gradio as gr from typing import Tuple, Optional, Any # Any for file_data_binary for now, though bytes is expected # Import config first to ensure JAVA_HOME is set early import config from main_analyzer import analyze_pdf # Import language_tool_python only for the test in __main__ if needed # import language_tool_python def process_upload(file_data_binary: Optional[bytes]) -> Tuple[str, Optional[str]]: # Explicitly Optional[bytes] if not isinstance(file_data_binary, bytes): if file_data_binary is None: error_msg = "No file uploaded or file data is None." else: error_msg = f"Unexpected file data type: {type(file_data_binary)}. Expected bytes." return json.dumps({"error": error_msg}, indent=2), None temp_pdf_path = None try: # Create a temporary file to store the uploaded PDF bytes # delete=False is used because analyze_pdf will open it by path. # We are responsible for deleting it in the finally block. with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: temp_pdf_path = temp_file.name temp_file.write(file_data_binary) # The file is closed when exiting the 'with' block, but still exists due to delete=False. print(f"App: Processing PDF via temporary file: {temp_pdf_path}") results_dict, _ = analyze_pdf(temp_pdf_path) # Pass the path to the temporary file results_json = json.dumps(results_dict, indent=2, ensure_ascii=False) return results_json, None except Exception as e: print(f"Error in process_upload: {e}\n{traceback.format_exc()}") error_message = json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2) return error_message, None finally: # Clean up the temporary file if it was created if temp_pdf_path and os.path.exists(temp_pdf_path): try: os.remove(temp_pdf_path) print(f"App: Cleaned up temporary PDF file: {temp_pdf_path}") except Exception as e_clean: print(f"App: Error cleaning up temporary PDF file {temp_pdf_path}: {e_clean}") def create_interface(): with gr.Blocks(title="PDF Analyzer") as interface: with gr.Row(): file_input = gr.File( label="Upload PDF", file_types=[".pdf"], type="binary" # This ensures file_data_binary is bytes ) with gr.Row(): analyze_btn = gr.Button("Analyze PDF") with gr.Row(): results_output = gr.JSON( label="Analysis Results", show_label=True ) pdf_output = gr.File( label="Annotated PDF (Placeholder - View Coordinates in JSON)", show_label=True, interactive=False ) analyze_btn.click( fn=process_upload, inputs=[file_input], outputs=[results_output, pdf_output] ) return interface if __name__ == "__main__": print("\n--- Launching Gradio Interface ---") # config.set_java_home() is called when config.py is imported. # Optional: Test LanguageTool initialization try: import language_tool_python lt_test = language_tool_python.LanguageTool('en-US') lt_test.close() print("App: LanguageTool initialized successfully for test.") except Exception as lt_e: print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}") print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).") app_interface = create_interface() app_interface.launch( share=False, # server_port=7860 )