Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | @@ -1,58 +1,59 @@ | |
| 1 | 
             
            # app.py
         | 
| 2 | 
             
            import json
         | 
| 3 | 
             
            import traceback
         | 
| 4 | 
            -
            import tempfile
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            import gradio as gr
         | 
| 7 | 
            -
            from typing import Tuple, Optional, Any  | 
| 8 |  | 
| 9 | 
             
            # Import config first to ensure JAVA_HOME is set early
         | 
| 10 | 
             
            import config 
         | 
| 11 |  | 
| 12 | 
             
            from main_analyzer import analyze_pdf 
         | 
| 13 | 
            -
            #  | 
| 14 | 
            -
             | 
| 15 |  | 
| 16 | 
            -
            def process_upload(file_data_binary: Optional[Any]) -> Tuple[str, Optional[str]]: # Use Optional[Any] for Gradio File type="binary"
         | 
| 17 | 
            -
                if file_data_binary is None or not hasattr(file_data_binary, 'read'): # Check if it's a file-like object
         | 
| 18 | 
            -
                    # Gradio's binary type for gr.File returns a tempfile._TemporaryFileWrapper object
         | 
| 19 | 
            -
                    # If it's None, no file was uploaded.
         | 
| 20 | 
            -
                    # If it's not None but doesn't have 'read', it's an unexpected type.
         | 
| 21 | 
            -
                    # However, gradio usually passes the bytes directly if type="binary" was used in older versions
         | 
| 22 | 
            -
                    # or a TemporaryFileWrapper which is file-like.
         | 
| 23 | 
            -
                    # For robustness, let's check if it's bytes.
         | 
| 24 | 
            -
                    if isinstance(file_data_binary, bytes):
         | 
| 25 | 
            -
                         pass # Good, it's bytes
         | 
| 26 | 
            -
                    elif file_data_binary is None:
         | 
| 27 | 
            -
                         return json.dumps({"error": "No file uploaded or file data is None"}, indent=2), None
         | 
| 28 | 
            -
                    elif not hasattr(file_data_binary, 'read'): # It's not None, not bytes, not file-like
         | 
| 29 | 
            -
                         return json.dumps({"error": f"Unexpected file data type: {type(file_data_binary)}"}), None
         | 
| 30 | 
            -
                    # If it has 'read', it's a file-like object, proceed.
         | 
| 31 |  | 
| 32 | 
            -
             | 
| 33 | 
            -
                 | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 34 | 
             
                try:
         | 
| 35 | 
            -
                     | 
| 36 | 
            -
                    #  | 
| 37 | 
            -
                    #  | 
| 38 | 
            -
                    
         | 
| 39 | 
            -
                    # Gradio with type="binary" gives a tempfile._TemporaryFileWrapper.
         | 
| 40 | 
            -
                    # This object is already file-like and can be passed directly.
         | 
| 41 | 
            -
                    # No need to create another temp file here in app.py if main_analyzer handles it.
         | 
| 42 |  | 
| 43 | 
            -
                     | 
| 44 | 
            -
                    results_dict, _ = analyze_pdf(file_data_binary) 
         | 
| 45 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 46 | 
             
                    results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
         | 
| 47 | 
             
                    return results_json, None 
         | 
| 48 |  | 
| 49 | 
             
                except Exception as e:
         | 
| 50 | 
            -
                     | 
| 51 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 52 | 
             
                    return error_message, None
         | 
| 53 | 
            -
                # No  | 
| 54 | 
            -
                #  | 
| 55 | 
            -
             | 
| 56 |  | 
| 57 | 
             
            def create_interface():
         | 
| 58 | 
             
                with gr.Blocks(title="PDF Analyzer") as interface:
         | 
| @@ -67,8 +68,9 @@ def create_interface(): | |
| 67 | 
             
                    with gr.Row():
         | 
| 68 | 
             
                        file_input = gr.File(
         | 
| 69 | 
             
                            label="Upload PDF",
         | 
| 70 | 
            -
                            file_types=[".pdf"] | 
| 71 | 
            -
                            # type | 
|  | |
| 72 | 
             
                        )
         | 
| 73 |  | 
| 74 | 
             
                    with gr.Row():
         | 
| @@ -76,7 +78,7 @@ def create_interface(): | |
| 76 |  | 
| 77 | 
             
                    with gr.Row():
         | 
| 78 | 
             
                        results_output = gr.JSON(
         | 
| 79 | 
            -
                            label="Analysis Results", | 
| 80 | 
             
                            show_label=True
         | 
| 81 | 
             
                        )
         | 
| 82 |  | 
| @@ -84,7 +86,7 @@ def create_interface(): | |
| 84 | 
             
                        pdf_output = gr.File( 
         | 
| 85 | 
             
                            label="Annotated PDF (Placeholder - View Coordinates in JSON)",
         | 
| 86 | 
             
                            show_label=True,
         | 
| 87 | 
            -
                            interactive=False  | 
| 88 | 
             
                        )
         | 
| 89 |  | 
| 90 | 
             
                    analyze_btn.click(
         | 
| @@ -98,18 +100,15 @@ if __name__ == "__main__": | |
| 98 | 
             
                print("\n--- Launching Gradio Interface ---")
         | 
| 99 | 
             
                # config.set_java_home() is called when config.py is imported.
         | 
| 100 |  | 
| 101 | 
            -
                # Optional: Test LanguageTool initialization
         | 
| 102 | 
             
                try:
         | 
| 103 | 
            -
                    import language_tool_python # Import here for the test
         | 
| 104 | 
             
                    lt_test = language_tool_python.LanguageTool('en-US')
         | 
| 105 | 
             
                    lt_test.close()
         | 
| 106 | 
             
                    print("App: LanguageTool initialized successfully for test.")
         | 
| 107 | 
             
                except Exception as lt_e:
         | 
| 108 | 
            -
                    print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}")
         | 
| 109 | 
             
                    print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
         | 
| 110 |  | 
| 111 | 
             
                app_interface = create_interface()
         | 
| 112 | 
             
                app_interface.launch(
         | 
| 113 | 
            -
                    share=False, | 
| 114 | 
            -
                    # server_port=7860 # Optionally specify a port
         | 
| 115 | 
             
                )
         | 
|  | |
| 1 | 
             
            # app.py
         | 
| 2 | 
             
            import json
         | 
| 3 | 
             
            import traceback
         | 
| 4 | 
            +
            import tempfile # Not strictly needed by process_upload anymore, but good to keep if other parts use it.
         | 
| 5 | 
             
            import os
         | 
| 6 | 
             
            import gradio as gr
         | 
| 7 | 
            +
            from typing import Tuple, Optional, Any 
         | 
| 8 |  | 
| 9 | 
             
            # Import config first to ensure JAVA_HOME is set early
         | 
| 10 | 
             
            import config 
         | 
| 11 |  | 
| 12 | 
             
            from main_analyzer import analyze_pdf 
         | 
| 13 | 
            +
            # language_tool_python needed for the test in __main__
         | 
| 14 | 
            +
            import language_tool_python
         | 
| 15 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 16 |  | 
| 17 | 
            +
            def process_upload(uploaded_file_input: Optional[Any]) -> Tuple[str, Optional[str]]:
         | 
| 18 | 
            +
                """
         | 
| 19 | 
            +
                Processes the uploaded file from Gradio.
         | 
| 20 | 
            +
                'uploaded_file_input' is expected to be an object from gr.File(),
         | 
| 21 | 
            +
                which could be a path string (like gradio.utils.NamedString) or a file-like object.
         | 
| 22 | 
            +
                """
         | 
| 23 | 
            +
                if uploaded_file_input is None:
         | 
| 24 | 
            +
                    print("App: No file uploaded.")
         | 
| 25 | 
            +
                    return json.dumps({"error": "No file uploaded."}, indent=2), None
         | 
| 26 | 
            +
             | 
| 27 | 
             
                try:
         | 
| 28 | 
            +
                    # Pass the uploaded_file_input directly to analyze_pdf.
         | 
| 29 | 
            +
                    # analyze_pdf is responsible for determining if it's a path or a stream
         | 
| 30 | 
            +
                    # and handling it accordingly (including creating a temp file for streams if needed).
         | 
| 31 | 
            +
                    print(f"App: Received file input of type: {type(uploaded_file_input)}. Passing to analyzer.")
         | 
|  | |
|  | |
|  | |
| 32 |  | 
| 33 | 
            +
                    results_dict, _ = analyze_pdf(uploaded_file_input) 
         | 
|  | |
| 34 |  | 
| 35 | 
            +
                    # Check if analyze_pdf returned an error (it returns a dict with "error" key in that case)
         | 
| 36 | 
            +
                    if isinstance(results_dict, dict) and "error" in results_dict:
         | 
| 37 | 
            +
                        print(f"App: Analysis returned an error: {results_dict['error']}")
         | 
| 38 | 
            +
                        # Return the error JSON directly
         | 
| 39 | 
            +
                        return json.dumps(results_dict, indent=2, ensure_ascii=False), None
         | 
| 40 | 
            +
             | 
| 41 | 
             
                    results_json = json.dumps(results_dict, indent=2, ensure_ascii=False)
         | 
| 42 | 
             
                    return results_json, None 
         | 
| 43 |  | 
| 44 | 
             
                except Exception as e:
         | 
| 45 | 
            +
                    # This catches unexpected errors during the call to analyze_pdf or JSON dumping.
         | 
| 46 | 
            +
                    print(f"App: Error in process_upload: {e}\n{traceback.format_exc()}")
         | 
| 47 | 
            +
                    error_detail = str(e)
         | 
| 48 | 
            +
                    # If the error is due to an invalid input type that analyze_pdf couldn't handle:
         | 
| 49 | 
            +
                    if "Invalid PDF input type" in error_detail: # Check for specific error from analyze_pdf
         | 
| 50 | 
            +
                         error_detail = f"Invalid PDF input type received from uploader: {type(uploaded_file_input)}. Details: {str(e)}"
         | 
| 51 | 
            +
                    
         | 
| 52 | 
            +
                    error_message = json.dumps({"error": error_detail, "traceback": traceback.format_exc()}, indent=2)
         | 
| 53 | 
             
                    return error_message, None
         | 
| 54 | 
            +
                # No explicit temp file cleanup needed here in process_upload for the Gradio-provided file object.
         | 
| 55 | 
            +
                # Gradio manages its own temporary files.
         | 
| 56 | 
            +
                # analyze_pdf manages any internal temporary files it creates.
         | 
| 57 |  | 
| 58 | 
             
            def create_interface():
         | 
| 59 | 
             
                with gr.Blocks(title="PDF Analyzer") as interface:
         | 
|  | |
| 68 | 
             
                    with gr.Row():
         | 
| 69 | 
             
                        file_input = gr.File(
         | 
| 70 | 
             
                            label="Upload PDF",
         | 
| 71 | 
            +
                            file_types=[".pdf"]
         | 
| 72 | 
            +
                            # Default type: Gradio provides a path-like object (e.g., NamedString)
         | 
| 73 | 
            +
                            # or a TemporaryFileWrapper. Both should be acceptable by analyze_pdf.
         | 
| 74 | 
             
                        )
         | 
| 75 |  | 
| 76 | 
             
                    with gr.Row():
         | 
|  | |
| 78 |  | 
| 79 | 
             
                    with gr.Row():
         | 
| 80 | 
             
                        results_output = gr.JSON(
         | 
| 81 | 
            +
                            label="Analysis Results",
         | 
| 82 | 
             
                            show_label=True
         | 
| 83 | 
             
                        )
         | 
| 84 |  | 
|  | |
| 86 | 
             
                        pdf_output = gr.File( 
         | 
| 87 | 
             
                            label="Annotated PDF (Placeholder - View Coordinates in JSON)",
         | 
| 88 | 
             
                            show_label=True,
         | 
| 89 | 
            +
                            interactive=False 
         | 
| 90 | 
             
                        )
         | 
| 91 |  | 
| 92 | 
             
                    analyze_btn.click(
         | 
|  | |
| 100 | 
             
                print("\n--- Launching Gradio Interface ---")
         | 
| 101 | 
             
                # config.set_java_home() is called when config.py is imported.
         | 
| 102 |  | 
|  | |
| 103 | 
             
                try:
         | 
|  | |
| 104 | 
             
                    lt_test = language_tool_python.LanguageTool('en-US')
         | 
| 105 | 
             
                    lt_test.close()
         | 
| 106 | 
             
                    print("App: LanguageTool initialized successfully for test.")
         | 
| 107 | 
             
                except Exception as lt_e:
         | 
| 108 | 
            +
                    print(f"App: Warning: Could not initialize LanguageTool for test. Language checks might fail: {lt_e}\n{traceback.format_exc(limit=1)}")
         | 
| 109 | 
             
                    print("Please ensure Java is installed and JAVA_HOME is correctly set (see config.py).")
         | 
| 110 |  | 
| 111 | 
             
                app_interface = create_interface()
         | 
| 112 | 
             
                app_interface.launch(
         | 
| 113 | 
            +
                    share=False,
         | 
|  | |
| 114 | 
             
                )
         |