Spaces:

spark-ds549
/

Chinese-Label-Transcription

Sleeping

App Files Files Community

mkaramb commited on Apr 25, 2024

Commit

9ac2440

verified ·

1 Parent(s): 70266cf

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -33

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os
 # Upload credential json file from default compute service account
 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai_v1 as documentai
 from google.cloud.documentai_v1.types import RawDocument
@@ -10,8 +11,6 @@ import zipfile
 import os
 import io
 import gradio as gr
-import pandas as pd
-import tempfile
 # Global DataFrame declaration
 results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
@@ -19,7 +18,7 @@ results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Tex
 # Set your Google Cloud Document AI processor details here
 project_id = "herbaria-ai"
 location = "us"
-processor_id = "de954414712822b3"
 def translate_text(text, target_language="en"):
     translate_client = translate.Client()
@@ -58,18 +57,15 @@ def unzip_and_find_jpgs(file_path):
 def process_images(uploaded_file):
     global results_df
-    if uploaded_file is None:
-        results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])  # Clear DataFrame
-        return "", ""  # Return empty outputs if no file is uploaded
-    else:
-        # Reinitialize the DataFrame every time a new file is uploaded
-        results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
-    file_path = uploaded_file.name
     try:
         image_files = unzip_and_find_jpgs(file_path)
         if not image_files:
-            return "No JPG files found in the zip.", ""
         for file_path in image_files:
             extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
@@ -80,26 +76,17 @@ def process_images(uploaded_file):
             }])
             results_df = pd.concat([results_df, new_row], ignore_index=True)
     except Exception as e:
-        return f"An error occurred: {str(e)}", ""
-    html_output = results_df.to_html()
-    temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
-    results_df.to_csv(temp_file.name, index=False)
-    temp_file.close()  # File is closed but not deleted
-    return html_output, temp_file.name
-with gr.Blocks() as interface:
-    with gr.Row():
-        gr.Markdown("# Document AI Translation")
-        gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
-    with gr.Row():
-        file_input = gr.File(label="Upload ZIP File")
-    with gr.Row():
-        html_output = gr.HTML()
-    with gr.Row():
-        file_output = gr.File()
-    file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
 if __name__ == "__main__":
-    interface.launch(debug=True)

 # Upload credential json file from default compute service account
 os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
+import pandas as pd
 from google.api_core.client_options import ClientOptions
 from google.cloud import documentai_v1 as documentai
 from google.cloud.documentai_v1.types import RawDocument
 import os
 import io
 import gradio as gr
 # Global DataFrame declaration
 results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
 # Set your Google Cloud Document AI processor details here
 project_id = "herbaria-ai"
 location = "us"
+processor_id = "4307b078717a399a"
 def translate_text(text, target_language="en"):
     translate_client = translate.Client()
 def process_images(uploaded_file):
     global results_df
+    results_df = results_df.iloc[0:0]  # Clear the DataFrame if re-running this cell
+    file_path = uploaded_file.name  # Gradio provides the file path through the .name attribute
     try:
         image_files = unzip_and_find_jpgs(file_path)
         if not image_files:
+            return "No JPG files found in the zip."
         for file_path in image_files:
             extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
             }])
             results_df = pd.concat([results_df, new_row], ignore_index=True)
     except Exception as e:
+        return f"An error occurred: {str(e)}"
+    return results_df.to_html()
+interface = gr.Interface(
+    fn=process_images,
+    inputs="file",
+    outputs="html",
+    title="Document AI Translation",
+    description="Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image."
+)
 if __name__ == "__main__":
+    interface.launch(debug=True)