Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import os
|
|
| 2 |
# Upload credential json file from default compute service account
|
| 3 |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
|
| 4 |
|
|
|
|
| 5 |
from google.api_core.client_options import ClientOptions
|
| 6 |
from google.cloud import documentai_v1 as documentai
|
| 7 |
from google.cloud.documentai_v1.types import RawDocument
|
|
@@ -10,8 +11,6 @@ import zipfile
|
|
| 10 |
import os
|
| 11 |
import io
|
| 12 |
import gradio as gr
|
| 13 |
-
import pandas as pd
|
| 14 |
-
import tempfile
|
| 15 |
|
| 16 |
# Global DataFrame declaration
|
| 17 |
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
|
|
@@ -19,7 +18,7 @@ results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Tex
|
|
| 19 |
# Set your Google Cloud Document AI processor details here
|
| 20 |
project_id = "herbaria-ai"
|
| 21 |
location = "us"
|
| 22 |
-
processor_id = "
|
| 23 |
|
| 24 |
def translate_text(text, target_language="en"):
|
| 25 |
translate_client = translate.Client()
|
|
@@ -58,18 +57,15 @@ def unzip_and_find_jpgs(file_path):
|
|
| 58 |
|
| 59 |
def process_images(uploaded_file):
|
| 60 |
global results_df
|
| 61 |
-
if
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
# Reinitialize the DataFrame every time a new file is uploaded
|
| 66 |
-
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
|
| 67 |
-
|
| 68 |
-
file_path = uploaded_file.name
|
| 69 |
try:
|
| 70 |
image_files = unzip_and_find_jpgs(file_path)
|
|
|
|
| 71 |
if not image_files:
|
| 72 |
-
return "No JPG files found in the zip."
|
| 73 |
|
| 74 |
for file_path in image_files:
|
| 75 |
extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
|
|
@@ -80,26 +76,17 @@ def process_images(uploaded_file):
|
|
| 80 |
}])
|
| 81 |
results_df = pd.concat([results_df, new_row], ignore_index=True)
|
| 82 |
except Exception as e:
|
| 83 |
-
return f"An error occurred: {str(e)}"
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
gr.Markdown("Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image.")
|
| 95 |
-
with gr.Row():
|
| 96 |
-
file_input = gr.File(label="Upload ZIP File")
|
| 97 |
-
with gr.Row():
|
| 98 |
-
html_output = gr.HTML()
|
| 99 |
-
with gr.Row():
|
| 100 |
-
file_output = gr.File()
|
| 101 |
-
|
| 102 |
-
file_input.change(process_images, inputs=file_input, outputs=[html_output, file_output])
|
| 103 |
|
| 104 |
if __name__ == "__main__":
|
| 105 |
-
interface.launch(debug=True)
|
|
|
|
| 2 |
# Upload credential json file from default compute service account
|
| 3 |
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "herbaria-ai-3c860bcb0f44.json"
|
| 4 |
|
| 5 |
+
import pandas as pd
|
| 6 |
from google.api_core.client_options import ClientOptions
|
| 7 |
from google.cloud import documentai_v1 as documentai
|
| 8 |
from google.cloud.documentai_v1.types import RawDocument
|
|
|
|
| 11 |
import os
|
| 12 |
import io
|
| 13 |
import gradio as gr
|
|
|
|
|
|
|
| 14 |
|
| 15 |
# Global DataFrame declaration
|
| 16 |
results_df = pd.DataFrame(columns=["Filename", "Extracted Text", "Translated Text"])
|
|
|
|
| 18 |
# Set your Google Cloud Document AI processor details here
|
| 19 |
project_id = "herbaria-ai"
|
| 20 |
location = "us"
|
| 21 |
+
processor_id = "4307b078717a399a"
|
| 22 |
|
| 23 |
def translate_text(text, target_language="en"):
|
| 24 |
translate_client = translate.Client()
|
|
|
|
| 57 |
|
| 58 |
def process_images(uploaded_file):
|
| 59 |
global results_df
|
| 60 |
+
results_df = results_df.iloc[0:0] # Clear the DataFrame if re-running this cell
|
| 61 |
+
|
| 62 |
+
file_path = uploaded_file.name # Gradio provides the file path through the .name attribute
|
| 63 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
try:
|
| 65 |
image_files = unzip_and_find_jpgs(file_path)
|
| 66 |
+
|
| 67 |
if not image_files:
|
| 68 |
+
return "No JPG files found in the zip."
|
| 69 |
|
| 70 |
for file_path in image_files:
|
| 71 |
extracted_text, translated_text = batch_process_documents(file_path, "image/jpeg")
|
|
|
|
| 76 |
}])
|
| 77 |
results_df = pd.concat([results_df, new_row], ignore_index=True)
|
| 78 |
except Exception as e:
|
| 79 |
+
return f"An error occurred: {str(e)}"
|
| 80 |
+
|
| 81 |
+
return results_df.to_html()
|
| 82 |
+
|
| 83 |
+
interface = gr.Interface(
|
| 84 |
+
fn=process_images,
|
| 85 |
+
inputs="file",
|
| 86 |
+
outputs="html",
|
| 87 |
+
title="Document AI Translation",
|
| 88 |
+
description="Upload a ZIP file containing JPEG/JPG images, and the system will extract and translate text from each image."
|
| 89 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 90 |
|
| 91 |
if __name__ == "__main__":
|
| 92 |
+
interface.launch(debug=True)
|