pdfProdigy

Sleeping

App Files Files Community

asdfaman commited on Dec 5, 2024

Commit

200bbb5

verified ·

1 Parent(s): 55e1f80

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -42

app.py CHANGED Viewed

@@ -7,35 +7,18 @@ import time
 from paddleocr import PaddleOCR
 import os
 from dotenv import load_dotenv
-from huggingface_hub import login
-load_dotenv()  # Load .env file
 huggingface_token = os.getenv("HF_TOKEN")
-login(huggingface_token)
-##########################LLAMA3BI################################
-from huggingface_hub import InferenceClient
-client = InferenceClient(api_key=huggingface_token)
-messages = [
-    {"role": "system", "content": """Your task is to get the product details out of the text given.
-                                    The text given will be raw text from OCR of social media images of products,
-                                    and the goal is to get product details and description so that it can be used for e-commerce product listings.
-                                    TRY TO KEEP THE LISTING IN FOLLOWING FORMAT.
-                                    📦 [Product Name]
-                                    💰 Price: $XX.XX
-                                    ✨ Key Features:
-                                    •⁠  ⁠[Main Feature 1]
-                                    •⁠  ⁠[Main Feature 2]
-                                    •⁠  ⁠[Main Feature 3]
-                                    📸 [Product Image]
-                                    🏷 Available Now
-                                    ✈️ Prime Shipping Available
-                                    🛍 Shop Now: [Link]
-                                    🔍 Search: [Main Keywords]
-                                    [#RelevantHashtags] """},
-]
-# Initialize PaddleOCR model
 ocr = PaddleOCR(use_angle_cls=True, lang='en')
 # Team details
@@ -56,30 +39,50 @@ def preprocess_image(image):
         np.ndarray: Preprocessed image array ready for prediction.
     """
     try:
-        # Resize image to match model input size
         img = image.resize((128, 128), Image.LANCZOS)
-        # Convert image to NumPy array
         img_array = np.array(img)
-        # Check if the image is grayscale and convert to RGB if needed
         if img_array.ndim == 2:  # Grayscale image
             img_array = np.stack([img_array] * 3, axis=-1)
         elif img_array.shape[2] == 1:  # Single-channel image
             img_array = np.concatenate([img_array, img_array, img_array], axis=-1)
-        # Normalize pixel values to [0, 1] range
         img_array = img_array / 255.0
-        # Add batch dimension
         img_array = np.expand_dims(img_array, axis=0)
         return img_array
     except Exception as e:
         print(f"Error processing image: {e}")
         return None
 # Function to display team members in circular format
 def display_team_members(members, max_members_per_row=4):
     num_members = len(members)
@@ -114,20 +117,18 @@ elif app_mode == "Project Details":
     st.write("""
     ## Project Overview:
     - Automates product listings from social media content.
-    - Extracts product details from posts using OCR and LLMs.
     - Outputs structured, engaging, and optimized e-commerce listings.
     """)
 elif app_mode == "Team Details":
     st.write("## Meet Our Team:")
     display_team_members(team_members)
 elif app_mode == "Extract Product Details":
-    st.write("## Extract Product Details Using OCR and LLM")
     post_url = st.text_input("Enter Post URL:")
     uploaded_files = st.file_uploader("Upload Product Images", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
-    if post_url:
-        st.write("### Processed Details:")
-        # Add Instagram post processing logic here.
     if uploaded_files:
         st.write("### Uploaded Images:")
@@ -136,5 +137,16 @@ elif app_mode == "Extract Product Details":
             image = Image.open(uploaded_image)
             st.image(image, use_column_width=True)
             simulate_progress()
-            st.write("Details extracted:")
-            # Add OCR and LLM processing logic here.

 from paddleocr import PaddleOCR
 import os
 from dotenv import load_dotenv
+import torch
+from transformers import AutoTokenizer, AutoModelForQuestionAnswering
+# Load environment variables
+load_dotenv()
 huggingface_token = os.getenv("HF_TOKEN")
+# Load TinyBERT model and tokenizer
+tokenizer = AutoTokenizer.from_pretrained("Intel/dynamic_tinybert")
+model = AutoModelForQuestionAnswering.from_pretrained("Intel/dynamic_tinybert")
+# Initialize PaddleOCR
 ocr = PaddleOCR(use_angle_cls=True, lang='en')
 # Team details
         np.ndarray: Preprocessed image array ready for prediction.
     """
     try:
         img = image.resize((128, 128), Image.LANCZOS)
         img_array = np.array(img)
         if img_array.ndim == 2:  # Grayscale image
             img_array = np.stack([img_array] * 3, axis=-1)
         elif img_array.shape[2] == 1:  # Single-channel image
             img_array = np.concatenate([img_array, img_array, img_array], axis=-1)
         img_array = img_array / 255.0
         img_array = np.expand_dims(img_array, axis=0)
         return img_array
     except Exception as e:
         print(f"Error processing image: {e}")
         return None
+# Function to perform Q&A with TinyBERT
+def answer_question(context, question):
+    """
+    Extract the answer to a question from the given context using TinyBERT.
+    Args:
+        context (str): The text to search for answers.
+        question (str): The question to answer.
+    Returns:
+        str: The extracted answer or an error message.
+    """
+    try:
+        tokens = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True)
+        input_ids = tokens["input_ids"]
+        attention_mask = tokens["attention_mask"]
+        # Perform question answering
+        outputs = model(input_ids, attention_mask=attention_mask)
+        start_scores = outputs.start_logits
+        end_scores = outputs.end_logits
+        answer_start = torch.argmax(start_scores)
+        answer_end = torch.argmax(end_scores) + 1
+        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][answer_start:answer_end]))
+        return answer
+    except Exception as e:
+        return f"Error: {e}"
 # Function to display team members in circular format
 def display_team_members(members, max_members_per_row=4):
     num_members = len(members)
     st.write("""
     ## Project Overview:
     - Automates product listings from social media content.
+    - Extracts product details from posts using OCR and Q&A.
     - Outputs structured, engaging, and optimized e-commerce listings.
     """)
 elif app_mode == "Team Details":
     st.write("## Meet Our Team:")
     display_team_members(team_members)
 elif app_mode == "Extract Product Details":
+    st.write("## Extract Product Details Using OCR and Q&A")
     post_url = st.text_input("Enter Post URL:")
     uploaded_files = st.file_uploader("Upload Product Images", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
+    user_question = st.text_input("Ask a question about the extracted details:")
     if uploaded_files:
         st.write("### Uploaded Images:")
             image = Image.open(uploaded_image)
             st.image(image, use_column_width=True)
             simulate_progress()
+            # Perform OCR
+            st.write("Extracting text from image...")
+            result = ocr.ocr(np.array(image), cls=True)
+            extracted_text = " ".join([line[1][0] for line in result[0]])
+            st.write("Extracted Text:")
+            st.text(extracted_text)
+            # Use Q&A model
+            if user_question:
+                st.write("### Answer to your question:")
+                answer = answer_question(extracted_text, user_question)
+                st.write(answer)