asdfaman commited on
Commit
200bbb5
·
verified ·
1 Parent(s): 55e1f80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -42
app.py CHANGED
@@ -7,35 +7,18 @@ import time
7
  from paddleocr import PaddleOCR
8
  import os
9
  from dotenv import load_dotenv
10
- from huggingface_hub import login
 
11
 
12
- load_dotenv() # Load .env file
 
13
  huggingface_token = os.getenv("HF_TOKEN")
14
- login(huggingface_token)
15
-
16
- ##########################LLAMA3BI################################
17
- from huggingface_hub import InferenceClient
18
- client = InferenceClient(api_key=huggingface_token)
19
- messages = [
20
- {"role": "system", "content": """Your task is to get the product details out of the text given.
21
- The text given will be raw text from OCR of social media images of products,
22
- and the goal is to get product details and description so that it can be used for e-commerce product listings.
23
- TRY TO KEEP THE LISTING IN FOLLOWING FORMAT.
24
- 📦 [Product Name]
25
- 💰 Price: $XX.XX
26
- ✨ Key Features:
27
- •⁠ ⁠[Main Feature 1]
28
- •⁠ ⁠[Main Feature 2]
29
- •⁠ ⁠[Main Feature 3]
30
- 📸 [Product Image]
31
- 🏷 Available Now
32
- ✈️ Prime Shipping Available
33
- 🛍 Shop Now: [Link]
34
- 🔍 Search: [Main Keywords]
35
- [#RelevantHashtags] """},
36
- ]
37
 
38
- # Initialize PaddleOCR model
 
 
 
 
39
  ocr = PaddleOCR(use_angle_cls=True, lang='en')
40
 
41
  # Team details
@@ -56,30 +39,50 @@ def preprocess_image(image):
56
  np.ndarray: Preprocessed image array ready for prediction.
57
  """
58
  try:
59
- # Resize image to match model input size
60
  img = image.resize((128, 128), Image.LANCZOS)
61
-
62
- # Convert image to NumPy array
63
  img_array = np.array(img)
64
 
65
- # Check if the image is grayscale and convert to RGB if needed
66
  if img_array.ndim == 2: # Grayscale image
67
  img_array = np.stack([img_array] * 3, axis=-1)
68
  elif img_array.shape[2] == 1: # Single-channel image
69
  img_array = np.concatenate([img_array, img_array, img_array], axis=-1)
70
 
71
- # Normalize pixel values to [0, 1] range
72
  img_array = img_array / 255.0
73
-
74
- # Add batch dimension
75
  img_array = np.expand_dims(img_array, axis=0)
76
 
77
  return img_array
78
-
79
  except Exception as e:
80
  print(f"Error processing image: {e}")
81
  return None
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  # Function to display team members in circular format
84
  def display_team_members(members, max_members_per_row=4):
85
  num_members = len(members)
@@ -114,20 +117,18 @@ elif app_mode == "Project Details":
114
  st.write("""
115
  ## Project Overview:
116
  - Automates product listings from social media content.
117
- - Extracts product details from posts using OCR and LLMs.
118
  - Outputs structured, engaging, and optimized e-commerce listings.
119
  """)
120
  elif app_mode == "Team Details":
121
  st.write("## Meet Our Team:")
122
  display_team_members(team_members)
123
  elif app_mode == "Extract Product Details":
124
- st.write("## Extract Product Details Using OCR and LLM")
 
125
  post_url = st.text_input("Enter Post URL:")
126
  uploaded_files = st.file_uploader("Upload Product Images", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
127
-
128
- if post_url:
129
- st.write("### Processed Details:")
130
- # Add Instagram post processing logic here.
131
 
132
  if uploaded_files:
133
  st.write("### Uploaded Images:")
@@ -136,5 +137,16 @@ elif app_mode == "Extract Product Details":
136
  image = Image.open(uploaded_image)
137
  st.image(image, use_column_width=True)
138
  simulate_progress()
139
- st.write("Details extracted:")
140
- # Add OCR and LLM processing logic here.
 
 
 
 
 
 
 
 
 
 
 
 
7
  from paddleocr import PaddleOCR
8
  import os
9
  from dotenv import load_dotenv
10
+ import torch
11
+ from transformers import AutoTokenizer, AutoModelForQuestionAnswering
12
 
13
+ # Load environment variables
14
+ load_dotenv()
15
  huggingface_token = os.getenv("HF_TOKEN")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
 
17
+ # Load TinyBERT model and tokenizer
18
+ tokenizer = AutoTokenizer.from_pretrained("Intel/dynamic_tinybert")
19
+ model = AutoModelForQuestionAnswering.from_pretrained("Intel/dynamic_tinybert")
20
+
21
+ # Initialize PaddleOCR
22
  ocr = PaddleOCR(use_angle_cls=True, lang='en')
23
 
24
  # Team details
 
39
  np.ndarray: Preprocessed image array ready for prediction.
40
  """
41
  try:
 
42
  img = image.resize((128, 128), Image.LANCZOS)
 
 
43
  img_array = np.array(img)
44
 
 
45
  if img_array.ndim == 2: # Grayscale image
46
  img_array = np.stack([img_array] * 3, axis=-1)
47
  elif img_array.shape[2] == 1: # Single-channel image
48
  img_array = np.concatenate([img_array, img_array, img_array], axis=-1)
49
 
 
50
  img_array = img_array / 255.0
 
 
51
  img_array = np.expand_dims(img_array, axis=0)
52
 
53
  return img_array
 
54
  except Exception as e:
55
  print(f"Error processing image: {e}")
56
  return None
57
 
58
+ # Function to perform Q&A with TinyBERT
59
+ def answer_question(context, question):
60
+ """
61
+ Extract the answer to a question from the given context using TinyBERT.
62
+ Args:
63
+ context (str): The text to search for answers.
64
+ question (str): The question to answer.
65
+ Returns:
66
+ str: The extracted answer or an error message.
67
+ """
68
+ try:
69
+ tokens = tokenizer.encode_plus(question, context, return_tensors="pt", truncation=True)
70
+ input_ids = tokens["input_ids"]
71
+ attention_mask = tokens["attention_mask"]
72
+
73
+ # Perform question answering
74
+ outputs = model(input_ids, attention_mask=attention_mask)
75
+ start_scores = outputs.start_logits
76
+ end_scores = outputs.end_logits
77
+
78
+ answer_start = torch.argmax(start_scores)
79
+ answer_end = torch.argmax(end_scores) + 1
80
+ answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][answer_start:answer_end]))
81
+
82
+ return answer
83
+ except Exception as e:
84
+ return f"Error: {e}"
85
+
86
  # Function to display team members in circular format
87
  def display_team_members(members, max_members_per_row=4):
88
  num_members = len(members)
 
117
  st.write("""
118
  ## Project Overview:
119
  - Automates product listings from social media content.
120
+ - Extracts product details from posts using OCR and Q&A.
121
  - Outputs structured, engaging, and optimized e-commerce listings.
122
  """)
123
  elif app_mode == "Team Details":
124
  st.write("## Meet Our Team:")
125
  display_team_members(team_members)
126
  elif app_mode == "Extract Product Details":
127
+ st.write("## Extract Product Details Using OCR and Q&A")
128
+
129
  post_url = st.text_input("Enter Post URL:")
130
  uploaded_files = st.file_uploader("Upload Product Images", type=["jpeg", "png", "jpg"], accept_multiple_files=True)
131
+ user_question = st.text_input("Ask a question about the extracted details:")
 
 
 
132
 
133
  if uploaded_files:
134
  st.write("### Uploaded Images:")
 
137
  image = Image.open(uploaded_image)
138
  st.image(image, use_column_width=True)
139
  simulate_progress()
140
+
141
+ # Perform OCR
142
+ st.write("Extracting text from image...")
143
+ result = ocr.ocr(np.array(image), cls=True)
144
+ extracted_text = " ".join([line[1][0] for line in result[0]])
145
+ st.write("Extracted Text:")
146
+ st.text(extracted_text)
147
+
148
+ # Use Q&A model
149
+ if user_question:
150
+ st.write("### Answer to your question:")
151
+ answer = answer_question(extracted_text, user_question)
152
+ st.write(answer)