Deepak Sahu commited on
Commit
2a28d9d
·
1 Parent(s): f1fa604

parsing images

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. z_generate.py +61 -2
app.py CHANGED
@@ -30,8 +30,8 @@ llm = ServerlessInference(vector_store_text=vector_text, vector_store_images=vec
30
 
31
  # Processing Functions
32
  def update_response(query:str = "something"):
33
- response_text = llm.perform_rag(query)
34
- return response_text
35
 
36
  def update_gallery(text:str = "hell"):
37
  imgs = [
@@ -42,7 +42,7 @@ def update_gallery(text:str = "hell"):
42
 
43
 
44
  def ask_bot(text):
45
- return update_response(text), update_gallery(text)
46
 
47
  # UI Layout
48
  with demo:
 
30
 
31
  # Processing Functions
32
  def update_response(query:str = "something"):
33
+ response_text, response_images = llm.perform_rag(query)
34
+ return response_text, response_images
35
 
36
  def update_gallery(text:str = "hell"):
37
  imgs = [
 
42
 
43
 
44
  def ask_bot(text):
45
+ return update_response(text)
46
 
47
  # UI Layout
48
  with demo:
z_generate.py CHANGED
@@ -1,5 +1,8 @@
1
  from huggingface_hub import InferenceClient
2
  import os
 
 
 
3
 
4
  class ServerlessInference:
5
  def __init__(self, vector_store_text = None, vector_store_images = None):
@@ -135,5 +138,61 @@ Question: {question}""".format(context=context, question=query),
135
  max_tokens=500
136
  )
137
 
138
- images_list = completion.choices[0].message.content
139
- return response_text + str(images_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from huggingface_hub import InferenceClient
2
  import os
3
+ from typing import List
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
 
7
  class ServerlessInference:
8
  def __init__(self, vector_store_text = None, vector_store_images = None):
 
138
  max_tokens=500
139
  )
140
 
141
+ images_list_str: str = completion.choices[0].message.content
142
+ images_list:list = parse(images_list_str)
143
+ # Create link and caption pair
144
+ response_images = []
145
+ for idx in images_list:
146
+ caption = retrieved_image[idx].page_content
147
+ url = get_wiki_file_to_image_url(retrieved_image[idx].metadata["url"])
148
+ response_images.append(
149
+ (url, caption)
150
+ )
151
+ return response_text, response_images
152
+
153
+
154
+
155
+
156
+ def parse(value: str) -> List[int]:
157
+ try:
158
+ # Convert the string to a Python list using eval safely with literal_eval
159
+ from ast import literal_eval
160
+ parsed_value = literal_eval(value)
161
+
162
+ # Ensure it's a list of numbers
163
+ if isinstance(parsed_value, list) and all(isinstance(i, (int, float)) for i in parsed_value):
164
+ return parsed_value
165
+ else:
166
+ print("The input string is not a valid list of numbers.")
167
+ except Exception as e:
168
+ print(f"Invalid input string: {value}. Error: {e}")
169
+ return []
170
+
171
+
172
+
173
+
174
+ def get_wiki_file_to_image_url(file_page_url:str):
175
+ # URL of the Wikipedia file page
176
+ file_page_url = "https://en.wikipedia.org/wiki/File:Wicketkeeping_kit_and_bat_of_MS_Dhoni_at_Blades_of_Glory_Cricket_Museum,_Pune.jpg"
177
+
178
+ # Headers to mimic a browser
179
+ headers = {
180
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
181
+ }
182
+
183
+ # Step 1: Get the file page HTML
184
+ response = requests.get(file_page_url, headers=headers)
185
+
186
+ if response.status_code == 200:
187
+ # Parse the HTML content
188
+ soup = BeautifulSoup(response.content, "html.parser")
189
+
190
+ # Step 2: Find the link to the image file
191
+ image_tag = soup.find("a", {"class": "internal"})
192
+ if image_tag and "href" in image_tag.attrs:
193
+ direct_image_url = "https:" + image_tag["href"]
194
+
195
+ return direct_image_url
196
+
197
+ else:
198
+ return file_page_url