Spaces:
Sleeping
Sleeping
Deepak Sahu
commited on
Commit
·
2a28d9d
1
Parent(s):
f1fa604
parsing images
Browse files- app.py +3 -3
- z_generate.py +61 -2
app.py
CHANGED
@@ -30,8 +30,8 @@ llm = ServerlessInference(vector_store_text=vector_text, vector_store_images=vec
|
|
30 |
|
31 |
# Processing Functions
|
32 |
def update_response(query:str = "something"):
|
33 |
-
response_text = llm.perform_rag(query)
|
34 |
-
return response_text
|
35 |
|
36 |
def update_gallery(text:str = "hell"):
|
37 |
imgs = [
|
@@ -42,7 +42,7 @@ def update_gallery(text:str = "hell"):
|
|
42 |
|
43 |
|
44 |
def ask_bot(text):
|
45 |
-
return update_response(text)
|
46 |
|
47 |
# UI Layout
|
48 |
with demo:
|
|
|
30 |
|
31 |
# Processing Functions
|
32 |
def update_response(query:str = "something"):
|
33 |
+
response_text, response_images = llm.perform_rag(query)
|
34 |
+
return response_text, response_images
|
35 |
|
36 |
def update_gallery(text:str = "hell"):
|
37 |
imgs = [
|
|
|
42 |
|
43 |
|
44 |
def ask_bot(text):
|
45 |
+
return update_response(text)
|
46 |
|
47 |
# UI Layout
|
48 |
with demo:
|
z_generate.py
CHANGED
@@ -1,5 +1,8 @@
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
import os
|
|
|
|
|
|
|
3 |
|
4 |
class ServerlessInference:
|
5 |
def __init__(self, vector_store_text = None, vector_store_images = None):
|
@@ -135,5 +138,61 @@ Question: {question}""".format(context=context, question=query),
|
|
135 |
max_tokens=500
|
136 |
)
|
137 |
|
138 |
-
|
139 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from huggingface_hub import InferenceClient
|
2 |
import os
|
3 |
+
from typing import List
|
4 |
+
import requests
|
5 |
+
from bs4 import BeautifulSoup
|
6 |
|
7 |
class ServerlessInference:
|
8 |
def __init__(self, vector_store_text = None, vector_store_images = None):
|
|
|
138 |
max_tokens=500
|
139 |
)
|
140 |
|
141 |
+
images_list_str: str = completion.choices[0].message.content
|
142 |
+
images_list:list = parse(images_list_str)
|
143 |
+
# Create link and caption pair
|
144 |
+
response_images = []
|
145 |
+
for idx in images_list:
|
146 |
+
caption = retrieved_image[idx].page_content
|
147 |
+
url = get_wiki_file_to_image_url(retrieved_image[idx].metadata["url"])
|
148 |
+
response_images.append(
|
149 |
+
(url, caption)
|
150 |
+
)
|
151 |
+
return response_text, response_images
|
152 |
+
|
153 |
+
|
154 |
+
|
155 |
+
|
156 |
+
def parse(value: str) -> List[int]:
|
157 |
+
try:
|
158 |
+
# Convert the string to a Python list using eval safely with literal_eval
|
159 |
+
from ast import literal_eval
|
160 |
+
parsed_value = literal_eval(value)
|
161 |
+
|
162 |
+
# Ensure it's a list of numbers
|
163 |
+
if isinstance(parsed_value, list) and all(isinstance(i, (int, float)) for i in parsed_value):
|
164 |
+
return parsed_value
|
165 |
+
else:
|
166 |
+
print("The input string is not a valid list of numbers.")
|
167 |
+
except Exception as e:
|
168 |
+
print(f"Invalid input string: {value}. Error: {e}")
|
169 |
+
return []
|
170 |
+
|
171 |
+
|
172 |
+
|
173 |
+
|
174 |
+
def get_wiki_file_to_image_url(file_page_url:str):
|
175 |
+
# URL of the Wikipedia file page
|
176 |
+
file_page_url = "https://en.wikipedia.org/wiki/File:Wicketkeeping_kit_and_bat_of_MS_Dhoni_at_Blades_of_Glory_Cricket_Museum,_Pune.jpg"
|
177 |
+
|
178 |
+
# Headers to mimic a browser
|
179 |
+
headers = {
|
180 |
+
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
181 |
+
}
|
182 |
+
|
183 |
+
# Step 1: Get the file page HTML
|
184 |
+
response = requests.get(file_page_url, headers=headers)
|
185 |
+
|
186 |
+
if response.status_code == 200:
|
187 |
+
# Parse the HTML content
|
188 |
+
soup = BeautifulSoup(response.content, "html.parser")
|
189 |
+
|
190 |
+
# Step 2: Find the link to the image file
|
191 |
+
image_tag = soup.find("a", {"class": "internal"})
|
192 |
+
if image_tag and "href" in image_tag.attrs:
|
193 |
+
direct_image_url = "https:" + image_tag["href"]
|
194 |
+
|
195 |
+
return direct_image_url
|
196 |
+
|
197 |
+
else:
|
198 |
+
return file_page_url
|