Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -85,6 +85,7 @@ image_description = ""
|
|
| 85 |
|
| 86 |
|
| 87 |
def check_hallucination(assertion, citation):
|
|
|
|
| 88 |
api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
|
| 89 |
header = {"Authorization": f"Bearer {hf_token}"}
|
| 90 |
payload = {"inputs": f"{assertion} [SEP] {citation}"}
|
|
@@ -93,6 +94,7 @@ def check_hallucination(assertion, citation):
|
|
| 93 |
output = response.json()
|
| 94 |
output = output[0][0]["score"]
|
| 95 |
|
|
|
|
| 96 |
return f"**hallucination score:** {output}"
|
| 97 |
|
| 98 |
|
|
@@ -104,29 +106,26 @@ headers = {"Authorization": f"Bearer {hf_token}"}
|
|
| 104 |
|
| 105 |
# Function to query the API
|
| 106 |
def query(payload):
|
|
|
|
| 107 |
response = requests.post(vapi_url, headers=headers, json=payload)
|
|
|
|
| 108 |
return response.json()
|
| 109 |
|
| 110 |
-
|
| 111 |
# Function to evaluate hallucination
|
| 112 |
def evaluate_hallucination(input1, input2):
|
| 113 |
-
|
| 114 |
-
combined_input = f"{input1}
|
| 115 |
|
| 116 |
-
# Make the API call
|
| 117 |
output = query({"inputs": combined_input})
|
| 118 |
-
|
| 119 |
-
# Extract the score from the output
|
| 120 |
score = output[0][0]['score']
|
| 121 |
-
|
| 122 |
-
# Generate a label based on the score
|
| 123 |
if score < 0.5:
|
| 124 |
label = f"🔴 High risk. Score: {score:.2f}"
|
| 125 |
else:
|
| 126 |
label = f"🟢 Low risk. Score: {score:.2f}"
|
| 127 |
-
|
| 128 |
-
return label
|
| 129 |
|
|
|
|
|
|
|
| 130 |
|
| 131 |
def save_audio(audio_input, output_dir="saved_audio"):
|
| 132 |
if not os.path.exists(output_dir):
|
|
@@ -146,39 +145,40 @@ def save_audio(audio_input, output_dir="saved_audio"):
|
|
| 146 |
|
| 147 |
|
| 148 |
def save_image(image_input, output_dir="saved_images"):
|
|
|
|
| 149 |
if not os.path.exists(output_dir):
|
| 150 |
os.makedirs(output_dir)
|
| 151 |
|
| 152 |
-
# Assuming image_input is a NumPy array
|
| 153 |
if isinstance(image_input, np.ndarray):
|
| 154 |
-
# Convert NumPy arrays to PIL Image
|
| 155 |
image = Image.fromarray(image_input)
|
| 156 |
-
|
| 157 |
-
# Generate a unique file name
|
| 158 |
file_name = f"image_{int(time.time())}.png"
|
| 159 |
file_path = os.path.join(output_dir, file_name)
|
| 160 |
-
|
| 161 |
-
# Save the image file
|
| 162 |
image.save(file_path)
|
| 163 |
|
|
|
|
| 164 |
return file_path
|
| 165 |
else:
|
| 166 |
raise ValueError("Invalid image input type")
|
| 167 |
|
|
|
|
| 168 |
def process_image(image_file_path):
|
|
|
|
| 169 |
client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/t5ccx/") # TruEra
|
| 170 |
try:
|
| 171 |
result = client.predict(
|
| 172 |
-
"Describe this image in detail, identify every detail in this image. Describe the image the best you can.",
|
| 173 |
image_file_path,
|
| 174 |
fn_index=0
|
| 175 |
)
|
|
|
|
| 176 |
return result
|
| 177 |
except Exception as e:
|
|
|
|
| 178 |
return f"Error occurred during image processing: {e}"
|
| 179 |
|
| 180 |
|
| 181 |
def process_speech(audio_input, source_language, target_language="English"):
|
|
|
|
| 182 |
if audio_input is None:
|
| 183 |
return "No audio input provided."
|
| 184 |
try:
|
|
@@ -188,11 +188,14 @@ def process_speech(audio_input, source_language, target_language="English"):
|
|
| 188 |
target_language,
|
| 189 |
api_name="/s2tt"
|
| 190 |
)
|
|
|
|
| 191 |
return result
|
| 192 |
except Exception as e:
|
|
|
|
| 193 |
return f"Error in speech processing: {str(e)}"
|
| 194 |
|
| 195 |
def convert_text_to_speech(input_text, source_language, target_language):
|
|
|
|
| 196 |
try:
|
| 197 |
result = seamless_client.predict(
|
| 198 |
input_text,
|
|
@@ -203,8 +206,10 @@ def convert_text_to_speech(input_text, source_language, target_language):
|
|
| 203 |
audio_file_path = result[0] if result else None
|
| 204 |
translated_text = result[1] if result else ""
|
| 205 |
|
|
|
|
| 206 |
return audio_file_path, translated_text
|
| 207 |
except Exception as e:
|
|
|
|
| 208 |
return None, f"Error in text-to-speech conversion: {str(e)}"
|
| 209 |
|
| 210 |
def query_vectara(text):
|
|
@@ -310,8 +315,8 @@ def query_vectara(text):
|
|
| 310 |
return f"Error: {response.status_code}"
|
| 311 |
|
| 312 |
|
| 313 |
-
# Functions to Wrap the Prompt Correctly
|
| 314 |
def wrap_text(text, width=90):
|
|
|
|
| 315 |
lines = text.split('\n')
|
| 316 |
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
|
| 317 |
wrapped_text = '\n'.join(wrapped_lines)
|
|
@@ -320,96 +325,82 @@ def wrap_text(text, width=90):
|
|
| 320 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
|
| 321 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
|
| 322 |
|
| 323 |
-
# TruEra
|
| 324 |
class ChatBot:
|
| 325 |
def __init__(self):
|
| 326 |
self.history = None
|
| 327 |
|
| 328 |
def predict(self, user_input, system_prompt=""):
|
|
|
|
| 329 |
response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt)
|
| 330 |
return response
|
| 331 |
|
| 332 |
bot = ChatBot()
|
| 333 |
|
| 334 |
-
# TruEra
|
| 335 |
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
|
|
|
|
| 336 |
return bot.predict(user_input, system_prompt)
|
| 337 |
|
| 338 |
-
|
| 339 |
-
|
| 340 |
-
system_prompt = "You are a medical instructor
|
| 341 |
response_text = bot.predict(summary, system_prompt)
|
| 342 |
return response_text
|
| 343 |
|
| 344 |
|
|
|
|
| 345 |
def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
|
| 346 |
try:
|
| 347 |
-
|
| 348 |
combined_text = ""
|
| 349 |
-
markdown_output = ""
|
| 350 |
-
image_text = ""
|
| 351 |
-
language_code = None
|
| 352 |
-
|
| 353 |
-
# Convert input language to its code
|
| 354 |
-
if input_language and input_language in languages:
|
| 355 |
-
language_code = languages[input_language]
|
| 356 |
-
|
| 357 |
-
# Debugging print statement
|
| 358 |
print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
|
| 359 |
-
|
| 360 |
-
# Process image input
|
| 361 |
if image_input is not None:
|
| 362 |
-
|
| 363 |
image_file_path = save_image(image_input)
|
| 364 |
image_text = process_image(image_file_path)
|
| 365 |
combined_text += "\n\n**Image Input:**\n" + image_text
|
| 366 |
|
| 367 |
-
# Process audio input
|
| 368 |
elif audio_input is not None:
|
|
|
|
| 369 |
sample_rate, audio_data = audio_input
|
| 370 |
audio_file_path = save_audio(audio_input)
|
| 371 |
-
audio_text = process_speech(audio_file_path,
|
| 372 |
combined_text += "\n\n**Audio Input:**\n" + audio_text
|
| 373 |
|
| 374 |
-
# Process text input
|
| 375 |
elif text_input is not None and text_input.strip():
|
|
|
|
| 376 |
combined_text += "The user asks the following to his health adviser: " + text_input
|
| 377 |
|
| 378 |
-
# Check if combined text is empty
|
| 379 |
else:
|
| 380 |
return "Error: Please provide some input (text, audio, or image)."
|
| 381 |
|
| 382 |
-
# Append the original image description in Markdown
|
| 383 |
if image_text:
|
| 384 |
markdown_output += "\n### Original Image Description\n"
|
| 385 |
markdown_output += image_text + "\n"
|
| 386 |
-
|
| 387 |
-
# Use the text to query Vectara
|
| 388 |
-
vectara_response_json = query_vectara(combined_text)
|
| 389 |
|
| 390 |
-
|
|
|
|
| 391 |
vectara_response = json.loads(vectara_response_json)
|
| 392 |
summary = vectara_response.get('summary', 'No summary available')
|
| 393 |
sources_info = vectara_response.get('sources', [])
|
| 394 |
|
| 395 |
-
# Format Vectara response in Markdown
|
| 396 |
markdown_output = "### Vectara Response Summary\n"
|
| 397 |
markdown_output += f"* **Summary**: {summary}\n"
|
| 398 |
markdown_output += "### Sources Information\n"
|
| 399 |
for source in sources_info:
|
| 400 |
markdown_output += f"* {source}\n"
|
| 401 |
|
| 402 |
-
# Process the summary with Qwen
|
| 403 |
final_response = process_summary_with_qwen(summary)
|
| 404 |
|
| 405 |
-
|
| 406 |
-
target_language = "English"
|
| 407 |
audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
|
| 408 |
-
|
| 409 |
-
|
| 410 |
hallucination_label = evaluate_hallucination(final_response, summary)
|
| 411 |
|
| 412 |
-
# Add final response and hallucination label to Markdown output
|
| 413 |
markdown_output += "\n### Processed Summary with Qwen\n"
|
| 414 |
markdown_output += final_response + "\n"
|
| 415 |
markdown_output += "\n### Hallucination Evaluation\n"
|
|
@@ -418,8 +409,9 @@ def process_and_query(input_language=None, audio_input=None, image_input=None, t
|
|
| 418 |
markdown_output += translated_text + "\n"
|
| 419 |
|
| 420 |
return markdown_output, audio_output
|
| 421 |
-
|
| 422 |
except Exception as e:
|
|
|
|
| 423 |
return f"Error occurred during processing: {e}. No hallucination evaluation.", None
|
| 424 |
|
| 425 |
|
|
|
|
| 85 |
|
| 86 |
|
| 87 |
def check_hallucination(assertion, citation):
|
| 88 |
+
print("Entering check_hallucination function")
|
| 89 |
api_url = "https://api-inference.huggingface.co/models/vectara/hallucination_evaluation_model"
|
| 90 |
header = {"Authorization": f"Bearer {hf_token}"}
|
| 91 |
payload = {"inputs": f"{assertion} [SEP] {citation}"}
|
|
|
|
| 94 |
output = response.json()
|
| 95 |
output = output[0][0]["score"]
|
| 96 |
|
| 97 |
+
print(f"check_hallucination output: {output}")
|
| 98 |
return f"**hallucination score:** {output}"
|
| 99 |
|
| 100 |
|
|
|
|
| 106 |
|
| 107 |
# Function to query the API
|
| 108 |
def query(payload):
|
| 109 |
+
print("Entering query function")
|
| 110 |
response = requests.post(vapi_url, headers=headers, json=payload)
|
| 111 |
+
print(f"API response: {response.json()}")
|
| 112 |
return response.json()
|
| 113 |
|
|
|
|
| 114 |
# Function to evaluate hallucination
|
| 115 |
def evaluate_hallucination(input1, input2):
|
| 116 |
+
print("Entering evaluate_hallucination function")
|
| 117 |
+
combined_input = f"{input1}[SEP]{input2}"
|
| 118 |
|
|
|
|
| 119 |
output = query({"inputs": combined_input})
|
|
|
|
|
|
|
| 120 |
score = output[0][0]['score']
|
| 121 |
+
|
|
|
|
| 122 |
if score < 0.5:
|
| 123 |
label = f"🔴 High risk. Score: {score:.2f}"
|
| 124 |
else:
|
| 125 |
label = f"🟢 Low risk. Score: {score:.2f}"
|
|
|
|
|
|
|
| 126 |
|
| 127 |
+
print(f"evaluate_hallucination label: {label}")
|
| 128 |
+
return label
|
| 129 |
|
| 130 |
def save_audio(audio_input, output_dir="saved_audio"):
|
| 131 |
if not os.path.exists(output_dir):
|
|
|
|
| 145 |
|
| 146 |
|
| 147 |
def save_image(image_input, output_dir="saved_images"):
|
| 148 |
+
print("Entering save_image function")
|
| 149 |
if not os.path.exists(output_dir):
|
| 150 |
os.makedirs(output_dir)
|
| 151 |
|
|
|
|
| 152 |
if isinstance(image_input, np.ndarray):
|
|
|
|
| 153 |
image = Image.fromarray(image_input)
|
|
|
|
|
|
|
| 154 |
file_name = f"image_{int(time.time())}.png"
|
| 155 |
file_path = os.path.join(output_dir, file_name)
|
|
|
|
|
|
|
| 156 |
image.save(file_path)
|
| 157 |
|
| 158 |
+
print(f"Image saved at: {file_path}")
|
| 159 |
return file_path
|
| 160 |
else:
|
| 161 |
raise ValueError("Invalid image input type")
|
| 162 |
|
| 163 |
+
|
| 164 |
def process_image(image_file_path):
|
| 165 |
+
print("Entering process_image function")
|
| 166 |
client = Client("https://tonic1-official-qwen-vl-chat.hf.space/--replicas/t5ccx/") # TruEra
|
| 167 |
try:
|
| 168 |
result = client.predict(
|
| 169 |
+
"Describe this image in detail, identify every detail in this image. Describe the image the best you can.",
|
| 170 |
image_file_path,
|
| 171 |
fn_index=0
|
| 172 |
)
|
| 173 |
+
print(f"Image processing result: {result}")
|
| 174 |
return result
|
| 175 |
except Exception as e:
|
| 176 |
+
print(f"Error in process_image: {e}")
|
| 177 |
return f"Error occurred during image processing: {e}"
|
| 178 |
|
| 179 |
|
| 180 |
def process_speech(audio_input, source_language, target_language="English"):
|
| 181 |
+
print("Entering process_speech function")
|
| 182 |
if audio_input is None:
|
| 183 |
return "No audio input provided."
|
| 184 |
try:
|
|
|
|
| 188 |
target_language,
|
| 189 |
api_name="/s2tt"
|
| 190 |
)
|
| 191 |
+
print(f"Speech processing result: {result}")
|
| 192 |
return result
|
| 193 |
except Exception as e:
|
| 194 |
+
print(f"Error in process_speech: {str(e)}")
|
| 195 |
return f"Error in speech processing: {str(e)}"
|
| 196 |
|
| 197 |
def convert_text_to_speech(input_text, source_language, target_language):
|
| 198 |
+
print("Entering convert_text_to_speech function")
|
| 199 |
try:
|
| 200 |
result = seamless_client.predict(
|
| 201 |
input_text,
|
|
|
|
| 206 |
audio_file_path = result[0] if result else None
|
| 207 |
translated_text = result[1] if result else ""
|
| 208 |
|
| 209 |
+
print(f"Text-to-speech conversion result: Audio file path: {audio_file_path}, Translated text: {translated_text}")
|
| 210 |
return audio_file_path, translated_text
|
| 211 |
except Exception as e:
|
| 212 |
+
print(f"Error in convert_text_to_speech: {str(e)}")
|
| 213 |
return None, f"Error in text-to-speech conversion: {str(e)}"
|
| 214 |
|
| 215 |
def query_vectara(text):
|
|
|
|
| 315 |
return f"Error: {response.status_code}"
|
| 316 |
|
| 317 |
|
|
|
|
| 318 |
def wrap_text(text, width=90):
|
| 319 |
+
print("Wrapping text...")
|
| 320 |
lines = text.split('\n')
|
| 321 |
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
|
| 322 |
wrapped_text = '\n'.join(wrapped_lines)
|
|
|
|
| 325 |
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen-1_8B-Chat", trust_remote_code=True)
|
| 326 |
model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen-1_8B-Chat", device_map="auto", trust_remote_code=True).eval()
|
| 327 |
|
|
|
|
| 328 |
class ChatBot:
|
| 329 |
def __init__(self):
|
| 330 |
self.history = None
|
| 331 |
|
| 332 |
def predict(self, user_input, system_prompt=""):
|
| 333 |
+
print("Generating prediction...")
|
| 334 |
response, self.history = model.chat(tokenizer, user_input, history=self.history, system=system_prompt)
|
| 335 |
return response
|
| 336 |
|
| 337 |
bot = ChatBot()
|
| 338 |
|
|
|
|
| 339 |
def multimodal_prompt(user_input, system_prompt="You are an expert medical analyst:"):
|
| 340 |
+
print("Processing multimodal prompt...")
|
| 341 |
return bot.predict(user_input, system_prompt)
|
| 342 |
|
| 343 |
+
def process_summary_with_qwen(summary):
|
| 344 |
+
print("Processing summary with Qwen...")
|
| 345 |
+
system_prompt = "You are a medical instructor. Assess and describe the proper options to your students in minute detail. Propose a course of action for them to base their recommendations on based on your description."
|
| 346 |
response_text = bot.predict(summary, system_prompt)
|
| 347 |
return response_text
|
| 348 |
|
| 349 |
|
| 350 |
+
|
| 351 |
def process_and_query(input_language=None, audio_input=None, image_input=None, text_input=None):
|
| 352 |
try:
|
| 353 |
+
print("Processing and querying...")
|
| 354 |
combined_text = ""
|
| 355 |
+
markdown_output = ""
|
| 356 |
+
image_text = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 357 |
print(f"Image Input Type: {type(image_input)}, Audio Input Type: {type(audio_input)}")
|
| 358 |
+
|
|
|
|
| 359 |
if image_input is not None:
|
| 360 |
+
print("Processing image input...")
|
| 361 |
image_file_path = save_image(image_input)
|
| 362 |
image_text = process_image(image_file_path)
|
| 363 |
combined_text += "\n\n**Image Input:**\n" + image_text
|
| 364 |
|
|
|
|
| 365 |
elif audio_input is not None:
|
| 366 |
+
print("Processing audio input...")
|
| 367 |
sample_rate, audio_data = audio_input
|
| 368 |
audio_file_path = save_audio(audio_input)
|
| 369 |
+
audio_text = process_speech(audio_file_path, input_language, "English")
|
| 370 |
combined_text += "\n\n**Audio Input:**\n" + audio_text
|
| 371 |
|
|
|
|
| 372 |
elif text_input is not None and text_input.strip():
|
| 373 |
+
print("Processing text input...")
|
| 374 |
combined_text += "The user asks the following to his health adviser: " + text_input
|
| 375 |
|
|
|
|
| 376 |
else:
|
| 377 |
return "Error: Please provide some input (text, audio, or image)."
|
| 378 |
|
|
|
|
| 379 |
if image_text:
|
| 380 |
markdown_output += "\n### Original Image Description\n"
|
| 381 |
markdown_output += image_text + "\n"
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
+
print("Querying Vectara...")
|
| 384 |
+
vectara_response_json = query_vectara(combined_text)
|
| 385 |
vectara_response = json.loads(vectara_response_json)
|
| 386 |
summary = vectara_response.get('summary', 'No summary available')
|
| 387 |
sources_info = vectara_response.get('sources', [])
|
| 388 |
|
|
|
|
| 389 |
markdown_output = "### Vectara Response Summary\n"
|
| 390 |
markdown_output += f"* **Summary**: {summary}\n"
|
| 391 |
markdown_output += "### Sources Information\n"
|
| 392 |
for source in sources_info:
|
| 393 |
markdown_output += f"* {source}\n"
|
| 394 |
|
|
|
|
| 395 |
final_response = process_summary_with_qwen(summary)
|
| 396 |
|
| 397 |
+
print("Converting text to speech...")
|
| 398 |
+
target_language = "English"
|
| 399 |
audio_output, translated_text = convert_text_to_speech(final_response, target_language, input_language)
|
| 400 |
+
|
| 401 |
+
print("Evaluating hallucination...")
|
| 402 |
hallucination_label = evaluate_hallucination(final_response, summary)
|
| 403 |
|
|
|
|
| 404 |
markdown_output += "\n### Processed Summary with Qwen\n"
|
| 405 |
markdown_output += final_response + "\n"
|
| 406 |
markdown_output += "\n### Hallucination Evaluation\n"
|
|
|
|
| 409 |
markdown_output += translated_text + "\n"
|
| 410 |
|
| 411 |
return markdown_output, audio_output
|
| 412 |
+
|
| 413 |
except Exception as e:
|
| 414 |
+
print(f"Error occurred: {e}")
|
| 415 |
return f"Error occurred during processing: {e}. No hallucination evaluation.", None
|
| 416 |
|
| 417 |
|