Spaces:

ppaihack
/

ZamaClinik

Sleeping

ZamaClinik / utils.py

b65ff94 about 1 year ago

6.22 kB

	import streamlit as st
	import requests
	from PIL import Image
	import pytesseract
	import os
	from langchain_huggingface import HuggingFaceEndpoint
	from langchain.chains import LLMChain
	from langchain_core.prompts import PromptTemplate
	import re
	import json

	api_key = os.environ.get("HFBearer")
	os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key

	# API URL and headers
	API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud"

	# Function to extract text from image
	def extract_text_from_image(image):
	text = pytesseract.image_to_string(image)
	return text

	# Function to extract JSON from text
	def extract_json(text):
	# Use regex to find the JSON between <JSON> and </JSON>
	match = re.search(r'<JSON>\s(.?)\s*</JSON>', text, re.DOTALL)

	if match:
	json_str = match.group(1) # Get the JSON string
	try:
	# Load the JSON string into a Python dictionary
	json_data = json.loads(json_str)
	return json_data
	except json.JSONDecodeError:
	return "Erreur de décodage JSON"
	else:
	return "Aucun JSON trouvé"

	# Function to get metadata title from image
	def get_image_metadata(image):
	# You can customize this function to extract other metadata as needed
	title = image.name.split('.')[0] # Simple title extraction from file name without extension
	return title

	def count_tokens(text):
	return len(text.split())

	image_params = {
	"bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique",
	"bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin",
	"ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm",
	"echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides",
	"echographie-poumons": "medecin_responsable, score calcique, technique, resultats",
	"echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion",
	"echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion",
	"echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion",
	"hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite"
	}

	user_input = """
	Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
	Liste des paramètres : {parameters}

	Voici un exemple de réponse valide :
	<JSON>
	{{"date_naissance": "", "prenom": "", "nom": ""}}
	</JSON>

	Voici le texte à partir duquel vous devez extraire les paramètres :
	{texte}
	"""

	# prompt = PromptTemplate.from_template(user_input)

	llm = HuggingFaceEndpoint(
	endpoint_url=API_URL,
	)

	# llm_chain = prompt \| llm

	# # File uploader for multiple images
	# uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True)

	# # Modify the Streamlit section to extract the JSON for multiple images
	# if st.button("Submit"):
	# if uploaded_images:
	# all_json_data = {} # Dictionary to store JSON data for each image
	# for uploaded_image in uploaded_images:
	# with st.spinner(f"Extracting text from image: {uploaded_image.name}..."):
	# image = Image.open(uploaded_image)
	# extracted_text = extract_text_from_image(image)

	# max_text_length = 500 # Adjust as needed to keep total tokens under 1024
	# if count_tokens(extracted_text) > max_text_length:
	# extracted_text = " ".join(extracted_text.split()[:max_text_length])

	# with st.spinner(f"Fetching response from API for {uploaded_image.name}..."):
	# # Get metadata title from the image
	# title = get_image_metadata(uploaded_image)
	# parameters = image_params[title]
	# output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})
	# st.success(f"Response received for {uploaded_image.name}!")

	# # Extract JSON from the API output
	# json_data = extract_json(output) # Extract JSON from the API output
	# all_json_data[title] = json_data # Store JSON data with title as key
	# st.write(title, json_data)

	# # Display all extracted JSON data
	# st.write("Extracted JSON Data for all images.")
	# else:
	# st.warning("Please upload at least one image to extract text.")



	def extract_json_from_images(uploaded_images):
	all_json_data = {} # Dictionary to store JSON data for each image

	for uploaded_image in uploaded_images:
	image = Image.open(uploaded_image)
	extracted_text = extract_text_from_image(image)

	max_text_length = 500 # Adjust as needed to keep total tokens under 1024
	if count_tokens(extracted_text) > max_text_length:
	extracted_text = " ".join(extracted_text.split()[:max_text_length])

	# Get metadata title from the image
	title = get_image_metadata(uploaded_image)
	parameters = image_params[title]

	# Prepare the prompt and invoke the LLM chain
	user_input = """
	Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>.
	Liste des paramètres : {parameters}

	Voici un exemple de réponse valide :
	<JSON>
	{{"date_naissance": "", "prenom": "", "nom": ""}}
	</JSON>

	Voici le texte à partir duquel vous devez extraire les paramètres :
	{texte}
	"""
	prompt = PromptTemplate.from_template(user_input)
	llm_chain = prompt \| llm

	output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters})

	# Extract JSON from the API output
	json_data = extract_json(output) # Extract JSON from the API output
	all_json_data[title] = json_data # Store JSON data with title as key

	return all_json_data