Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| from PIL import Image | |
| import pytesseract | |
| import os | |
| from langchain_huggingface import HuggingFaceEndpoint | |
| from langchain.chains import LLMChain | |
| from langchain_core.prompts import PromptTemplate | |
| import re | |
| import json | |
| api_key = os.environ.get("HFBearer") | |
| os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_key | |
| # API URL and headers | |
| API_URL = "https://pllfc7e5i0rujahy.us-east-1.aws.endpoints.huggingface.cloud" | |
| # Function to extract text from image | |
| def extract_text_from_image(image): | |
| text = pytesseract.image_to_string(image) | |
| return text | |
| # Function to extract JSON from text | |
| def extract_json(text): | |
| # Use regex to find the JSON between <JSON> and </JSON> | |
| match = re.search(r'<JSON>\s*(.*?)\s*</JSON>', text, re.DOTALL) | |
| if match: | |
| json_str = match.group(1) # Get the JSON string | |
| try: | |
| # Load the JSON string into a Python dictionary | |
| json_data = json.loads(json_str) | |
| return json_data | |
| except json.JSONDecodeError: | |
| return "Erreur de décodage JSON" | |
| else: | |
| return "Aucun JSON trouvé" | |
| # Function to get metadata title from image | |
| def get_image_metadata(image): | |
| # You can customize this function to extract other metadata as needed | |
| title = image.name.split('.')[0] # Simple title extraction from file name without extension | |
| return title | |
| def count_tokens(text): | |
| return len(text.split()) | |
| image_params = { | |
| "bilan-atherosclerose": "medecin_responsable, rythme_sinusal, valeur_EIM, score_calcique", | |
| "bilan-medical": "medecin_responsable, date_naissance, prenom, nom, identifiant_patient, nom_medecin", | |
| "ECG": "medecin_responsable, poids, taille, ECG_repos_valeur_par_minute), valeur_FMT, valeur_niveau_atteint, valeur_diminution_frequence_cardiaque_bpm", | |
| "echo-doppler": "medecin_responsable, sous_clavieres, vertebrales, carotides", | |
| "echographie-poumons": "medecin_responsable, score calcique, technique, resultats", | |
| "echotomographie-abdominale": "medecin_responsable, foie, vesicule, pancreas, reins, rate, aorte_abdominale, conclusion", | |
| "echotomographie-cardiaque": "medecin_responsable, taille, poids, surface_corporelle, conclusion", | |
| "echotomographie-prostate": "medecin_responsable, vessie, ureteres, prostate, conclusion", | |
| "hematologie": "medecin_responsable, leucocytes, hematies, hemoglobines, hematocrite" | |
| } | |
| user_input = """ | |
| Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>. | |
| Liste des paramètres : {parameters} | |
| Voici un exemple de réponse valide : | |
| <JSON> | |
| {{"date_naissance": "", "prenom": "", "nom": ""}} | |
| </JSON> | |
| Voici le texte à partir duquel vous devez extraire les paramètres : | |
| {texte} | |
| """ | |
| # prompt = PromptTemplate.from_template(user_input) | |
| llm = HuggingFaceEndpoint( | |
| endpoint_url=API_URL, | |
| ) | |
| # llm_chain = prompt | llm | |
| # # File uploader for multiple images | |
| # uploaded_images = st.file_uploader("Upload images", type=["png", "jpg", "jpeg"], accept_multiple_files=True) | |
| # # Modify the Streamlit section to extract the JSON for multiple images | |
| # if st.button("Submit"): | |
| # if uploaded_images: | |
| # all_json_data = {} # Dictionary to store JSON data for each image | |
| # for uploaded_image in uploaded_images: | |
| # with st.spinner(f"Extracting text from image: {uploaded_image.name}..."): | |
| # image = Image.open(uploaded_image) | |
| # extracted_text = extract_text_from_image(image) | |
| # max_text_length = 500 # Adjust as needed to keep total tokens under 1024 | |
| # if count_tokens(extracted_text) > max_text_length: | |
| # extracted_text = " ".join(extracted_text.split()[:max_text_length]) | |
| # with st.spinner(f"Fetching response from API for {uploaded_image.name}..."): | |
| # # Get metadata title from the image | |
| # title = get_image_metadata(uploaded_image) | |
| # parameters = image_params[title] | |
| # output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters}) | |
| # st.success(f"Response received for {uploaded_image.name}!") | |
| # # Extract JSON from the API output | |
| # json_data = extract_json(output) # Extract JSON from the API output | |
| # all_json_data[title] = json_data # Store JSON data with title as key | |
| # st.write(title, json_data) | |
| # # Display all extracted JSON data | |
| # st.write("Extracted JSON Data for all images.") | |
| # else: | |
| # st.warning("Please upload at least one image to extract text.") | |
| def extract_json_from_images(uploaded_images): | |
| all_json_data = {} # Dictionary to store JSON data for each image | |
| for uploaded_image in uploaded_images: | |
| image = Image.open(uploaded_image) | |
| extracted_text = extract_text_from_image(image) | |
| max_text_length = 500 # Adjust as needed to keep total tokens under 1024 | |
| if count_tokens(extracted_text) > max_text_length: | |
| extracted_text = " ".join(extracted_text.split()[:max_text_length]) | |
| # Get metadata title from the image | |
| title = get_image_metadata(uploaded_image) | |
| parameters = image_params[title] | |
| # Prepare the prompt and invoke the LLM chain | |
| user_input = """ | |
| Vous allez extraire des paramètres d'un texte à l'intérieur d'un objet JSON, écrit entre <JSON> et </JSON>. | |
| Liste des paramètres : {parameters} | |
| Voici un exemple de réponse valide : | |
| <JSON> | |
| {{"date_naissance": "", "prenom": "", "nom": ""}} | |
| </JSON> | |
| Voici le texte à partir duquel vous devez extraire les paramètres : | |
| {texte} | |
| """ | |
| prompt = PromptTemplate.from_template(user_input) | |
| llm_chain = prompt | llm | |
| output = llm_chain.invoke({"texte": extracted_text, "parameters": parameters}) | |
| # Extract JSON from the API output | |
| json_data = extract_json(output) # Extract JSON from the API output | |
| all_json_data[title] = json_data # Store JSON data with title as key | |
| return all_json_data |