import streamlit as st | |
st.title("Medical RAG and Reasoning App") | |
st.write("This app demonstrates Retrieval-Augmented Generation (RAG) for medical question answering.") | |
#!/usr/bin/env python | |
# coding: utf-8 | |
# # HuatuoGPT-o1 Medical RAG and Reasoning | |
# | |
# _Authored by: [Alan Ponnachan](https://huggingface.co/AlanPonnachan)_ | |
# | |
# This notebook demonstrates an end-to-end example of using HuatuoGPT-o1 for medical question answering with Retrieval-Augmented Generation (RAG) and reasoning. We'll leverage the HuatuoGPT-o1 model, a medical Large Language Model (LLM) designed for advanced medical reasoning, to provide detailed and well-structured answers to medical queries. | |
# | |
# ## Introduction | |
# | |
# HuatuoGPT-o1 is a medical LLM that excels at identifying mistakes, exploring alternative strategies, and refining its answers. It utilizes verifiable medical problems and a specialized medical verifier to enhance its reasoning capabilities. This notebook showcases how to use HuatuoGPT-o1 in a RAG setting, where we retrieve relevant information from a medical knowledge base and then use the model to generate a reasoned response. | |
# ## Notebook Setup | |
# | |
# | |
# **Important:** Before running the code, ensure you are using a GPU runtime for faster performance. Go to **"Runtime" -> "Change runtime type"** and select **"GPU"** under "Hardware accelerator." | |
# | |
# Let's start by installing the necessary libraries. | |
# In[1]: | |
#get_ipython().system('pip install transformers datasets sentence-transformers scikit-learn --upgrade -q') | |
# ## Load the Dataset | |
# | |
# We'll use the **"ChatDoctor-HealthCareMagic-100k"** dataset from the Hugging Face Datasets library. This dataset contains 100,000 real-world patient-doctor interactions, providing a rich knowledge base for our RAG system. | |
# In[2]: | |
from datasets import load_dataset | |
dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k") | |
# ## Step 3: Initialize the Models | |
# | |
# We need to initialize two models: | |
# | |
# 1. **HuatuoGPT-o1**: The medical LLM for generating responses. | |
# 2. **Sentence Transformer**: An embedding model for creating vector representations of text, which we'll use for retrieval. | |
# In[3]: | |
import torch | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
from sentence_transformers import SentenceTransformer | |
# Initialize HuatuoGPT-o1 | |
model_name = "FreedomIntelligence/HuatuoGPT-o1-7B" | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, torch_dtype="auto", device_map="auto" | |
) | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
# Initialize Sentence Transformer | |
embed_model = SentenceTransformer("all-MiniLM-L6-v2") | |
# ## Prepare the Knowledge Base | |
# | |
# We'll create a knowledge base by generating embeddings for the combined question-answer pairs from the dataset. | |
# In[4]: | |
import pandas as pd | |
import numpy as np | |
# Convert dataset to DataFrame | |
df = pd.DataFrame(dataset["train"]) | |
# Combine question and answer for context | |
df["combined"] = df["input"] + " " + df["output"] | |
# Generate embeddings | |
st.write("Generating embeddings for the knowledge base...") | |
embeddings = embed_model.encode( | |
df["combined"].tolist(), show_progress_bar=True, batch_size=128 | |
) | |
st.write("Embeddings generated!") | |
# ## Implement Retrieval | |
# | |
# This function retrieves the `k` most relevant contexts to a given query using cosine similarity. | |
# In[5]: | |
from sklearn.metrics.pairwise import cosine_similarity | |
def retrieve_relevant_contexts(query: str, k: int = 3) -> list: | |
""" | |
Retrieves the k most relevant contexts to a given query. | |
Args: | |
query (str): The user's medical query. | |
k (int): The number of relevant contexts to retrieve. | |
Returns: | |
list: A list of dictionaries, each containing a relevant context. | |
""" | |
# Generate query embedding | |
query_embedding = embed_model.encode([query])[0] | |
# Calculate similarities | |
similarities = cosine_similarity([query_embedding], embeddings)[0] | |
# Get top k similar contexts | |
top_k_indices = np.argsort(similarities)[-k:][::-1] | |
contexts = [] | |
for idx in top_k_indices: | |
contexts.append( | |
{ | |
"question": df.iloc[idx]["input"], | |
"answer": df.iloc[idx]["output"], | |
"similarity": similarities[idx], | |
} | |
) | |
return contexts | |
# ## Implement Response Generation | |
# | |
# This function generates a detailed response using the retrieved contexts. | |
# In[6]: | |
def generate_structured_response(query: str, contexts: list) -> str: | |
""" | |
Generates a detailed response using the retrieved contexts. | |
Args: | |
query (str): The user's medical query. | |
contexts (list): A list of relevant contexts. | |
Returns: | |
str: The generated response. | |
""" | |
# Prepare prompt with retrieved contexts | |
context_prompt = "\n".join( | |
[ | |
f"Reference {i+1}:" | |
f"\nQuestion: {ctx['question']}" | |
f"\nAnswer: {ctx['answer']}" | |
for i, ctx in enumerate(contexts) | |
] | |
) | |
prompt = f"""Based on the following references and your medical knowledge, provide a detailed response: | |
References: | |
{context_prompt} | |
Question: {query} | |
By considering: | |
1. The key medical concepts in the question. | |
2. How the reference cases relate to this question. | |
3. What medical principles should be applied. | |
4. Any potential complications or considerations. | |
Give the final response: | |
""" | |
# Generate response | |
messages = [{"role": "user", "content": prompt}] | |
inputs = tokenizer( | |
tokenizer.apply_chat_template( | |
messages, tokenize=False, add_generation_prompt=True | |
), | |
return_tensors="pt", | |
).to(model.device) | |
outputs = model.generate( | |
**inputs, | |
max_new_tokens=1024, | |
temperature=0.7, | |
num_beams=1, | |
do_sample=True, | |
) | |
response = tokenizer.decode(outputs[0], skip_special_tokens=True) | |
# Extract the final response portion | |
final_response = response.split("Give the final response:\n")[-1] | |
return final_response | |
# ## Putting It All Together | |
# | |
# Let's define a function to process a query end-to-end and then use it with an example. | |
# In[7]: | |
def process_query(query: str, k: int = 3) -> tuple: | |
""" | |
Processes a medical query end-to-end. | |
Args: | |
query (str): The user's medical query. | |
k (int): The number of relevant contexts to retrieve. | |
Returns: | |
tuple: The generated response and the retrieved contexts. | |
""" | |
contexts = retrieve_relevant_contexts(query, k) | |
response = generate_structured_response(query, contexts) | |
return response, contexts | |
# Example query | |
query = "I've been experiencing persistent headaches and dizziness for the past week. What could be the cause?" | |
# Process query | |
response, contexts = process_query(query) | |
# Print results | |
st.write("\nQuery:", query) | |
st.write("\nRelevant Contexts:") | |
for i, ctx in enumerate(contexts, 1): | |
st.write(f"\nReference {i} (Similarity: {ctx['similarity']:.3f}):") | |
st.write(f"Q: {ctx['question']}") | |
st.write(f"A: {ctx['answer']}") | |
st.write("\nGenerated Response:") | |
st.write(response) | |
# ## Conclusion | |
# | |
# This notebook demonstrates a practical application of HuatuoGPT-o1 for medical question answering using RAG and reasoning. By combining retrieval from a relevant knowledge base with the advanced reasoning capabilities of HuatuoGPT-o1, we can build a system that provides detailed and well-structured answers to complex medical queries. | |
# | |
# You can further enhance this system by: | |
# | |
# * Experimenting with different values of `k` (number of retrieved contexts). | |
# * Fine-tuning HuatuoGPT-o1 on a specific medical domain. | |
# * Evaluating the system's performance using medical benchmarks. | |
# * Adding a user interface for easier interaction. | |
# * Improving upon existing code by handling edge cases. | |
# | |
# Feel free to adapt and expand upon this example to create even more powerful and helpful medical AI applications! | |