Spaces:
Sleeping
Sleeping
from pinecone import Pinecone | |
from datasets import load_dataset | |
import pandas as pd | |
from langchain.schema import Document | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from transformers import ( | |
AutoTokenizer, | |
pipeline, | |
AutoModelForSeq2SeqLM, | |
) | |
from langchain.llms import HuggingFacePipeline | |
from langchain.prompts import PromptTemplate | |
import os | |
api_key = os.environ["PINECONE_API_KEY"] | |
from langchain_pinecone import PineconeVectorStore | |
def load_raw_dataset(): | |
dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k") | |
df = pd.DataFrame(dataset["train"]) | |
df["combined"] = df["input"] + " " + df["output"] | |
docs = [ | |
Document( | |
page_content=row["combined"], | |
metadata={"question": row["input"], "answer": row["output"]}, | |
) | |
for _, row in df.iterrows() | |
] | |
return docs | |
def create_vector_database(model_name): | |
PINECONE_INDEX_NAME = "medical-rag-index" | |
pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"]) | |
embedding_model = HuggingFaceEmbeddings(model_name=model_name) | |
index = pc.Index(PINECONE_INDEX_NAME) | |
vectorstore = PineconeVectorStore(index=index, embedding=embedding_model) | |
return vectorstore | |
def get_llm(model_name): | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSeq2SeqLM.from_pretrained( | |
"google/flan-t5-base", torch_dtype="auto", device_map="auto" | |
) | |
pipe = pipeline( | |
"text2text-generation", | |
model=model, | |
tokenizer=tokenizer, | |
max_new_tokens=512, | |
temperature=1, | |
do_sample=True, | |
) | |
llm = HuggingFacePipeline(pipeline=pipe) | |
return llm | |
def get_prompt_template(): | |
prompt_template = PromptTemplate( | |
input_variables=["context", "question"], | |
template="""Based on the following references and your medical knowledge, provide a detailed response: | |
References: | |
{context} | |
Question: {question} | |
By considering: | |
1. The key medical concepts in the question. | |
2. How the reference cases relate to this question. | |
3. What medical principles should be applied. | |
4. Any potential complications or considerations. | |
Give the final response: | |
""", | |
) | |
return prompt_template | |