Spaces:

hibikigf88
/

Medical_Assistant-RAG

Sleeping

Gary

Fetch data from pincone

1b9a516 27 days ago

2.31 kB

	from pinecone import Pinecone
	from datasets import load_dataset
	import pandas as pd
	from langchain.schema import Document
	from langchain.embeddings import HuggingFaceEmbeddings
	from transformers import (
	AutoTokenizer,
	pipeline,
	AutoModelForSeq2SeqLM,
	)
	from langchain.llms import HuggingFacePipeline
	from langchain.prompts import PromptTemplate
	import os

	api_key = os.environ["PINECONE_API_KEY"]

	from langchain_pinecone import PineconeVectorStore

	def load_raw_dataset():
	dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k")

	df = pd.DataFrame(dataset["train"])

	df["combined"] = df["input"] + " " + df["output"]

	docs = [
	Document(
	page_content=row["combined"],
	metadata={"question": row["input"], "answer": row["output"]},
	)
	for _, row in df.iterrows()
	]

	return docs


	def create_vector_database(model_name):
	PINECONE_INDEX_NAME = "medical-rag-index"
	pc = Pinecone(api_key=os.environ["PINECONE_API_KEY"])

	embedding_model = HuggingFaceEmbeddings(model_name=model_name)

	index = pc.Index(PINECONE_INDEX_NAME)
	vectorstore = PineconeVectorStore(index=index, embedding=embedding_model)
	return vectorstore


	def get_llm(model_name):
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForSeq2SeqLM.from_pretrained(
	"google/flan-t5-base", torch_dtype="auto", device_map="auto"
	)

	pipe = pipeline(
	"text2text-generation",
	model=model,
	tokenizer=tokenizer,
	max_new_tokens=512,
	temperature=1,
	do_sample=True,
	)

	llm = HuggingFacePipeline(pipeline=pipe)
	return llm


	def get_prompt_template():
	prompt_template = PromptTemplate(
	input_variables=["context", "question"],
	template="""Based on the following references and your medical knowledge, provide a detailed response:

	References:
	{context}

	Question: {question}

	By considering:
	1. The key medical concepts in the question.
	2. How the reference cases relate to this question.
	3. What medical principles should be applied.
	4. Any potential complications or considerations.

	Give the final response:
	""",
	)

	return prompt_template