Gary commited on
Commit
cca58a9
·
1 Parent(s): 466b291

Initial commit

Browse files
Files changed (3) hide show
  1. app.py +63 -0
  2. indexer.py +74 -0
  3. requirements.txt +10 -0
app.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from indexer import (
2
+ load_raw_dataset,
3
+ create_vector_database,
4
+ get_llm,
5
+ get_prompt_template,
6
+ )
7
+ import gradio as gr
8
+
9
+
10
+ def format_contexts(contexts):
11
+ return "\n".join(
12
+ [
13
+ f"Reference {i+1}:\n{doc.metadata['question']}\n{doc.metadata['answer']}"
14
+ for i, doc in enumerate(contexts)
15
+ ]
16
+ )
17
+
18
+
19
+ class CustomRAG:
20
+ def __init__(self, vector_db, llm, prompt_template):
21
+ self.vector_db = vector_db
22
+ self.llm = llm
23
+ self.prompt_template = prompt_template
24
+
25
+ def run(self, query):
26
+ retriever = self.vector_db.as_retriever(search_kwargs={"k": 3})
27
+ contexts = retriever.get_relevant_documents(query)
28
+ formatted_context = format_contexts(contexts)
29
+ prompt = self.prompt_template.format(context=formatted_context, question=query)
30
+ return self.llm.invoke(prompt), contexts
31
+
32
+
33
+ def answer_question(query):
34
+ docs = load_raw_dataset()
35
+ rag = CustomRAG(
36
+ create_vector_database(docs, "all-MiniLM-L6-v2"),
37
+ get_llm("FreedomIntelligence/HuatuoGPT-o1-7B"),
38
+ get_prompt_template(),
39
+ )
40
+ response, _ = rag.run(query)
41
+
42
+ return response
43
+
44
+
45
+ demo = gr.Interface(
46
+ fn=answer_question,
47
+ inputs=[
48
+ gr.Textbox(
49
+ label="Describe your medical concern",
50
+ placeholder="e.g. I've been feeling tired and dizzy lately.",
51
+ lines=3,
52
+ ),
53
+ ],
54
+ outputs="text",
55
+ title="Medical Assistant – Powered by AI & RAG",
56
+ description=(
57
+ "Get helpful insights based on your described symptoms. "
58
+ "This assistant uses medical reference data to provide informative responses. "
59
+ "Note: This is not a substitute for professional medical advice."
60
+ ),
61
+ )
62
+
63
+ demo.launch()
indexer.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from datasets import load_dataset
2
+ import pandas as pd
3
+ from langchain.schema import Document
4
+ from langchain.embeddings import HuggingFaceEmbeddings
5
+ from langchain.vectorstores import FAISS
6
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
7
+ from langchain.llms import HuggingFacePipeline
8
+ from langchain.prompts import PromptTemplate
9
+
10
+
11
+ def load_raw_dataset():
12
+ dataset = load_dataset("lavita/ChatDoctor-HealthCareMagic-100k")
13
+
14
+ df = pd.DataFrame(dataset["train"])
15
+
16
+ df["combined"] = df["input"] + " " + df["output"]
17
+
18
+ docs = [
19
+ Document(
20
+ page_content=row["combined"],
21
+ metadata={"question": row["input"], "answer": row["output"]},
22
+ )
23
+ for _, row in df.iterrows()
24
+ ]
25
+
26
+ return docs
27
+
28
+
29
+ def create_vector_database(docs, model_name):
30
+ embedding_model = HuggingFaceEmbeddings(model_name=model_name)
31
+ vectorstore = FAISS.from_documents(docs, embedding_model)
32
+ return vectorstore
33
+
34
+
35
+ def get_llm(model_name):
36
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
37
+ model = AutoModelForCausalLM.from_pretrained(
38
+ model_name, torch_dtype="auto", device_map="auto"
39
+ )
40
+
41
+ pipe = pipeline(
42
+ "text-generation",
43
+ model=model,
44
+ tokenizer=tokenizer,
45
+ max_new_tokens=512,
46
+ temperature=0.7,
47
+ do_sample=True,
48
+ )
49
+
50
+ llm = HuggingFacePipeline(pipeline=pipe)
51
+ return llm
52
+
53
+
54
+ def get_prompt_template():
55
+ prompt_template = PromptTemplate(
56
+ input_variables=["context", "question"],
57
+ template="""Based on the following references and your medical knowledge, provide a detailed response:
58
+
59
+ References:
60
+ {context}
61
+
62
+ Question: {question}
63
+
64
+ By considering:
65
+ 1. The key medical concepts in the question.
66
+ 2. How the reference cases relate to this question.
67
+ 3. What medical principles should be applied.
68
+ 4. Any potential complications or considerations.
69
+
70
+ Give the final response:
71
+ """,
72
+ )
73
+
74
+ return prompt_template
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ sentence-transformers
4
+ torch
5
+ langchain
6
+ faiss-cpu
7
+ huggingface-hub
8
+ praw
9
+ langchain-community
10
+ accelerate