Spaces:

Rooobert
/

Nvidia_RAG_pdf_R

Sleeping

App Files Files Community

Nvidia_RAG_pdf_R / app.py

Rooobert

Update app.py

666813f verified 8 months ago

raw

history blame

4.66 kB

	import os
	import gradio as gr
	from langchain_community.document_loaders import PyPDFLoader
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM
	from langchain_community.llms import HuggingFacePipeline
	from langchain.chains.question_answering import load_qa_chain
	from langchain_core.prompts import PromptTemplate
	from transformers import pipeline

	# 載入 Mistral 模型
	model_path = "nvidia/Mistral-NeMo-Minitron-8B-instruct"
	device = 'cuda' if torch.cuda.is_available() else 'cpu'
	dtype = torch.bfloat16
	print(f"使用設備: {device}")

	# 初始化 tokenizer
	mistral_tokenizer = AutoTokenizer.from_pretrained(model_path)

	# 初始化模型
	mistral_model = AutoModelForCausalLM.from_pretrained(
	model_path,
	torch_dtype=dtype,
	device_map=device,
	low_cpu_mem_usage=True
	)

	# 創建 pipeline
	text_generation_pipeline = pipeline(
	"text-generation",
	model=mistral_model,
	tokenizer=mistral_tokenizer,
	max_length=512,
	temperature=0.3,
	top_p=0.95,
	device_map=device
	)

	# 為 pipeline 創建 LangChain 包裝器
	llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

	def initialize(file_path, question):
	try:
	prompt_template = """根據提供的上下文盡可能準確地回答問題。如果上下文中沒有包含答案，請說「上下文中沒有提供答案」\n\n
	上下文: \n {context}?\n
	問題: \n {question} \n
	回答:
	"""
	prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])

	if os.path.exists(file_path):
	pdf_loader = PyPDFLoader(file_path)
	pages = pdf_loader.load_and_split()

	# 限制上下文以避免超出令牌限制
	max_pages = 5 # 根據模型容量和文檔長度調整
	context = "\n".join(str(page.page_content) for page in pages[:max_pages])

	try:
	# 使用 Mistral 創建問答鏈
	stuff_chain = load_qa_chain(llm, chain_type="stuff", prompt=prompt)

	# 使用有限的頁面獲取答案
	stuff_answer = stuff_chain(
	{"input_documents": pages[:max_pages], "question": question, "context": context},
	return_only_outputs=True
	)

	main_answer = stuff_answer['output_text']

	# 生成後續問題
	follow_up_prompt = f"根據這個回答: {main_answer}\n生成一個相關的後續問題:"
	follow_up_inputs = mistral_tokenizer.encode(follow_up_prompt, return_tensors='pt').to(device)

	with torch.no_grad():
	follow_up_outputs = mistral_model.generate(
	follow_up_inputs,
	max_length=256,
	temperature=0.7,
	top_p=0.9,
	do_sample=True
	)

	follow_up = mistral_tokenizer.decode(follow_up_outputs[0], skip_special_tokens=True)

	# 提取問題
	if "後續問題:" in follow_up.lower():
	follow_up = follow_up.split("後續問題:", 1)[1].strip()

	combined_output = f"回答: {main_answer}\n\n可能的後續問題: {follow_up}"
	return combined_output

	except Exception as e:
	if "exceeds the maximum token count" in str(e):
	return "錯誤: 文檔太大無法處理。請嘗試使用較小的文檔。"
	else:
	raise e
	else:
	return "錯誤: 無法處理文檔。請確保PDF文件存在且有效。"
	except Exception as e:
	return f"發生錯誤: {str(e)}"

	# 定義 Gradio 界面
	def pdf_qa(file, question):
	if file is None:
	return "請先上傳PDF文件。"
	return initialize(file.name, question)

	# 創建 Gradio 界面
	demo = gr.Interface(
	fn=pdf_qa,
	inputs=[
	gr.File(label="上傳PDF文件", file_types=[".pdf"]),
	gr.Textbox(label="詢問文檔內容", placeholder="這個文檔主要講了什麼？")
	],
	outputs=gr.Textbox(label="Mistral 回答"),
	title="基於Mistral的PDF問答系統",
	description="上傳PDF文件並提出問題，Mistral模型將分析內容並提供回答和可能的後續問題。"
	)

	if __name__ == "__main__":
	demo.launch()