Spaces:
Running
Running
| from langchain_community.document_loaders import PyPDFLoader | |
| import os | |
| from langchain_openai import ChatOpenAI | |
| from langchain_chroma import Chroma | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain_text_splitters import RecursiveCharacterTextSplitter | |
| from langchain.chains.combine_documents import create_stuff_documents_chain | |
| from langchain_core.prompts import ChatPromptTemplate | |
| os.environ.get("OPENAI_API_KEY") | |
| def getPDF(file_path="./nike.pdf"): | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200) | |
| loader = PyPDFLoader(file_path, extract_images=False) | |
| pages = loader.load_and_split(text_splitter) | |
| return pages | |
| def create_retriever(documents): | |
| vectorstore = Chroma.from_documents( | |
| documents, | |
| embedding=OpenAIEmbeddings(), | |
| ) | |
| retriever = vectorstore.as_retriever( | |
| search_type="similarity", | |
| search_kwargs={"k": 1}, | |
| ) | |
| return retriever | |
| def create_prompt_llm_chain(system_prompt): | |
| model = ChatOpenAI(model="gpt-4o-mini") | |
| system_prompt = system_prompt + "\n\n" + "{context}" | |
| prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ("system", system_prompt), | |
| ("human", "{input}"), | |
| ] | |
| ) | |
| question_answer_chain = create_stuff_documents_chain(model, prompt) | |
| return question_answer_chain |