Spaces:
Sleeping
Sleeping
# * This is for Rag pipeline | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.vectorstores import Chroma | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
os.environ['OPENAI_API_KEY'] = os.getenv('OPENROUTE_API_KEY') | |
def dataIngestion( document): | |
loader = PyPDFLoader(document) | |
ingested_docs = loader.load() | |
return ingested_docs | |
def transform( ingested_docs): | |
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200) | |
transformed_docs = text_splitter.split_documents(ingested_docs) | |
return transformed_docs | |
def vectorStoreAndEmbeddings(docs, query): | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
db = Chroma.from_documents(documents=docs, embedding=embeddings) | |
return db.similarity_search(query) |