Spaces:
Sleeping
Sleeping
import os | |
from langchain.vectorstores import FAISS | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains import RetrievalQA | |
from langchain.chat_models import ChatOpenAI | |
from dotenv import load_dotenv | |
load_dotenv() | |
# Build FAISS retriever from raw text | |
def build_retriever_from_text(text: str, chunk_size: int = 800, overlap: int = 100, k: int = 3): | |
splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=overlap) | |
chunks = splitter.split_text(text) | |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") | |
vs = FAISS.from_texts(chunks, embeddings) | |
return vs.as_retriever(search_kwargs={"k": k}) | |
# Together.ai LLM (LLaMA) factory | |
def together_llm(model: str = "meta-llama/Llama-Vision-Free", temperature: float = 0.2, max_tokens: int = 512): | |
return ChatOpenAI( | |
model=model, | |
temperature=temperature, | |
max_tokens=max_tokens, | |
openai_api_key=os.getenv("TOGETHER_API_KEY"), | |
openai_api_base="https://api.together.xyz/v1" | |
) | |
# Q&A over PDF (RAG) | |
def rag_qa(text: str, question: str, model: str = "meta-llama/Llama-Vision-Free"): | |
retriever = build_retriever_from_text(text) | |
llm = together_llm(model=model) | |
qa = RetrievalQA.from_chain_type( | |
llm=llm, | |
retriever=retriever, | |
return_source_documents=True, | |
chain_type="stuff" | |
) | |
result = qa({"query": question}) | |
return result["result"], result.get("source_documents", []) | |
# Summarize PDF text | |
def summarize_text(text: str, model: str = "meta-llama/Llama-Vision-Free"): | |
prompt = ( | |
"You are a concise technical summarizer. Summarize the following document in 6-10 bullet points, " | |
"preserving key facts, numbers, and definitions. Text:\n\n" | |
f"{text}" | |
) | |
llm = together_llm(model=model, temperature=0.2, max_tokens=400) | |
output = llm.invoke(prompt) | |
return output.content.strip() | |