Spaces:

AreebaAliAsghar
/

rag-chatbot

Sleeping

App Files Files Community

AreebaAliAsghar commited on May 14

Commit

e9edc62

verified ·

1 Parent(s): 8c76a79

Upload 3 files

Browse files

Files changed (3) hide show

app.py +77 -0
apt.txt +1 -0
requirements.txt +5 -0

app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# app.py
+from pypdf import PdfReader
+import gradio as gr
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+# Load embedding model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Global state to persist embeddings and chunks
+index = None
+chunks = []
+# Step 1: Extract text from uploaded PDFs
+def extract_text_from_pdfs(files):
+    all_text = ""
+    for file in files:
+        reader = PdfReader(file.name)
+        for page in reader.pages:
+            text = page.extract_text()
+            if text:
+                all_text += text + "\n"
+    return all_text
+# Step 2: Chunk text
+def chunk_text(text, chunk_size=500, overlap=50):
+    words = text.split()
+    result = []
+    for i in range(0, len(words), chunk_size - overlap):
+        chunk = " ".join(words[i:i + chunk_size])
+        result.append(chunk)
+    return result
+# Step 3: Embed and store chunks
+def create_index(text_chunks):
+    global index, chunks
+    chunks = text_chunks
+    embeddings = model.encode(chunks)
+    index = faiss.IndexFlatL2(len(embeddings[0]))
+    index.add(np.array(embeddings))
+# Step 4: Retrieve top relevant chunks
+def get_top_chunks(query, k=3):
+    query_vec = model.encode([query])
+    D, I = index.search(np.array(query_vec), k)
+    return [chunks[i] for i in I[0]]
+# Step 5: Fake LLM response (replace with real API call if needed)
+def call_llm(context, question):
+    return f"Answer (simulated): Based on context:\n\n{context}\n\nQuestion: {question}"
+# Step 6: Gradio main function
+def rag_pipeline(files, question):
+    text = extract_text_from_pdfs(files)
+    text_chunks = chunk_text(text)
+    create_index(text_chunks)
+    top_chunks = get_top_chunks(question)
+    context = "\n".join(top_chunks)
+    answer = call_llm(context, question)
+    return answer
+# Step 7: Gradio UI
+demo = gr.Interface(
+    fn=rag_pipeline,
+    inputs=[
+        gr.File(file_types=[".pdf"], file_count="multiple", label="Upload PDFs"),
+        gr.Textbox(lines=2, label="Ask a question")
+    ],
+    outputs="text",
+    title="RAG PDF Chatbot",
+    description="Upload PDFs and ask questions based on their content"
+)
+if __name__ == "__main__":
+    demo.launch()

apt.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ ffmpeg

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio
+pypdf
+sentence-transformers
+faiss-cpu
+numpy