Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -8,16 +8,16 @@ from chromadb.config import Settings
|
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
# Device setup
|
| 11 |
-
device = -1 #
|
| 12 |
print("Device set to: CPU")
|
| 13 |
|
| 14 |
# Load CSV data
|
| 15 |
-
df = pd.read_csv("iec_college_data.csv").dropna(subset=["content"]).reset_index(drop=True)
|
| 16 |
|
| 17 |
# Load embedding model on CPU
|
| 18 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
|
| 19 |
|
| 20 |
-
# ChromaDB
|
| 21 |
chroma_client = chromadb.PersistentClient(path="./chroma_db")
|
| 22 |
collection_name = "iec_data"
|
| 23 |
|
|
@@ -45,8 +45,8 @@ if collection.count() == 0:
|
|
| 45 |
print(f"Indexed {idx}/{len(df)}")
|
| 46 |
print("Indexing complete.")
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
qa_pipeline = pipeline("
|
| 50 |
|
| 51 |
# QA function
|
| 52 |
def answer_question(user_question):
|
|
@@ -55,11 +55,10 @@ def answer_question(user_question):
|
|
| 55 |
context = "\n".join(results["documents"][0])
|
| 56 |
if len(context.split()) > 400:
|
| 57 |
context = " ".join(context.split()[:400])
|
| 58 |
-
|
| 59 |
-
result
|
| 60 |
-
return result.strip()
|
| 61 |
|
| 62 |
-
# Gradio
|
| 63 |
iface = gr.Interface(
|
| 64 |
fn=answer_question,
|
| 65 |
inputs=gr.Textbox(lines=2, placeholder="Ask about IEC College..."),
|
|
|
|
| 8 |
from transformers import pipeline
|
| 9 |
|
| 10 |
# Device setup
|
| 11 |
+
device = -1 # Use CPU
|
| 12 |
print("Device set to: CPU")
|
| 13 |
|
| 14 |
# Load CSV data
|
| 15 |
+
df = pd.read_csv("/mnt/data/iec_college_data.csv").dropna(subset=["content"]).reset_index(drop=True)
|
| 16 |
|
| 17 |
# Load embedding model on CPU
|
| 18 |
embed_model = SentenceTransformer("all-MiniLM-L6-v2", device="cpu")
|
| 19 |
|
| 20 |
+
# ChromaDB setup
|
| 21 |
chroma_client = chromadb.PersistentClient(path="./chroma_db")
|
| 22 |
collection_name = "iec_data"
|
| 23 |
|
|
|
|
| 45 |
print(f"Indexed {idx}/{len(df)}")
|
| 46 |
print("Indexing complete.")
|
| 47 |
|
| 48 |
+
# Use lightweight extractive QA model
|
| 49 |
+
qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased", device=device)
|
| 50 |
|
| 51 |
# QA function
|
| 52 |
def answer_question(user_question):
|
|
|
|
| 55 |
context = "\n".join(results["documents"][0])
|
| 56 |
if len(context.split()) > 400:
|
| 57 |
context = " ".join(context.split()[:400])
|
| 58 |
+
result = qa_pipeline(question=user_question, context=context)
|
| 59 |
+
return result["answer"]
|
|
|
|
| 60 |
|
| 61 |
+
# Gradio UI
|
| 62 |
iface = gr.Interface(
|
| 63 |
fn=answer_question,
|
| 64 |
inputs=gr.Textbox(lines=2, placeholder="Ask about IEC College..."),
|