|
import gradio as gr |
|
import pandas as pd |
|
import faiss |
|
from sentence_transformers import SentenceTransformer |
|
from transformers import pipeline |
|
|
|
|
|
df = pd.read_csv("abstracts.csv") |
|
abstracts = df["abstract"].tolist() |
|
|
|
|
|
embedder = SentenceTransformer("all-MiniLM-L6-v2") |
|
abstract_embeddings = embedder.encode(abstracts, show_progress_bar=True) |
|
|
|
|
|
index = faiss.IndexFlatL2(abstract_embeddings.shape[1]) |
|
index.add(abstract_embeddings) |
|
|
|
|
|
llm = pipeline("text-generation", model="tiiuae/falcon-7b-instruct", max_new_tokens=300) |
|
|
|
def verify_claim(claim): |
|
query_vec = embedder.encode([claim]) |
|
D, I = index.search(query_vec, 3) |
|
|
|
top_abstracts = df.iloc[I[0]]["abstract"].tolist() |
|
context = "\n".join(top_abstracts) |
|
|
|
prompt = f"Claim: {claim}\n\nEvidence:\n{context}\n\nAnswer True, False, or Uncertain. Then explain why:\n" |
|
output = llm(prompt)[0]["generated_text"] |
|
|
|
return f"π **Top Abstracts:**\n{context}\n\nπ§ **LLM Response:**\n{output}" |
|
|
|
|
|
gr.Interface( |
|
fn=verify_claim, |
|
inputs=gr.Textbox(label="Enter a scientific claim"), |
|
outputs=gr.Markdown(), |
|
title="π¬ Scientific Claim Verifier", |
|
description="Checks the validity of a scientific claim using PubMed abstracts + LLM" |
|
).launch() |
|
|