(.*?)", resp.text, flags=re.DOTALL)
if not abstracts:
abstracts = [""] * len(titles)
titles = [re.sub(r"\s+", " ", t).strip() for t in titles]
abstracts = [re.sub(r"\s+", " ", a).strip() for a in abstracts]
return titles, abstracts
def semantic_rerank_claim_abstracts(claim, titles, abstracts, top_k=TOP_ABSTRACTS):
doc_texts = [f"{t}. {a}" for t, a in zip(titles, abstracts)]
doc_embs = sbert_model.encode(doc_texts)
claim_emb = sbert_model.encode([claim])
sims = util.pytorch_cos_sim(claim_emb, doc_embs)[0]
idxs = np.argsort(-sims.cpu().numpy())[:top_k]
return [titles[i] for i in idxs], [abstracts[i] for i in idxs]
def extract_evidence_nli(claim, title, abstract):
sentences = sent_tokenize(abstract)
evidence = []
for sent in sentences:
encoding = nli_tokenizer(
sent, claim,
return_tensors='pt',
truncation=True,
max_length=256,
padding=True
)
with torch.no_grad():
outputs = nli_model(**encoding)
probs = torch.softmax(outputs.logits, dim=1).cpu().numpy().flatten()
max_idx = probs.argmax()
label = NLI_LABELS[max_idx]
score = float(probs[max_idx])
evidence.append({
"sentence": sent,
"label": label,
"score": score
})
return evidence
def get_summarizer(model_choice):
model_id = model_options[model_choice]
if model_id in pipe_cache:
return pipe_cache[model_id]
kwargs = {
"model": model_id,
"torch_dtype": torch.float16 if torch.cuda.is_available() else torch.float32,
"device_map": "auto",
"max_new_tokens": 128
}
# Add token for gated models (Gemma, Llama)
if any(gated in model_id for gated in ["meta-llama", "gemma"]):
hf_token = os.environ.get("HF_TOKEN", None)
if hf_token:
kwargs["token"] = hf_token
else:
raise RuntimeError(f"Model '{model_choice}' requires a Hugging Face access token. Please set 'HF_TOKEN' as a Space secret or environment variable.")
pipe_cache[model_id] = pipeline("text-generation", **kwargs)
return pipe_cache[model_id]
def summarize_evidence_llm(claim, evidence_list, model_choice):
support = [ev['sentence'] for ev in evidence_list if ev['label'] == 'ENTAILMENT']
contradict = [ev['sentence'] for ev in evidence_list if ev['label'] == 'CONTRADICTION']
messages = [
{"role": "system", "content": "You are a helpful biomedical assistant. Summarize scientific evidence in plain English for the general public."},
{"role": "user", "content":
f"Claim: {claim}\n"
f"Supporting evidence:\n" + ("\n".join(support) if support else "None") + "\n"
f"Contradicting evidence:\n" + ("\n".join(contradict) if contradict else "None") + "\n"
"Explain to a layperson: Is this claim likely true, false, or uncertain based on the evidence above? Give a brief and simple explanation in 2-3 sentences."
}
]
try:
pipe = get_summarizer(model_choice)
outputs = pipe(
messages,
max_new_tokens=96,
do_sample=False,
temperature=0.1,
)
out = outputs[0]["generated_text"]
if isinstance(out, list) and "content" in out[-1]:
return out[-1]["content"].strip()
return out.strip()
except Exception as e:
return f"Summary could not be generated: {e}"
def format_evidence_html(evidence_list):
color_map = {"ENTAILMENT":"#e6ffe6", "CONTRADICTION":"#ffe6e6", "NEUTRAL":"#f8f8f8"}
html = ""
for ev in evidence_list:
color = color_map[ev["label"]]
html += (
f''
f'{ev["label"]} (confidence {ev["score"]:.2f}): {ev["sentence"]}'
'
'
)
return html
def factcheck_app(article_url, model_choice):
try:
art = Article(article_url)
art.download()
art.parse()
text = art.text
headline = art.title
except Exception as e:
return f"Error downloading or reading article: {e}", None
claims = extract_claims_pattern(text)
matched_claims = match_claims_to_headline(claims, headline)
if not matched_claims:
return "No check-worthy claims found that match the headline.", None
results_html = ""
all_results = []
for claim in matched_claims:
titles, abstracts = retrieve_pubmed_abstracts(claim)
if not titles:
results_html += f"
Claim: {claim}
No PubMed results found.
"
all_results.append({"claim": claim, "summary": "No PubMed results found.", "evidence": []})
continue
top_titles, top_abstracts = semantic_rerank_claim_abstracts(claim, titles, abstracts)
idx_non_top = random.choice([i for i in range(len(titles)) if i not in [titles.index(t) for t in top_titles]]) if len(titles) > len(top_titles) else None
evidence_results = []
for title, abstract in zip(top_titles, top_abstracts):
evidence = extract_evidence_nli(claim, title, abstract)
evidence_results.append({"title": title, "evidence": evidence})
if idx_non_top is not None:
control_ev = extract_evidence_nli(claim, titles[idx_non_top], abstracts[idx_non_top])
evidence_results.append({"title": f"(Control) {titles[idx_non_top]}", "evidence": control_ev})
all_evidence_sentences = [ev for abs_res in evidence_results for ev in abs_res["evidence"]]
summary = summarize_evidence_llm(claim, all_evidence_sentences, model_choice)
results_html += f"
Claim: {claim}
Layman summary: {summary}
"
for abs_res in evidence_results:
results_html += f"
Abstract: {abs_res['title']}
{format_evidence_html(abs_res['evidence'])}"
all_results.append({"claim": claim, "summary": summary, "evidence": evidence_results})
return results_html, all_results
description = """
What does this app do?
This app extracts key scientific claims from a news article, finds the most relevant PubMed biomedical research papers, checks which sentences in those papers support or contradict each claim, and gives you a plain-English summary verdict.
How to use it:
1. Paste the link to a biomedical news article.
2. Choose an AI summarizer model below. If you have no special access, use 'TinyLlama' (works for everyone).
3. Wait for the results.
4. For each claim, you will see:
- A plain summary of what research says.
- Color-coded evidence sentences (green=support, red=contradict, gray=neutral).
- The titles of the most relevant PubMed articles.
Everything is 100% open source and runs on this website—no personal info or cloud API needed.
"""
iface = gr.Interface(
fn=factcheck_app,
inputs=[
gr.Textbox(lines=2, label="Paste a news article URL"),
gr.Dropdown(
choices=list(model_options.keys()),
value="TinyLlama-1.1B-Chat (Open)",
label="Choose summarizer model"
)
],
outputs=[gr.HTML(label="Fact-Check Results (Summary & Evidence)"), gr.JSON(label="All Results (JSON)")],
title="BioMedical News Fact-Checking & Research Evidence Finder",
description=description,
examples=[["https://www.medicalnewstoday.com/articles/omicron-what-do-we-know-about-the-stealth-variant", "TinyLlama-1.1B-Chat (Open)"]],
allow_flagging="never"
)
iface.launch(share=False, server_name='0.0.0.0', show_error=True)