(.*?)", resp.text, flags=re.DOTALL)
if not abstracts:
abstracts = [""] * len(titles)
titles = [re.sub(r"\s+", " ", t).strip() for t in titles]
abstracts = [re.sub(r"\s+", " ", a).strip() for a in abstracts]
return titles, abstracts
def semantic_rerank_claim_abstracts(claim, titles, abstracts, top_k=TOP_ABSTRACTS):
doc_texts = [f"{t}. {a}" for t, a in zip(titles, abstracts)]
doc_embs = sbert_model.encode(doc_texts)
claim_emb = sbert_model.encode([claim])
sims = util.pytorch_cos_sim(claim_emb, doc_embs)[0]
idxs = np.argsort(-sims.cpu().numpy())[:top_k]
return [titles[i] for i in idxs], [abstracts[i] for i in idxs]
def extract_evidence_nli(claim, title, abstract):
sentences = sent_tokenize(abstract)
evidence = []
for sent in sentences:
encoding = nli_tokenizer(
sent, claim,
return_tensors='pt',
truncation=True,
max_length=256,
padding=True
)
with torch.no_grad():
outputs = nli_model(**encoding)
probs = torch.softmax(outputs.logits, dim=1).cpu().numpy().flatten()
max_idx = probs.argmax()
label = NLI_LABELS[max_idx]
score = float(probs[max_idx])
evidence.append({
"sentence": sent,
"label": label,
"score": score
})
return evidence
def summarize_evidence_llm(claim, evidence_list):
support = [ev['sentence'] for ev in evidence_list if ev['label'] == 'ENTAILMENT']
contradict = [ev['sentence'] for ev in evidence_list if ev['label'] == 'CONTRADICTION']
prompt = (
f"Claim: {claim}\n"
f"Supporting evidence:\n" + ("\n".join(support) if support else "None") + "\n"
f"Contradicting evidence:\n" + ("\n".join(contradict) if contradict else "None") + "\n"
"Explain to a layperson: Is this claim likely true, false, or uncertain based on the evidence above? "
"Give a brief and simple explanation in 2-3 sentences."
)
try:
output = llm(
prompt,
max_tokens=128,
stop=["\n\n"],
temperature=0.4,
echo=False
)
summary = output['choices'][0]['text'].strip()
return summary
except Exception as e:
return f"Summary could not be generated: {e}"
def format_evidence_html(evidence_list):
color_map = {"ENTAILMENT":"#e6ffe6", "CONTRADICTION":"#ffe6e6", "NEUTRAL":"#f8f8f8"}
html = ""
for ev in evidence_list:
color = color_map[ev["label"]]
html += (
f''
f'{ev["label"]} (confidence {ev["score"]:.2f}): {ev["sentence"]}'
'
'
)
return html
def factcheck_app(article_url):
try:
art = Article(article_url)
art.download()
art.parse()
text = art.text
headline = art.title
except Exception as e:
return f"Error downloading or reading article: {e}", None
claims = extract_claims_pattern(text)
matched_claims = match_claims_to_headline(claims, headline)
if not matched_claims:
return "No check-worthy claims found that match the headline.", None
results_html = ""
all_results = []
for claim in matched_claims:
titles, abstracts = retrieve_pubmed_abstracts(claim)
if not titles:
results_html += f"
Claim: {claim}
No PubMed results found.
"
all_results.append({"claim": claim, "summary": "No PubMed results found.", "evidence": []})
continue
top_titles, top_abstracts = semantic_rerank_claim_abstracts(claim, titles, abstracts)
idx_non_top = random.choice([i for i in range(len(titles)) if i not in [titles.index(t) for t in top_titles]]) if len(titles) > len(top_titles) else None
evidence_results = []
for title, abstract in zip(top_titles, top_abstracts):
evidence = extract_evidence_nli(claim, title, abstract)
evidence_results.append({"title": title, "evidence": evidence})
if idx_non_top is not None:
control_ev = extract_evidence_nli(claim, titles[idx_non_top], abstracts[idx_non_top])
evidence_results.append({"title": f"(Control) {titles[idx_non_top]}", "evidence": control_ev})
all_evidence_sentences = [ev for abs_res in evidence_results for ev in abs_res["evidence"]]
summary = summarize_evidence_llm(claim, all_evidence_sentences)
results_html += f"
Claim: {claim}
Layman summary: {summary}
"
for abs_res in evidence_results:
results_html += f"
Abstract: {abs_res['title']}
{format_evidence_html(abs_res['evidence'])}"
all_results.append({"claim": claim, "summary": summary, "evidence": evidence_results})
return results_html, all_results
description = """
What does this app do?
This app extracts key scientific claims from a news article, finds the most relevant PubMed biomedical research papers, checks which sentences in those papers support or contradict each claim, and gives you a plain-English summary verdict.
How to use it:
1. Paste the link to a biomedical news article.
2. Wait for the results.
3. For each claim, you will see:
- A plain summary of what research says.
- Color-coded evidence sentences (green=support, red=contradict, gray=neutral).
- Links to original PubMed research.
Everything is 100% open source and runs on this website—no personal info or cloud API needed.
"""
iface = gr.Interface(
fn=factcheck_app,
inputs=gr.Textbox(lines=2, label="Paste a news article URL"),
outputs=[gr.HTML(label="Fact-Check Results (Summary & Evidence)"), gr.JSON(label="All Results (JSON)")],
title="BioMedical News Fact-Checking & Research Evidence Finder",
description=description,
examples=[["https://www.medicalnewstoday.com/articles/omicron-what-do-we-know-about-the-stealth-variant"]],
allow_flagging="never"
)
iface.launch(share=False, server_name='0.0.0.0', show_error=True)