import streamlit as st
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline

# Set Streamlit page config
st.set_page_config(page_title="Disease NER", layout="centered")

# Title of the app
st.title("🧠 Disease Named Entity Recognition (NER)")
st.write("This app uses a BioBERT model to detect **disease entities** in clinical or medical text.")

# Load the model
@st.cache_resource
def load_model():
    model_name = "Ishan0612/biobert-ner-disease-ncbi"
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForTokenClassification.from_pretrained(model_name)
    ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer, aggregation_strategy="simple")
    return ner_pipeline

ner = load_model()

# Input from user
text_input = st.text_area("Enter a medical sentence below:", 
                          "The patient was diagnosed with diabetes mellitus and rheumatoid arthritis.")

# Run model when button is clicked
if st.button("Find Disease Entities"):
    if text_input.strip() == "":
        st.warning("Please enter some text.")
    else:
        results = ner(text_input)
        
        if results:
            st.subheader("🧬 Disease Entities Found:")
            for e in results:
                st.markdown(f"- **{e['word']}** ({e['entity_group']}) – Score: `{e['score']:.2f}`")
        else:
            st.info("No disease entities found in the given text.")