TharushiPerera's picture
Update app.py
5c91e4c verified
raw
history blame
3.67 kB
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
# βœ… MUST be first Streamlit command
st.set_page_config(page_title="πŸ“° News Classifier & Q&A App", layout="wide")
# ----------------- Model Loader -----------------
@st.cache_resource
def load_text_classifier():
model_name = "MihanTilk/News_Classifier"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForSequenceClassification.from_pretrained(
model_name
)
return pipeline("text-classification", model=model, tokenizer=tokenizer)
# Load Classifier & QA pipeline
classifier = load_text_classifier()
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2")
# ----------------- CSS Styling -----------------
st.markdown(
"""
<style>
.main { background-color: #f4f4f4; }
.stTextInput, .stFileUploader { border: 2px solid #ff4b4b; border-radius: 10px; }
.stButton>button { background-color: #ff4b4b; color: white; border-radius: 10px; }
.stDownloadButton>button { background-color: #4CAF50; color: white; border-radius: 10px; }
h1, h2, h3, h4, h5, h6, p { color: #333333; }
</style>
""",
unsafe_allow_html=True
)
# ----------------- App Title -----------------
st.title("πŸ“° News Classification & Q&A App")
st.markdown("<h4 style='color:#ff4b4b;'>Upload a CSV to classify news headlines and ask questions!</h4>", unsafe_allow_html=True)
# ----------------- Upload CSV -----------------
st.subheader("πŸ“‚ Upload a CSV File")
uploaded_file = st.file_uploader("Choose a CSV file...", type=["csv"])
if uploaded_file:
# Read and preprocess
df = pd.read_csv(uploaded_file)
if "content" not in df.columns:
st.error("❌ The uploaded CSV must contain a 'content' column.")
st.stop()
# Preprocess text
df['cleaned_text'] = df['content'].astype(str).str.lower().str.strip()
st.write("πŸ“Š Preview of Uploaded Data:", df.head())
# ----------------- Classification -----------------
with st.spinner("πŸ” Classifying news articles..."):
df['class'] = df['cleaned_text'].apply(lambda text: classifier(text)[0]['label'])
st.success("βœ… Classification Complete!")
st.write("πŸ”Ž Classified Results:", df[['content', 'class']].head())
# ----------------- Download -----------------
st.subheader("πŸ“₯ Download Results")
csv_output = df.to_csv(index=False).encode('utf-8')
st.download_button("Download Output CSV", data=csv_output, file_name="output.csv", mime="text/csv")
# ----------------- Q&A Section -----------------
st.subheader("πŸ’¬ Ask a Question")
question = st.text_input("πŸ” What do you want to know about the content?")
if st.button("Get Answer"):
context = " ".join(df['cleaned_text'].tolist())
with st.spinner("Answering..."):
result = qa_pipeline(question=question, context=context)
st.success(f"πŸ“ **Answer:** {result['answer']}")
# ----------------- Word Cloud -----------------
st.subheader("☁️ Word Cloud of News Text")
text = " ".join(df['cleaned_text'].tolist())
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text)
fig, ax = plt.subplots()
ax.imshow(wordcloud, interpolation="bilinear")
ax.axis("off")
st.pyplot(fig)
# ----------------- Footer -----------------
st.markdown("---")
st.markdown("<p style='text-align:center; color:#666;'>πŸš€ Built with using Streamlit & Hugging Face</p>", unsafe_allow_html=True)