Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import matplotlib.pyplot as plt | |
from wordcloud import WordCloud | |
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification | |
# β MUST be first Streamlit command | |
st.set_page_config(page_title="π° News Classifier & Q&A App", layout="wide") | |
# ----------------- Model Loader ----------------- | |
def load_text_classifier(): | |
model_name = "MihanTilk/News_Classifier" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForSequenceClassification.from_pretrained( | |
model_name | |
) | |
return pipeline("text-classification", model=model, tokenizer=tokenizer) | |
# Load Classifier & QA pipeline | |
classifier = load_text_classifier() | |
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") | |
# ----------------- CSS Styling ----------------- | |
st.markdown( | |
""" | |
<style> | |
.main { background-color: #f4f4f4; } | |
.stTextInput, .stFileUploader { border: 2px solid #ff4b4b; border-radius: 10px; } | |
.stButton>button { background-color: #ff4b4b; color: white; border-radius: 10px; } | |
.stDownloadButton>button { background-color: #4CAF50; color: white; border-radius: 10px; } | |
h1, h2, h3, h4, h5, h6, p { color: #333333; } | |
</style> | |
""", | |
unsafe_allow_html=True | |
) | |
# ----------------- App Title ----------------- | |
st.title("π° News Classification & Q&A App") | |
st.markdown("<h4 style='color:#ff4b4b;'>Upload a CSV to classify news headlines and ask questions!</h4>", unsafe_allow_html=True) | |
# ----------------- Upload CSV ----------------- | |
st.subheader("π Upload a CSV File") | |
uploaded_file = st.file_uploader("Choose a CSV file...", type=["csv"]) | |
if uploaded_file: | |
# Read and preprocess | |
df = pd.read_csv(uploaded_file) | |
if "content" not in df.columns: | |
st.error("β The uploaded CSV must contain a 'content' column.") | |
st.stop() | |
# Preprocess text | |
df['cleaned_text'] = df['content'].astype(str).str.lower().str.strip() | |
st.write("π Preview of Uploaded Data:", df.head()) | |
# ----------------- Classification ----------------- | |
with st.spinner("π Classifying news articles..."): | |
df['class'] = df['cleaned_text'].apply(lambda text: classifier(text)[0]['label']) | |
st.success("β Classification Complete!") | |
st.write("π Classified Results:", df[['content', 'class']].head()) | |
# ----------------- Download ----------------- | |
st.subheader("π₯ Download Results") | |
csv_output = df.to_csv(index=False).encode('utf-8') | |
st.download_button("Download Output CSV", data=csv_output, file_name="output.csv", mime="text/csv") | |
# ----------------- Q&A Section ----------------- | |
st.subheader("π¬ Ask a Question") | |
question = st.text_input("π What do you want to know about the content?") | |
if st.button("Get Answer"): | |
context = " ".join(df['cleaned_text'].tolist()) | |
with st.spinner("Answering..."): | |
result = qa_pipeline(question=question, context=context) | |
st.success(f"π **Answer:** {result['answer']}") | |
# ----------------- Word Cloud ----------------- | |
st.subheader("βοΈ Word Cloud of News Text") | |
text = " ".join(df['cleaned_text'].tolist()) | |
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text) | |
fig, ax = plt.subplots() | |
ax.imshow(wordcloud, interpolation="bilinear") | |
ax.axis("off") | |
st.pyplot(fig) | |
# ----------------- Footer ----------------- | |
st.markdown("---") | |
st.markdown("<p style='text-align:center; color:#666;'>π Built with using Streamlit & Hugging Face</p>", unsafe_allow_html=True) |