|
import streamlit as st |
|
import pandas as pd |
|
import matplotlib.pyplot as plt |
|
from wordcloud import WordCloud |
|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
|
|
st.set_page_config(page_title="π° News Classifier & Q&A App", layout="wide") |
|
|
|
|
|
@st.cache_resource |
|
def load_text_classifier(): |
|
model_name = "MihanTilk/News_Classifier" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForSequenceClassification.from_pretrained( |
|
model_name |
|
) |
|
return pipeline("text-classification", model=model, tokenizer=tokenizer) |
|
|
|
|
|
classifier = load_text_classifier() |
|
qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") |
|
|
|
|
|
st.markdown( |
|
""" |
|
<style> |
|
.main { background-color: #f4f4f4; } |
|
.stTextInput, .stFileUploader { border: 2px solid #ff4b4b; border-radius: 10px; } |
|
.stButton>button { background-color: #ff4b4b; color: white; border-radius: 10px; } |
|
.stDownloadButton>button { background-color: #4CAF50; color: white; border-radius: 10px; } |
|
h1, h2, h3, h4, h5, h6, p { color: #333333; } |
|
</style> |
|
""", |
|
unsafe_allow_html=True |
|
) |
|
|
|
|
|
st.title("π° News Classification & Q&A App") |
|
st.markdown("<h4 style='color:#ff4b4b;'>Upload a CSV to classify news headlines and ask questions!</h4>", unsafe_allow_html=True) |
|
|
|
|
|
st.subheader("π Upload a CSV File") |
|
uploaded_file = st.file_uploader("Choose a CSV file...", type=["csv"]) |
|
|
|
if uploaded_file: |
|
|
|
df = pd.read_csv(uploaded_file) |
|
if "content" not in df.columns: |
|
st.error("β The uploaded CSV must contain a 'content' column.") |
|
st.stop() |
|
|
|
|
|
df['cleaned_text'] = df['content'].astype(str).str.lower().str.strip() |
|
st.write("π Preview of Uploaded Data:", df.head()) |
|
|
|
|
|
with st.spinner("π Classifying news articles..."): |
|
df['class'] = df['cleaned_text'].apply(lambda text: classifier(text)[0]['label']) |
|
|
|
st.success("β
Classification Complete!") |
|
st.write("π Classified Results:", df[['content', 'class']].head()) |
|
|
|
|
|
st.subheader("π₯ Download Results") |
|
csv_output = df.to_csv(index=False).encode('utf-8') |
|
st.download_button("Download Output CSV", data=csv_output, file_name="output.csv", mime="text/csv") |
|
|
|
|
|
st.subheader("π¬ Ask a Question") |
|
question = st.text_input("π What do you want to know about the content?") |
|
|
|
if st.button("Get Answer"): |
|
context = " ".join(df['cleaned_text'].tolist()) |
|
with st.spinner("Answering..."): |
|
result = qa_pipeline(question=question, context=context) |
|
st.success(f"π **Answer:** {result['answer']}") |
|
|
|
|
|
st.subheader("βοΈ Word Cloud of News Text") |
|
text = " ".join(df['cleaned_text'].tolist()) |
|
wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text) |
|
|
|
fig, ax = plt.subplots() |
|
ax.imshow(wordcloud, interpolation="bilinear") |
|
ax.axis("off") |
|
st.pyplot(fig) |
|
|
|
|
|
st.markdown("---") |
|
st.markdown("<p style='text-align:center; color:#666;'>π Built with using Streamlit & Hugging Face</p>", unsafe_allow_html=True) |