import streamlit as st import pandas as pd import matplotlib.pyplot as plt from wordcloud import WordCloud from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification # ✅ MUST be first Streamlit command st.set_page_config(page_title="📰 News Classifier & Q&A App", layout="wide") # ----------------- Model Loader ----------------- @st.cache_resource def load_text_classifier(): model_name = "MihanTilk/News_Classifier" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForSequenceClassification.from_pretrained( model_name ) return pipeline("text-classification", model=model, tokenizer=tokenizer) # Load Classifier & QA pipeline classifier = load_text_classifier() qa_pipeline = pipeline("question-answering", model="deepset/roberta-base-squad2") # ----------------- CSS Styling ----------------- st.markdown( """ """, unsafe_allow_html=True ) # ----------------- App Title ----------------- st.title("📰 News Classification & Q&A App") st.markdown("

Upload a CSV to classify news headlines and ask questions!

", unsafe_allow_html=True) # ----------------- Upload CSV ----------------- st.subheader("📂 Upload a CSV File") uploaded_file = st.file_uploader("Choose a CSV file...", type=["csv"]) if uploaded_file: # Read and preprocess df = pd.read_csv(uploaded_file) if "content" not in df.columns: st.error("❌ The uploaded CSV must contain a 'content' column.") st.stop() # Preprocess text df['cleaned_text'] = df['content'].astype(str).str.lower().str.strip() st.write("📊 Preview of Uploaded Data:", df.head()) # ----------------- Classification ----------------- with st.spinner("🔍 Classifying news articles..."): df['class'] = df['cleaned_text'].apply(lambda text: classifier(text)[0]['label']) st.success("✅ Classification Complete!") st.write("🔎 Classified Results:", df[['content', 'class']].head()) # ----------------- Download ----------------- st.subheader("📥 Download Results") csv_output = df.to_csv(index=False).encode('utf-8') st.download_button("Download Output CSV", data=csv_output, file_name="output.csv", mime="text/csv") # ----------------- Q&A Section ----------------- st.subheader("💬 Ask a Question") question = st.text_input("🔍 What do you want to know about the content?") if st.button("Get Answer"): context = " ".join(df['cleaned_text'].tolist()) with st.spinner("Answering..."): result = qa_pipeline(question=question, context=context) st.success(f"📝 **Answer:** {result['answer']}") # ----------------- Word Cloud ----------------- st.subheader("☁️ Word Cloud of News Text") text = " ".join(df['cleaned_text'].tolist()) wordcloud = WordCloud(width=800, height=400, background_color="white").generate(text) fig, ax = plt.subplots() ax.imshow(wordcloud, interpolation="bilinear") ax.axis("off") st.pyplot(fig) # ----------------- Footer ----------------- st.markdown("---") st.markdown("

🚀 Built with using Streamlit & Hugging Face

", unsafe_allow_html=True)