import streamlit as st import pandas as pd import re import io from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline from transformers import AutoModelForQuestionAnswering # Streamlit UI st.set_page_config(page_title="News Classifier & Q&A", layout="wide") st.markdown(""" """, unsafe_allow_html=True) # Load fine-tuned models model_name_classification = "TAgroup5/news-classification-model" model = AutoModelForSequenceClassification.from_pretrained(model_name_classification) tokenizer = AutoTokenizer.from_pretrained(model_name_classification) model_name_qa = "distilbert-base-cased-distilled-squad" model_qa = AutoModelForQuestionAnswering.from_pretrained(model_name_qa) tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa) # Initialize pipelines for both models text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer) qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa) # Streamlit App st.title(" News Classification and Q&A ") ## ====================== News Classification ====================== ## st.header("📌 Classify News Articles") st.markdown("Upload a CSV file containing a **'Content'** column to classify news into pre-defined categories.") uploaded_file = st.file_uploader("📂 Choose a CSV file", type="csv") if uploaded_file is not None: df = pd.read_csv(uploaded_file, encoding="utf-8") if 'content' not in df.columns: st.error("❌ Error: The uploaded CSV must contain a 'content' column.") else: st.write("✅ Preview of uploaded data:") st.dataframe(df.head()) def preprocess_text(text): text = text.lower() text = re.sub(r'\s+', ' ', text) text = re.sub(r'[^a-z\s]', '', text) return text df['processed_content'] = df['content'].apply(preprocess_text) df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown") st.write("🔍 Classification Results:") st.dataframe(df[['content', 'class']]) output = io.BytesIO() df.to_csv(output, index=False, encoding="utf-8-sig") st.download_button("📥 Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv") #App Component 3: Think!Think!Think! - Introducing a News Filtering Option st.write("🔍 **Filter by Category**") categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news'] col1, col2, col3, col4, col5, col6 = st.columns(6) selected_category = 'All' with col1: if st.button("All"): selected_category = 'All' with col2: if st.button("📈 Business"): selected_category = 'Business' with col3: if st.button("🗣 Opinion"): selected_category = 'Opinion' with col4: if st.button("🏛 Political Gossip"): selected_category = 'Political_gossip' with col5: if st.button("⚽ Sports"): selected_category = 'Sports' with col6: if st.button("🌎 World News"): selected_category = 'World_news' if selected_category != 'All': filtered_df = df[df['class'] == selected_category] else: filtered_df = df st.write(f"🔎 Showing news articles in category: {selected_category}") st.dataframe(filtered_df[['content', 'class']]) # Add a separator st.markdown("---") ## ====================== Q&A ====================== ## st.header("💬 Ask a Question About the News") question = st.text_input("❓ Ask a question:") context = st.text_area("📰 Provide the news article or content:", height=150) if question and context.strip(): result = qa_pipeline(question=question, context=context) st.success(f"✅ Answer: {result['answer']}")