Spaces:

TAgroup5
/

demo-News_classifier

Running

App Files Files Community

demo-News_classifier / app.py

TAgroup5

Update app.py

6fcbed4 verified 27 days ago

raw

history blame contribute delete

5.09 kB

	import streamlit as st
	import pandas as pd
	import re
	import io
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
	from transformers import AutoModelForQuestionAnswering

	# Streamlit UI
	st.set_page_config(page_title="News Classifier & Q&A", layout="wide")
	st.markdown("""
	<style>
	body {
	background-color: #f4f4f4;
	color: #333333;
	font-family: 'Arial', sans-serif;
	}
	.stApp {
	background-image: url('https://i.pinimg.com/474x/9c/68/86/9c6886dd642a4869f3fa4578f9fe34ef.jpg');
	background-size: cover;
	background-position: center;
	padding: 20px;
	border-radius: 10px;
	box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
	}
	h1 {
	color: #ff4b4b;
	text-align: center;
	}
	.stButton>button {
	background-color: #088da5 !important;
	color: white !important;
	font-size: 18px !important;
	border-radius: 10px !important;
	width: 100%;
	padding: 10px;
	}
	.stDownloadButton>button {
	background-color: #28a745 !important;
	color: white !important;
	font-size: 16px !important;
	border-radius: 10px !important;
	}
	</style>
	""", unsafe_allow_html=True)

	# Load fine-tuned models
	model_name_classification = "TAgroup5/news-classification-model"
	model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
	tokenizer = AutoTokenizer.from_pretrained(model_name_classification)

	model_name_qa = "distilbert-base-cased-distilled-squad"
	model_qa = AutoModelForQuestionAnswering.from_pretrained(model_name_qa)
	tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)

	# Initialize pipelines for both models
	text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
	qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa)

	# Streamlit App
	st.title(" News Classification and Q&A ")

	## ====================== News Classification ====================== ##
	st.header("📌 Classify News Articles")
	st.markdown("Upload a CSV file containing a 'Content' column to classify news into pre-defined categories.")

	uploaded_file = st.file_uploader("📂 Choose a CSV file", type="csv")

	if uploaded_file is not None:
	df = pd.read_csv(uploaded_file, encoding="utf-8")
	if 'content' not in df.columns:
	st.error("❌ Error: The uploaded CSV must contain a 'content' column.")
	else:
	st.write("✅ Preview of uploaded data:")
	st.dataframe(df.head())

	def preprocess_text(text):
	text = text.lower()
	text = re.sub(r'\s+', ' ', text)
	text = re.sub(r'[^a-z\s]', '', text)
	return text

	df['processed_content'] = df['content'].apply(preprocess_text)
	df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")

	st.write("🔍 Classification Results:")
	st.dataframe(df[['content', 'class']])

	output = io.BytesIO()
	df.to_csv(output, index=False, encoding="utf-8-sig")
	st.download_button("📥 Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv")

	#App Component 3: Think!Think!Think! - Introducing a News Filtering Option
	st.write("🔍 Filter by Category")
	categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news']

	col1, col2, col3, col4, col5, col6 = st.columns(6)
	selected_category = 'All'

	with col1:
	if st.button("All"):
	selected_category = 'All'
	with col2:
	if st.button("📈 Business"):
	selected_category = 'Business'
	with col3:
	if st.button("🗣 Opinion"):
	selected_category = 'Opinion'
	with col4:
	if st.button("🏛 Political Gossip"):
	selected_category = 'Political_gossip'
	with col5:
	if st.button("⚽ Sports"):
	selected_category = 'Sports'
	with col6:
	if st.button("🌎 World News"):
	selected_category = 'World_news'

	if selected_category != 'All':
	filtered_df = df[df['class'] == selected_category]
	else:
	filtered_df = df

	st.write(f"🔎 Showing news articles in category: {selected_category}")
	st.dataframe(filtered_df[['content', 'class']])

	# Add a separator
	st.markdown("---")

	## ====================== Q&A ====================== ##
	st.header("💬 Ask a Question About the News")
	question = st.text_input("❓ Ask a question:")
	context = st.text_area("📰 Provide the news article or content:", height=150)

	if question and context.strip():
	result = qa_pipeline(question=question, context=context)
	st.success(f"✅ Answer: {result['answer']}")