Spaces:

TAgroup5
/

demo-News_classifier

Sleeping

App Files Files Community

demo-News_classifier / app.py

TAgroup5

Update app.py

73c0f99 verified 3 months ago

raw

history blame

4.12 kB

	import streamlit as st
	import pandas as pd
	import re
	import io
	from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
	from transformers import AutoModelForQuestionAnswering
	from streamlit_extras.app_logo import add_logo # For adding a logo

	# Custom Styling
	st.set_page_config(page_title="News Classifier & Q&A", page_icon="📰", layout="wide")

	# CSS for styling
	st.markdown(
	"""
	<style>
	body {
	background-color: #f5f5f5;
	}
	.stApp {
	background-color: white;
	border-radius: 10px;
	padding: 20px;
	box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
	}
	.stTitle, .stHeader {
	color: #0073e6;
	text-align: center;
	}
	.stButton>button {
	background-color: #0073e6 !important;
	color: white !important;
	border-radius: 8px !important;
	font-size: 16px !important;
	}
	.stDownloadButton>button {
	background-color: #28a745 !important;
	color: white !important;
	border-radius: 8px !important;
	}
	</style>
	""",
	unsafe_allow_html=True,
	)

	# Add a logo (optional, replace with your logo URL)
	# add_logo("https://your-logo-url.png", height=50)

	st.title("📰 News Classification & Q&A")

	## ====================== Component 1: News Classification ====================== ##
	st.header("📌 Classify News Articles")
	st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")

	uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

	if uploaded_file is not None:
	try:
	df = pd.read_csv(uploaded_file, encoding="utf-8")
	except UnicodeDecodeError:
	df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")

	if 'content' not in df.columns:
	st.error("❌ The uploaded CSV must contain a 'content' column.")
	else:
	st.success("✅ File uploaded successfully!")
	st.write("Preview of uploaded data:")
	st.dataframe(df.head())

	# Preprocessing function
	def preprocess_text(text):
	text = text.lower()
	text = re.sub(r'\s+', ' ', text)
	text = re.sub(r'[^a-z\s]', '', text)
	return text

	# Apply preprocessing
	df['processed_content'] = df['content'].apply(preprocess_text)

	# Load Model
	model_name_classification = "TAgroup5/news-classification-model"
	model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
	tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
	text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)

	# Classify each record
	df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")

	# Display results
	st.write("📌 Classification Results:")
	st.dataframe(df[['content', 'class']])

	# Provide CSV download
	output = io.BytesIO()
	df.to_csv(output, index=False, encoding="utf-8-sig")
	st.download_button(label="📥 Download Classified News", data=output.getvalue(), file_name="classified_news.csv", mime="text/csv")

	## ====================== Component 2: Q&A ====================== ##
	st.header("💬 Ask a Question About the News")
	st.markdown("Enter a question and provide a news article to get an answer.")

	question = st.text_input("🔍 Ask a question:")
	context = st.text_area("📝 Provide the news article content:", height=150)

	if question and context.strip():
	model_name_qa = "distilbert-base-uncased-distilled-squad"
	qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
	result = qa_pipeline(question=question, context=context)

	# Display answer
	if 'answer' in result and result['answer']:
	st.success(f"🗣 Answer: {result['answer']}")
	else:
	st.warning("⚠️ No answer found in the provided content.")