TAgroup5's picture
Update app.py
73c0f99 verified
raw
history blame
4.12 kB
import streamlit as st
import pandas as pd
import re
import io
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from transformers import AutoModelForQuestionAnswering
from streamlit_extras.app_logo import add_logo # For adding a logo
# Custom Styling
st.set_page_config(page_title="News Classifier & Q&A", page_icon="πŸ“°", layout="wide")
# CSS for styling
st.markdown(
"""
<style>
body {
background-color: #f5f5f5;
}
.stApp {
background-color: white;
border-radius: 10px;
padding: 20px;
box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
}
.stTitle, .stHeader {
color: #0073e6;
text-align: center;
}
.stButton>button {
background-color: #0073e6 !important;
color: white !important;
border-radius: 8px !important;
font-size: 16px !important;
}
.stDownloadButton>button {
background-color: #28a745 !important;
color: white !important;
border-radius: 8px !important;
}
</style>
""",
unsafe_allow_html=True,
)
# Add a logo (optional, replace with your logo URL)
# add_logo("https://your-logo-url.png", height=50)
st.title("πŸ“° News Classification & Q&A")
## ====================== Component 1: News Classification ====================== ##
st.header("πŸ“Œ Classify News Articles")
st.markdown("Upload a CSV file with a 'content' column to classify news into categories.")
uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
if uploaded_file is not None:
try:
df = pd.read_csv(uploaded_file, encoding="utf-8")
except UnicodeDecodeError:
df = pd.read_csv(uploaded_file, encoding="ISO-8859-1")
if 'content' not in df.columns:
st.error("❌ The uploaded CSV must contain a 'content' column.")
else:
st.success("βœ… File uploaded successfully!")
st.write("Preview of uploaded data:")
st.dataframe(df.head())
# Preprocessing function
def preprocess_text(text):
text = text.lower()
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[^a-z\s]', '', text)
return text
# Apply preprocessing
df['processed_content'] = df['content'].apply(preprocess_text)
# Load Model
model_name_classification = "TAgroup5/news-classification-model"
model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
# Classify each record
df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
# Display results
st.write("πŸ“Œ Classification Results:")
st.dataframe(df[['content', 'class']])
# Provide CSV download
output = io.BytesIO()
df.to_csv(output, index=False, encoding="utf-8-sig")
st.download_button(label="πŸ“₯ Download Classified News", data=output.getvalue(), file_name="classified_news.csv", mime="text/csv")
## ====================== Component 2: Q&A ====================== ##
st.header("πŸ’¬ Ask a Question About the News")
st.markdown("Enter a question and provide a news article to get an answer.")
question = st.text_input("πŸ” Ask a question:")
context = st.text_area("πŸ“ Provide the news article content:", height=150)
if question and context.strip():
model_name_qa = "distilbert-base-uncased-distilled-squad"
qa_pipeline = pipeline("question-answering", model=model_name_qa, tokenizer=model_name_qa)
result = qa_pipeline(question=question, context=context)
# Display answer
if 'answer' in result and result['answer']:
st.success(f"**πŸ—£ Answer:** {result['answer']}")
else:
st.warning("⚠️ No answer found in the provided content.")