Spaces:
Running
Running
File size: 5,094 Bytes
246133f 5cdc45c 237a63b 5cdc45c ec68c76 73c0f99 f72fd34 4a1b338 a025d74 f72fd34 4a1b338 f72fd34 4a1b338 f72fd34 4a1b338 f72fd34 4a1b338 f72fd34 4a1b338 f72fd34 3412bc8 73c0f99 3412bc8 6fcbed4 9ff5a0e 4a1b338 2ec8bb5 9ff5a0e 90bfd68 2ec8bb5 4a1b338 f72fd34 4a1b338 9ff5a0e f72fd34 9ff5a0e 4a1b338 9ff5a0e f72fd34 9ff5a0e f72fd34 9ff5a0e 4a1b338 9ff5a0e f72fd34 9ff5a0e 4a1b338 6fcbed4 4a1b338 f72fd34 9ff5a0e 4a1b338 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import streamlit as st
import pandas as pd
import re
import io
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
from transformers import AutoModelForQuestionAnswering
# Streamlit UI
st.set_page_config(page_title="News Classifier & Q&A", layout="wide")
st.markdown("""
<style>
body {
background-color: #f4f4f4;
color: #333333;
font-family: 'Arial', sans-serif;
}
.stApp {
background-image: url('https://i.pinimg.com/474x/9c/68/86/9c6886dd642a4869f3fa4578f9fe34ef.jpg');
background-size: cover;
background-position: center;
padding: 20px;
border-radius: 10px;
box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
}
h1 {
color: #ff4b4b;
text-align: center;
}
.stButton>button {
background-color: #088da5 !important;
color: white !important;
font-size: 18px !important;
border-radius: 10px !important;
width: 100%;
padding: 10px;
}
.stDownloadButton>button {
background-color: #28a745 !important;
color: white !important;
font-size: 16px !important;
border-radius: 10px !important;
}
</style>
""", unsafe_allow_html=True)
# Load fine-tuned models
model_name_classification = "TAgroup5/news-classification-model"
model = AutoModelForSequenceClassification.from_pretrained(model_name_classification)
tokenizer = AutoTokenizer.from_pretrained(model_name_classification)
model_name_qa = "distilbert-base-cased-distilled-squad"
model_qa = AutoModelForQuestionAnswering.from_pretrained(model_name_qa)
tokenizer_qa = AutoTokenizer.from_pretrained(model_name_qa)
# Initialize pipelines for both models
text_classification_pipeline = pipeline("text-classification", model=model, tokenizer=tokenizer)
qa_pipeline = pipeline("question-answering", model=model_qa, tokenizer=tokenizer_qa)
# Streamlit App
st.title(" News Classification and Q&A ")
## ====================== News Classification ====================== ##
st.header("π Classify News Articles")
st.markdown("Upload a CSV file containing a **'Content'** column to classify news into pre-defined categories.")
uploaded_file = st.file_uploader("π Choose a CSV file", type="csv")
if uploaded_file is not None:
df = pd.read_csv(uploaded_file, encoding="utf-8")
if 'content' not in df.columns:
st.error("β Error: The uploaded CSV must contain a 'content' column.")
else:
st.write("β
Preview of uploaded data:")
st.dataframe(df.head())
def preprocess_text(text):
text = text.lower()
text = re.sub(r'\s+', ' ', text)
text = re.sub(r'[^a-z\s]', '', text)
return text
df['processed_content'] = df['content'].apply(preprocess_text)
df['class'] = df['processed_content'].apply(lambda x: text_classification_pipeline(x)[0]['label'] if x.strip() else "Unknown")
st.write("π Classification Results:")
st.dataframe(df[['content', 'class']])
output = io.BytesIO()
df.to_csv(output, index=False, encoding="utf-8-sig")
st.download_button("π₯ Download Classified News", data=output.getvalue(), file_name="output.csv", mime="text/csv")
#App Component 3: Think!Think!Think! - Introducing a News Filtering Option
st.write("π **Filter by Category**")
categories = ['All', 'Business', 'Opinion', 'Political_gossip', 'Sports', 'World_news']
col1, col2, col3, col4, col5, col6 = st.columns(6)
selected_category = 'All'
with col1:
if st.button("All"):
selected_category = 'All'
with col2:
if st.button("π Business"):
selected_category = 'Business'
with col3:
if st.button("π£ Opinion"):
selected_category = 'Opinion'
with col4:
if st.button("π Political Gossip"):
selected_category = 'Political_gossip'
with col5:
if st.button("β½ Sports"):
selected_category = 'Sports'
with col6:
if st.button("π World News"):
selected_category = 'World_news'
if selected_category != 'All':
filtered_df = df[df['class'] == selected_category]
else:
filtered_df = df
st.write(f"π Showing news articles in category: {selected_category}")
st.dataframe(filtered_df[['content', 'class']])
# Add a separator
st.markdown("---")
## ====================== Q&A ====================== ##
st.header("π¬ Ask a Question About the News")
question = st.text_input("β Ask a question:")
context = st.text_area("π° Provide the news article or content:", height=150)
if question and context.strip():
result = qa_pipeline(question=question, context=context)
st.success(f"β
Answer: {result['answer']}")
|