Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -5,7 +5,7 @@ import io
|
|
| 5 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 6 |
|
| 7 |
# Load fine-tuned model and tokenizer
|
| 8 |
-
model_name = "TAgroup5/
|
| 9 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 11 |
|
|
@@ -36,10 +36,12 @@ if uploaded_file is not None:
|
|
| 36 |
|
| 37 |
# Preprocessing function
|
| 38 |
def preprocess_text(text):
|
| 39 |
-
text = text.lower() #
|
| 40 |
-
text = re.sub(r'\s
|
| 41 |
-
|
| 42 |
-
|
|
|
|
|
|
|
| 43 |
|
| 44 |
# Apply preprocessing and classification
|
| 45 |
df['processed_content'] = df['content'].apply(preprocess_text)
|
|
|
|
| 5 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
|
| 6 |
|
| 7 |
# Load fine-tuned model and tokenizer
|
| 8 |
+
model_name = "TAgroup5/news-classification-model"
|
| 9 |
model = AutoModelForSequenceClassification.from_pretrained(model_name)
|
| 10 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
| 11 |
|
|
|
|
| 36 |
|
| 37 |
# Preprocessing function
|
| 38 |
def preprocess_text(text):
|
| 39 |
+
text = text.lower() # Convert to lowercase
|
| 40 |
+
text = re.sub(r'[^a-z\s]', '', text) # Remove special characters & numbers
|
| 41 |
+
tokens = word_tokenize(text) # Tokenization
|
| 42 |
+
tokens = [word for word in tokens if word not in stop_words] # Remove stopwords
|
| 43 |
+
tokens = [lemmatizer.lemmatize(word) for word in tokens] # Lemmatization
|
| 44 |
+
return " ".join(tokens)
|
| 45 |
|
| 46 |
# Apply preprocessing and classification
|
| 47 |
df['processed_content'] = df['content'].apply(preprocess_text)
|