JainilP30 commited on
Commit
01da0ae
Β·
verified Β·
1 Parent(s): 676e1bd

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +101 -0
  2. requirements.txt +5 -0
app.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pickle
3
+ import re
4
+ import string
5
+ import numpy as np
6
+ import tensorflow as tf
7
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
8
+ from nltk.corpus import stopwords
9
+ from nltk.stem import WordNetLemmatizer
10
+ from nltk.tokenize import word_tokenize
11
+ import nltk
12
+ nltk.download('punkt')
13
+ nltk.download('stopwords')
14
+ nltk.download('wordnet')
15
+
16
+ # ============ Load Models and Tokenizers ============
17
+ with open("logreg_model.pkl", "rb") as f:
18
+ logreg_model = pickle.load(f)
19
+
20
+ with open("nb_model.pkl", "rb") as f:
21
+ nb_model = pickle.load(f)
22
+
23
+ with open("tfidf_vectorizer.pkl", "rb") as f:
24
+ tfidf_vectorizer = pickle.load(f)
25
+
26
+ with open("glove_tokenizer.pkl", "rb") as f:
27
+ glove_tokenizer = pickle.load(f)
28
+
29
+ model_glove = tf.keras.models.load_model("glove_model.h5")
30
+
31
+ # ============ Constants ============
32
+ MAX_LENGTH = 300
33
+ stop_words = set(stopwords.words('english'))
34
+ lemmatizer = WordNetLemmatizer()
35
+
36
+ # ============ Preprocessing ============
37
+ def clean_text(text):
38
+ text = str(text).lower()
39
+ text = re.sub(r'\[.*?\]', '', text)
40
+ text = re.sub(r'https?://\S+|www\.\S+', '', text)
41
+ text = re.sub(r'<.*?>+', '', text)
42
+ text = re.sub(f"[{re.escape(string.punctuation)}]", '', text)
43
+ text = re.sub(r'\n', ' ', text)
44
+ text = re.sub(r'\w*\d\w*', '', text)
45
+ text = text.replace('β€œ', '').replace('”', '').replace("’", "'").replace("β€˜", "'")
46
+ text = re.sub(r"'s\b", '', text)
47
+
48
+ tokens = word_tokenize(text)
49
+ tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and len(word) > 2]
50
+ return ' '.join(tokens)
51
+
52
+ # ============ Prediction ============
53
+ def predict_ensemble(text):
54
+ cleaned = clean_text(text)
55
+
56
+ # Check if cleaned text is too short
57
+ if len(cleaned.strip()) == 10:
58
+ return "Input too short to analyze."
59
+
60
+ # TF-IDF-based predictions
61
+ tfidf_vec = tfidf_vectorizer.transform([cleaned])
62
+ prob_nb = nb_model.predict_proba(tfidf_vec)[0][1]
63
+ prob_logreg = logreg_model.predict_proba(tfidf_vec)[0][1]
64
+
65
+ # GloVe prediction
66
+ glove_seq = glove_tokenizer.texts_to_sequences([cleaned])
67
+ glove_pad = pad_sequences(glove_seq, maxlen=MAX_LENGTH, padding='post', truncating='post')
68
+ prob_glove = model_glove.predict(glove_pad)[0][0]
69
+
70
+ # Weighted ensemble
71
+ ensemble_score = 0.55 * prob_nb + 0.1 * prob_logreg + 0.35 * prob_glove
72
+ label = "βœ… Real News" if ensemble_score >= 0.45 else "❌ Fake News"
73
+
74
+ # Optional: Include probabilities
75
+ # Naive Bayes:
76
+ # Logistic Regression:
77
+ # GloVe Model:
78
+
79
+ explanation = f"""
80
+ **Model 1** {prob_nb:.4f}
81
+ **Model 2** {prob_logreg:.4f}
82
+ **Model 3** {prob_glove:.4f}
83
+ **Ensemble Score:** {ensemble_score:.4f}
84
+ **Final Prediction:** {label}
85
+ """
86
+ return explanation
87
+
88
+ # ============ Gradio Interface ============
89
+ interface = gr.Interface(
90
+ fn=predict_ensemble,
91
+ inputs=gr.Textbox(lines=8, placeholder="Paste your news article here...", label="News Article"),
92
+ outputs=gr.Markdown(label="Prediction"),
93
+ title="πŸ“° Fake News Detector",
94
+ description="This tool uses 3 models (Naive Bayes, Logistic Regression, GloVe-based Deep Learning) to classify news as real or fake using an ensemble method."
95
+ allow_flagging="never"
96
+ )
97
+
98
+ if __name__ == "__main__":
99
+ interface.launch()
100
+
101
+
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ tensorflow
3
+ scikit-learn
4
+ nltk
5
+ numpy