File size: 3,576 Bytes
d3ce202
 
 
 
 
 
 
 
 
 
 
5dd6de5
 
 
 
 
 
 
 
 
 
 
 
d3ce202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5dfda3
d3ce202
 
 
 
 
 
 
 
 
 
 
 
 
8c2ee22
 
d3ce202
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import gradio as gr
import pickle
import re
import string
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
import nltk
import os
import zipfile

# Unzip local nltk_data.zip if not already unzipped
nltk_data_path = os.path.join(os.path.dirname(__file__), 'nltk_data')
if not os.path.exists(nltk_data_path):
    with zipfile.ZipFile('nltk_data.zip', 'r') as zip_ref:
        zip_ref.extractall(nltk_data_path)

# Tell NLTK to use the local data path
nltk.data.path.append(nltk_data_path)


# ============ Load Models and Tokenizers ============
with open("logreg_model.pkl", "rb") as f:
    logreg_model = pickle.load(f)

with open("nb_model.pkl", "rb") as f:
    nb_model = pickle.load(f)

with open("tfidf_vectorizer.pkl", "rb") as f:
    tfidf_vectorizer = pickle.load(f)

with open("glove_tokenizer.pkl", "rb") as f:
    glove_tokenizer = pickle.load(f)

model_glove = tf.keras.models.load_model("glove_model.h5")

# ============ Constants ============
MAX_LENGTH = 300
stop_words = set(stopwords.words('english'))
lemmatizer = WordNetLemmatizer()

# ============ Preprocessing ============
def clean_text(text):
    text = str(text).lower()
    text = re.sub(r'\[.*?\]', '', text)
    text = re.sub(r'https?://\S+|www\.\S+', '', text)
    text = re.sub(r'<.*?>+', '', text)
    text = re.sub(f"[{re.escape(string.punctuation)}]", '', text)
    text = re.sub(r'\n', ' ', text)
    text = re.sub(r'\w*\d\w*', '', text)
    text = text.replace('β€œ', '').replace('”', '').replace("’", "'").replace("β€˜", "'")
    text = re.sub(r"'s\b", '', text)
    
    tokens = word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and len(word) > 2]
    return ' '.join(tokens)

# ============ Prediction ============
def predict_ensemble(text):
    cleaned = clean_text(text)

    # Check if cleaned text is too short
    if len(cleaned.strip()) <= 10:
        return "Input too short to analyze."

    # TF-IDF-based predictions
    tfidf_vec = tfidf_vectorizer.transform([cleaned])
    prob_nb = nb_model.predict_proba(tfidf_vec)[0][1]
    prob_logreg = logreg_model.predict_proba(tfidf_vec)[0][1]

    # GloVe prediction
    glove_seq = glove_tokenizer.texts_to_sequences([cleaned])
    glove_pad = pad_sequences(glove_seq, maxlen=MAX_LENGTH, padding='post', truncating='post')
    prob_glove = model_glove.predict(glove_pad)[0][0]

    # Weighted ensemble
    ensemble_score = 0.50 * prob_nb + 0.1 * prob_logreg + 0.40 * prob_glove
    label = "βœ… Real News" if ensemble_score >= 0.47 else "❌ Fake News"

    # Optional: Include probabilities
    # Naive Bayes:
    # Logistic Regression:
    # GloVe Model:

    explanation = f"""
**Model 1** {prob_nb:.4f}  
**Model 2** {prob_logreg:.4f}  
**Model 3** {prob_glove:.4f}  
**Ensemble Score:** {ensemble_score:.4f}  
**Final Prediction:** {label}
"""
    return explanation

# ============ Gradio Interface ============
interface = gr.Interface(
    fn=predict_ensemble,
    inputs=gr.Textbox(lines=8, placeholder="Paste your news article here...", label="News Article"),
    outputs=gr.Markdown(label="Prediction"),
    title="πŸ“° Fake News Detector",
    description="This tool uses 3 models (Naive Bayes, Logistic Regression, GloVe-based Deep Learning) to classify news as real or fake using an ensemble method.",
    allow_flagging="never" 
)

if __name__ == "__main__":
    interface.launch()