Alexvatti commited on
Commit
868fbbe
·
verified ·
1 Parent(s): d48d645

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +74 -0
app.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import torch.nn as nn
3
+ import torch.optim as optim
4
+ import pandas as pd
5
+ import numpy as np
6
+ import re
7
+ import torch
8
+ import pickle
9
+ import json
10
+
11
+ # Define paths
12
+ MODEL_PATH = "spam_model.pth"
13
+ VOCAB_PATH = "vocab.pkl"
14
+
15
+ class TransformerEncoder(nn.Module):
16
+ def __init__(self, d_model=256, num_heads=1, d_ff=512, num_layers=1, vocab_size=10000, max_seq_len=100, dropout=0.1):
17
+ super(TransformerEncoder, self).__init__()
18
+
19
+ # Embedding & Positional Encoding
20
+ self.embedding = nn.Embedding(vocab_size, d_model)
21
+ self.positional_encoding = nn.Parameter(torch.zeros(1, max_seq_len, d_model))
22
+
23
+ # Transformer Encoder Layers
24
+ encoder_layer = nn.TransformerEncoderLayer(
25
+ d_model=d_model,
26
+ nhead=num_heads,
27
+ dim_feedforward=d_ff,
28
+ dropout=dropout,
29
+ activation='relu',
30
+ batch_first=True
31
+ )
32
+
33
+ self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
34
+
35
+ # Classification Head
36
+ self.fc = nn.Linear(d_model, 1)
37
+ self.sigmoid = nn.Sigmoid()
38
+
39
+ def forward(self, x):
40
+ x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
41
+ x = self.encoder(x) # Pass through transformer
42
+ x = x[:, 0, :] # Take first token's output (CLS token equivalent)
43
+ x = self.fc(x)
44
+ return self.sigmoid(x) # Binary classification (spam or not)
45
+
46
+ with open(VOCAB_PATH, "rb") as f:
47
+ vocab = pickle.load(f)
48
+
49
+ # Load model
50
+ device = torch.device("cpu") # Change to "cuda" if using GPU
51
+ model = TransformerEncoder(d_model=256, num_heads=1, num_layers=1, vocab_size=len(vocab), max_seq_len=100).to(device)
52
+ model.load_state_dict(torch.load(MODEL_PATH, map_location=device))
53
+ model.eval() # Set model to evaluation mode
54
+
55
+ print("✅ Model and vocabulary loaded successfully!")
56
+
57
+ def simple_tokenize(text):
58
+ return re.findall(r"\b\w+\b", text.lower())
59
+
60
+ def predict(text, model, vocab, max_len=1000):
61
+ model.eval()
62
+ tokens = simple_tokenize(text.lower())
63
+ token_ids = [vocab.get(word, vocab['<UNK>']) for word in tokens]
64
+ token_ids += [vocab['<PAD>']] * (max_len - len(token_ids)) # Pad if needed
65
+ input_tensor = torch.tensor([token_ids], dtype=torch.long).to(device)
66
+
67
+ with torch.no_grad():
68
+ output = model(input_tensor).squeeze().item()
69
+
70
+ return "Spam" if output > 0.5 else "Ham"
71
+
72
+ # Test prediction
73
+ sample_text = "FreeMsg Hey there darling it's been 3 week's now and no word back! I'd like some fun you up for it still? Tb ok! XxX std chgs to send, £1.50 to rcv"
74
+ print(f"Prediction: {predict(sample_text, model, vocab)}")