bhavyapandya commited on
Commit
3691fcb
·
1 Parent(s): dee479b

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +80 -0
  2. requirements.txt +0 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import tensorflow as tf
3
+ from transformers import BertTokenizer, TFBertForSequenceClassification
4
+ import re
5
+ from nltk.corpus import stopwords
6
+ from nltk.stem import WordNetLemmatizer
7
+ from nltk.stem import PorterStemmer
8
+ from tqdm import tqdm
9
+ import nltk
10
+ nltk.download('wordnet')
11
+ nltk.download('stopwords')
12
+ def clean_text(raw_text):
13
+ # Remove unnecessary symbols and numbers
14
+ cleaned_text = re.sub('[^a-zA-Z]', ' ', raw_text)
15
+
16
+ # Convert to lowercase
17
+ cleaned_text = cleaned_text.lower()
18
+
19
+ # Tokenize the text
20
+ words = cleaned_text.split()
21
+
22
+ # Remove stop words
23
+ stop_words = set(stopwords.words('english'))
24
+ words = [word for word in words if word not in stop_words]
25
+
26
+ # Perform stemming
27
+ stemmer = PorterStemmer()
28
+ words = [stemmer.stem(word) for word in words]
29
+
30
+ # Perform lemmatization
31
+ lemmatizer = WordNetLemmatizer()
32
+ words = [lemmatizer.lemmatize(word) for word in words]
33
+
34
+ # Join the cleaned words back into a single string
35
+ cleaned_text = ' '.join(words)
36
+
37
+ return cleaned_text
38
+ # Load the pre-trained BERT model and tokenizer
39
+ model_name = 'bert-base-uncased'
40
+ tokenizer = BertTokenizer.from_pretrained(model_name)
41
+ model = TFBertForSequenceClassification.from_pretrained("troll_model")
42
+
43
+ def predict_text(input_text):
44
+ # Tokenize and encode the input text
45
+ input_ids = tokenizer.encode_plus(
46
+ clean_text(input_text),
47
+ add_special_tokens=True,
48
+ max_length=128,
49
+ padding='longest',
50
+ truncation=True,
51
+ return_tensors='tf'
52
+ )['input_ids']
53
+
54
+ # Make prediction
55
+ predictions = model.predict(input_ids)[0]
56
+
57
+ # Get predicted label and confidence scores
58
+ predicted_label = tf.argmax(predictions, axis=1).numpy()[0]
59
+ confidence_scores = tf.nn.softmax(predictions, axis=1).numpy()[0]
60
+
61
+ # Interpret the predicted label
62
+ if predicted_label == 0:
63
+ output_text = f"Not troll, Troll level: {confidence_scores[1]}"
64
+ else:
65
+ output_text = f"Troll, Troll level: {confidence_scores[1]}"
66
+
67
+ return output_text
68
+
69
+ # Create the Gradio interface
70
+ iface = gr.Interface(
71
+ fn=predict_text,
72
+ inputs="text",
73
+ outputs="text",
74
+ title="Text Classification",
75
+ description="Enter a text and the model will predict its class.",
76
+ theme="default"
77
+ )
78
+
79
+ # Launch the interface
80
+ iface.launch()
requirements.txt ADDED
Binary file (3.85 kB). View file