Spaces:

bhavyapandya
/

Troll-detection

Sleeping

App Files Files Community

Troll-detection / app.py

bhavyapandya

Upload 2 files

3691fcb about 2 years ago

raw

history blame contribute delete

2.36 kB

	import gradio as gr
	import tensorflow as tf
	from transformers import BertTokenizer, TFBertForSequenceClassification
	import re
	from nltk.corpus import stopwords
	from nltk.stem import WordNetLemmatizer
	from nltk.stem import PorterStemmer
	from tqdm import tqdm
	import nltk
	nltk.download('wordnet')
	nltk.download('stopwords')
	def clean_text(raw_text):
	# Remove unnecessary symbols and numbers
	cleaned_text = re.sub('[^a-zA-Z]', ' ', raw_text)

	# Convert to lowercase
	cleaned_text = cleaned_text.lower()

	# Tokenize the text
	words = cleaned_text.split()

	# Remove stop words
	stop_words = set(stopwords.words('english'))
	words = [word for word in words if word not in stop_words]

	# Perform stemming
	stemmer = PorterStemmer()
	words = [stemmer.stem(word) for word in words]

	# Perform lemmatization
	lemmatizer = WordNetLemmatizer()
	words = [lemmatizer.lemmatize(word) for word in words]

	# Join the cleaned words back into a single string
	cleaned_text = ' '.join(words)

	return cleaned_text
	# Load the pre-trained BERT model and tokenizer
	model_name = 'bert-base-uncased'
	tokenizer = BertTokenizer.from_pretrained(model_name)
	model = TFBertForSequenceClassification.from_pretrained("troll_model")

	def predict_text(input_text):
	# Tokenize and encode the input text
	input_ids = tokenizer.encode_plus(
	clean_text(input_text),
	add_special_tokens=True,
	max_length=128,
	padding='longest',
	truncation=True,
	return_tensors='tf'
	)['input_ids']

	# Make prediction
	predictions = model.predict(input_ids)[0]

	# Get predicted label and confidence scores
	predicted_label = tf.argmax(predictions, axis=1).numpy()[0]
	confidence_scores = tf.nn.softmax(predictions, axis=1).numpy()[0]

	# Interpret the predicted label
	if predicted_label == 0:
	output_text = f"Not troll, Troll level: {confidence_scores[1]}"
	else:
	output_text = f"Troll, Troll level: {confidence_scores[1]}"

	return output_text

	# Create the Gradio interface
	iface = gr.Interface(
	fn=predict_text,
	inputs="text",
	outputs="text",
	title="Text Classification",
	description="Enter a text and the model will predict its class.",
	theme="default"
	)

	# Launch the interface
	iface.launch()