Spaces:

ifmain
/

moderation-2

Sleeping

App Files Files Community

ifmain commited on Apr 2

Commit

f9d050d

verified ·

1 Parent(s): 673e117

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -16

app.py CHANGED Viewed

@@ -1,25 +1,60 @@
-import gradio as gr
 import torch
-from moderation import *
-moderation = ModerationModel()
-moderation.load_state_dict(torch.load('moderation_model.pth', map_location=torch.device('cpu'))) # Remove map_location if run on gpu
-moderation.eval()
 def predict_moderation(text):
-    embeddings_for_prediction = getEmb(text)
-    prediction = predict(moderation, embeddings_for_prediction)
-    category_scores = prediction.get('category_scores', {})
-    detected = prediction.get('detected', False)
-    return category_scores, str(detected)
-iface = gr.Interface(fn=predict_moderation,
-                     inputs="text",
-                     outputs=[gr.Label(label="Category Scores"), gr.Label(label="Detected")],
-                     title="Moderation Model",
-                     description="Enter text to check for moderation flags.")
-iface.launch()

+import json
 import torch
+from transformers import BertTokenizer, BertForSequenceClassification
+import gradio as gr
+model_name = "ifmain/ModerationBERT-En-02"
+tokenizer = BertTokenizer.from_pretrained(model_name)
+model = BertForSequenceClassification.from_pretrained(model_name, num_labels=18)
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+model.to(device)
+categories = [
+    'harassment', 'harassment_threatening', 'hate', 'hate_threatening',
+    'self_harm', 'self_harm_instructions', 'self_harm_intent', 'sexual',
+    'sexual_minors', 'violence', 'violence_graphic', 'self-harm',
+    'sexual/minors', 'hate/threatening', 'violence/graphic',
+    'self-harm/intent', 'self-harm/instructions', 'harassment/threatening'
+]
 def predict_moderation(text):
+    encoding = tokenizer.encode_plus(
+        text,
+        add_special_tokens=True,
+        max_length=128,
+        return_token_type_ids=False,
+        padding='max_length',
+        truncation=True,
+        return_attention_mask=True,
+        return_tensors='pt'
+    )
+    input_ids = encoding['input_ids'].to(device)
+    attention_mask = encoding['attention_mask'].to(device)
+    model.eval()
+    with torch.no_grad():
+        outputs = model(input_ids, attention_mask=attention_mask)
+    probs = torch.sigmoid(outputs.logits)[0].cpu().numpy()
+    category_scores = {categories[i]: float(probs[i]) for i in range(len(categories))}
+    detected = any(prob > 0.5 for prob in probs)
+    return category_scores, str(detected)
+iface = gr.Interface(
+    fn=predict_moderation,
+    inputs=gr.Textbox(label="Enter text"),
+    outputs=[
+        gr.Label(label="Ratings by category"),
+        gr.Label(label="Was a violation detected?")
+    ],
+    title="Text moderation",
+    description="Enter text to check it for content violations (ModerationBERT-En-02 model)."
+)
+iface.launch()