Spaces:

adlozano1
/

gibberish_detector

Runtime error

App Files Files Community

Al John Lexter Lozano commited on Mar 27, 2022

Commit

9a34627

1 Parent(s): ead2dcb

add DL model, fixed examples, add visual output

Browse files

Files changed (2) hide show

app.py +85 -14
demo_mixed.txt +5 -0

app.py CHANGED Viewed

@@ -1,20 +1,31 @@
 from fastapi import File
 import gradio as gr
 from gib_detect_module import detect
 import csv
 def greet(name):
     return "Hello " + name + "!!"
 def detect_gibberish(line,f):
     if line:
         if detect(line):
-            return "Valid!!!!", None
         else:
-            return "Bollocks Giberrish",None
     elif f:
-        return None, annotate_csv(f)
 def annotate_csv(f):
@@ -25,22 +36,82 @@ def annotate_csv(f):
             cwriter = csv.writer(csvout, delimiter=',',
                                 quotechar='"', quoting=csv.QUOTE_MINIMAL)
             for row in creader:
-                print(row)
                 row.append(str(detect(row[0])))
-                cwriter.writerow(row)
             return "out.csv"
 inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)
 outputFile=gr.outputs.File( label="Annotated CSV")
 examples=[
-    ["quetzalcoatl","demo_blank.csv"],
-    ["Shinkansen","demo_blank.csv"],
-    ["aasdf","demo_blank.csv"],
-    ["Covfefe","demo_blank.csv"]
 ]
-iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')
-iface.launch()

+from cProfile import label
 from fastapi import File
 import gradio as gr
 from gib_detect_module import detect
 import csv
+import torch
+import tensorflow as tf
+from transformers import AutoModelForSequenceClassification, AutoTokenizer
+DLmodel = AutoModelForSequenceClassification.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
+tokenizer = AutoTokenizer.from_pretrained("madhurjindal/autonlp-Gibberish-Detector-492513457", use_auth_token=True)
 def greet(name):
     return "Hello " + name + "!!"
 def detect_gibberish(line,f):
     if line:
         if detect(line):
+            return "Valid!!!!", None,None
         else:
+            return "Bollocks Giberrish",None,None
     elif f:
+        return None, annotate_csv(f), None
 def annotate_csv(f):
             cwriter = csv.writer(csvout, delimiter=',',
                                 quotechar='"', quoting=csv.QUOTE_MINIMAL)
             for row in creader:
                 row.append(str(detect(row[0])))
+                cwriter.writerow(row)
+            return "out.csv"
+def annotate_csv_deep(f):
+    labels = DLmodel.config.id2label
+    with open(f.name) as csvfile:
+        creader = csv.reader(csvfile, delimiter=',', quotechar='"')
+        with open('out.csv', 'w', newline='') as csvout:
+            cwriter = csv.writer(csvout, delimiter=',',
+                                quotechar='"', quoting=csv.QUOTE_MINIMAL)
+            for row in creader:
+                inputs = tokenizer(row, return_tensors="pt")
+                outputs = DLmodel(**inputs)
+                probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
+                idx = probs.index(max(probs))
+                row.append(labels[idx])
+                row.append("{:.0%}".format(probs[idx]) )
+                cwriter.writerow(row)
             return "out.csv"
+def detect_gibberish_deep(line,f):
+    if line:
+        inputs = tokenizer(line, return_tensors="pt")
+        labels = DLmodel.config.id2label
+        outputs = DLmodel(**inputs)
+        probs = outputs.logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
+        output=dict(zip(labels.values(), probs))
+        readable_output=""
+        for k,v in output.items():
+            readable_output+=k+" : "+ "{:.0%}".format(v) + "\n"
+        return readable_output, None, output
+    if f:
+        return None, annotate_csv_deep(f),None
+def detect_gibberish_abstract(model, line,f):
+    if model == "Deep Learning Model":
+        return detect_gibberish_deep(line,f)
+    else:
+        return detect_gibberish(line, f)
+inputLine=gr.inputs.Textbox(lines=1, placeholder="Input text here, if both text and file have values, only the text input will be processed.", default="", label="Text", optional=False)
 inputFile=gr.inputs.File(file_count="single", type="file", label="File to Annotate", optional=True)
+choices = ["Deep Learning Model", "Markov Chain"]
+inputModel=gr.inputs.Dropdown(choices)
+outputLine=gr.outputs.Textbox(type="auto", label=None)
 outputFile=gr.outputs.File( label="Annotated CSV")
+label = gr.outputs.Label(num_top_classes=4)
 examples=[
+    ["Deep Learning Model","quetzalcoatl","demo_blank.csv"],
+    ["Deep Learning Model","aasdf","demo_blank.csv"],
+    ["Deep Learning Model","Covfefe","demo_blank.csv"],
+    ["Markov Chain","quetzalcoatl","demo_blank.csv"],
+    ["Markov Chain","aasdf","demo_blank.csv"],
+    ["Markov Chain","Covfefe","demo_blank.csv"],
+    ["Deep Learning Model","","demo_bad.txt"],
+    ["Deep Learning Model","","demo_mixed.txt"],
+    ["Markov Chain","","demo_bad.txt"],
+    ["Markov Chain","","demo_mixed.txt"],
 ]
+#iface = gr.Interface(fn=[detect_gibberish], inputs=["text",inputFile], outputs=["text",outputFile],examples=examples, allow_flagging='never')
+#iface.launch()
+iface = gr.Interface(fn=[detect_gibberish_abstract], inputs=[inputModel,inputLine,inputFile], outputs=["text",outputFile,label],examples=examples, allow_flagging='never')
+iface.launch()

demo_mixed.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+"The quick brown fox."
+"nmnjcviburili,<>"
+"This is a legitimate line"
+"ertrjiloifdfyyoiu"
+"1+1 =2"