Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	app file created
Browse files
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,101 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import gradio as gr
         | 
| 2 | 
            +
            from transformers import AutoTokenizer, AutoModelForSequenceClassification
         | 
| 3 | 
            +
            import torch
         | 
| 4 | 
            +
            import numpy as np
         | 
| 5 | 
            +
            import re
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            from turkish.deasciifier import Deasciifier
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            # Model ve tokenizer initialization
         | 
| 10 | 
            +
            tokenizer = AutoTokenizer.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr")
         | 
| 11 | 
            +
            model = AutoModelForSequenceClassification.from_pretrained("TURKCELL/bert-offensive-lang-detection-tr")
         | 
| 12 | 
            +
            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         | 
| 13 | 
            +
            model.to(device)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
             | 
| 16 | 
            +
            def deasciifier(text):
         | 
| 17 | 
            +
                deasciifier = Deasciifier(text)
         | 
| 18 | 
            +
                return deasciifier.convert_to_turkish()
         | 
| 19 | 
            +
             | 
| 20 | 
            +
             | 
| 21 | 
            +
            def remove_circumflex(text):
         | 
| 22 | 
            +
                circumflex_map = {
         | 
| 23 | 
            +
                    'â': 'a',
         | 
| 24 | 
            +
                    'î': 'i',
         | 
| 25 | 
            +
                    'û': 'u',
         | 
| 26 | 
            +
                    'ô': 'o',
         | 
| 27 | 
            +
                    'Â': 'A',
         | 
| 28 | 
            +
                    'Î': 'I',
         | 
| 29 | 
            +
                    'Û': 'U',
         | 
| 30 | 
            +
                    'Ô': 'O'
         | 
| 31 | 
            +
                }
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                return ''.join(circumflex_map.get(c, c) for c in text)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
             | 
| 36 | 
            +
            def turkish_lower(text):
         | 
| 37 | 
            +
                turkish_map = {
         | 
| 38 | 
            +
                    'I': 'ı',
         | 
| 39 | 
            +
                    'İ': 'i',
         | 
| 40 | 
            +
                    'Ç': 'ç',
         | 
| 41 | 
            +
                    'Ş': 'ş',
         | 
| 42 | 
            +
                    'Ğ': 'ğ',
         | 
| 43 | 
            +
                    'Ü': 'ü',
         | 
| 44 | 
            +
                    'Ö': 'ö'
         | 
| 45 | 
            +
                }
         | 
| 46 | 
            +
                return ''.join(turkish_map.get(c, c).lower() for c in text)
         | 
| 47 | 
            +
             | 
| 48 | 
            +
             | 
| 49 | 
            +
            def clean_text(text):
         | 
| 50 | 
            +
                # Metindeki şapkalı harfleri kaldırma
         | 
| 51 | 
            +
                text = remove_circumflex(text)
         | 
| 52 | 
            +
                # Metni küçük harfe dönüştürme
         | 
| 53 | 
            +
                text = turkish_lower(text)
         | 
| 54 | 
            +
                # deasciifier
         | 
| 55 | 
            +
                text = deasciifier(text)
         | 
| 56 | 
            +
                # Kullanıcı adlarını kaldırma
         | 
| 57 | 
            +
                text = re.sub(r"@\S*", " ", text)
         | 
| 58 | 
            +
                # Hashtag'leri kaldırma
         | 
| 59 | 
            +
                text = re.sub(r'#\S+', ' ', text)
         | 
| 60 | 
            +
                # URL'leri kaldırma
         | 
| 61 | 
            +
                text = re.sub(r"http\S+|www\S+|https\S+", ' ', text, flags=re.MULTILINE)
         | 
| 62 | 
            +
                # Noktalama işaretlerini ve metin tabanlı emojileri kaldırma
         | 
| 63 | 
            +
                text = re.sub(r'[^\w\s]|(:\)|:\(|:D|:P|:o|:O|;\))', ' ', text)
         | 
| 64 | 
            +
                # Emojileri kaldırma
         | 
| 65 | 
            +
                emoji_pattern = re.compile("["
         | 
| 66 | 
            +
                                           u"\U0001F600-\U0001F64F"  # emoticons
         | 
| 67 | 
            +
                                           u"\U0001F300-\U0001F5FF"  # symbols & pictographs
         | 
| 68 | 
            +
                                           u"\U0001F680-\U0001F6FF"  # transport & map symbols
         | 
| 69 | 
            +
                                           u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
         | 
| 70 | 
            +
                                           u"\U00002702-\U000027B0"
         | 
| 71 | 
            +
                                           u"\U000024C2-\U0001F251"
         | 
| 72 | 
            +
                                           "]+", flags=re.UNICODE)
         | 
| 73 | 
            +
                text = emoji_pattern.sub(r' ', text)
         | 
| 74 | 
            +
             | 
| 75 | 
            +
                # Birden fazla boşluğu tek boşlukla değiştirme
         | 
| 76 | 
            +
                text = re.sub(r'\s+', ' ', text).strip()
         | 
| 77 | 
            +
                return text
         | 
| 78 | 
            +
             | 
| 79 | 
            +
             | 
| 80 | 
            +
            def is_offensive(sentence):
         | 
| 81 | 
            +
                normalize_text = clean_text(sentence)
         | 
| 82 | 
            +
             | 
| 83 | 
            +
                test_sample = tokenizer(normalize_text, padding=True, truncation=True, max_length=256, return_tensors='pt')
         | 
| 84 | 
            +
                test_sample = {k: v.to(device) for k, v in test_sample.items()}
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                output = model(**test_sample)
         | 
| 87 | 
            +
                y_pred = np.argmax(output.logits.detach().cpu().numpy(), axis=1)
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                d = {0: 'non-offensive', 1: 'offensive'}
         | 
| 90 | 
            +
                return d[y_pred[0]]
         | 
| 91 | 
            +
             | 
| 92 | 
            +
             | 
| 93 | 
            +
            iface = gr.Interface(
         | 
| 94 | 
            +
                fn=is_offensive,
         | 
| 95 | 
            +
                inputs=gr.Textbox(lines=2, placeholder="Enter sentence here..."),
         | 
| 96 | 
            +
                outputs="text",
         | 
| 97 | 
            +
                title="Offensive Language Detection",
         | 
| 98 | 
            +
                description="Offensive language detection for Turkish"
         | 
| 99 | 
            +
            )
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            iface.launch()
         | 
