Den4ikAI commited on
Commit
cc4aa64
1 Parent(s): dd4d232

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +97 -0
README.md ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ language:
4
+ - ru
5
+ ---
6
+
7
+ RUPunct_big - самая большая модель из семейства RUPunct. Подходит для большинства задач.
8
+
9
+ Код инференса:
10
+ ```py
11
+ from transformers import pipeline
12
+ from transformers import AutoTokenizer
13
+
14
+ pt = "RUPunct/RUPunct_big"
15
+
16
+ tk = AutoTokenizer.from_pretrained(pt, strip_accents=False, add_prefix_space=True)
17
+ classifier = pipeline("ner", model=pt, tokenizer=tk, aggregation_strategy="first")
18
+
19
+
20
+ def process_token(token, label):
21
+ if label == "LOWER_O":
22
+ return token
23
+ if label == "LOWER_PERIOD":
24
+ return token + "."
25
+ if label == "LOWER_COMMA":
26
+ return token + ","
27
+ if label == "LOWER_QUESTION":
28
+ return token + "?"
29
+ if label == "LOWER_TIRE":
30
+ return token + "—"
31
+ if label == "LOWER_DVOETOCHIE":
32
+ return token + ":"
33
+ if label == "LOWER_VOSKL":
34
+ return token + "!"
35
+ if label == "LOWER_PERIODCOMMA":
36
+ return token + ";"
37
+ if label == "LOWER_DEFIS":
38
+ return token + "-"
39
+ if label == "LOWER_MNOGOTOCHIE":
40
+ return token + "..."
41
+ if label == "LOWER_QUESTIONVOSKL":
42
+ return token + "?!"
43
+ if label == "UPPER_O":
44
+ return token.capitalize()
45
+ if label == "UPPER_PERIOD":
46
+ return token.capitalize() + "."
47
+ if label == "UPPER_COMMA":
48
+ return token.capitalize() + ","
49
+ if label == "UPPER_QUESTION":
50
+ return token.capitalize() + "?"
51
+ if label == "UPPER_TIRE":
52
+ return token.capitalize() + " —"
53
+ if label == "UPPER_DVOETOCHIE":
54
+ return token.capitalize() + ":"
55
+ if label == "UPPER_VOSKL":
56
+ return token.capitalize() + "!"
57
+ if label == "UPPER_PERIODCOMMA":
58
+ return token.capitalize() + ";"
59
+ if label == "UPPER_DEFIS":
60
+ return token.capitalize() + "-"
61
+ if label == "UPPER_MNOGOTOCHIE":
62
+ return token.capitalize() + "..."
63
+ if label == "UPPER_QUESTIONVOSKL":
64
+ return token.capitalize() + "?!"
65
+ if label == "UPPER_TOTAL_O":
66
+ return token.upper()
67
+ if label == "UPPER_TOTAL_PERIOD":
68
+ return token.upper() + "."
69
+ if label == "UPPER_TOTAL_COMMA":
70
+ return token.upper() + ","
71
+ if label == "UPPER_TOTAL_QUESTION":
72
+ return token.upper() + "?"
73
+ if label == "UPPER_TOTAL_TIRE":
74
+ return token.upper() + " —"
75
+ if label == "UPPER_TOTAL_DVOETOCHIE":
76
+ return token.upper() + ":"
77
+ if label == "UPPER_TOTAL_VOSKL":
78
+ return token.upper() + "!"
79
+ if label == "UPPER_TOTAL_PERIODCOMMA":
80
+ return token.upper() + ";"
81
+ if label == "UPPER_TOTAL_DEFIS":
82
+ return token.upper() + "-"
83
+ if label == "UPPER_TOTAL_MNOGOTOCHIE":
84
+ return token.upper() + "..."
85
+ if label == "UPPER_TOTAL_QUESTIONVOSKL":
86
+ return token.upper() + "?!"
87
+
88
+ while 1:
89
+ input_text = input(":> ")
90
+ preds = classifier(input_text)
91
+ output = ""
92
+ for item in preds:
93
+ if item["word"] == ".":
94
+ item["entity_group"] = "O"
95
+ output += " " + process_token(item['word'].strip(), item['entity_group'])
96
+ print(">>>", output)
97
+ ```