yaya36095 commited on
Commit
3fb2361
·
verified ·
1 Parent(s): 0011b35

Update preprocessor_config.json

Browse files
Files changed (1) hide show
  1. preprocessor_config.json +52 -25
preprocessor_config.json CHANGED
@@ -5,22 +5,33 @@
5
  ],
6
  "model_type": "xlm-roberta",
7
  "tokenizer_class": "XLMRobertaTokenizer",
8
- "max_length": 512,
9
- "padding": "max_length",
10
- "truncation": true,
11
- "num_labels": 2,
12
- "id2label": {
13
- "0": "HUMAN",
14
- "1": "AI"
 
 
 
 
 
15
  },
16
- "label2id": {
17
- "HUMAN": 0,
18
- "AI": 1
 
 
 
 
 
 
 
 
 
 
19
  },
20
- "do_lower_case": false,
21
- "strip_accents": false,
22
- "use_fast": true,
23
- "add_prefix_space": true,
24
  "special_tokens": {
25
  "bos_token": "<s>",
26
  "eos_token": "</s>",
@@ -30,15 +41,31 @@
30
  "cls_token": "<s>",
31
  "mask_token": "<mask>"
32
  },
33
- "clean_up_tokenization_spaces": true,
34
- "model_max_length": 512,
35
- "padding_side": "right",
36
- "truncation_side": "right",
37
- "return_attention_mask": true,
38
- "return_token_type_ids": false,
39
- "return_overflowing_tokens": false,
40
- "return_special_tokens_mask": false,
41
- "return_offsets_mapping": false,
42
- "return_length": false,
43
- "verbose": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  }
 
5
  ],
6
  "model_type": "xlm-roberta",
7
  "tokenizer_class": "XLMRobertaTokenizer",
8
+ "task_specific_params": {
9
+ "text-classification": {
10
+ "num_labels": 2,
11
+ "id2label": {
12
+ "0": "HUMAN",
13
+ "1": "AI"
14
+ },
15
+ "label2id": {
16
+ "HUMAN": 0,
17
+ "AI": 1
18
+ }
19
+ }
20
  },
21
+ "text_config": {
22
+ "max_length": 512,
23
+ "padding": "max_length",
24
+ "truncation": true,
25
+ "return_tensors": "pt"
26
+ },
27
+ "preprocessing": {
28
+ "do_lower_case": false,
29
+ "strip_accents": false,
30
+ "add_special_tokens": true,
31
+ "padding": true,
32
+ "truncation": true,
33
+ "max_length": 512
34
  },
 
 
 
 
35
  "special_tokens": {
36
  "bos_token": "<s>",
37
  "eos_token": "</s>",
 
41
  "cls_token": "<s>",
42
  "mask_token": "<mask>"
43
  },
44
+ "tokenizer_settings": {
45
+ "clean_up_tokenization_spaces": true,
46
+ "model_max_length": 512,
47
+ "padding_side": "right",
48
+ "truncation_side": "right",
49
+ "return_attention_mask": true,
50
+ "return_token_type_ids": false
51
+ },
52
+ "inference_config": {
53
+ "return_all_scores": true,
54
+ "output_hidden_states": false,
55
+ "output_attentions": false,
56
+ "return_dict": true,
57
+ "problem_type": "single_label_classification"
58
+ },
59
+ "model_params": {
60
+ "attention_probs_dropout_prob": 0.1,
61
+ "hidden_dropout_prob": 0.1,
62
+ "hidden_size": 768,
63
+ "intermediate_size": 3072,
64
+ "max_position_embeddings": 514,
65
+ "num_attention_heads": 12,
66
+ "num_hidden_layers": 12,
67
+ "type_vocab_size": 1,
68
+ "vocab_size": 250002,
69
+ "layer_norm_eps": 1e-05
70
+ }
71
  }