adwardlee commited on
Commit
33a11e7
·
verified ·
1 Parent(s): be5b108

Upload folder using huggingface_hub

Browse files
lora/adapter_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "model_zoo/internlm-xcomposer2-vl-7b",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layer_replication": null,
10
+ "layers_pattern": null,
11
+ "layers_to_transform": null,
12
+ "loftq_config": {},
13
+ "lora_alpha": 256,
14
+ "lora_dropout": 0.05,
15
+ "megatron_config": null,
16
+ "megatron_core": "megatron.core",
17
+ "modules_to_save": null,
18
+ "peft_type": "LORA",
19
+ "r": 256,
20
+ "rank_pattern": {},
21
+ "revision": null,
22
+ "target_modules": [
23
+ "attention.wo",
24
+ "feed_forward.w2",
25
+ "attention.wqkv",
26
+ "feed_forward.w1",
27
+ "feed_forward.w3"
28
+ ],
29
+ "task_type": "CAUSAL_LM",
30
+ "use_dora": false,
31
+ "use_rslora": false
32
+ }
lora/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cc549f4f6f3a2763298d164ee5589fc213e716d8286b3af5fbe0dbda6bd01d9
3
+ size 1208003536
lora/config.yaml ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ data_cfg:
2
+ data_cfg:
3
+ eval:
4
+ JsonDataset:
5
+ base_path: data
6
+ fairness: true
7
+ generated_ratio: 1.0
8
+ real_ratio: 1.0
9
+ safety: true
10
+ SMID:
11
+ base_path: data/evaluator_test/SMID
12
+ SelfHarm:
13
+ base_path: data/evaluator_test/self-harm
14
+ UnsafeBench:
15
+ base_path: data/evaluator_test/UnsafeBench
16
+ UnsafeDiff:
17
+ base_path: data/evaluator_test/Unsafe_diff
18
+ ViolentBehavior:
19
+ base_path: data/evaluator_test/Violent_behavior
20
+ train:
21
+ base_path: data
22
+ fairness: true
23
+ generated_ratio: 1.0
24
+ max_face_length: 2000
25
+ max_generated_dim_length: 6000
26
+ max_real_dim_length: 600
27
+ real_ratio: 1.0
28
+ safe_ratio: 0.3
29
+ safety: true
30
+ verion: 3
31
+ lora_cfg:
32
+ lora_alpha: 256
33
+ lora_bias: none
34
+ lora_dropout: 0.05
35
+ lora_r: 256
36
+ lora_target_modules:
37
+ - attention.wqkv
38
+ - attention.wo
39
+ - feed_forward.w1
40
+ - feed_forward.w2
41
+ - feed_forward.w3
42
+ lora_type: lora
43
+ lora_weight_path: ''
44
+ model_cfg:
45
+ model_name_or_path: model_zoo/internlm-xcomposer2-vl-7b
46
+ training_cfg:
47
+ _n_gpu: 1
48
+ accelerator_config:
49
+ dispatch_batches: null
50
+ even_batches: true
51
+ gradient_accumulation_kwargs: null
52
+ non_blocking: false
53
+ split_batches: false
54
+ use_seedable_sampler: true
55
+ adafactor: false
56
+ adam_beta1: 0.9
57
+ adam_beta2: 0.95
58
+ adam_epsilon: 1.0e-08
59
+ auto_find_batch_size: false
60
+ batch_eval_metrics: false
61
+ bf16: true
62
+ bf16_full_eval: false
63
+ cache_dir: null
64
+ data_seed: null
65
+ dataloader_drop_last: false
66
+ dataloader_num_workers: 0
67
+ dataloader_persistent_workers: false
68
+ dataloader_pin_memory: true
69
+ dataloader_prefetch_factor: null
70
+ ddp_backend: null
71
+ ddp_broadcast_buffers: null
72
+ ddp_bucket_cap_mb: null
73
+ ddp_find_unused_parameters: null
74
+ ddp_timeout: 1800
75
+ debug: []
76
+ deepspeed: scripts/ds_config_zero2.json
77
+ disable_tqdm: false
78
+ dispatch_batches: null
79
+ do_eval: true
80
+ do_predict: false
81
+ do_train: false
82
+ eval_accumulation_steps: null
83
+ eval_delay: 0
84
+ eval_do_concat_batches: true
85
+ eval_steps: null
86
+ fix_sampler: false
87
+ fix_vit: true
88
+ fp16: false
89
+ fp16_backend: auto
90
+ fp16_full_eval: false
91
+ fp16_opt_level: O1
92
+ fsdp: []
93
+ fsdp_config:
94
+ min_num_params: 0
95
+ xla: false
96
+ xla_fsdp_grad_ckpt: false
97
+ xla_fsdp_v2: false
98
+ fsdp_min_num_params: 0
99
+ fsdp_transformer_layer_cls_to_wrap: null
100
+ full_determinism: false
101
+ gradient_accumulation_steps: 1
102
+ gradient_checkpointing: true
103
+ gradient_checkpointing_kwargs: null
104
+ greater_is_better: null
105
+ group_by_length: false
106
+ half_precision_backend: auto
107
+ hub_always_push: false
108
+ hub_model_id: null
109
+ hub_private_repo: false
110
+ hub_token: null
111
+ ignore_data_skip: false
112
+ include_inputs_for_metrics: false
113
+ include_num_input_tokens_seen: false
114
+ include_tokens_per_second: false
115
+ jit_mode_eval: false
116
+ label_names:
117
+ - samples
118
+ label_smoothing_factor: 0.0
119
+ learning_rate: 5.0e-05
120
+ length_column_name: length
121
+ load_best_model_at_end: false
122
+ local_rank: 7
123
+ log_level: passive
124
+ log_level_replica: warning
125
+ log_on_each_node: true
126
+ logging_dir: output/internlm/datav3_1/safe0.3_lr5e-5_decay1e-2_nocap_e2//safe0.3_nocap
127
+ logging_first_step: false
128
+ logging_nan_inf_filter: true
129
+ logging_steps: 10
130
+ lr_scheduler_kwargs: {}
131
+ max_grad_norm: 1.0
132
+ max_length: 4096
133
+ max_steps: -1
134
+ metric_for_best_model: null
135
+ mp_parameters: ''
136
+ neftune_noise_alpha: null
137
+ no_cuda: false
138
+ num_train_epochs: 2.0
139
+ optim_args: null
140
+ optim_target_modules: null
141
+ output_dir: output/internlm/datav3_1/safe0.3_lr5e-5_decay1e-2_nocap_e2//safe0.3_nocap
142
+ overwrite_output_dir: false
143
+ past_index: -1
144
+ per_device_eval_batch_size: 8
145
+ per_device_train_batch_size: 8
146
+ per_gpu_eval_batch_size: null
147
+ per_gpu_train_batch_size: null
148
+ prediction_loss_only: false
149
+ push_to_hub: false
150
+ push_to_hub_model_id: null
151
+ push_to_hub_organization: null
152
+ push_to_hub_token: null
153
+ ray_scope: last
154
+ remove_unused_columns: true
155
+ report_to: []
156
+ restore_callback_states_from_checkpoint: false
157
+ resume_from_checkpoint: null
158
+ run_name: output/internlm/datav3_1/safe0.3_lr5e-5_decay1e-2_nocap_e2//safe0.3_nocap
159
+ save_on_each_node: false
160
+ save_only_model: false
161
+ save_safetensors: true
162
+ save_steps: 500
163
+ save_total_limit: 1
164
+ seed: 3407
165
+ skip_memory_metrics: true
166
+ split_batches: null
167
+ tf32: null
168
+ torch_compile: false
169
+ torch_compile_backend: null
170
+ torch_compile_mode: null
171
+ torchdynamo: null
172
+ tpu_metrics_debug: false
173
+ tpu_num_cores: null
174
+ use_cpu: false
175
+ use_ipex: false
176
+ use_legacy_prediction_loop: false
177
+ use_lora: true
178
+ use_mps_device: false
179
+ warmup_ratio: 0.01
180
+ warmup_steps: 0
181
+ weight_decay: 0.01
lora/non_lora_trainables.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12840b8d4721fd14f249759412c1c9d821c016ceec870ce1ea9eafa4da457cbf
3
+ size 1073755886
lora/special_tokens_map.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "</s>",
5
+ "unk_token": "<unk>"
6
+ }
lora/tokenization_internlm_xcomposer2.py ADDED
@@ -0,0 +1,252 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) InternLM. All rights reserved.
2
+ #
3
+ # This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
4
+ # and OPT implementations in this library. It has been modified from its
5
+ # original forms to accommodate minor architectural differences compared
6
+ # to GPT-NeoX and OPT used by the Meta AI team that trained the model.
7
+ #
8
+ # Licensed under the Apache License, Version 2.0 (the "License");
9
+ # you may not use this file except in compliance with the License.
10
+ # You may obtain a copy of the License at
11
+ #
12
+ # http://www.apache.org/licenses/LICENSE-2.0
13
+ #
14
+ # Unless required by applicable law or agreed to in writing, software
15
+ # distributed under the License is distributed on an "AS IS" BASIS,
16
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17
+ # See the License for the specific language governing permissions and
18
+ # limitations under the License.
19
+ """Tokenization classes for IntermLM."""
20
+ import os
21
+ from shutil import copyfile
22
+ from typing import Any, Dict, List, Optional, Tuple
23
+
24
+ import sentencepiece as spm
25
+ from transformers.tokenization_utils import PreTrainedTokenizer
26
+ from transformers.utils import logging
27
+
28
+ logger = logging.get_logger(__name__)
29
+
30
+ VOCAB_FILES_NAMES = {'vocab_file': './tokenizer.model'}
31
+
32
+ PRETRAINED_VOCAB_FILES_MAP = {}
33
+
34
+
35
+ class InternLMXComposer2Tokenizer(PreTrainedTokenizer):
36
+ """Construct a InternLM tokenizer. Based on byte-level Byte-Pair-Encoding.
37
+
38
+ Args:
39
+ vocab_file (`str`):
40
+ Path to the vocabulary file.
41
+ """
42
+
43
+ vocab_files_names = VOCAB_FILES_NAMES
44
+ pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
45
+ model_input_names = ['input_ids', 'attention_mask']
46
+ _auto_class = 'AutoTokenizer'
47
+
48
+ def __init__(
49
+ self,
50
+ vocab_file,
51
+ unk_token='<unk>',
52
+ bos_token='<s>',
53
+ eos_token='</s>',
54
+ pad_token='</s>',
55
+ sp_model_kwargs: Optional[Dict[str, Any]] = None,
56
+ add_bos_token=True,
57
+ add_eos_token=False,
58
+ decode_with_prefix_space=False,
59
+ clean_up_tokenization_spaces=False,
60
+ **kwargs,
61
+ ):
62
+ self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
63
+ self.vocab_file = vocab_file
64
+ self.add_bos_token = add_bos_token
65
+ self.add_eos_token = add_eos_token
66
+ self.decode_with_prefix_space = decode_with_prefix_space
67
+ self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
68
+ self.sp_model.Load(vocab_file)
69
+ self._no_prefix_space_tokens = None
70
+ super().__init__(
71
+ bos_token=bos_token,
72
+ eos_token=eos_token,
73
+ unk_token=unk_token,
74
+ pad_token=pad_token,
75
+ clean_up_tokenization_spaces=clean_up_tokenization_spaces,
76
+ **kwargs,
77
+ )
78
+ """ Initialization"""
79
+
80
+ @property
81
+ def no_prefix_space_tokens(self):
82
+ if self._no_prefix_space_tokens is None:
83
+ vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
84
+ self._no_prefix_space_tokens = {
85
+ i
86
+ for i, tok in enumerate(vocab) if not tok.startswith('▁')
87
+ }
88
+ return self._no_prefix_space_tokens
89
+
90
+ @property
91
+ def vocab_size(self):
92
+ """Returns vocab size."""
93
+ return self.sp_model.get_piece_size()
94
+
95
+ @property
96
+ def bos_token_id(self) -> Optional[int]:
97
+ return self.sp_model.bos_id()
98
+
99
+ @property
100
+ def eos_token_id(self) -> Optional[int]:
101
+ return self.sp_model.eos_id()
102
+
103
+ def get_vocab(self):
104
+ """Returns vocab as a dict."""
105
+ vocab = {
106
+ self.convert_ids_to_tokens(i): i
107
+ for i in range(self.vocab_size)
108
+ }
109
+ vocab.update(self.added_tokens_encoder)
110
+ return vocab
111
+
112
+ def _tokenize(self, text):
113
+ """Returns a tokenized string."""
114
+ return self.sp_model.encode(text, out_type=str)
115
+
116
+ def _convert_token_to_id(self, token):
117
+ """Converts a token (str) in an id using the vocab."""
118
+ return self.sp_model.piece_to_id(token)
119
+
120
+ def _convert_id_to_token(self, index):
121
+ """Converts an index (integer) in a token (str) using the vocab."""
122
+ token = self.sp_model.IdToPiece(index)
123
+ return token
124
+
125
+ def _maybe_add_prefix_space(self, tokens, decoded):
126
+ if tokens and tokens[0] not in self.no_prefix_space_tokens:
127
+ return ' ' + decoded
128
+ else:
129
+ return decoded
130
+
131
+ def convert_tokens_to_string(self, tokens):
132
+ """Converts a sequence of tokens (string) in a single string."""
133
+ current_sub_tokens = []
134
+ out_string = ''
135
+ prev_is_special = False
136
+ for token in tokens:
137
+ # make sure that special tokens are not decoded using sentencepiece model
138
+ if token in self.all_special_tokens:
139
+ if not prev_is_special:
140
+ out_string += ' '
141
+ out_string += self.sp_model.decode(current_sub_tokens) + token
142
+ prev_is_special = True
143
+ current_sub_tokens = []
144
+ else:
145
+ current_sub_tokens.append(token)
146
+ prev_is_special = False
147
+ out_string += self.sp_model.decode(current_sub_tokens)
148
+ out_string = self.clean_up_tokenization(out_string)
149
+ out_string = self._maybe_add_prefix_space(
150
+ tokens=tokens, decoded=out_string)
151
+ return out_string[1:]
152
+
153
+ def save_vocabulary(self,
154
+ save_directory,
155
+ filename_prefix: Optional[str] = None) -> Tuple[str]:
156
+ """Save the vocabulary and special tokens file to a directory.
157
+
158
+ Args:
159
+ save_directory (`str`):
160
+ The directory in which to save the vocabulary.
161
+
162
+ Returns:
163
+ `Tuple(str)`: Paths to the files saved.
164
+ """
165
+ if not os.path.isdir(save_directory):
166
+ logger.error(
167
+ f'Vocabulary path ({save_directory}) should be a directory')
168
+ return
169
+ out_vocab_file = os.path.join(
170
+ save_directory,
171
+ (filename_prefix + '-' if filename_prefix else '') +
172
+ VOCAB_FILES_NAMES['vocab_file'])
173
+
174
+ if os.path.abspath(self.vocab_file) != os.path.abspath(
175
+ out_vocab_file) and os.path.isfile(self.vocab_file):
176
+ copyfile(self.vocab_file, out_vocab_file)
177
+ elif not os.path.isfile(self.vocab_file):
178
+ with open(out_vocab_file, 'wb') as fi:
179
+ content_spiece_model = self.sp_model.serialized_model_proto()
180
+ fi.write(content_spiece_model)
181
+
182
+ return (out_vocab_file, )
183
+
184
+ def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
185
+ if self.add_bos_token:
186
+ bos_token_ids = [self.bos_token_id]
187
+ else:
188
+ bos_token_ids = []
189
+
190
+ output = bos_token_ids + token_ids_0
191
+
192
+ if token_ids_1 is not None:
193
+ output = output + token_ids_1
194
+
195
+ if self.add_eos_token:
196
+ output = output + [self.eos_token_id]
197
+
198
+ return output
199
+
200
+ def get_special_tokens_mask(
201
+ self,
202
+ token_ids_0: List[int],
203
+ token_ids_1: Optional[List[int]] = None,
204
+ already_has_special_tokens: bool = False) -> List[int]:
205
+ """Retrieve sequence ids from a token list that has no special tokens
206
+ added. This method is called when adding special tokens using the
207
+ tokenizer `prepare_for_model` method.
208
+
209
+ Args:
210
+ token_ids_0 (`List[int]`):
211
+ List of IDs.
212
+ token_ids_1 (`List[int]`, *optional*):
213
+ Optional second list of IDs for sequence pairs.
214
+ already_has_special_tokens (`bool`, *optional*, defaults to `False`):
215
+ Whether or not the token list is already formatted with special tokens for the model.
216
+
217
+ Returns:
218
+ `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
219
+ """
220
+ if already_has_special_tokens:
221
+ return super().get_special_tokens_mask(
222
+ token_ids_0=token_ids_0,
223
+ token_ids_1=token_ids_1,
224
+ already_has_special_tokens=True)
225
+
226
+ if token_ids_1 is None:
227
+ return [1] + ([0] * len(token_ids_0)) + [1]
228
+ return [1] + ([0] * len(token_ids_0)) + [1, 1] + (
229
+ [0] * len(token_ids_1)) + [1]
230
+
231
+ def create_token_type_ids_from_sequences(
232
+ self,
233
+ token_ids_0: List[int],
234
+ token_ids_1: Optional[List[int]] = None) -> List[int]:
235
+ """Create a mask from the two sequences passed to be used in a
236
+ sequence-pair classification task. T5 does not make use of token type
237
+ ids, therefore a list of zeros is returned.
238
+
239
+ Args:
240
+ token_ids_0 (`List[int]`):
241
+ List of IDs.
242
+ token_ids_1 (`List[int]`, *optional*):
243
+ Optional second list of IDs for sequence pairs.
244
+
245
+ Returns:
246
+ `List[int]`: List of zeros.
247
+ """
248
+ eos = [self.eos_token_id]
249
+
250
+ if token_ids_1 is None:
251
+ return len(token_ids_0 + eos) * [0]
252
+ return len(token_ids_0 + eos + token_ids_1 + eos) * [0]
lora/tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f868398fc4e05ee1e8aeba95ddf18ddcc45b8bce55d5093bead5bbf80429b48b
3
+ size 1477754
lora/tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "auto_map": {
29
+ "AutoTokenizer": [
30
+ "tokenization_internlm_xcomposer2.InternLMXComposer2Tokenizer",
31
+ null
32
+ ]
33
+ },
34
+ "bos_token": "<s>",
35
+ "clean_up_tokenization_spaces": false,
36
+ "eos_token": "</s>",
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "padding_side": "right",
40
+ "tokenizer_class": "InternLMXComposer2Tokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_fast": false
43
+ }
lora/trainer_state.json ADDED
@@ -0,0 +1,2461 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "eval_steps": 500,
6
+ "global_step": 2016,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.00992063492063492,
13
+ "grad_norm": 1.899263178856104,
14
+ "learning_rate": 2.380952380952381e-05,
15
+ "loss": 1.1251,
16
+ "step": 10
17
+ },
18
+ {
19
+ "epoch": 0.01984126984126984,
20
+ "grad_norm": 0.7922716506113461,
21
+ "learning_rate": 4.761904761904762e-05,
22
+ "loss": 0.2041,
23
+ "step": 20
24
+ },
25
+ {
26
+ "epoch": 0.02976190476190476,
27
+ "grad_norm": 0.5895583365390514,
28
+ "learning_rate": 4.999748926019576e-05,
29
+ "loss": 0.138,
30
+ "step": 30
31
+ },
32
+ {
33
+ "epoch": 0.03968253968253968,
34
+ "grad_norm": 0.5560939312704402,
35
+ "learning_rate": 4.9988810807087584e-05,
36
+ "loss": 0.116,
37
+ "step": 40
38
+ },
39
+ {
40
+ "epoch": 0.0496031746031746,
41
+ "grad_norm": 0.5339646542974129,
42
+ "learning_rate": 4.9973935795400226e-05,
43
+ "loss": 0.1227,
44
+ "step": 50
45
+ },
46
+ {
47
+ "epoch": 0.05952380952380952,
48
+ "grad_norm": 0.3172900020174145,
49
+ "learning_rate": 4.995286791373982e-05,
50
+ "loss": 0.1161,
51
+ "step": 60
52
+ },
53
+ {
54
+ "epoch": 0.06944444444444445,
55
+ "grad_norm": 0.42612238875506897,
56
+ "learning_rate": 4.992561238637912e-05,
57
+ "loss": 0.1171,
58
+ "step": 70
59
+ },
60
+ {
61
+ "epoch": 0.07936507936507936,
62
+ "grad_norm": 0.29025648297274464,
63
+ "learning_rate": 4.989217597196194e-05,
64
+ "loss": 0.112,
65
+ "step": 80
66
+ },
67
+ {
68
+ "epoch": 0.08928571428571429,
69
+ "grad_norm": 0.3936577938746362,
70
+ "learning_rate": 4.985256696182724e-05,
71
+ "loss": 0.1139,
72
+ "step": 90
73
+ },
74
+ {
75
+ "epoch": 0.0992063492063492,
76
+ "grad_norm": 0.340841068630456,
77
+ "learning_rate": 4.980679517795309e-05,
78
+ "loss": 0.103,
79
+ "step": 100
80
+ },
81
+ {
82
+ "epoch": 0.10912698412698413,
83
+ "grad_norm": 0.22519986858539828,
84
+ "learning_rate": 4.9754871970521055e-05,
85
+ "loss": 0.1094,
86
+ "step": 110
87
+ },
88
+ {
89
+ "epoch": 0.11904761904761904,
90
+ "grad_norm": 0.22918903537877802,
91
+ "learning_rate": 4.9696810215101695e-05,
92
+ "loss": 0.105,
93
+ "step": 120
94
+ },
95
+ {
96
+ "epoch": 0.12896825396825398,
97
+ "grad_norm": 0.2972032589349314,
98
+ "learning_rate": 4.963262430946173e-05,
99
+ "loss": 0.1146,
100
+ "step": 130
101
+ },
102
+ {
103
+ "epoch": 0.1388888888888889,
104
+ "grad_norm": 0.14022379837954602,
105
+ "learning_rate": 4.956233016999379e-05,
106
+ "loss": 0.1009,
107
+ "step": 140
108
+ },
109
+ {
110
+ "epoch": 0.1488095238095238,
111
+ "grad_norm": 0.29147939403227785,
112
+ "learning_rate": 4.948594522776958e-05,
113
+ "loss": 0.1082,
114
+ "step": 150
115
+ },
116
+ {
117
+ "epoch": 0.15873015873015872,
118
+ "grad_norm": 0.22074336132465838,
119
+ "learning_rate": 4.9403488424217433e-05,
120
+ "loss": 0.1021,
121
+ "step": 160
122
+ },
123
+ {
124
+ "epoch": 0.16865079365079366,
125
+ "grad_norm": 0.259089069292428,
126
+ "learning_rate": 4.9314980206425355e-05,
127
+ "loss": 0.128,
128
+ "step": 170
129
+ },
130
+ {
131
+ "epoch": 0.17857142857142858,
132
+ "grad_norm": 0.2435767337393017,
133
+ "learning_rate": 4.9220442522070657e-05,
134
+ "loss": 0.1,
135
+ "step": 180
136
+ },
137
+ {
138
+ "epoch": 0.1884920634920635,
139
+ "grad_norm": 0.20234199140218986,
140
+ "learning_rate": 4.911989881397755e-05,
141
+ "loss": 0.0967,
142
+ "step": 190
143
+ },
144
+ {
145
+ "epoch": 0.1984126984126984,
146
+ "grad_norm": 0.18427600304177014,
147
+ "learning_rate": 4.901337401430395e-05,
148
+ "loss": 0.1008,
149
+ "step": 200
150
+ },
151
+ {
152
+ "epoch": 0.20833333333333334,
153
+ "grad_norm": 0.19784209557315877,
154
+ "learning_rate": 4.8900894538358944e-05,
155
+ "loss": 0.1088,
156
+ "step": 210
157
+ },
158
+ {
159
+ "epoch": 0.21825396825396826,
160
+ "grad_norm": 0.2361890840964331,
161
+ "learning_rate": 4.878248827805252e-05,
162
+ "loss": 0.1018,
163
+ "step": 220
164
+ },
165
+ {
166
+ "epoch": 0.22817460317460317,
167
+ "grad_norm": 0.17946620458385004,
168
+ "learning_rate": 4.865818459497911e-05,
169
+ "loss": 0.101,
170
+ "step": 230
171
+ },
172
+ {
173
+ "epoch": 0.23809523809523808,
174
+ "grad_norm": 0.200937152702067,
175
+ "learning_rate": 4.8528014313136675e-05,
176
+ "loss": 0.1019,
177
+ "step": 240
178
+ },
179
+ {
180
+ "epoch": 0.24801587301587302,
181
+ "grad_norm": 0.2641817602324182,
182
+ "learning_rate": 4.839200971128324e-05,
183
+ "loss": 0.0865,
184
+ "step": 250
185
+ },
186
+ {
187
+ "epoch": 0.25793650793650796,
188
+ "grad_norm": 0.16708831362801244,
189
+ "learning_rate": 4.8250204514932517e-05,
190
+ "loss": 0.0943,
191
+ "step": 260
192
+ },
193
+ {
194
+ "epoch": 0.26785714285714285,
195
+ "grad_norm": 0.23663162415830213,
196
+ "learning_rate": 4.810263388799101e-05,
197
+ "loss": 0.0955,
198
+ "step": 270
199
+ },
200
+ {
201
+ "epoch": 0.2777777777777778,
202
+ "grad_norm": 0.18283685945766517,
203
+ "learning_rate": 4.7949334424038176e-05,
204
+ "loss": 0.1052,
205
+ "step": 280
206
+ },
207
+ {
208
+ "epoch": 0.2876984126984127,
209
+ "grad_norm": 0.4848491439986444,
210
+ "learning_rate": 4.77903441372523e-05,
211
+ "loss": 0.1017,
212
+ "step": 290
213
+ },
214
+ {
215
+ "epoch": 0.2976190476190476,
216
+ "grad_norm": 0.3605542323724644,
217
+ "learning_rate": 4.762570245298389e-05,
218
+ "loss": 0.0986,
219
+ "step": 300
220
+ },
221
+ {
222
+ "epoch": 0.30753968253968256,
223
+ "grad_norm": 0.1386048127933906,
224
+ "learning_rate": 4.7455450197979345e-05,
225
+ "loss": 0.0935,
226
+ "step": 310
227
+ },
228
+ {
229
+ "epoch": 0.31746031746031744,
230
+ "grad_norm": 0.14991678174597392,
231
+ "learning_rate": 4.727962959025694e-05,
232
+ "loss": 0.0956,
233
+ "step": 320
234
+ },
235
+ {
236
+ "epoch": 0.3273809523809524,
237
+ "grad_norm": 0.26295328059626233,
238
+ "learning_rate": 4.709828422863791e-05,
239
+ "loss": 0.0953,
240
+ "step": 330
241
+ },
242
+ {
243
+ "epoch": 0.3373015873015873,
244
+ "grad_norm": 0.16038042754223325,
245
+ "learning_rate": 4.6911459081935084e-05,
246
+ "loss": 0.0959,
247
+ "step": 340
248
+ },
249
+ {
250
+ "epoch": 0.3472222222222222,
251
+ "grad_norm": 0.22069080284626696,
252
+ "learning_rate": 4.671920047780186e-05,
253
+ "loss": 0.09,
254
+ "step": 350
255
+ },
256
+ {
257
+ "epoch": 0.35714285714285715,
258
+ "grad_norm": 0.26820678370641815,
259
+ "learning_rate": 4.652155609124414e-05,
260
+ "loss": 0.0988,
261
+ "step": 360
262
+ },
263
+ {
264
+ "epoch": 0.36706349206349204,
265
+ "grad_norm": 0.2311480903314735,
266
+ "learning_rate": 4.631857493279823e-05,
267
+ "loss": 0.0988,
268
+ "step": 370
269
+ },
270
+ {
271
+ "epoch": 0.376984126984127,
272
+ "grad_norm": 0.18425738333190533,
273
+ "learning_rate": 4.611030733637751e-05,
274
+ "loss": 0.0929,
275
+ "step": 380
276
+ },
277
+ {
278
+ "epoch": 0.3869047619047619,
279
+ "grad_norm": 0.12332174843455722,
280
+ "learning_rate": 4.589680494679099e-05,
281
+ "loss": 0.0959,
282
+ "step": 390
283
+ },
284
+ {
285
+ "epoch": 0.3968253968253968,
286
+ "grad_norm": 0.194045506361412,
287
+ "learning_rate": 4.567812070693675e-05,
288
+ "loss": 0.0985,
289
+ "step": 400
290
+ },
291
+ {
292
+ "epoch": 0.40674603174603174,
293
+ "grad_norm": 0.24121090661176106,
294
+ "learning_rate": 4.545430884467354e-05,
295
+ "loss": 0.0915,
296
+ "step": 410
297
+ },
298
+ {
299
+ "epoch": 0.4166666666666667,
300
+ "grad_norm": 0.19201769770887162,
301
+ "learning_rate": 4.522542485937369e-05,
302
+ "loss": 0.0943,
303
+ "step": 420
304
+ },
305
+ {
306
+ "epoch": 0.42658730158730157,
307
+ "grad_norm": 0.3290546621528532,
308
+ "learning_rate": 4.499152550816077e-05,
309
+ "loss": 0.0864,
310
+ "step": 430
311
+ },
312
+ {
313
+ "epoch": 0.4365079365079365,
314
+ "grad_norm": 0.1945555131786947,
315
+ "learning_rate": 4.4752668791835315e-05,
316
+ "loss": 0.0904,
317
+ "step": 440
318
+ },
319
+ {
320
+ "epoch": 0.44642857142857145,
321
+ "grad_norm": 0.17033755769918293,
322
+ "learning_rate": 4.450891394049221e-05,
323
+ "loss": 0.0874,
324
+ "step": 450
325
+ },
326
+ {
327
+ "epoch": 0.45634920634920634,
328
+ "grad_norm": 0.33700673031382816,
329
+ "learning_rate": 4.426032139883315e-05,
330
+ "loss": 0.0964,
331
+ "step": 460
332
+ },
333
+ {
334
+ "epoch": 0.4662698412698413,
335
+ "grad_norm": 0.1386245861821237,
336
+ "learning_rate": 4.400695281117802e-05,
337
+ "loss": 0.0909,
338
+ "step": 470
339
+ },
340
+ {
341
+ "epoch": 0.47619047619047616,
342
+ "grad_norm": 0.18027472945026246,
343
+ "learning_rate": 4.37488710061787e-05,
344
+ "loss": 0.0973,
345
+ "step": 480
346
+ },
347
+ {
348
+ "epoch": 0.4861111111111111,
349
+ "grad_norm": 0.17090015410514822,
350
+ "learning_rate": 4.3486139981239304e-05,
351
+ "loss": 0.0957,
352
+ "step": 490
353
+ },
354
+ {
355
+ "epoch": 0.49603174603174605,
356
+ "grad_norm": 0.17225190040947705,
357
+ "learning_rate": 4.321882488664645e-05,
358
+ "loss": 0.0984,
359
+ "step": 500
360
+ },
361
+ {
362
+ "epoch": 0.5059523809523809,
363
+ "grad_norm": 0.24663017482809838,
364
+ "learning_rate": 4.2946992009413774e-05,
365
+ "loss": 0.1012,
366
+ "step": 510
367
+ },
368
+ {
369
+ "epoch": 0.5158730158730159,
370
+ "grad_norm": 0.21766586383802478,
371
+ "learning_rate": 4.2670708756844504e-05,
372
+ "loss": 0.0933,
373
+ "step": 520
374
+ },
375
+ {
376
+ "epoch": 0.5257936507936508,
377
+ "grad_norm": 0.2174374764424065,
378
+ "learning_rate": 4.239004363981627e-05,
379
+ "loss": 0.0908,
380
+ "step": 530
381
+ },
382
+ {
383
+ "epoch": 0.5357142857142857,
384
+ "grad_norm": 0.18176898727645474,
385
+ "learning_rate": 4.2105066255792185e-05,
386
+ "loss": 0.0967,
387
+ "step": 540
388
+ },
389
+ {
390
+ "epoch": 0.5456349206349206,
391
+ "grad_norm": 0.18294668815510332,
392
+ "learning_rate": 4.1815847271562594e-05,
393
+ "loss": 0.0895,
394
+ "step": 550
395
+ },
396
+ {
397
+ "epoch": 0.5555555555555556,
398
+ "grad_norm": 0.2755073678680904,
399
+ "learning_rate": 4.152245840572153e-05,
400
+ "loss": 0.0885,
401
+ "step": 560
402
+ },
403
+ {
404
+ "epoch": 0.5654761904761905,
405
+ "grad_norm": 0.13142325484055215,
406
+ "learning_rate": 4.122497241088247e-05,
407
+ "loss": 0.1044,
408
+ "step": 570
409
+ },
410
+ {
411
+ "epoch": 0.5753968253968254,
412
+ "grad_norm": 0.16668164786917436,
413
+ "learning_rate": 4.09234630556376e-05,
414
+ "loss": 0.0963,
415
+ "step": 580
416
+ },
417
+ {
418
+ "epoch": 0.5853174603174603,
419
+ "grad_norm": 0.18038706809428273,
420
+ "learning_rate": 4.061800510626515e-05,
421
+ "loss": 0.0946,
422
+ "step": 590
423
+ },
424
+ {
425
+ "epoch": 0.5952380952380952,
426
+ "grad_norm": 0.1930360692086378,
427
+ "learning_rate": 4.030867430818941e-05,
428
+ "loss": 0.0981,
429
+ "step": 600
430
+ },
431
+ {
432
+ "epoch": 0.6051587301587301,
433
+ "grad_norm": 0.2190484582397661,
434
+ "learning_rate": 3.999554736719785e-05,
435
+ "loss": 0.0918,
436
+ "step": 610
437
+ },
438
+ {
439
+ "epoch": 0.6150793650793651,
440
+ "grad_norm": 0.24605450998539993,
441
+ "learning_rate": 3.9678701930420095e-05,
442
+ "loss": 0.1004,
443
+ "step": 620
444
+ },
445
+ {
446
+ "epoch": 0.625,
447
+ "grad_norm": 0.12987591431160975,
448
+ "learning_rate": 3.935821656707359e-05,
449
+ "loss": 0.1017,
450
+ "step": 630
451
+ },
452
+ {
453
+ "epoch": 0.6349206349206349,
454
+ "grad_norm": 0.19782253063677727,
455
+ "learning_rate": 3.903417074898047e-05,
456
+ "loss": 0.0881,
457
+ "step": 640
458
+ },
459
+ {
460
+ "epoch": 0.6448412698412699,
461
+ "grad_norm": 0.23190635119611894,
462
+ "learning_rate": 3.870664483086067e-05,
463
+ "loss": 0.088,
464
+ "step": 650
465
+ },
466
+ {
467
+ "epoch": 0.6547619047619048,
468
+ "grad_norm": 0.14464411323958998,
469
+ "learning_rate": 3.837572003040612e-05,
470
+ "loss": 0.0907,
471
+ "step": 660
472
+ },
473
+ {
474
+ "epoch": 0.6646825396825397,
475
+ "grad_norm": 0.11660095690724923,
476
+ "learning_rate": 3.8041478408140926e-05,
477
+ "loss": 0.0877,
478
+ "step": 670
479
+ },
480
+ {
481
+ "epoch": 0.6746031746031746,
482
+ "grad_norm": 0.18383294614345877,
483
+ "learning_rate": 3.77040028470725e-05,
484
+ "loss": 0.0851,
485
+ "step": 680
486
+ },
487
+ {
488
+ "epoch": 0.6845238095238095,
489
+ "grad_norm": 0.16896655109068967,
490
+ "learning_rate": 3.736337703213888e-05,
491
+ "loss": 0.0875,
492
+ "step": 690
493
+ },
494
+ {
495
+ "epoch": 0.6944444444444444,
496
+ "grad_norm": 0.17392953850416312,
497
+ "learning_rate": 3.7019685429456986e-05,
498
+ "loss": 0.097,
499
+ "step": 700
500
+ },
501
+ {
502
+ "epoch": 0.7043650793650794,
503
+ "grad_norm": 0.24645390722766997,
504
+ "learning_rate": 3.6673013265377355e-05,
505
+ "loss": 0.0876,
506
+ "step": 710
507
+ },
508
+ {
509
+ "epoch": 0.7142857142857143,
510
+ "grad_norm": 0.1787303955658246,
511
+ "learning_rate": 3.632344650535024e-05,
512
+ "loss": 0.0887,
513
+ "step": 720
514
+ },
515
+ {
516
+ "epoch": 0.7242063492063492,
517
+ "grad_norm": 0.12552185945236538,
518
+ "learning_rate": 3.59710718326085e-05,
519
+ "loss": 0.0924,
520
+ "step": 730
521
+ },
522
+ {
523
+ "epoch": 0.7341269841269841,
524
+ "grad_norm": 0.1691175916463122,
525
+ "learning_rate": 3.5615976626672434e-05,
526
+ "loss": 0.0837,
527
+ "step": 740
528
+ },
529
+ {
530
+ "epoch": 0.7440476190476191,
531
+ "grad_norm": 0.2732015272731385,
532
+ "learning_rate": 3.525824894168203e-05,
533
+ "loss": 0.0878,
534
+ "step": 750
535
+ },
536
+ {
537
+ "epoch": 0.753968253968254,
538
+ "grad_norm": 0.21608867617240846,
539
+ "learning_rate": 3.489797748456187e-05,
540
+ "loss": 0.0896,
541
+ "step": 760
542
+ },
543
+ {
544
+ "epoch": 0.7638888888888888,
545
+ "grad_norm": 0.1837057522774625,
546
+ "learning_rate": 3.453525159302415e-05,
547
+ "loss": 0.0844,
548
+ "step": 770
549
+ },
550
+ {
551
+ "epoch": 0.7738095238095238,
552
+ "grad_norm": 0.23426485652488405,
553
+ "learning_rate": 3.417016121341537e-05,
554
+ "loss": 0.0962,
555
+ "step": 780
556
+ },
557
+ {
558
+ "epoch": 0.7837301587301587,
559
+ "grad_norm": 0.20613723836878384,
560
+ "learning_rate": 3.380279687841199e-05,
561
+ "loss": 0.0848,
562
+ "step": 790
563
+ },
564
+ {
565
+ "epoch": 0.7936507936507936,
566
+ "grad_norm": 0.20859267166659057,
567
+ "learning_rate": 3.343324968457076e-05,
568
+ "loss": 0.093,
569
+ "step": 800
570
+ },
571
+ {
572
+ "epoch": 0.8035714285714286,
573
+ "grad_norm": 0.1459009760134056,
574
+ "learning_rate": 3.306161126973918e-05,
575
+ "loss": 0.0897,
576
+ "step": 810
577
+ },
578
+ {
579
+ "epoch": 0.8134920634920635,
580
+ "grad_norm": 0.16377549953879472,
581
+ "learning_rate": 3.268797379033181e-05,
582
+ "loss": 0.0911,
583
+ "step": 820
584
+ },
585
+ {
586
+ "epoch": 0.8234126984126984,
587
+ "grad_norm": 0.19355138242730935,
588
+ "learning_rate": 3.23124298984779e-05,
589
+ "loss": 0.0883,
590
+ "step": 830
591
+ },
592
+ {
593
+ "epoch": 0.8333333333333334,
594
+ "grad_norm": 0.18239709779797436,
595
+ "learning_rate": 3.1935072719046115e-05,
596
+ "loss": 0.0848,
597
+ "step": 840
598
+ },
599
+ {
600
+ "epoch": 0.8432539682539683,
601
+ "grad_norm": 0.10583679127237934,
602
+ "learning_rate": 3.155599582655211e-05,
603
+ "loss": 0.0885,
604
+ "step": 850
605
+ },
606
+ {
607
+ "epoch": 0.8531746031746031,
608
+ "grad_norm": 0.1834906329062599,
609
+ "learning_rate": 3.117529322195448e-05,
610
+ "loss": 0.087,
611
+ "step": 860
612
+ },
613
+ {
614
+ "epoch": 0.8630952380952381,
615
+ "grad_norm": 0.17967922772149214,
616
+ "learning_rate": 3.079305930934509e-05,
617
+ "loss": 0.0899,
618
+ "step": 870
619
+ },
620
+ {
621
+ "epoch": 0.873015873015873,
622
+ "grad_norm": 0.13745052140998398,
623
+ "learning_rate": 3.040938887253932e-05,
624
+ "loss": 0.0922,
625
+ "step": 880
626
+ },
627
+ {
628
+ "epoch": 0.8829365079365079,
629
+ "grad_norm": 0.22606931644010264,
630
+ "learning_rate": 3.002437705157225e-05,
631
+ "loss": 0.0915,
632
+ "step": 890
633
+ },
634
+ {
635
+ "epoch": 0.8928571428571429,
636
+ "grad_norm": 0.16850557595303492,
637
+ "learning_rate": 2.963811931910645e-05,
638
+ "loss": 0.0864,
639
+ "step": 900
640
+ },
641
+ {
642
+ "epoch": 0.9027777777777778,
643
+ "grad_norm": 0.23963677891708385,
644
+ "learning_rate": 2.925071145675733e-05,
645
+ "loss": 0.0837,
646
+ "step": 910
647
+ },
648
+ {
649
+ "epoch": 0.9126984126984127,
650
+ "grad_norm": 0.1587891497734928,
651
+ "learning_rate": 2.8862249531341806e-05,
652
+ "loss": 0.0853,
653
+ "step": 920
654
+ },
655
+ {
656
+ "epoch": 0.9226190476190477,
657
+ "grad_norm": 0.17289844782248673,
658
+ "learning_rate": 2.8472829871056332e-05,
659
+ "loss": 0.0816,
660
+ "step": 930
661
+ },
662
+ {
663
+ "epoch": 0.9325396825396826,
664
+ "grad_norm": 0.19037298678850886,
665
+ "learning_rate": 2.8082549041590085e-05,
666
+ "loss": 0.0848,
667
+ "step": 940
668
+ },
669
+ {
670
+ "epoch": 0.9424603174603174,
671
+ "grad_norm": 0.18209887709567296,
672
+ "learning_rate": 2.7691503822179187e-05,
673
+ "loss": 0.0793,
674
+ "step": 950
675
+ },
676
+ {
677
+ "epoch": 0.9523809523809523,
678
+ "grad_norm": 0.23253229868000924,
679
+ "learning_rate": 2.7299791181608124e-05,
680
+ "loss": 0.0944,
681
+ "step": 960
682
+ },
683
+ {
684
+ "epoch": 0.9623015873015873,
685
+ "grad_norm": 0.1730036255240911,
686
+ "learning_rate": 2.6907508254163987e-05,
687
+ "loss": 0.0827,
688
+ "step": 970
689
+ },
690
+ {
691
+ "epoch": 0.9722222222222222,
692
+ "grad_norm": 0.1355999949692624,
693
+ "learning_rate": 2.6514752315549847e-05,
694
+ "loss": 0.0713,
695
+ "step": 980
696
+ },
697
+ {
698
+ "epoch": 0.9821428571428571,
699
+ "grad_norm": 0.1659255007537986,
700
+ "learning_rate": 2.6121620758762877e-05,
701
+ "loss": 0.085,
702
+ "step": 990
703
+ },
704
+ {
705
+ "epoch": 0.9920634920634921,
706
+ "grad_norm": 0.21719730615776264,
707
+ "learning_rate": 2.5728211069943582e-05,
708
+ "loss": 0.0885,
709
+ "step": 1000
710
+ },
711
+ {
712
+ "Accuracy": 77.7,
713
+ "Overall_f1": 70.0,
714
+ "epoch": 1.0,
715
+ "eval_UnsafeBench_runtime": 226.9313,
716
+ "eval_UnsafeBench_samples_per_second": 9.025,
717
+ "eval_UnsafeBench_steps_per_second": 0.141,
718
+ "generated": {
719
+ "Accuracy": 78.6,
720
+ "F1": 74.5,
721
+ "unsafe": {
722
+ "Accuracy": 78.6,
723
+ "F1": 74.5,
724
+ "safe": {
725
+ "f1": "81.6",
726
+ "prec": "82.9(489/590)",
727
+ "recall": "80.3(489/609)"
728
+ },
729
+ "unsafe": {
730
+ "f1": "74.5",
731
+ "prec": "72.9(323/443)",
732
+ "recall": "76.2(323/424)"
733
+ }
734
+ }
735
+ },
736
+ "real": {
737
+ "Accuracy": 76.7,
738
+ "F1": 64.3,
739
+ "unsafe": {
740
+ "Accuracy": 76.7,
741
+ "F1": 64.3,
742
+ "safe": {
743
+ "f1": "82.8",
744
+ "prec": "79.3(567/715)",
745
+ "recall": "86.6(567/655)"
746
+ },
747
+ "unsafe": {
748
+ "f1": "64.3",
749
+ "prec": "70.7(212/300)",
750
+ "recall": "58.9(212/360)"
751
+ }
752
+ }
753
+ },
754
+ "step": 1008
755
+ },
756
+ {
757
+ "Accuracy": 67.3,
758
+ "Overall_f1": 80.5,
759
+ "epoch": 1.0,
760
+ "eval_SelfHarm_runtime": 78.5426,
761
+ "eval_SelfHarm_samples_per_second": 8.148,
762
+ "eval_SelfHarm_steps_per_second": 0.127,
763
+ "generated": {
764
+ "Accuracy": 0.0,
765
+ "F1": 0.0,
766
+ "unsafe": {
767
+ "Accuracy": 0.0,
768
+ "F1": 0.0,
769
+ "safe": {
770
+ "f1": "0.0",
771
+ "prec": "0.0(0/0)",
772
+ "recall": "0.0(0/0)"
773
+ },
774
+ "unsafe": {
775
+ "f1": "0.0",
776
+ "prec": "0.0(0/0)",
777
+ "recall": "0.0(0/0)"
778
+ }
779
+ }
780
+ },
781
+ "real": {
782
+ "Accuracy": 67.3,
783
+ "F1": 80.5,
784
+ "unsafe": {
785
+ "Accuracy": 67.3,
786
+ "F1": 80.5,
787
+ "safe": {
788
+ "f1": "0.0",
789
+ "prec": "0.0(0/209)",
790
+ "recall": "0.0(0/0)"
791
+ },
792
+ "unsafe": {
793
+ "f1": "80.5",
794
+ "prec": "100.0(431/431)",
795
+ "recall": "67.3(431/640)"
796
+ }
797
+ }
798
+ },
799
+ "step": 1008
800
+ },
801
+ {
802
+ "Accuracy": 78.7,
803
+ "Overall_f1": 68.2,
804
+ "epoch": 1.0,
805
+ "eval_UnsafeDiff_runtime": 97.4369,
806
+ "eval_UnsafeDiff_samples_per_second": 8.539,
807
+ "eval_UnsafeDiff_steps_per_second": 0.133,
808
+ "generated": {
809
+ "Accuracy": 0.0,
810
+ "F1": 0.0,
811
+ "unsafe": {
812
+ "Accuracy": 0.0,
813
+ "F1": 0.0,
814
+ "safe": {
815
+ "f1": "0.0",
816
+ "prec": "0.0(0/0)",
817
+ "recall": "0.0(0/0)"
818
+ },
819
+ "unsafe": {
820
+ "f1": "0.0",
821
+ "prec": "0.0(0/0)",
822
+ "recall": "0.0(0/0)"
823
+ }
824
+ }
825
+ },
826
+ "real": {
827
+ "Accuracy": 78.7,
828
+ "F1": 68.2,
829
+ "unsafe": {
830
+ "Accuracy": 78.7,
831
+ "F1": 68.2,
832
+ "safe": {
833
+ "f1": "84.0",
834
+ "prec": "92.8(465/501)",
835
+ "recall": "76.7(465/606)"
836
+ },
837
+ "unsafe": {
838
+ "f1": "68.2",
839
+ "prec": "57.4(190/331)",
840
+ "recall": "84.1(190/226)"
841
+ }
842
+ }
843
+ },
844
+ "step": 1008
845
+ },
846
+ {
847
+ "Accuracy": 76.0,
848
+ "Overall_f1": 86.4,
849
+ "epoch": 1.0,
850
+ "eval_ViolentBehavior_runtime": 24.6799,
851
+ "eval_ViolentBehavior_samples_per_second": 7.78,
852
+ "eval_ViolentBehavior_steps_per_second": 0.122,
853
+ "generated": {
854
+ "Accuracy": 0.0,
855
+ "F1": 0.0,
856
+ "unsafe": {
857
+ "Accuracy": 0.0,
858
+ "F1": 0.0,
859
+ "safe": {
860
+ "f1": "0.0",
861
+ "prec": "0.0(0/0)",
862
+ "recall": "0.0(0/0)"
863
+ },
864
+ "unsafe": {
865
+ "f1": "0.0",
866
+ "prec": "0.0(0/0)",
867
+ "recall": "0.0(0/0)"
868
+ }
869
+ }
870
+ },
871
+ "real": {
872
+ "Accuracy": 76.0,
873
+ "F1": 86.4,
874
+ "unsafe": {
875
+ "Accuracy": 76.0,
876
+ "F1": 86.4,
877
+ "safe": {
878
+ "f1": "0.0",
879
+ "prec": "0.0(0/46)",
880
+ "recall": "0.0(0/0)"
881
+ },
882
+ "unsafe": {
883
+ "f1": "86.4",
884
+ "prec": "100.0(146/146)",
885
+ "recall": "76.0(146/192)"
886
+ }
887
+ }
888
+ },
889
+ "step": 1008
890
+ },
891
+ {
892
+ "Accuracy": 80.8,
893
+ "Overall_f1": 74.8,
894
+ "epoch": 1.0,
895
+ "eval_SMID_runtime": 192.5161,
896
+ "eval_SMID_samples_per_second": 8.976,
897
+ "eval_SMID_steps_per_second": 0.14,
898
+ "generated": {
899
+ "Accuracy": 0.0,
900
+ "F1": 0.0,
901
+ "unsafe": {
902
+ "Accuracy": 0.0,
903
+ "F1": 0.0,
904
+ "safe": {
905
+ "f1": "0.0",
906
+ "prec": "0.0(0/0)",
907
+ "recall": "0.0(0/0)"
908
+ },
909
+ "unsafe": {
910
+ "f1": "0.0",
911
+ "prec": "0.0(0/0)",
912
+ "recall": "0.0(0/0)"
913
+ }
914
+ }
915
+ },
916
+ "real": {
917
+ "Accuracy": 80.8,
918
+ "F1": 74.8,
919
+ "unsafe": {
920
+ "Accuracy": 80.8,
921
+ "F1": 74.8,
922
+ "safe": {
923
+ "f1": "84.6",
924
+ "prec": "78.9(906/1148)",
925
+ "recall": "91.1(906/995)"
926
+ },
927
+ "unsafe": {
928
+ "f1": "74.8",
929
+ "prec": "84.7(491/580)",
930
+ "recall": "67.0(491/733)"
931
+ }
932
+ }
933
+ },
934
+ "step": 1008
935
+ },
936
+ {
937
+ "Accuracy": 84.3,
938
+ "Overall_f1": 85.0,
939
+ "epoch": 1.0,
940
+ "eval_JsonDataset_runtime": 404.5105,
941
+ "eval_JsonDataset_samples_per_second": 4.905,
942
+ "eval_JsonDataset_steps_per_second": 0.077,
943
+ "generated": {
944
+ "Accuracy": 86.2,
945
+ "F1": 87.3,
946
+ "fairness": {
947
+ "Accuracy": 90.3,
948
+ "F1": 90.3,
949
+ "african": {
950
+ "f1": "86.2",
951
+ "prec": "77.8(28/36)",
952
+ "recall": "96.6(28/29)"
953
+ },
954
+ "asian": {
955
+ "f1": "87.0",
956
+ "prec": "89.6(121/135)",
957
+ "recall": "84.6(121/143)"
958
+ },
959
+ "caucasian": {
960
+ "f1": "91.3",
961
+ "prec": "88.7(282/318)",
962
+ "recall": "94.0(282/300)"
963
+ },
964
+ "children": {
965
+ "f1": "72.2",
966
+ "prec": "56.5(13/23)",
967
+ "recall": "100.0(13/13)"
968
+ },
969
+ "elderly": {
970
+ "f1": "91.9",
971
+ "prec": "98.1(102/104)",
972
+ "recall": "86.4(102/118)"
973
+ },
974
+ "female": {
975
+ "f1": "98.9",
976
+ "prec": "99.6(234/235)",
977
+ "recall": "98.3(234/238)"
978
+ },
979
+ "indian": {
980
+ "f1": "70.6",
981
+ "prec": "75.0(6/8)",
982
+ "recall": "66.7(6/9)"
983
+ },
984
+ "latino": {
985
+ "f1": "43.5",
986
+ "prec": "66.7(10/15)",
987
+ "recall": "32.3(10/31)"
988
+ },
989
+ "male": {
990
+ "f1": "99.1",
991
+ "prec": "98.6(273/277)",
992
+ "recall": "99.6(273/274)"
993
+ },
994
+ "middle-aged": {
995
+ "f1": "75.6",
996
+ "prec": "86.3(107/124)",
997
+ "recall": "67.3(107/159)"
998
+ },
999
+ "safe": {
1000
+ "f1": "0.0",
1001
+ "prec": "0.0(0/0)",
1002
+ "recall": "0.0(0/0)"
1003
+ },
1004
+ "teenager": {
1005
+ "f1": "0.0",
1006
+ "prec": "0.0(0/0)",
1007
+ "recall": "0.0(0/0)"
1008
+ },
1009
+ "young adult": {
1010
+ "f1": "87.3",
1011
+ "prec": "80.8(211/261)",
1012
+ "recall": "95.0(211/222)"
1013
+ }
1014
+ },
1015
+ "privacy": {
1016
+ "Accuracy": 80.3,
1017
+ "F1": 83.6,
1018
+ "intellectual property violation": {
1019
+ "f1": "82.5",
1020
+ "prec": "89.2(33/37)",
1021
+ "recall": "76.7(33/43)"
1022
+ },
1023
+ "personal identification documents": {
1024
+ "f1": "86.9",
1025
+ "prec": "95.2(40/42)",
1026
+ "recall": "80.0(40/50)"
1027
+ },
1028
+ "public figures": {
1029
+ "f1": "81.3",
1030
+ "prec": "82.2(37/45)",
1031
+ "recall": "80.4(37/46)"
1032
+ },
1033
+ "safe": {
1034
+ "f1": "72.6",
1035
+ "prec": "64.1(41/64)",
1036
+ "recall": "83.7(41/49)"
1037
+ }
1038
+ },
1039
+ "toxicity": {
1040
+ "Accuracy": 67.4,
1041
+ "F1": 68.3,
1042
+ "disturbing": {
1043
+ "f1": "68.7",
1044
+ "prec": "57.4(35/61)",
1045
+ "recall": "85.4(35/41)"
1046
+ },
1047
+ "hate": {
1048
+ "f1": "42.1",
1049
+ "prec": "100.0(4/4)",
1050
+ "recall": "26.7(4/15)"
1051
+ },
1052
+ "humiliation": {
1053
+ "f1": "32.8",
1054
+ "prec": "100.0(9/9)",
1055
+ "recall": "19.6(9/46)"
1056
+ },
1057
+ "illegal activity": {
1058
+ "f1": "73.0",
1059
+ "prec": "100.0(23/23)",
1060
+ "recall": "57.5(23/40)"
1061
+ },
1062
+ "safe": {
1063
+ "f1": "64.5",
1064
+ "prec": "52.7(39/74)",
1065
+ "recall": "83.0(39/47)"
1066
+ },
1067
+ "sexual": {
1068
+ "f1": "94.4",
1069
+ "prec": "100.0(42/42)",
1070
+ "recall": "89.4(42/47)"
1071
+ },
1072
+ "violence": {
1073
+ "f1": "66.0",
1074
+ "prec": "53.3(32/60)",
1075
+ "recall": "86.5(32/37)"
1076
+ }
1077
+ }
1078
+ },
1079
+ "real": {
1080
+ "Accuracy": 82.2,
1081
+ "F1": 82.6,
1082
+ "fairness": {
1083
+ "Accuracy": 81.9,
1084
+ "F1": 81.9,
1085
+ "african": {
1086
+ "f1": "80.8",
1087
+ "prec": "82.4(61/74)",
1088
+ "recall": "79.2(61/77)"
1089
+ },
1090
+ "asian": {
1091
+ "f1": "78.2",
1092
+ "prec": "77.4(72/93)",
1093
+ "recall": "79.1(72/91)"
1094
+ },
1095
+ "caucasian": {
1096
+ "f1": "82.2",
1097
+ "prec": "74.1(166/224)",
1098
+ "recall": "92.2(166/180)"
1099
+ },
1100
+ "children": {
1101
+ "f1": "83.9",
1102
+ "prec": "78.8(26/33)",
1103
+ "recall": "89.7(26/29)"
1104
+ },
1105
+ "elderly": {
1106
+ "f1": "66.7",
1107
+ "prec": "92.9(26/28)",
1108
+ "recall": "52.0(26/50)"
1109
+ },
1110
+ "female": {
1111
+ "f1": "93.3",
1112
+ "prec": "98.0(145/148)",
1113
+ "recall": "89.0(145/163)"
1114
+ },
1115
+ "indian": {
1116
+ "f1": "62.5",
1117
+ "prec": "64.5(40/62)",
1118
+ "recall": "60.6(40/66)"
1119
+ },
1120
+ "latino": {
1121
+ "f1": "33.1",
1122
+ "prec": "46.8(22/47)",
1123
+ "recall": "25.6(22/86)"
1124
+ },
1125
+ "male": {
1126
+ "f1": "97.0",
1127
+ "prec": "94.9(334/352)",
1128
+ "recall": "99.1(334/337)"
1129
+ },
1130
+ "middle-aged": {
1131
+ "f1": "80.8",
1132
+ "prec": "83.1(217/261)",
1133
+ "recall": "78.6(217/276)"
1134
+ },
1135
+ "safe": {
1136
+ "f1": "0.0",
1137
+ "prec": "0.0(0/0)",
1138
+ "recall": "0.0(0/0)"
1139
+ },
1140
+ "teenager": {
1141
+ "f1": "0.0",
1142
+ "prec": "0.0(0/0)",
1143
+ "recall": "0.0(0/0)"
1144
+ },
1145
+ "young adult": {
1146
+ "f1": "73.7",
1147
+ "prec": "66.9(119/178)",
1148
+ "recall": "82.1(119/145)"
1149
+ }
1150
+ },
1151
+ "privacy": {
1152
+ "Accuracy": 85.9,
1153
+ "F1": 89.5,
1154
+ "intellectual property violation": {
1155
+ "f1": "86.1",
1156
+ "prec": "100.0(34/34)",
1157
+ "recall": "75.6(34/45)"
1158
+ },
1159
+ "personal identification documents": {
1160
+ "f1": "92.9",
1161
+ "prec": "93.9(46/49)",
1162
+ "recall": "92.0(46/50)"
1163
+ },
1164
+ "public figures": {
1165
+ "f1": "88.9",
1166
+ "prec": "95.2(40/42)",
1167
+ "recall": "83.3(40/48)"
1168
+ },
1169
+ "safe": {
1170
+ "f1": "77.2",
1171
+ "prec": "66.7(44/66)",
1172
+ "recall": "91.7(44/48)"
1173
+ }
1174
+ },
1175
+ "toxicity": {
1176
+ "Accuracy": 81.6,
1177
+ "F1": 83.3,
1178
+ "disturbing": {
1179
+ "f1": "84.9",
1180
+ "prec": "82.4(42/51)",
1181
+ "recall": "87.5(42/48)"
1182
+ },
1183
+ "hate": {
1184
+ "f1": "69.6",
1185
+ "prec": "72.7(8/11)",
1186
+ "recall": "66.7(8/12)"
1187
+ },
1188
+ "humiliation": {
1189
+ "f1": "23.5",
1190
+ "prec": "100.0(2/2)",
1191
+ "recall": "13.3(2/15)"
1192
+ },
1193
+ "illegal activity": {
1194
+ "f1": "89.3",
1195
+ "prec": "96.2(25/26)",
1196
+ "recall": "83.3(25/30)"
1197
+ },
1198
+ "safe": {
1199
+ "f1": "75.5",
1200
+ "prec": "66.7(40/60)",
1201
+ "recall": "87.0(40/46)"
1202
+ },
1203
+ "sexual": {
1204
+ "f1": "95.0",
1205
+ "prec": "94.1(48/51)",
1206
+ "recall": "96.0(48/50)"
1207
+ },
1208
+ "violence": {
1209
+ "f1": "79.5",
1210
+ "prec": "79.5(35/44)",
1211
+ "recall": "79.5(35/44)"
1212
+ }
1213
+ }
1214
+ },
1215
+ "step": 1008
1216
+ },
1217
+ {
1218
+ "epoch": 1.001984126984127,
1219
+ "grad_norm": 0.15089259656503126,
1220
+ "learning_rate": 2.5334620804201765e-05,
1221
+ "loss": 0.0813,
1222
+ "step": 1010
1223
+ },
1224
+ {
1225
+ "epoch": 1.0119047619047619,
1226
+ "grad_norm": 0.15814476349549628,
1227
+ "learning_rate": 2.4940947561425505e-05,
1228
+ "loss": 0.0703,
1229
+ "step": 1020
1230
+ },
1231
+ {
1232
+ "epoch": 1.0218253968253967,
1233
+ "grad_norm": 0.11346687531610126,
1234
+ "learning_rate": 2.4547288962078963e-05,
1235
+ "loss": 0.0652,
1236
+ "step": 1030
1237
+ },
1238
+ {
1239
+ "epoch": 1.0317460317460316,
1240
+ "grad_norm": 0.16252940122847073,
1241
+ "learning_rate": 2.415374262299513e-05,
1242
+ "loss": 0.0699,
1243
+ "step": 1040
1244
+ },
1245
+ {
1246
+ "epoch": 1.0416666666666667,
1247
+ "grad_norm": 0.22302915065109266,
1248
+ "learning_rate": 2.3760406133169443e-05,
1249
+ "loss": 0.0671,
1250
+ "step": 1050
1251
+ },
1252
+ {
1253
+ "epoch": 1.0515873015873016,
1254
+ "grad_norm": 0.22373178948720648,
1255
+ "learning_rate": 2.3367377029560304e-05,
1256
+ "loss": 0.071,
1257
+ "step": 1060
1258
+ },
1259
+ {
1260
+ "epoch": 1.0615079365079365,
1261
+ "grad_norm": 0.19248373027885218,
1262
+ "learning_rate": 2.297475277290256e-05,
1263
+ "loss": 0.0684,
1264
+ "step": 1070
1265
+ },
1266
+ {
1267
+ "epoch": 1.0714285714285714,
1268
+ "grad_norm": 0.18697476333136995,
1269
+ "learning_rate": 2.2582630723539784e-05,
1270
+ "loss": 0.0701,
1271
+ "step": 1080
1272
+ },
1273
+ {
1274
+ "epoch": 1.0813492063492063,
1275
+ "grad_norm": 0.15184950126869703,
1276
+ "learning_rate": 2.2191108117281558e-05,
1277
+ "loss": 0.0748,
1278
+ "step": 1090
1279
+ },
1280
+ {
1281
+ "epoch": 1.0912698412698412,
1282
+ "grad_norm": 0.21524692332664133,
1283
+ "learning_rate": 2.1800282041291548e-05,
1284
+ "loss": 0.0718,
1285
+ "step": 1100
1286
+ },
1287
+ {
1288
+ "epoch": 1.1011904761904763,
1289
+ "grad_norm": 0.19429776760950043,
1290
+ "learning_rate": 2.1410249410012496e-05,
1291
+ "loss": 0.0599,
1292
+ "step": 1110
1293
+ },
1294
+ {
1295
+ "epoch": 1.1111111111111112,
1296
+ "grad_norm": 0.24475424138885818,
1297
+ "learning_rate": 2.1021106941134012e-05,
1298
+ "loss": 0.0725,
1299
+ "step": 1120
1300
+ },
1301
+ {
1302
+ "epoch": 1.121031746031746,
1303
+ "grad_norm": 0.19282893432306394,
1304
+ "learning_rate": 2.063295113160919e-05,
1305
+ "loss": 0.0704,
1306
+ "step": 1130
1307
+ },
1308
+ {
1309
+ "epoch": 1.130952380952381,
1310
+ "grad_norm": 0.18724259947539162,
1311
+ "learning_rate": 2.024587823372591e-05,
1312
+ "loss": 0.0752,
1313
+ "step": 1140
1314
+ },
1315
+ {
1316
+ "epoch": 1.1408730158730158,
1317
+ "grad_norm": 0.14771605521783054,
1318
+ "learning_rate": 1.9859984231238835e-05,
1319
+ "loss": 0.0677,
1320
+ "step": 1150
1321
+ },
1322
+ {
1323
+ "epoch": 1.1507936507936507,
1324
+ "grad_norm": 0.27831523957564996,
1325
+ "learning_rate": 1.9475364815568036e-05,
1326
+ "loss": 0.0689,
1327
+ "step": 1160
1328
+ },
1329
+ {
1330
+ "epoch": 1.1607142857142858,
1331
+ "grad_norm": 0.2740109289227727,
1332
+ "learning_rate": 1.9092115362070038e-05,
1333
+ "loss": 0.0684,
1334
+ "step": 1170
1335
+ },
1336
+ {
1337
+ "epoch": 1.1706349206349207,
1338
+ "grad_norm": 0.15722739177982728,
1339
+ "learning_rate": 1.871033090638729e-05,
1340
+ "loss": 0.0649,
1341
+ "step": 1180
1342
+ },
1343
+ {
1344
+ "epoch": 1.1805555555555556,
1345
+ "grad_norm": 0.2155976765362568,
1346
+ "learning_rate": 1.8330106120881846e-05,
1347
+ "loss": 0.0641,
1348
+ "step": 1190
1349
+ },
1350
+ {
1351
+ "epoch": 1.1904761904761905,
1352
+ "grad_norm": 0.2070784065234882,
1353
+ "learning_rate": 1.7951535291159178e-05,
1354
+ "loss": 0.0683,
1355
+ "step": 1200
1356
+ },
1357
+ {
1358
+ "epoch": 1.2003968253968254,
1359
+ "grad_norm": 0.24925088406217583,
1360
+ "learning_rate": 1.7574712292687813e-05,
1361
+ "loss": 0.0736,
1362
+ "step": 1210
1363
+ },
1364
+ {
1365
+ "epoch": 1.2103174603174602,
1366
+ "grad_norm": 0.26008378528910975,
1367
+ "learning_rate": 1.719973056752076e-05,
1368
+ "loss": 0.0613,
1369
+ "step": 1220
1370
+ },
1371
+ {
1372
+ "epoch": 1.2202380952380953,
1373
+ "grad_norm": 0.2687147461146715,
1374
+ "learning_rate": 1.682668310112437e-05,
1375
+ "loss": 0.0648,
1376
+ "step": 1230
1377
+ },
1378
+ {
1379
+ "epoch": 1.2301587301587302,
1380
+ "grad_norm": 0.27904378988460643,
1381
+ "learning_rate": 1.6455662399320383e-05,
1382
+ "loss": 0.0639,
1383
+ "step": 1240
1384
+ },
1385
+ {
1386
+ "epoch": 1.2400793650793651,
1387
+ "grad_norm": 0.24340438457160612,
1388
+ "learning_rate": 1.6086760465346993e-05,
1389
+ "loss": 0.0685,
1390
+ "step": 1250
1391
+ },
1392
+ {
1393
+ "epoch": 1.25,
1394
+ "grad_norm": 0.21382493424744065,
1395
+ "learning_rate": 1.5720068777044476e-05,
1396
+ "loss": 0.0665,
1397
+ "step": 1260
1398
+ },
1399
+ {
1400
+ "epoch": 1.2599206349206349,
1401
+ "grad_norm": 0.17562775798071065,
1402
+ "learning_rate": 1.5355678264171158e-05,
1403
+ "loss": 0.0679,
1404
+ "step": 1270
1405
+ },
1406
+ {
1407
+ "epoch": 1.2698412698412698,
1408
+ "grad_norm": 0.23898778196551948,
1409
+ "learning_rate": 1.4993679285855198e-05,
1410
+ "loss": 0.0693,
1411
+ "step": 1280
1412
+ },
1413
+ {
1414
+ "epoch": 1.2797619047619047,
1415
+ "grad_norm": 0.1564634534054769,
1416
+ "learning_rate": 1.4634161608187999e-05,
1417
+ "loss": 0.0663,
1418
+ "step": 1290
1419
+ },
1420
+ {
1421
+ "epoch": 1.2896825396825398,
1422
+ "grad_norm": 0.3494046426935179,
1423
+ "learning_rate": 1.4277214381964569e-05,
1424
+ "loss": 0.0629,
1425
+ "step": 1300
1426
+ },
1427
+ {
1428
+ "epoch": 1.2996031746031746,
1429
+ "grad_norm": 0.2906431464200776,
1430
+ "learning_rate": 1.3922926120576532e-05,
1431
+ "loss": 0.0755,
1432
+ "step": 1310
1433
+ },
1434
+ {
1435
+ "epoch": 1.3095238095238095,
1436
+ "grad_norm": 0.23278532541655608,
1437
+ "learning_rate": 1.3571384678063128e-05,
1438
+ "loss": 0.0665,
1439
+ "step": 1320
1440
+ },
1441
+ {
1442
+ "epoch": 1.3194444444444444,
1443
+ "grad_norm": 0.26481428967128406,
1444
+ "learning_rate": 1.322267722732582e-05,
1445
+ "loss": 0.0659,
1446
+ "step": 1330
1447
+ },
1448
+ {
1449
+ "epoch": 1.3293650793650793,
1450
+ "grad_norm": 0.17250317955887648,
1451
+ "learning_rate": 1.2876890238511657e-05,
1452
+ "loss": 0.065,
1453
+ "step": 1340
1454
+ },
1455
+ {
1456
+ "epoch": 1.3392857142857144,
1457
+ "grad_norm": 0.1855989884926511,
1458
+ "learning_rate": 1.2534109457571047e-05,
1459
+ "loss": 0.0688,
1460
+ "step": 1350
1461
+ },
1462
+ {
1463
+ "epoch": 1.3492063492063493,
1464
+ "grad_norm": 0.22854169876152886,
1465
+ "learning_rate": 1.2194419884995014e-05,
1466
+ "loss": 0.0694,
1467
+ "step": 1360
1468
+ },
1469
+ {
1470
+ "epoch": 1.3591269841269842,
1471
+ "grad_norm": 0.21567426892633454,
1472
+ "learning_rate": 1.185790575473738e-05,
1473
+ "loss": 0.0685,
1474
+ "step": 1370
1475
+ },
1476
+ {
1477
+ "epoch": 1.369047619047619,
1478
+ "grad_norm": 0.23225993437200204,
1479
+ "learning_rate": 1.1524650513326945e-05,
1480
+ "loss": 0.064,
1481
+ "step": 1380
1482
+ },
1483
+ {
1484
+ "epoch": 1.378968253968254,
1485
+ "grad_norm": 0.18155286576255683,
1486
+ "learning_rate": 1.1194736799174996e-05,
1487
+ "loss": 0.0637,
1488
+ "step": 1390
1489
+ },
1490
+ {
1491
+ "epoch": 1.3888888888888888,
1492
+ "grad_norm": 0.277759522427609,
1493
+ "learning_rate": 1.0868246422083204e-05,
1494
+ "loss": 0.0638,
1495
+ "step": 1400
1496
+ },
1497
+ {
1498
+ "epoch": 1.3988095238095237,
1499
+ "grad_norm": 0.2824587015752722,
1500
+ "learning_rate": 1.0545260342956936e-05,
1501
+ "loss": 0.0728,
1502
+ "step": 1410
1503
+ },
1504
+ {
1505
+ "epoch": 1.4087301587301586,
1506
+ "grad_norm": 0.28789347022215056,
1507
+ "learning_rate": 1.0225858653729143e-05,
1508
+ "loss": 0.0672,
1509
+ "step": 1420
1510
+ },
1511
+ {
1512
+ "epoch": 1.4186507936507937,
1513
+ "grad_norm": 0.18977601762176396,
1514
+ "learning_rate": 9.910120557499666e-06,
1515
+ "loss": 0.0633,
1516
+ "step": 1430
1517
+ },
1518
+ {
1519
+ "epoch": 1.4285714285714286,
1520
+ "grad_norm": 0.25156259951382204,
1521
+ "learning_rate": 9.598124348895032e-06,
1522
+ "loss": 0.0627,
1523
+ "step": 1440
1524
+ },
1525
+ {
1526
+ "epoch": 1.4384920634920635,
1527
+ "grad_norm": 0.2542853679485282,
1528
+ "learning_rate": 9.289947394653407e-06,
1529
+ "loss": 0.0698,
1530
+ "step": 1450
1531
+ },
1532
+ {
1533
+ "epoch": 1.4484126984126984,
1534
+ "grad_norm": 0.2613374152478023,
1535
+ "learning_rate": 8.985666114439758e-06,
1536
+ "loss": 0.0585,
1537
+ "step": 1460
1538
+ },
1539
+ {
1540
+ "epoch": 1.4583333333333333,
1541
+ "grad_norm": 0.2554324424677922,
1542
+ "learning_rate": 8.685355961895784e-06,
1543
+ "loss": 0.0734,
1544
+ "step": 1470
1545
+ },
1546
+ {
1547
+ "epoch": 1.4682539682539684,
1548
+ "grad_norm": 0.34193558194517265,
1549
+ "learning_rate": 8.389091405929467e-06,
1550
+ "loss": 0.0704,
1551
+ "step": 1480
1552
+ },
1553
+ {
1554
+ "epoch": 1.4781746031746033,
1555
+ "grad_norm": 0.5388579049439481,
1556
+ "learning_rate": 8.096945912248718e-06,
1557
+ "loss": 0.0644,
1558
+ "step": 1490
1559
+ },
1560
+ {
1561
+ "epoch": 1.4880952380952381,
1562
+ "grad_norm": 0.17918052887703428,
1563
+ "learning_rate": 7.808991925143869e-06,
1564
+ "loss": 0.0564,
1565
+ "step": 1500
1566
+ },
1567
+ {
1568
+ "epoch": 1.498015873015873,
1569
+ "grad_norm": 0.3011198738615104,
1570
+ "learning_rate": 7.5253008495234255e-06,
1571
+ "loss": 0.0644,
1572
+ "step": 1510
1573
+ },
1574
+ {
1575
+ "epoch": 1.507936507936508,
1576
+ "grad_norm": 0.21945267455563025,
1577
+ "learning_rate": 7.245943033207542e-06,
1578
+ "loss": 0.0627,
1579
+ "step": 1520
1580
+ },
1581
+ {
1582
+ "epoch": 1.5178571428571428,
1583
+ "grad_norm": 0.19972883632416796,
1584
+ "learning_rate": 6.9709877494836314e-06,
1585
+ "loss": 0.0616,
1586
+ "step": 1530
1587
+ },
1588
+ {
1589
+ "epoch": 1.5277777777777777,
1590
+ "grad_norm": 0.2870778046981311,
1591
+ "learning_rate": 6.700503179928458e-06,
1592
+ "loss": 0.0587,
1593
+ "step": 1540
1594
+ },
1595
+ {
1596
+ "epoch": 1.5376984126984126,
1597
+ "grad_norm": 0.46783063057095087,
1598
+ "learning_rate": 6.434556397500918e-06,
1599
+ "loss": 0.0683,
1600
+ "step": 1550
1601
+ },
1602
+ {
1603
+ "epoch": 1.5476190476190477,
1604
+ "grad_norm": 0.25122099969992817,
1605
+ "learning_rate": 6.173213349909729e-06,
1606
+ "loss": 0.0708,
1607
+ "step": 1560
1608
+ },
1609
+ {
1610
+ "epoch": 1.5575396825396826,
1611
+ "grad_norm": 0.30899232162114265,
1612
+ "learning_rate": 5.9165388432601446e-06,
1613
+ "loss": 0.07,
1614
+ "step": 1570
1615
+ },
1616
+ {
1617
+ "epoch": 1.5674603174603174,
1618
+ "grad_norm": 0.1914310341962679,
1619
+ "learning_rate": 5.664596525983814e-06,
1620
+ "loss": 0.0652,
1621
+ "step": 1580
1622
+ },
1623
+ {
1624
+ "epoch": 1.5773809523809523,
1625
+ "grad_norm": 0.26636971533611215,
1626
+ "learning_rate": 5.417448873055617e-06,
1627
+ "loss": 0.0631,
1628
+ "step": 1590
1629
+ },
1630
+ {
1631
+ "epoch": 1.5873015873015874,
1632
+ "grad_norm": 0.22112989096572308,
1633
+ "learning_rate": 5.17515717050156e-06,
1634
+ "loss": 0.076,
1635
+ "step": 1600
1636
+ },
1637
+ {
1638
+ "epoch": 1.5972222222222223,
1639
+ "grad_norm": 0.2928121020824289,
1640
+ "learning_rate": 4.937781500201474e-06,
1641
+ "loss": 0.0701,
1642
+ "step": 1610
1643
+ },
1644
+ {
1645
+ "epoch": 1.6071428571428572,
1646
+ "grad_norm": 0.3156026006058721,
1647
+ "learning_rate": 4.705380724990327e-06,
1648
+ "loss": 0.0615,
1649
+ "step": 1620
1650
+ },
1651
+ {
1652
+ "epoch": 1.617063492063492,
1653
+ "grad_norm": 0.4755190385141863,
1654
+ "learning_rate": 4.478012474061774e-06,
1655
+ "loss": 0.0652,
1656
+ "step": 1630
1657
+ },
1658
+ {
1659
+ "epoch": 1.626984126984127,
1660
+ "grad_norm": 0.17388919764712818,
1661
+ "learning_rate": 4.255733128677691e-06,
1662
+ "loss": 0.0666,
1663
+ "step": 1640
1664
+ },
1665
+ {
1666
+ "epoch": 1.6369047619047619,
1667
+ "grad_norm": 0.3367174542395493,
1668
+ "learning_rate": 4.038597808187092e-06,
1669
+ "loss": 0.0672,
1670
+ "step": 1650
1671
+ },
1672
+ {
1673
+ "epoch": 1.6468253968253967,
1674
+ "grad_norm": 0.3332988216991925,
1675
+ "learning_rate": 3.8266603563580475e-06,
1676
+ "loss": 0.0661,
1677
+ "step": 1660
1678
+ },
1679
+ {
1680
+ "epoch": 1.6567460317460316,
1681
+ "grad_norm": 0.30549723060698114,
1682
+ "learning_rate": 3.6199733280258107e-06,
1683
+ "loss": 0.071,
1684
+ "step": 1670
1685
+ },
1686
+ {
1687
+ "epoch": 1.6666666666666665,
1688
+ "grad_norm": 0.1521266037696581,
1689
+ "learning_rate": 3.418587976060653e-06,
1690
+ "loss": 0.0609,
1691
+ "step": 1680
1692
+ },
1693
+ {
1694
+ "epoch": 1.6765873015873016,
1695
+ "grad_norm": 0.25665858712473993,
1696
+ "learning_rate": 3.2225542386585233e-06,
1697
+ "loss": 0.0668,
1698
+ "step": 1690
1699
+ },
1700
+ {
1701
+ "epoch": 1.6865079365079365,
1702
+ "grad_norm": 0.22578693699521823,
1703
+ "learning_rate": 3.0319207269576903e-06,
1704
+ "loss": 0.059,
1705
+ "step": 1700
1706
+ },
1707
+ {
1708
+ "epoch": 1.6964285714285714,
1709
+ "grad_norm": 0.29793394856689953,
1710
+ "learning_rate": 2.846734712984481e-06,
1711
+ "loss": 0.0636,
1712
+ "step": 1710
1713
+ },
1714
+ {
1715
+ "epoch": 1.7063492063492065,
1716
+ "grad_norm": 0.27937818557407995,
1717
+ "learning_rate": 2.6670421179310788e-06,
1718
+ "loss": 0.0692,
1719
+ "step": 1720
1720
+ },
1721
+ {
1722
+ "epoch": 1.7162698412698414,
1723
+ "grad_norm": 0.49698807611303736,
1724
+ "learning_rate": 2.4928875007683096e-06,
1725
+ "loss": 0.069,
1726
+ "step": 1730
1727
+ },
1728
+ {
1729
+ "epoch": 1.7261904761904763,
1730
+ "grad_norm": 0.22562604605413764,
1731
+ "learning_rate": 2.3243140471961772e-06,
1732
+ "loss": 0.063,
1733
+ "step": 1740
1734
+ },
1735
+ {
1736
+ "epoch": 1.7361111111111112,
1737
+ "grad_norm": 0.2724970311694327,
1738
+ "learning_rate": 2.1613635589349756e-06,
1739
+ "loss": 0.0649,
1740
+ "step": 1750
1741
+ },
1742
+ {
1743
+ "epoch": 1.746031746031746,
1744
+ "grad_norm": 0.22608422901904388,
1745
+ "learning_rate": 2.004076443359593e-06,
1746
+ "loss": 0.0621,
1747
+ "step": 1760
1748
+ },
1749
+ {
1750
+ "epoch": 1.755952380952381,
1751
+ "grad_norm": 0.2169426951719115,
1752
+ "learning_rate": 1.8524917034795252e-06,
1753
+ "loss": 0.0602,
1754
+ "step": 1770
1755
+ },
1756
+ {
1757
+ "epoch": 1.7658730158730158,
1758
+ "grad_norm": 0.24550854266292543,
1759
+ "learning_rate": 1.7066469282672026e-06,
1760
+ "loss": 0.0678,
1761
+ "step": 1780
1762
+ },
1763
+ {
1764
+ "epoch": 1.7757936507936507,
1765
+ "grad_norm": 0.2217749756351088,
1766
+ "learning_rate": 1.566578283336903e-06,
1767
+ "loss": 0.0632,
1768
+ "step": 1790
1769
+ },
1770
+ {
1771
+ "epoch": 1.7857142857142856,
1772
+ "grad_norm": 0.26436288496986254,
1773
+ "learning_rate": 1.4323205019766694e-06,
1774
+ "loss": 0.0684,
1775
+ "step": 1800
1776
+ },
1777
+ {
1778
+ "epoch": 1.7956349206349205,
1779
+ "grad_norm": 0.2687739361594074,
1780
+ "learning_rate": 1.3039068765353573e-06,
1781
+ "loss": 0.0581,
1782
+ "step": 1810
1783
+ },
1784
+ {
1785
+ "epoch": 1.8055555555555556,
1786
+ "grad_norm": 0.28225821347276653,
1787
+ "learning_rate": 1.1813692501670276e-06,
1788
+ "loss": 0.0654,
1789
+ "step": 1820
1790
+ },
1791
+ {
1792
+ "epoch": 1.8154761904761905,
1793
+ "grad_norm": 0.23006756666014438,
1794
+ "learning_rate": 1.064738008934696e-06,
1795
+ "loss": 0.0667,
1796
+ "step": 1830
1797
+ },
1798
+ {
1799
+ "epoch": 1.8253968253968254,
1800
+ "grad_norm": 0.2705006236969955,
1801
+ "learning_rate": 9.540420742754103e-07,
1802
+ "loss": 0.0652,
1803
+ "step": 1840
1804
+ },
1805
+ {
1806
+ "epoch": 1.8353174603174605,
1807
+ "grad_norm": 0.29549308432556487,
1808
+ "learning_rate": 8.493088958284822e-07,
1809
+ "loss": 0.0729,
1810
+ "step": 1850
1811
+ },
1812
+ {
1813
+ "epoch": 1.8452380952380953,
1814
+ "grad_norm": 0.26523762393360467,
1815
+ "learning_rate": 7.505644446287263e-07,
1816
+ "loss": 0.0621,
1817
+ "step": 1860
1818
+ },
1819
+ {
1820
+ "epoch": 1.8551587301587302,
1821
+ "grad_norm": 0.28071984020209584,
1822
+ "learning_rate": 6.578332066663307e-07,
1823
+ "loss": 0.065,
1824
+ "step": 1870
1825
+ },
1826
+ {
1827
+ "epoch": 1.8650793650793651,
1828
+ "grad_norm": 0.19893792873292027,
1829
+ "learning_rate": 5.711381768149865e-07,
1830
+ "loss": 0.0623,
1831
+ "step": 1880
1832
+ },
1833
+ {
1834
+ "epoch": 1.875,
1835
+ "grad_norm": 0.17581046709328915,
1836
+ "learning_rate": 4.905008531297661e-07,
1837
+ "loss": 0.0584,
1838
+ "step": 1890
1839
+ },
1840
+ {
1841
+ "epoch": 1.8849206349206349,
1842
+ "grad_norm": 0.21415646467667077,
1843
+ "learning_rate": 4.1594123151618704e-07,
1844
+ "loss": 0.0593,
1845
+ "step": 1900
1846
+ },
1847
+ {
1848
+ "epoch": 1.8948412698412698,
1849
+ "grad_norm": 0.21962600950346692,
1850
+ "learning_rate": 3.474778007717588e-07,
1851
+ "loss": 0.0683,
1852
+ "step": 1910
1853
+ },
1854
+ {
1855
+ "epoch": 1.9047619047619047,
1856
+ "grad_norm": 0.18764676866552024,
1857
+ "learning_rate": 2.851275380012508e-07,
1858
+ "loss": 0.0637,
1859
+ "step": 1920
1860
+ },
1861
+ {
1862
+ "epoch": 1.9146825396825395,
1863
+ "grad_norm": 0.2667189957763398,
1864
+ "learning_rate": 2.2890590440682314e-07,
1865
+ "loss": 0.0664,
1866
+ "step": 1930
1867
+ },
1868
+ {
1869
+ "epoch": 1.9246031746031746,
1870
+ "grad_norm": 0.25067142387187374,
1871
+ "learning_rate": 1.7882684145406614e-07,
1872
+ "loss": 0.0647,
1873
+ "step": 1940
1874
+ },
1875
+ {
1876
+ "epoch": 1.9345238095238095,
1877
+ "grad_norm": 0.23355277796154653,
1878
+ "learning_rate": 1.3490276741488783e-07,
1879
+ "loss": 0.0661,
1880
+ "step": 1950
1881
+ },
1882
+ {
1883
+ "epoch": 1.9444444444444444,
1884
+ "grad_norm": 0.2871474934919063,
1885
+ "learning_rate": 9.71445742881022e-08,
1886
+ "loss": 0.061,
1887
+ "step": 1960
1888
+ },
1889
+ {
1890
+ "epoch": 1.9543650793650795,
1891
+ "grad_norm": 0.3570405979727388,
1892
+ "learning_rate": 6.556162509852304e-08,
1893
+ "loss": 0.075,
1894
+ "step": 1970
1895
+ },
1896
+ {
1897
+ "epoch": 1.9642857142857144,
1898
+ "grad_norm": 0.24614021074624268,
1899
+ "learning_rate": 4.016175157516844e-08,
1900
+ "loss": 0.0629,
1901
+ "step": 1980
1902
+ },
1903
+ {
1904
+ "epoch": 1.9742063492063493,
1905
+ "grad_norm": 0.31139696477741163,
1906
+ "learning_rate": 2.0951252209208682e-08,
1907
+ "loss": 0.0658,
1908
+ "step": 1990
1909
+ },
1910
+ {
1911
+ "epoch": 1.9841269841269842,
1912
+ "grad_norm": 0.29188443750648085,
1913
+ "learning_rate": 7.934890692101738e-09,
1914
+ "loss": 0.057,
1915
+ "step": 2000
1916
+ },
1917
+ {
1918
+ "epoch": 1.994047619047619,
1919
+ "grad_norm": 0.2831720795550947,
1920
+ "learning_rate": 1.1158947343353766e-09,
1921
+ "loss": 0.0555,
1922
+ "step": 2010
1923
+ },
1924
+ {
1925
+ "Accuracy": 78.2,
1926
+ "Overall_f1": 70.5,
1927
+ "epoch": 2.0,
1928
+ "eval_UnsafeBench_runtime": 226.0049,
1929
+ "eval_UnsafeBench_samples_per_second": 9.062,
1930
+ "eval_UnsafeBench_steps_per_second": 0.142,
1931
+ "generated": {
1932
+ "Accuracy": 78.5,
1933
+ "F1": 74.5,
1934
+ "unsafe": {
1935
+ "Accuracy": 78.5,
1936
+ "F1": 74.5,
1937
+ "safe": {
1938
+ "f1": "81.4",
1939
+ "prec": "83.1(486/585)",
1940
+ "recall": "79.8(486/609)"
1941
+ },
1942
+ "unsafe": {
1943
+ "f1": "74.5",
1944
+ "prec": "72.5(325/448)",
1945
+ "recall": "76.7(325/424)"
1946
+ }
1947
+ }
1948
+ },
1949
+ "real": {
1950
+ "Accuracy": 77.8,
1951
+ "F1": 64.9,
1952
+ "unsafe": {
1953
+ "Accuracy": 77.8,
1954
+ "F1": 64.9,
1955
+ "safe": {
1956
+ "f1": "83.8",
1957
+ "prec": "79.3(582/734)",
1958
+ "recall": "88.9(582/655)"
1959
+ },
1960
+ "unsafe": {
1961
+ "f1": "64.9",
1962
+ "prec": "74.0(208/281)",
1963
+ "recall": "57.8(208/360)"
1964
+ }
1965
+ }
1966
+ },
1967
+ "step": 2016
1968
+ },
1969
+ {
1970
+ "Accuracy": 69.5,
1971
+ "Overall_f1": 82.0,
1972
+ "epoch": 2.0,
1973
+ "eval_SelfHarm_runtime": 80.8454,
1974
+ "eval_SelfHarm_samples_per_second": 7.916,
1975
+ "eval_SelfHarm_steps_per_second": 0.124,
1976
+ "generated": {
1977
+ "Accuracy": 0.0,
1978
+ "F1": 0.0,
1979
+ "unsafe": {
1980
+ "Accuracy": 0.0,
1981
+ "F1": 0.0,
1982
+ "safe": {
1983
+ "f1": "0.0",
1984
+ "prec": "0.0(0/0)",
1985
+ "recall": "0.0(0/0)"
1986
+ },
1987
+ "unsafe": {
1988
+ "f1": "0.0",
1989
+ "prec": "0.0(0/0)",
1990
+ "recall": "0.0(0/0)"
1991
+ }
1992
+ }
1993
+ },
1994
+ "real": {
1995
+ "Accuracy": 69.5,
1996
+ "F1": 82.0,
1997
+ "unsafe": {
1998
+ "Accuracy": 69.5,
1999
+ "F1": 82.0,
2000
+ "safe": {
2001
+ "f1": "0.0",
2002
+ "prec": "0.0(0/195)",
2003
+ "recall": "0.0(0/0)"
2004
+ },
2005
+ "unsafe": {
2006
+ "f1": "82.0",
2007
+ "prec": "100.0(445/445)",
2008
+ "recall": "69.5(445/640)"
2009
+ }
2010
+ }
2011
+ },
2012
+ "step": 2016
2013
+ },
2014
+ {
2015
+ "Accuracy": 79.8,
2016
+ "Overall_f1": 69.6,
2017
+ "epoch": 2.0,
2018
+ "eval_UnsafeDiff_runtime": 98.0101,
2019
+ "eval_UnsafeDiff_samples_per_second": 8.489,
2020
+ "eval_UnsafeDiff_steps_per_second": 0.133,
2021
+ "generated": {
2022
+ "Accuracy": 0.0,
2023
+ "F1": 0.0,
2024
+ "unsafe": {
2025
+ "Accuracy": 0.0,
2026
+ "F1": 0.0,
2027
+ "safe": {
2028
+ "f1": "0.0",
2029
+ "prec": "0.0(0/0)",
2030
+ "recall": "0.0(0/0)"
2031
+ },
2032
+ "unsafe": {
2033
+ "f1": "0.0",
2034
+ "prec": "0.0(0/0)",
2035
+ "recall": "0.0(0/0)"
2036
+ }
2037
+ }
2038
+ },
2039
+ "real": {
2040
+ "Accuracy": 79.8,
2041
+ "F1": 69.6,
2042
+ "unsafe": {
2043
+ "Accuracy": 79.8,
2044
+ "F1": 69.6,
2045
+ "safe": {
2046
+ "f1": "84.9",
2047
+ "prec": "93.5(471/504)",
2048
+ "recall": "77.7(471/606)"
2049
+ },
2050
+ "unsafe": {
2051
+ "f1": "69.6",
2052
+ "prec": "58.8(193/328)",
2053
+ "recall": "85.4(193/226)"
2054
+ }
2055
+ }
2056
+ },
2057
+ "step": 2016
2058
+ },
2059
+ {
2060
+ "Accuracy": 69.3,
2061
+ "Overall_f1": 81.9,
2062
+ "epoch": 2.0,
2063
+ "eval_ViolentBehavior_runtime": 23.6563,
2064
+ "eval_ViolentBehavior_samples_per_second": 8.116,
2065
+ "eval_ViolentBehavior_steps_per_second": 0.127,
2066
+ "generated": {
2067
+ "Accuracy": 0.0,
2068
+ "F1": 0.0,
2069
+ "unsafe": {
2070
+ "Accuracy": 0.0,
2071
+ "F1": 0.0,
2072
+ "safe": {
2073
+ "f1": "0.0",
2074
+ "prec": "0.0(0/0)",
2075
+ "recall": "0.0(0/0)"
2076
+ },
2077
+ "unsafe": {
2078
+ "f1": "0.0",
2079
+ "prec": "0.0(0/0)",
2080
+ "recall": "0.0(0/0)"
2081
+ }
2082
+ }
2083
+ },
2084
+ "real": {
2085
+ "Accuracy": 69.3,
2086
+ "F1": 81.9,
2087
+ "unsafe": {
2088
+ "Accuracy": 69.3,
2089
+ "F1": 81.9,
2090
+ "safe": {
2091
+ "f1": "0.0",
2092
+ "prec": "0.0(0/59)",
2093
+ "recall": "0.0(0/0)"
2094
+ },
2095
+ "unsafe": {
2096
+ "f1": "81.9",
2097
+ "prec": "100.0(133/133)",
2098
+ "recall": "69.3(133/192)"
2099
+ }
2100
+ }
2101
+ },
2102
+ "step": 2016
2103
+ },
2104
+ {
2105
+ "Accuracy": 78.9,
2106
+ "Overall_f1": 70.8,
2107
+ "epoch": 2.0,
2108
+ "eval_SMID_runtime": 190.3209,
2109
+ "eval_SMID_samples_per_second": 9.079,
2110
+ "eval_SMID_steps_per_second": 0.142,
2111
+ "generated": {
2112
+ "Accuracy": 0.0,
2113
+ "F1": 0.0,
2114
+ "unsafe": {
2115
+ "Accuracy": 0.0,
2116
+ "F1": 0.0,
2117
+ "safe": {
2118
+ "f1": "0.0",
2119
+ "prec": "0.0(0/0)",
2120
+ "recall": "0.0(0/0)"
2121
+ },
2122
+ "unsafe": {
2123
+ "f1": "0.0",
2124
+ "prec": "0.0(0/0)",
2125
+ "recall": "0.0(0/0)"
2126
+ }
2127
+ }
2128
+ },
2129
+ "real": {
2130
+ "Accuracy": 78.9,
2131
+ "F1": 70.8,
2132
+ "unsafe": {
2133
+ "Accuracy": 78.9,
2134
+ "F1": 70.8,
2135
+ "safe": {
2136
+ "f1": "83.5",
2137
+ "prec": "76.0(921/1212)",
2138
+ "recall": "92.6(921/995)"
2139
+ },
2140
+ "unsafe": {
2141
+ "f1": "70.8",
2142
+ "prec": "85.7(442/516)",
2143
+ "recall": "60.3(442/733)"
2144
+ }
2145
+ }
2146
+ },
2147
+ "step": 2016
2148
+ },
2149
+ {
2150
+ "Accuracy": 84.9,
2151
+ "Overall_f1": 85.5,
2152
+ "epoch": 2.0,
2153
+ "eval_JsonDataset_runtime": 408.3025,
2154
+ "eval_JsonDataset_samples_per_second": 4.859,
2155
+ "eval_JsonDataset_steps_per_second": 0.076,
2156
+ "generated": {
2157
+ "Accuracy": 87.6,
2158
+ "F1": 88.6,
2159
+ "fairness": {
2160
+ "Accuracy": 91.3,
2161
+ "F1": 91.3,
2162
+ "african": {
2163
+ "f1": "88.9",
2164
+ "prec": "82.4(28/34)",
2165
+ "recall": "96.6(28/29)"
2166
+ },
2167
+ "asian": {
2168
+ "f1": "86.3",
2169
+ "prec": "91.4(117/128)",
2170
+ "recall": "81.8(117/143)"
2171
+ },
2172
+ "caucasian": {
2173
+ "f1": "91.7",
2174
+ "prec": "88.8(284/320)",
2175
+ "recall": "94.7(284/300)"
2176
+ },
2177
+ "children": {
2178
+ "f1": "75.0",
2179
+ "prec": "63.2(12/19)",
2180
+ "recall": "92.3(12/13)"
2181
+ },
2182
+ "elderly": {
2183
+ "f1": "93.9",
2184
+ "prec": "96.4(108/112)",
2185
+ "recall": "91.5(108/118)"
2186
+ },
2187
+ "female": {
2188
+ "f1": "99.4",
2189
+ "prec": "98.8(238/241)",
2190
+ "recall": "100.0(238/238)"
2191
+ },
2192
+ "indian": {
2193
+ "f1": "70.6",
2194
+ "prec": "75.0(6/8)",
2195
+ "recall": "66.7(6/9)"
2196
+ },
2197
+ "latino": {
2198
+ "f1": "45.3",
2199
+ "prec": "54.5(12/22)",
2200
+ "recall": "38.7(12/31)"
2201
+ },
2202
+ "male": {
2203
+ "f1": "99.4",
2204
+ "prec": "100.0(271/271)",
2205
+ "recall": "98.9(271/274)"
2206
+ },
2207
+ "middle-aged": {
2208
+ "f1": "80.3",
2209
+ "prec": "89.2(116/130)",
2210
+ "recall": "73.0(116/159)"
2211
+ },
2212
+ "safe": {
2213
+ "f1": "0.0",
2214
+ "prec": "0.0(0/0)",
2215
+ "recall": "0.0(0/0)"
2216
+ },
2217
+ "teenager": {
2218
+ "f1": "0.0",
2219
+ "prec": "0.0(0/0)",
2220
+ "recall": "0.0(0/0)"
2221
+ },
2222
+ "young adult": {
2223
+ "f1": "89.2",
2224
+ "prec": "84.1(211/251)",
2225
+ "recall": "95.0(211/222)"
2226
+ }
2227
+ },
2228
+ "privacy": {
2229
+ "Accuracy": 83.6,
2230
+ "F1": 86.7,
2231
+ "intellectual property violation": {
2232
+ "f1": "85.7",
2233
+ "prec": "90.0(36/40)",
2234
+ "recall": "81.8(36/44)"
2235
+ },
2236
+ "personal identification documents": {
2237
+ "f1": "90.5",
2238
+ "prec": "95.6(43/45)",
2239
+ "recall": "86.0(43/50)"
2240
+ },
2241
+ "public figures": {
2242
+ "f1": "83.5",
2243
+ "prec": "82.6(38/46)",
2244
+ "recall": "84.4(38/45)"
2245
+ },
2246
+ "safe": {
2247
+ "f1": "75.9",
2248
+ "prec": "70.7(41/58)",
2249
+ "recall": "82.0(41/50)"
2250
+ }
2251
+ },
2252
+ "toxicity": {
2253
+ "Accuracy": 69.4,
2254
+ "F1": 70.3,
2255
+ "disturbing": {
2256
+ "f1": "72.0",
2257
+ "prec": "61.0(36/59)",
2258
+ "recall": "87.8(36/41)"
2259
+ },
2260
+ "hate": {
2261
+ "f1": "50.0",
2262
+ "prec": "100.0(5/5)",
2263
+ "recall": "33.3(5/15)"
2264
+ },
2265
+ "humiliation": {
2266
+ "f1": "43.4",
2267
+ "prec": "92.9(13/14)",
2268
+ "recall": "28.3(13/46)"
2269
+ },
2270
+ "illegal activity": {
2271
+ "f1": "69.8",
2272
+ "prec": "88.0(22/25)",
2273
+ "recall": "57.9(22/38)"
2274
+ },
2275
+ "safe": {
2276
+ "f1": "66.1",
2277
+ "prec": "55.7(39/70)",
2278
+ "recall": "81.2(39/48)"
2279
+ },
2280
+ "sexual": {
2281
+ "f1": "96.8",
2282
+ "prec": "100.0(45/45)",
2283
+ "recall": "93.8(45/48)"
2284
+ },
2285
+ "violence": {
2286
+ "f1": "63.6",
2287
+ "prec": "52.8(28/53)",
2288
+ "recall": "80.0(28/35)"
2289
+ }
2290
+ }
2291
+ },
2292
+ "real": {
2293
+ "Accuracy": 82.2,
2294
+ "F1": 82.4,
2295
+ "fairness": {
2296
+ "Accuracy": 81.6,
2297
+ "F1": 81.6,
2298
+ "african": {
2299
+ "f1": "81.6",
2300
+ "prec": "85.7(60/70)",
2301
+ "recall": "77.9(60/77)"
2302
+ },
2303
+ "asian": {
2304
+ "f1": "76.9",
2305
+ "prec": "76.9(70/91)",
2306
+ "recall": "76.9(70/91)"
2307
+ },
2308
+ "caucasian": {
2309
+ "f1": "81.6",
2310
+ "prec": "74.7(162/217)",
2311
+ "recall": "90.0(162/180)"
2312
+ },
2313
+ "children": {
2314
+ "f1": "81.3",
2315
+ "prec": "74.3(26/35)",
2316
+ "recall": "89.7(26/29)"
2317
+ },
2318
+ "elderly": {
2319
+ "f1": "68.1",
2320
+ "prec": "75.6(31/41)",
2321
+ "recall": "62.0(31/50)"
2322
+ },
2323
+ "female": {
2324
+ "f1": "94.6",
2325
+ "prec": "98.0(149/152)",
2326
+ "recall": "91.4(149/163)"
2327
+ },
2328
+ "indian": {
2329
+ "f1": "62.4",
2330
+ "prec": "65.0(39/60)",
2331
+ "recall": "60.0(39/65)"
2332
+ },
2333
+ "latino": {
2334
+ "f1": "35.3",
2335
+ "prec": "42.6(26/61)",
2336
+ "recall": "30.2(26/86)"
2337
+ },
2338
+ "male": {
2339
+ "f1": "97.5",
2340
+ "prec": "96.0(333/347)",
2341
+ "recall": "99.1(333/336)"
2342
+ },
2343
+ "middle-aged": {
2344
+ "f1": "80.0",
2345
+ "prec": "83.1(212/255)",
2346
+ "recall": "77.1(212/275)"
2347
+ },
2348
+ "safe": {
2349
+ "f1": "0.0",
2350
+ "prec": "0.0(0/0)",
2351
+ "recall": "0.0(0/0)"
2352
+ },
2353
+ "teenager": {
2354
+ "f1": "0.0",
2355
+ "prec": "0.0(0/0)",
2356
+ "recall": "0.0(0/0)"
2357
+ },
2358
+ "young adult": {
2359
+ "f1": "72.2",
2360
+ "prec": "67.3(113/168)",
2361
+ "recall": "77.9(113/145)"
2362
+ }
2363
+ },
2364
+ "privacy": {
2365
+ "Accuracy": 87.0,
2366
+ "F1": 90.6,
2367
+ "intellectual property violation": {
2368
+ "f1": "88.1",
2369
+ "prec": "100.0(37/37)",
2370
+ "recall": "78.7(37/47)"
2371
+ },
2372
+ "personal identification documents": {
2373
+ "f1": "93.1",
2374
+ "prec": "92.2(47/51)",
2375
+ "recall": "94.0(47/50)"
2376
+ },
2377
+ "public figures": {
2378
+ "f1": "90.1",
2379
+ "prec": "95.3(41/43)",
2380
+ "recall": "85.4(41/48)"
2381
+ },
2382
+ "safe": {
2383
+ "f1": "78.2",
2384
+ "prec": "69.4(43/62)",
2385
+ "recall": "89.6(43/48)"
2386
+ }
2387
+ },
2388
+ "toxicity": {
2389
+ "Accuracy": 82.2,
2390
+ "F1": 83.3,
2391
+ "disturbing": {
2392
+ "f1": "84.8",
2393
+ "prec": "84.0(42/50)",
2394
+ "recall": "85.7(42/49)"
2395
+ },
2396
+ "hate": {
2397
+ "f1": "69.6",
2398
+ "prec": "72.7(8/11)",
2399
+ "recall": "66.7(8/12)"
2400
+ },
2401
+ "humiliation": {
2402
+ "f1": "30.0",
2403
+ "prec": "50.0(3/6)",
2404
+ "recall": "21.4(3/14)"
2405
+ },
2406
+ "illegal activity": {
2407
+ "f1": "88.2",
2408
+ "prec": "92.9(26/28)",
2409
+ "recall": "83.9(26/31)"
2410
+ },
2411
+ "safe": {
2412
+ "f1": "78.1",
2413
+ "prec": "69.5(41/59)",
2414
+ "recall": "89.1(41/46)"
2415
+ },
2416
+ "sexual": {
2417
+ "f1": "95.0",
2418
+ "prec": "94.1(48/51)",
2419
+ "recall": "96.0(48/50)"
2420
+ },
2421
+ "violence": {
2422
+ "f1": "80.5",
2423
+ "prec": "83.3(35/42)",
2424
+ "recall": "77.8(35/45)"
2425
+ }
2426
+ }
2427
+ },
2428
+ "step": 2016
2429
+ },
2430
+ {
2431
+ "epoch": 2.0,
2432
+ "step": 2016,
2433
+ "total_flos": 0.0,
2434
+ "train_loss": 0.08641785344788952,
2435
+ "train_runtime": 25197.0435,
2436
+ "train_samples_per_second": 5.118,
2437
+ "train_steps_per_second": 0.08
2438
+ }
2439
+ ],
2440
+ "logging_steps": 10,
2441
+ "max_steps": 2016,
2442
+ "num_input_tokens_seen": 0,
2443
+ "num_train_epochs": 2,
2444
+ "save_steps": 500,
2445
+ "stateful_callbacks": {
2446
+ "TrainerControl": {
2447
+ "args": {
2448
+ "should_epoch_stop": false,
2449
+ "should_evaluate": false,
2450
+ "should_log": false,
2451
+ "should_save": true,
2452
+ "should_training_stop": true
2453
+ },
2454
+ "attributes": {}
2455
+ }
2456
+ },
2457
+ "total_flos": 0.0,
2458
+ "train_batch_size": 8,
2459
+ "trial_name": null,
2460
+ "trial_params": null
2461
+ }
lora/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52a3bfb75fba53db5b4c5c7bbf4604ec198920602a994d2d438e866c29f40718
3
+ size 6584