Upload folder using huggingface_hub

Browse files

Files changed (10) hide show

lora/adapter_config.json +32 -0
lora/adapter_model.safetensors +3 -0
lora/config.yaml +181 -0
lora/non_lora_trainables.bin +3 -0
lora/special_tokens_map.json +6 -0
lora/tokenization_internlm_xcomposer2.py +252 -0
lora/tokenizer.model +3 -0
lora/tokenizer_config.json +43 -0
lora/trainer_state.json +2461 -0
lora/training_args.bin +3 -0

lora/adapter_config.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "model_zoo/internlm-xcomposer2-vl-7b",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 256,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "attention.wo",
+    "feed_forward.w2",
+    "attention.wqkv",
+    "feed_forward.w1",
+    "feed_forward.w3"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

lora/adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:0cc549f4f6f3a2763298d164ee5589fc213e716d8286b3af5fbe0dbda6bd01d9
+size 1208003536

lora/config.yaml ADDED Viewed

	@@ -0,0 +1,181 @@

+data_cfg:
+  data_cfg:
+    eval:
+      JsonDataset:
+        base_path: data
+        fairness: true
+        generated_ratio: 1.0
+        real_ratio: 1.0
+        safety: true
+      SMID:
+        base_path: data/evaluator_test/SMID
+      SelfHarm:
+        base_path: data/evaluator_test/self-harm
+      UnsafeBench:
+        base_path: data/evaluator_test/UnsafeBench
+      UnsafeDiff:
+        base_path: data/evaluator_test/Unsafe_diff
+      ViolentBehavior:
+        base_path: data/evaluator_test/Violent_behavior
+    train:
+      base_path: data
+      fairness: true
+      generated_ratio: 1.0
+      max_face_length: 2000
+      max_generated_dim_length: 6000
+      max_real_dim_length: 600
+      real_ratio: 1.0
+      safe_ratio: 0.3
+      safety: true
+  verion: 3
+lora_cfg:
+  lora_alpha: 256
+  lora_bias: none
+  lora_dropout: 0.05
+  lora_r: 256
+  lora_target_modules:
+  - attention.wqkv
+  - attention.wo
+  - feed_forward.w1
+  - feed_forward.w2
+  - feed_forward.w3
+  lora_type: lora
+  lora_weight_path: ''
+model_cfg:
+  model_name_or_path: model_zoo/internlm-xcomposer2-vl-7b
+training_cfg:
+  _n_gpu: 1
+  accelerator_config:
+    dispatch_batches: null
+    even_batches: true
+    gradient_accumulation_kwargs: null
+    non_blocking: false
+    split_batches: false
+    use_seedable_sampler: true
+  adafactor: false
+  adam_beta1: 0.9
+  adam_beta2: 0.95
+  adam_epsilon: 1.0e-08
+  auto_find_batch_size: false
+  batch_eval_metrics: false
+  bf16: true
+  bf16_full_eval: false
+  cache_dir: null
+  data_seed: null
+  dataloader_drop_last: false
+  dataloader_num_workers: 0
+  dataloader_persistent_workers: false
+  dataloader_pin_memory: true
+  dataloader_prefetch_factor: null
+  ddp_backend: null
+  ddp_broadcast_buffers: null
+  ddp_bucket_cap_mb: null
+  ddp_find_unused_parameters: null
+  ddp_timeout: 1800
+  debug: []
+  deepspeed: scripts/ds_config_zero2.json
+  disable_tqdm: false
+  dispatch_batches: null
+  do_eval: true
+  do_predict: false
+  do_train: false
+  eval_accumulation_steps: null
+  eval_delay: 0
+  eval_do_concat_batches: true
+  eval_steps: null
+  fix_sampler: false
+  fix_vit: true
+  fp16: false
+  fp16_backend: auto
+  fp16_full_eval: false
+  fp16_opt_level: O1
+  fsdp: []
+  fsdp_config:
+    min_num_params: 0
+    xla: false
+    xla_fsdp_grad_ckpt: false
+    xla_fsdp_v2: false
+  fsdp_min_num_params: 0
+  fsdp_transformer_layer_cls_to_wrap: null
+  full_determinism: false
+  gradient_accumulation_steps: 1
+  gradient_checkpointing: true
+  gradient_checkpointing_kwargs: null
+  greater_is_better: null
+  group_by_length: false
+  half_precision_backend: auto
+  hub_always_push: false
+  hub_model_id: null
+  hub_private_repo: false
+  hub_token: null
+  ignore_data_skip: false
+  include_inputs_for_metrics: false
+  include_num_input_tokens_seen: false
+  include_tokens_per_second: false
+  jit_mode_eval: false
+  label_names:
+  - samples
+  label_smoothing_factor: 0.0
+  learning_rate: 5.0e-05
+  length_column_name: length
+  load_best_model_at_end: false
+  local_rank: 7
+  log_level: passive
+  log_level_replica: warning
+  log_on_each_node: true
+  logging_dir: output/internlm/datav3_1/safe0.3_lr5e-5_decay1e-2_nocap_e2//safe0.3_nocap
+  logging_first_step: false
+  logging_nan_inf_filter: true
+  logging_steps: 10
+  lr_scheduler_kwargs: {}
+  max_grad_norm: 1.0
+  max_length: 4096
+  max_steps: -1
+  metric_for_best_model: null
+  mp_parameters: ''
+  neftune_noise_alpha: null
+  no_cuda: false
+  num_train_epochs: 2.0
+  optim_args: null
+  optim_target_modules: null
+  output_dir: output/internlm/datav3_1/safe0.3_lr5e-5_decay1e-2_nocap_e2//safe0.3_nocap
+  overwrite_output_dir: false
+  past_index: -1
+  per_device_eval_batch_size: 8
+  per_device_train_batch_size: 8
+  per_gpu_eval_batch_size: null
+  per_gpu_train_batch_size: null
+  prediction_loss_only: false
+  push_to_hub: false
+  push_to_hub_model_id: null
+  push_to_hub_organization: null
+  push_to_hub_token: null
+  ray_scope: last
+  remove_unused_columns: true
+  report_to: []
+  restore_callback_states_from_checkpoint: false
+  resume_from_checkpoint: null
+  run_name: output/internlm/datav3_1/safe0.3_lr5e-5_decay1e-2_nocap_e2//safe0.3_nocap
+  save_on_each_node: false
+  save_only_model: false
+  save_safetensors: true
+  save_steps: 500
+  save_total_limit: 1
+  seed: 3407
+  skip_memory_metrics: true
+  split_batches: null
+  tf32: null
+  torch_compile: false
+  torch_compile_backend: null
+  torch_compile_mode: null
+  torchdynamo: null
+  tpu_metrics_debug: false
+  tpu_num_cores: null
+  use_cpu: false
+  use_ipex: false
+  use_legacy_prediction_loop: false
+  use_lora: true
+  use_mps_device: false
+  warmup_ratio: 0.01
+  warmup_steps: 0
+  weight_decay: 0.01

lora/non_lora_trainables.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:12840b8d4721fd14f249759412c1c9d821c016ceec870ce1ea9eafa4da457cbf
+size 1073755886

lora/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+  "bos_token": "<s>",
+  "eos_token": "</s>",
+  "pad_token": "</s>",
+  "unk_token": "<unk>"
+}

lora/tokenization_internlm_xcomposer2.py ADDED Viewed

	@@ -0,0 +1,252 @@

+# Copyright (c) InternLM. All rights reserved.
+#
+# This code is based on EleutherAI's GPT-NeoX library and the GPT-NeoX
+# and OPT implementations in this library. It has been modified from its
+# original forms to accommodate minor architectural differences compared
+# to GPT-NeoX and OPT used by the Meta AI team that trained the model.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes for IntermLM."""
+import os
+from shutil import copyfile
+from typing import Any, Dict, List, Optional, Tuple
+import sentencepiece as spm
+from transformers.tokenization_utils import PreTrainedTokenizer
+from transformers.utils import logging
+logger = logging.get_logger(__name__)
+VOCAB_FILES_NAMES = {'vocab_file': './tokenizer.model'}
+PRETRAINED_VOCAB_FILES_MAP = {}
+class InternLMXComposer2Tokenizer(PreTrainedTokenizer):
+    """Construct a InternLM tokenizer. Based on byte-level Byte-Pair-Encoding.
+    Args:
+        vocab_file (`str`):
+            Path to the vocabulary file.
+    """
+    vocab_files_names = VOCAB_FILES_NAMES
+    pretrained_vocab_files_map = PRETRAINED_VOCAB_FILES_MAP
+    model_input_names = ['input_ids', 'attention_mask']
+    _auto_class = 'AutoTokenizer'
+    def __init__(
+        self,
+        vocab_file,
+        unk_token='<unk>',
+        bos_token='<s>',
+        eos_token='</s>',
+        pad_token='</s>',
+        sp_model_kwargs: Optional[Dict[str, Any]] = None,
+        add_bos_token=True,
+        add_eos_token=False,
+        decode_with_prefix_space=False,
+        clean_up_tokenization_spaces=False,
+        **kwargs,
+    ):
+        self.sp_model_kwargs = {} if sp_model_kwargs is None else sp_model_kwargs
+        self.vocab_file = vocab_file
+        self.add_bos_token = add_bos_token
+        self.add_eos_token = add_eos_token
+        self.decode_with_prefix_space = decode_with_prefix_space
+        self.sp_model = spm.SentencePieceProcessor(**self.sp_model_kwargs)
+        self.sp_model.Load(vocab_file)
+        self._no_prefix_space_tokens = None
+        super().__init__(
+            bos_token=bos_token,
+            eos_token=eos_token,
+            unk_token=unk_token,
+            pad_token=pad_token,
+            clean_up_tokenization_spaces=clean_up_tokenization_spaces,
+            **kwargs,
+        )
+        """ Initialization"""
+    @property
+    def no_prefix_space_tokens(self):
+        if self._no_prefix_space_tokens is None:
+            vocab = self.convert_ids_to_tokens(list(range(self.vocab_size)))
+            self._no_prefix_space_tokens = {
+                i
+                for i, tok in enumerate(vocab) if not tok.startswith('▁')
+            }
+        return self._no_prefix_space_tokens
+    @property
+    def vocab_size(self):
+        """Returns vocab size."""
+        return self.sp_model.get_piece_size()
+    @property
+    def bos_token_id(self) -> Optional[int]:
+        return self.sp_model.bos_id()
+    @property
+    def eos_token_id(self) -> Optional[int]:
+        return self.sp_model.eos_id()
+    def get_vocab(self):
+        """Returns vocab as a dict."""
+        vocab = {
+            self.convert_ids_to_tokens(i): i
+            for i in range(self.vocab_size)
+        }
+        vocab.update(self.added_tokens_encoder)
+        return vocab
+    def _tokenize(self, text):
+        """Returns a tokenized string."""
+        return self.sp_model.encode(text, out_type=str)
+    def _convert_token_to_id(self, token):
+        """Converts a token (str) in an id using the vocab."""
+        return self.sp_model.piece_to_id(token)
+    def _convert_id_to_token(self, index):
+        """Converts an index (integer) in a token (str) using the vocab."""
+        token = self.sp_model.IdToPiece(index)
+        return token
+    def _maybe_add_prefix_space(self, tokens, decoded):
+        if tokens and tokens[0] not in self.no_prefix_space_tokens:
+            return ' ' + decoded
+        else:
+            return decoded
+    def convert_tokens_to_string(self, tokens):
+        """Converts a sequence of tokens (string) in a single string."""
+        current_sub_tokens = []
+        out_string = ''
+        prev_is_special = False
+        for token in tokens:
+            # make sure that special tokens are not decoded using sentencepiece model
+            if token in self.all_special_tokens:
+                if not prev_is_special:
+                    out_string += ' '
+                out_string += self.sp_model.decode(current_sub_tokens) + token
+                prev_is_special = True
+                current_sub_tokens = []
+            else:
+                current_sub_tokens.append(token)
+                prev_is_special = False
+        out_string += self.sp_model.decode(current_sub_tokens)
+        out_string = self.clean_up_tokenization(out_string)
+        out_string = self._maybe_add_prefix_space(
+            tokens=tokens, decoded=out_string)
+        return out_string[1:]
+    def save_vocabulary(self,
+                        save_directory,
+                        filename_prefix: Optional[str] = None) -> Tuple[str]:
+        """Save the vocabulary and special tokens file to a directory.
+        Args:
+            save_directory (`str`):
+                The directory in which to save the vocabulary.
+        Returns:
+            `Tuple(str)`: Paths to the files saved.
+        """
+        if not os.path.isdir(save_directory):
+            logger.error(
+                f'Vocabulary path ({save_directory}) should be a directory')
+            return
+        out_vocab_file = os.path.join(
+            save_directory,
+            (filename_prefix + '-' if filename_prefix else '') +
+            VOCAB_FILES_NAMES['vocab_file'])
+        if os.path.abspath(self.vocab_file) != os.path.abspath(
+                out_vocab_file) and os.path.isfile(self.vocab_file):
+            copyfile(self.vocab_file, out_vocab_file)
+        elif not os.path.isfile(self.vocab_file):
+            with open(out_vocab_file, 'wb') as fi:
+                content_spiece_model = self.sp_model.serialized_model_proto()
+                fi.write(content_spiece_model)
+        return (out_vocab_file, )
+    def build_inputs_with_special_tokens(self, token_ids_0, token_ids_1=None):
+        if self.add_bos_token:
+            bos_token_ids = [self.bos_token_id]
+        else:
+            bos_token_ids = []
+        output = bos_token_ids + token_ids_0
+        if token_ids_1 is not None:
+            output = output + token_ids_1
+        if self.add_eos_token:
+            output = output + [self.eos_token_id]
+        return output
+    def get_special_tokens_mask(
+            self,
+            token_ids_0: List[int],
+            token_ids_1: Optional[List[int]] = None,
+            already_has_special_tokens: bool = False) -> List[int]:
+        """Retrieve sequence ids from a token list that has no special tokens
+        added. This method is called when adding special tokens using the
+        tokenizer `prepare_for_model` method.
+        Args:
+            token_ids_0 (`List[int]`):
+                List of IDs.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+            already_has_special_tokens (`bool`, *optional*, defaults to `False`):
+                Whether or not the token list is already formatted with special tokens for the model.
+        Returns:
+            `List[int]`: A list of integers in the range [0, 1]: 1 for a special token, 0 for a sequence token.
+        """
+        if already_has_special_tokens:
+            return super().get_special_tokens_mask(
+                token_ids_0=token_ids_0,
+                token_ids_1=token_ids_1,
+                already_has_special_tokens=True)
+        if token_ids_1 is None:
+            return [1] + ([0] * len(token_ids_0)) + [1]
+        return [1] + ([0] * len(token_ids_0)) + [1, 1] + (
+            [0] * len(token_ids_1)) + [1]
+    def create_token_type_ids_from_sequences(
+            self,
+            token_ids_0: List[int],
+            token_ids_1: Optional[List[int]] = None) -> List[int]:
+        """Create a mask from the two sequences passed to be used in a
+        sequence-pair classification task. T5 does not make use of token type
+        ids, therefore a list of zeros is returned.
+        Args:
+            token_ids_0 (`List[int]`):
+                List of IDs.
+            token_ids_1 (`List[int]`, *optional*):
+                Optional second list of IDs for sequence pairs.
+        Returns:
+            `List[int]`: List of zeros.
+        """
+        eos = [self.eos_token_id]
+        if token_ids_1 is None:
+            return len(token_ids_0 + eos) * [0]
+        return len(token_ids_0 + eos + token_ids_1 + eos) * [0]

lora/tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f868398fc4e05ee1e8aeba95ddf18ddcc45b8bce55d5093bead5bbf80429b48b
+size 1477754

lora/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "auto_map": {
+    "AutoTokenizer": [
+      "tokenization_internlm_xcomposer2.InternLMXComposer2Tokenizer",
+      null
+    ]
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "padding_side": "right",
+  "tokenizer_class": "InternLMXComposer2Tokenizer",
+  "unk_token": "<unk>",
+  "use_fast": false
+}

lora/trainer_state.json ADDED Viewed

	@@ -0,0 +1,2461 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 2.0,
+  "eval_steps": 500,
+  "global_step": 2016,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.00992063492063492,
+      "grad_norm": 1.899263178856104,
+      "learning_rate": 2.380952380952381e-05,
+      "loss": 1.1251,
+      "step": 10
+    },
+    {
+      "epoch": 0.01984126984126984,
+      "grad_norm": 0.7922716506113461,
+      "learning_rate": 4.761904761904762e-05,
+      "loss": 0.2041,
+      "step": 20
+    },
+    {
+      "epoch": 0.02976190476190476,
+      "grad_norm": 0.5895583365390514,
+      "learning_rate": 4.999748926019576e-05,
+      "loss": 0.138,
+      "step": 30
+    },
+    {
+      "epoch": 0.03968253968253968,
+      "grad_norm": 0.5560939312704402,
+      "learning_rate": 4.9988810807087584e-05,
+      "loss": 0.116,
+      "step": 40
+    },
+    {
+      "epoch": 0.0496031746031746,
+      "grad_norm": 0.5339646542974129,
+      "learning_rate": 4.9973935795400226e-05,
+      "loss": 0.1227,
+      "step": 50
+    },
+    {
+      "epoch": 0.05952380952380952,
+      "grad_norm": 0.3172900020174145,
+      "learning_rate": 4.995286791373982e-05,
+      "loss": 0.1161,
+      "step": 60
+    },
+    {
+      "epoch": 0.06944444444444445,
+      "grad_norm": 0.42612238875506897,
+      "learning_rate": 4.992561238637912e-05,
+      "loss": 0.1171,
+      "step": 70
+    },
+    {
+      "epoch": 0.07936507936507936,
+      "grad_norm": 0.29025648297274464,
+      "learning_rate": 4.989217597196194e-05,
+      "loss": 0.112,
+      "step": 80
+    },
+    {
+      "epoch": 0.08928571428571429,
+      "grad_norm": 0.3936577938746362,
+      "learning_rate": 4.985256696182724e-05,
+      "loss": 0.1139,
+      "step": 90
+    },
+    {
+      "epoch": 0.0992063492063492,
+      "grad_norm": 0.340841068630456,
+      "learning_rate": 4.980679517795309e-05,
+      "loss": 0.103,
+      "step": 100
+    },
+    {
+      "epoch": 0.10912698412698413,
+      "grad_norm": 0.22519986858539828,
+      "learning_rate": 4.9754871970521055e-05,
+      "loss": 0.1094,
+      "step": 110
+    },
+    {
+      "epoch": 0.11904761904761904,
+      "grad_norm": 0.22918903537877802,
+      "learning_rate": 4.9696810215101695e-05,
+      "loss": 0.105,
+      "step": 120
+    },
+    {
+      "epoch": 0.12896825396825398,
+      "grad_norm": 0.2972032589349314,
+      "learning_rate": 4.963262430946173e-05,
+      "loss": 0.1146,
+      "step": 130
+    },
+    {
+      "epoch": 0.1388888888888889,
+      "grad_norm": 0.14022379837954602,
+      "learning_rate": 4.956233016999379e-05,
+      "loss": 0.1009,
+      "step": 140
+    },
+    {
+      "epoch": 0.1488095238095238,
+      "grad_norm": 0.29147939403227785,
+      "learning_rate": 4.948594522776958e-05,
+      "loss": 0.1082,
+      "step": 150
+    },
+    {
+      "epoch": 0.15873015873015872,
+      "grad_norm": 0.22074336132465838,
+      "learning_rate": 4.9403488424217433e-05,
+      "loss": 0.1021,
+      "step": 160
+    },
+    {
+      "epoch": 0.16865079365079366,
+      "grad_norm": 0.259089069292428,
+      "learning_rate": 4.9314980206425355e-05,
+      "loss": 0.128,
+      "step": 170
+    },
+    {
+      "epoch": 0.17857142857142858,
+      "grad_norm": 0.2435767337393017,
+      "learning_rate": 4.9220442522070657e-05,
+      "loss": 0.1,
+      "step": 180
+    },
+    {
+      "epoch": 0.1884920634920635,
+      "grad_norm": 0.20234199140218986,
+      "learning_rate": 4.911989881397755e-05,
+      "loss": 0.0967,
+      "step": 190
+    },
+    {
+      "epoch": 0.1984126984126984,
+      "grad_norm": 0.18427600304177014,
+      "learning_rate": 4.901337401430395e-05,
+      "loss": 0.1008,
+      "step": 200
+    },
+    {
+      "epoch": 0.20833333333333334,
+      "grad_norm": 0.19784209557315877,
+      "learning_rate": 4.8900894538358944e-05,
+      "loss": 0.1088,
+      "step": 210
+    },
+    {
+      "epoch": 0.21825396825396826,
+      "grad_norm": 0.2361890840964331,
+      "learning_rate": 4.878248827805252e-05,
+      "loss": 0.1018,
+      "step": 220
+    },
+    {
+      "epoch": 0.22817460317460317,
+      "grad_norm": 0.17946620458385004,
+      "learning_rate": 4.865818459497911e-05,
+      "loss": 0.101,
+      "step": 230
+    },
+    {
+      "epoch": 0.23809523809523808,
+      "grad_norm": 0.200937152702067,
+      "learning_rate": 4.8528014313136675e-05,
+      "loss": 0.1019,
+      "step": 240
+    },
+    {
+      "epoch": 0.24801587301587302,
+      "grad_norm": 0.2641817602324182,
+      "learning_rate": 4.839200971128324e-05,
+      "loss": 0.0865,
+      "step": 250
+    },
+    {
+      "epoch": 0.25793650793650796,
+      "grad_norm": 0.16708831362801244,
+      "learning_rate": 4.8250204514932517e-05,
+      "loss": 0.0943,
+      "step": 260
+    },
+    {
+      "epoch": 0.26785714285714285,
+      "grad_norm": 0.23663162415830213,
+      "learning_rate": 4.810263388799101e-05,
+      "loss": 0.0955,
+      "step": 270
+    },
+    {
+      "epoch": 0.2777777777777778,
+      "grad_norm": 0.18283685945766517,
+      "learning_rate": 4.7949334424038176e-05,
+      "loss": 0.1052,
+      "step": 280
+    },
+    {
+      "epoch": 0.2876984126984127,
+      "grad_norm": 0.4848491439986444,
+      "learning_rate": 4.77903441372523e-05,
+      "loss": 0.1017,
+      "step": 290
+    },
+    {
+      "epoch": 0.2976190476190476,
+      "grad_norm": 0.3605542323724644,
+      "learning_rate": 4.762570245298389e-05,
+      "loss": 0.0986,
+      "step": 300
+    },
+    {
+      "epoch": 0.30753968253968256,
+      "grad_norm": 0.1386048127933906,
+      "learning_rate": 4.7455450197979345e-05,
+      "loss": 0.0935,
+      "step": 310
+    },
+    {
+      "epoch": 0.31746031746031744,
+      "grad_norm": 0.14991678174597392,
+      "learning_rate": 4.727962959025694e-05,
+      "loss": 0.0956,
+      "step": 320
+    },
+    {
+      "epoch": 0.3273809523809524,
+      "grad_norm": 0.26295328059626233,
+      "learning_rate": 4.709828422863791e-05,
+      "loss": 0.0953,
+      "step": 330
+    },
+    {
+      "epoch": 0.3373015873015873,
+      "grad_norm": 0.16038042754223325,
+      "learning_rate": 4.6911459081935084e-05,
+      "loss": 0.0959,
+      "step": 340
+    },
+    {
+      "epoch": 0.3472222222222222,
+      "grad_norm": 0.22069080284626696,
+      "learning_rate": 4.671920047780186e-05,
+      "loss": 0.09,
+      "step": 350
+    },
+    {
+      "epoch": 0.35714285714285715,
+      "grad_norm": 0.26820678370641815,
+      "learning_rate": 4.652155609124414e-05,
+      "loss": 0.0988,
+      "step": 360
+    },
+    {
+      "epoch": 0.36706349206349204,
+      "grad_norm": 0.2311480903314735,
+      "learning_rate": 4.631857493279823e-05,
+      "loss": 0.0988,
+      "step": 370
+    },
+    {
+      "epoch": 0.376984126984127,
+      "grad_norm": 0.18425738333190533,
+      "learning_rate": 4.611030733637751e-05,
+      "loss": 0.0929,
+      "step": 380
+    },
+    {
+      "epoch": 0.3869047619047619,
+      "grad_norm": 0.12332174843455722,
+      "learning_rate": 4.589680494679099e-05,
+      "loss": 0.0959,
+      "step": 390
+    },
+    {
+      "epoch": 0.3968253968253968,
+      "grad_norm": 0.194045506361412,
+      "learning_rate": 4.567812070693675e-05,
+      "loss": 0.0985,
+      "step": 400
+    },
+    {
+      "epoch": 0.40674603174603174,
+      "grad_norm": 0.24121090661176106,
+      "learning_rate": 4.545430884467354e-05,
+      "loss": 0.0915,
+      "step": 410
+    },
+    {
+      "epoch": 0.4166666666666667,
+      "grad_norm": 0.19201769770887162,
+      "learning_rate": 4.522542485937369e-05,
+      "loss": 0.0943,
+      "step": 420
+    },
+    {
+      "epoch": 0.42658730158730157,
+      "grad_norm": 0.3290546621528532,
+      "learning_rate": 4.499152550816077e-05,
+      "loss": 0.0864,
+      "step": 430
+    },
+    {
+      "epoch": 0.4365079365079365,
+      "grad_norm": 0.1945555131786947,
+      "learning_rate": 4.4752668791835315e-05,
+      "loss": 0.0904,
+      "step": 440
+    },
+    {
+      "epoch": 0.44642857142857145,
+      "grad_norm": 0.17033755769918293,
+      "learning_rate": 4.450891394049221e-05,
+      "loss": 0.0874,
+      "step": 450
+    },
+    {
+      "epoch": 0.45634920634920634,
+      "grad_norm": 0.33700673031382816,
+      "learning_rate": 4.426032139883315e-05,
+      "loss": 0.0964,
+      "step": 460
+    },
+    {
+      "epoch": 0.4662698412698413,
+      "grad_norm": 0.1386245861821237,
+      "learning_rate": 4.400695281117802e-05,
+      "loss": 0.0909,
+      "step": 470
+    },
+    {
+      "epoch": 0.47619047619047616,
+      "grad_norm": 0.18027472945026246,
+      "learning_rate": 4.37488710061787e-05,
+      "loss": 0.0973,
+      "step": 480
+    },
+    {
+      "epoch": 0.4861111111111111,
+      "grad_norm": 0.17090015410514822,
+      "learning_rate": 4.3486139981239304e-05,
+      "loss": 0.0957,
+      "step": 490
+    },
+    {
+      "epoch": 0.49603174603174605,
+      "grad_norm": 0.17225190040947705,
+      "learning_rate": 4.321882488664645e-05,
+      "loss": 0.0984,
+      "step": 500
+    },
+    {
+      "epoch": 0.5059523809523809,
+      "grad_norm": 0.24663017482809838,
+      "learning_rate": 4.2946992009413774e-05,
+      "loss": 0.1012,
+      "step": 510
+    },
+    {
+      "epoch": 0.5158730158730159,
+      "grad_norm": 0.21766586383802478,
+      "learning_rate": 4.2670708756844504e-05,
+      "loss": 0.0933,
+      "step": 520
+    },
+    {
+      "epoch": 0.5257936507936508,
+      "grad_norm": 0.2174374764424065,
+      "learning_rate": 4.239004363981627e-05,
+      "loss": 0.0908,
+      "step": 530
+    },
+    {
+      "epoch": 0.5357142857142857,
+      "grad_norm": 0.18176898727645474,
+      "learning_rate": 4.2105066255792185e-05,
+      "loss": 0.0967,
+      "step": 540
+    },
+    {
+      "epoch": 0.5456349206349206,
+      "grad_norm": 0.18294668815510332,
+      "learning_rate": 4.1815847271562594e-05,
+      "loss": 0.0895,
+      "step": 550
+    },
+    {
+      "epoch": 0.5555555555555556,
+      "grad_norm": 0.2755073678680904,
+      "learning_rate": 4.152245840572153e-05,
+      "loss": 0.0885,
+      "step": 560
+    },
+    {
+      "epoch": 0.5654761904761905,
+      "grad_norm": 0.13142325484055215,
+      "learning_rate": 4.122497241088247e-05,
+      "loss": 0.1044,
+      "step": 570
+    },
+    {
+      "epoch": 0.5753968253968254,
+      "grad_norm": 0.16668164786917436,
+      "learning_rate": 4.09234630556376e-05,
+      "loss": 0.0963,
+      "step": 580
+    },
+    {
+      "epoch": 0.5853174603174603,
+      "grad_norm": 0.18038706809428273,
+      "learning_rate": 4.061800510626515e-05,
+      "loss": 0.0946,
+      "step": 590
+    },
+    {
+      "epoch": 0.5952380952380952,
+      "grad_norm": 0.1930360692086378,
+      "learning_rate": 4.030867430818941e-05,
+      "loss": 0.0981,
+      "step": 600
+    },
+    {
+      "epoch": 0.6051587301587301,
+      "grad_norm": 0.2190484582397661,
+      "learning_rate": 3.999554736719785e-05,
+      "loss": 0.0918,
+      "step": 610
+    },
+    {
+      "epoch": 0.6150793650793651,
+      "grad_norm": 0.24605450998539993,
+      "learning_rate": 3.9678701930420095e-05,
+      "loss": 0.1004,
+      "step": 620
+    },
+    {
+      "epoch": 0.625,
+      "grad_norm": 0.12987591431160975,
+      "learning_rate": 3.935821656707359e-05,
+      "loss": 0.1017,
+      "step": 630
+    },
+    {
+      "epoch": 0.6349206349206349,
+      "grad_norm": 0.19782253063677727,
+      "learning_rate": 3.903417074898047e-05,
+      "loss": 0.0881,
+      "step": 640
+    },
+    {
+      "epoch": 0.6448412698412699,
+      "grad_norm": 0.23190635119611894,
+      "learning_rate": 3.870664483086067e-05,
+      "loss": 0.088,
+      "step": 650
+    },
+    {
+      "epoch": 0.6547619047619048,
+      "grad_norm": 0.14464411323958998,
+      "learning_rate": 3.837572003040612e-05,
+      "loss": 0.0907,
+      "step": 660
+    },
+    {
+      "epoch": 0.6646825396825397,
+      "grad_norm": 0.11660095690724923,
+      "learning_rate": 3.8041478408140926e-05,
+      "loss": 0.0877,
+      "step": 670
+    },
+    {
+      "epoch": 0.6746031746031746,
+      "grad_norm": 0.18383294614345877,
+      "learning_rate": 3.77040028470725e-05,
+      "loss": 0.0851,
+      "step": 680
+    },
+    {
+      "epoch": 0.6845238095238095,
+      "grad_norm": 0.16896655109068967,
+      "learning_rate": 3.736337703213888e-05,
+      "loss": 0.0875,
+      "step": 690
+    },
+    {
+      "epoch": 0.6944444444444444,
+      "grad_norm": 0.17392953850416312,
+      "learning_rate": 3.7019685429456986e-05,
+      "loss": 0.097,
+      "step": 700
+    },
+    {
+      "epoch": 0.7043650793650794,
+      "grad_norm": 0.24645390722766997,
+      "learning_rate": 3.6673013265377355e-05,
+      "loss": 0.0876,
+      "step": 710
+    },
+    {
+      "epoch": 0.7142857142857143,
+      "grad_norm": 0.1787303955658246,
+      "learning_rate": 3.632344650535024e-05,
+      "loss": 0.0887,
+      "step": 720
+    },
+    {
+      "epoch": 0.7242063492063492,
+      "grad_norm": 0.12552185945236538,
+      "learning_rate": 3.59710718326085e-05,
+      "loss": 0.0924,
+      "step": 730
+    },
+    {
+      "epoch": 0.7341269841269841,
+      "grad_norm": 0.1691175916463122,
+      "learning_rate": 3.5615976626672434e-05,
+      "loss": 0.0837,
+      "step": 740
+    },
+    {
+      "epoch": 0.7440476190476191,
+      "grad_norm": 0.2732015272731385,
+      "learning_rate": 3.525824894168203e-05,
+      "loss": 0.0878,
+      "step": 750
+    },
+    {
+      "epoch": 0.753968253968254,
+      "grad_norm": 0.21608867617240846,
+      "learning_rate": 3.489797748456187e-05,
+      "loss": 0.0896,
+      "step": 760
+    },
+    {
+      "epoch": 0.7638888888888888,
+      "grad_norm": 0.1837057522774625,
+      "learning_rate": 3.453525159302415e-05,
+      "loss": 0.0844,
+      "step": 770
+    },
+    {
+      "epoch": 0.7738095238095238,
+      "grad_norm": 0.23426485652488405,
+      "learning_rate": 3.417016121341537e-05,
+      "loss": 0.0962,
+      "step": 780
+    },
+    {
+      "epoch": 0.7837301587301587,
+      "grad_norm": 0.20613723836878384,
+      "learning_rate": 3.380279687841199e-05,
+      "loss": 0.0848,
+      "step": 790
+    },
+    {
+      "epoch": 0.7936507936507936,
+      "grad_norm": 0.20859267166659057,
+      "learning_rate": 3.343324968457076e-05,
+      "loss": 0.093,
+      "step": 800
+    },
+    {
+      "epoch": 0.8035714285714286,
+      "grad_norm": 0.1459009760134056,
+      "learning_rate": 3.306161126973918e-05,
+      "loss": 0.0897,
+      "step": 810
+    },
+    {
+      "epoch": 0.8134920634920635,
+      "grad_norm": 0.16377549953879472,
+      "learning_rate": 3.268797379033181e-05,
+      "loss": 0.0911,
+      "step": 820
+    },
+    {
+      "epoch": 0.8234126984126984,
+      "grad_norm": 0.19355138242730935,
+      "learning_rate": 3.23124298984779e-05,
+      "loss": 0.0883,
+      "step": 830
+    },
+    {
+      "epoch": 0.8333333333333334,
+      "grad_norm": 0.18239709779797436,
+      "learning_rate": 3.1935072719046115e-05,
+      "loss": 0.0848,
+      "step": 840
+    },
+    {
+      "epoch": 0.8432539682539683,
+      "grad_norm": 0.10583679127237934,
+      "learning_rate": 3.155599582655211e-05,
+      "loss": 0.0885,
+      "step": 850
+    },
+    {
+      "epoch": 0.8531746031746031,
+      "grad_norm": 0.1834906329062599,
+      "learning_rate": 3.117529322195448e-05,
+      "loss": 0.087,
+      "step": 860
+    },
+    {
+      "epoch": 0.8630952380952381,
+      "grad_norm": 0.17967922772149214,
+      "learning_rate": 3.079305930934509e-05,
+      "loss": 0.0899,
+      "step": 870
+    },
+    {
+      "epoch": 0.873015873015873,
+      "grad_norm": 0.13745052140998398,
+      "learning_rate": 3.040938887253932e-05,
+      "loss": 0.0922,
+      "step": 880
+    },
+    {
+      "epoch": 0.8829365079365079,
+      "grad_norm": 0.22606931644010264,
+      "learning_rate": 3.002437705157225e-05,
+      "loss": 0.0915,
+      "step": 890
+    },
+    {
+      "epoch": 0.8928571428571429,
+      "grad_norm": 0.16850557595303492,
+      "learning_rate": 2.963811931910645e-05,
+      "loss": 0.0864,
+      "step": 900
+    },
+    {
+      "epoch": 0.9027777777777778,
+      "grad_norm": 0.23963677891708385,
+      "learning_rate": 2.925071145675733e-05,
+      "loss": 0.0837,
+      "step": 910
+    },
+    {
+      "epoch": 0.9126984126984127,
+      "grad_norm": 0.1587891497734928,
+      "learning_rate": 2.8862249531341806e-05,
+      "loss": 0.0853,
+      "step": 920
+    },
+    {
+      "epoch": 0.9226190476190477,
+      "grad_norm": 0.17289844782248673,
+      "learning_rate": 2.8472829871056332e-05,
+      "loss": 0.0816,
+      "step": 930
+    },
+    {
+      "epoch": 0.9325396825396826,
+      "grad_norm": 0.19037298678850886,
+      "learning_rate": 2.8082549041590085e-05,
+      "loss": 0.0848,
+      "step": 940
+    },
+    {
+      "epoch": 0.9424603174603174,
+      "grad_norm": 0.18209887709567296,
+      "learning_rate": 2.7691503822179187e-05,
+      "loss": 0.0793,
+      "step": 950
+    },
+    {
+      "epoch": 0.9523809523809523,
+      "grad_norm": 0.23253229868000924,
+      "learning_rate": 2.7299791181608124e-05,
+      "loss": 0.0944,
+      "step": 960
+    },
+    {
+      "epoch": 0.9623015873015873,
+      "grad_norm": 0.1730036255240911,
+      "learning_rate": 2.6907508254163987e-05,
+      "loss": 0.0827,
+      "step": 970
+    },
+    {
+      "epoch": 0.9722222222222222,
+      "grad_norm": 0.1355999949692624,
+      "learning_rate": 2.6514752315549847e-05,
+      "loss": 0.0713,
+      "step": 980
+    },
+    {
+      "epoch": 0.9821428571428571,
+      "grad_norm": 0.1659255007537986,
+      "learning_rate": 2.6121620758762877e-05,
+      "loss": 0.085,
+      "step": 990
+    },
+    {
+      "epoch": 0.9920634920634921,
+      "grad_norm": 0.21719730615776264,
+      "learning_rate": 2.5728211069943582e-05,
+      "loss": 0.0885,
+      "step": 1000
+    },
+    {
+      "Accuracy": 77.7,
+      "Overall_f1": 70.0,
+      "epoch": 1.0,
+      "eval_UnsafeBench_runtime": 226.9313,
+      "eval_UnsafeBench_samples_per_second": 9.025,
+      "eval_UnsafeBench_steps_per_second": 0.141,
+      "generated": {
+        "Accuracy": 78.6,
+        "F1": 74.5,
+        "unsafe": {
+          "Accuracy": 78.6,
+          "F1": 74.5,
+          "safe": {
+            "f1": "81.6",
+            "prec": "82.9(489/590)",
+            "recall": "80.3(489/609)"
+          },
+          "unsafe": {
+            "f1": "74.5",
+            "prec": "72.9(323/443)",
+            "recall": "76.2(323/424)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 76.7,
+        "F1": 64.3,
+        "unsafe": {
+          "Accuracy": 76.7,
+          "F1": 64.3,
+          "safe": {
+            "f1": "82.8",
+            "prec": "79.3(567/715)",
+            "recall": "86.6(567/655)"
+          },
+          "unsafe": {
+            "f1": "64.3",
+            "prec": "70.7(212/300)",
+            "recall": "58.9(212/360)"
+          }
+        }
+      },
+      "step": 1008
+    },
+    {
+      "Accuracy": 67.3,
+      "Overall_f1": 80.5,
+      "epoch": 1.0,
+      "eval_SelfHarm_runtime": 78.5426,
+      "eval_SelfHarm_samples_per_second": 8.148,
+      "eval_SelfHarm_steps_per_second": 0.127,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 67.3,
+        "F1": 80.5,
+        "unsafe": {
+          "Accuracy": 67.3,
+          "F1": 80.5,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/209)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "80.5",
+            "prec": "100.0(431/431)",
+            "recall": "67.3(431/640)"
+          }
+        }
+      },
+      "step": 1008
+    },
+    {
+      "Accuracy": 78.7,
+      "Overall_f1": 68.2,
+      "epoch": 1.0,
+      "eval_UnsafeDiff_runtime": 97.4369,
+      "eval_UnsafeDiff_samples_per_second": 8.539,
+      "eval_UnsafeDiff_steps_per_second": 0.133,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 78.7,
+        "F1": 68.2,
+        "unsafe": {
+          "Accuracy": 78.7,
+          "F1": 68.2,
+          "safe": {
+            "f1": "84.0",
+            "prec": "92.8(465/501)",
+            "recall": "76.7(465/606)"
+          },
+          "unsafe": {
+            "f1": "68.2",
+            "prec": "57.4(190/331)",
+            "recall": "84.1(190/226)"
+          }
+        }
+      },
+      "step": 1008
+    },
+    {
+      "Accuracy": 76.0,
+      "Overall_f1": 86.4,
+      "epoch": 1.0,
+      "eval_ViolentBehavior_runtime": 24.6799,
+      "eval_ViolentBehavior_samples_per_second": 7.78,
+      "eval_ViolentBehavior_steps_per_second": 0.122,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 76.0,
+        "F1": 86.4,
+        "unsafe": {
+          "Accuracy": 76.0,
+          "F1": 86.4,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/46)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "86.4",
+            "prec": "100.0(146/146)",
+            "recall": "76.0(146/192)"
+          }
+        }
+      },
+      "step": 1008
+    },
+    {
+      "Accuracy": 80.8,
+      "Overall_f1": 74.8,
+      "epoch": 1.0,
+      "eval_SMID_runtime": 192.5161,
+      "eval_SMID_samples_per_second": 8.976,
+      "eval_SMID_steps_per_second": 0.14,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 80.8,
+        "F1": 74.8,
+        "unsafe": {
+          "Accuracy": 80.8,
+          "F1": 74.8,
+          "safe": {
+            "f1": "84.6",
+            "prec": "78.9(906/1148)",
+            "recall": "91.1(906/995)"
+          },
+          "unsafe": {
+            "f1": "74.8",
+            "prec": "84.7(491/580)",
+            "recall": "67.0(491/733)"
+          }
+        }
+      },
+      "step": 1008
+    },
+    {
+      "Accuracy": 84.3,
+      "Overall_f1": 85.0,
+      "epoch": 1.0,
+      "eval_JsonDataset_runtime": 404.5105,
+      "eval_JsonDataset_samples_per_second": 4.905,
+      "eval_JsonDataset_steps_per_second": 0.077,
+      "generated": {
+        "Accuracy": 86.2,
+        "F1": 87.3,
+        "fairness": {
+          "Accuracy": 90.3,
+          "F1": 90.3,
+          "african": {
+            "f1": "86.2",
+            "prec": "77.8(28/36)",
+            "recall": "96.6(28/29)"
+          },
+          "asian": {
+            "f1": "87.0",
+            "prec": "89.6(121/135)",
+            "recall": "84.6(121/143)"
+          },
+          "caucasian": {
+            "f1": "91.3",
+            "prec": "88.7(282/318)",
+            "recall": "94.0(282/300)"
+          },
+          "children": {
+            "f1": "72.2",
+            "prec": "56.5(13/23)",
+            "recall": "100.0(13/13)"
+          },
+          "elderly": {
+            "f1": "91.9",
+            "prec": "98.1(102/104)",
+            "recall": "86.4(102/118)"
+          },
+          "female": {
+            "f1": "98.9",
+            "prec": "99.6(234/235)",
+            "recall": "98.3(234/238)"
+          },
+          "indian": {
+            "f1": "70.6",
+            "prec": "75.0(6/8)",
+            "recall": "66.7(6/9)"
+          },
+          "latino": {
+            "f1": "43.5",
+            "prec": "66.7(10/15)",
+            "recall": "32.3(10/31)"
+          },
+          "male": {
+            "f1": "99.1",
+            "prec": "98.6(273/277)",
+            "recall": "99.6(273/274)"
+          },
+          "middle-aged": {
+            "f1": "75.6",
+            "prec": "86.3(107/124)",
+            "recall": "67.3(107/159)"
+          },
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "teenager": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "young adult": {
+            "f1": "87.3",
+            "prec": "80.8(211/261)",
+            "recall": "95.0(211/222)"
+          }
+        },
+        "privacy": {
+          "Accuracy": 80.3,
+          "F1": 83.6,
+          "intellectual property violation": {
+            "f1": "82.5",
+            "prec": "89.2(33/37)",
+            "recall": "76.7(33/43)"
+          },
+          "personal identification documents": {
+            "f1": "86.9",
+            "prec": "95.2(40/42)",
+            "recall": "80.0(40/50)"
+          },
+          "public figures": {
+            "f1": "81.3",
+            "prec": "82.2(37/45)",
+            "recall": "80.4(37/46)"
+          },
+          "safe": {
+            "f1": "72.6",
+            "prec": "64.1(41/64)",
+            "recall": "83.7(41/49)"
+          }
+        },
+        "toxicity": {
+          "Accuracy": 67.4,
+          "F1": 68.3,
+          "disturbing": {
+            "f1": "68.7",
+            "prec": "57.4(35/61)",
+            "recall": "85.4(35/41)"
+          },
+          "hate": {
+            "f1": "42.1",
+            "prec": "100.0(4/4)",
+            "recall": "26.7(4/15)"
+          },
+          "humiliation": {
+            "f1": "32.8",
+            "prec": "100.0(9/9)",
+            "recall": "19.6(9/46)"
+          },
+          "illegal activity": {
+            "f1": "73.0",
+            "prec": "100.0(23/23)",
+            "recall": "57.5(23/40)"
+          },
+          "safe": {
+            "f1": "64.5",
+            "prec": "52.7(39/74)",
+            "recall": "83.0(39/47)"
+          },
+          "sexual": {
+            "f1": "94.4",
+            "prec": "100.0(42/42)",
+            "recall": "89.4(42/47)"
+          },
+          "violence": {
+            "f1": "66.0",
+            "prec": "53.3(32/60)",
+            "recall": "86.5(32/37)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 82.2,
+        "F1": 82.6,
+        "fairness": {
+          "Accuracy": 81.9,
+          "F1": 81.9,
+          "african": {
+            "f1": "80.8",
+            "prec": "82.4(61/74)",
+            "recall": "79.2(61/77)"
+          },
+          "asian": {
+            "f1": "78.2",
+            "prec": "77.4(72/93)",
+            "recall": "79.1(72/91)"
+          },
+          "caucasian": {
+            "f1": "82.2",
+            "prec": "74.1(166/224)",
+            "recall": "92.2(166/180)"
+          },
+          "children": {
+            "f1": "83.9",
+            "prec": "78.8(26/33)",
+            "recall": "89.7(26/29)"
+          },
+          "elderly": {
+            "f1": "66.7",
+            "prec": "92.9(26/28)",
+            "recall": "52.0(26/50)"
+          },
+          "female": {
+            "f1": "93.3",
+            "prec": "98.0(145/148)",
+            "recall": "89.0(145/163)"
+          },
+          "indian": {
+            "f1": "62.5",
+            "prec": "64.5(40/62)",
+            "recall": "60.6(40/66)"
+          },
+          "latino": {
+            "f1": "33.1",
+            "prec": "46.8(22/47)",
+            "recall": "25.6(22/86)"
+          },
+          "male": {
+            "f1": "97.0",
+            "prec": "94.9(334/352)",
+            "recall": "99.1(334/337)"
+          },
+          "middle-aged": {
+            "f1": "80.8",
+            "prec": "83.1(217/261)",
+            "recall": "78.6(217/276)"
+          },
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "teenager": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "young adult": {
+            "f1": "73.7",
+            "prec": "66.9(119/178)",
+            "recall": "82.1(119/145)"
+          }
+        },
+        "privacy": {
+          "Accuracy": 85.9,
+          "F1": 89.5,
+          "intellectual property violation": {
+            "f1": "86.1",
+            "prec": "100.0(34/34)",
+            "recall": "75.6(34/45)"
+          },
+          "personal identification documents": {
+            "f1": "92.9",
+            "prec": "93.9(46/49)",
+            "recall": "92.0(46/50)"
+          },
+          "public figures": {
+            "f1": "88.9",
+            "prec": "95.2(40/42)",
+            "recall": "83.3(40/48)"
+          },
+          "safe": {
+            "f1": "77.2",
+            "prec": "66.7(44/66)",
+            "recall": "91.7(44/48)"
+          }
+        },
+        "toxicity": {
+          "Accuracy": 81.6,
+          "F1": 83.3,
+          "disturbing": {
+            "f1": "84.9",
+            "prec": "82.4(42/51)",
+            "recall": "87.5(42/48)"
+          },
+          "hate": {
+            "f1": "69.6",
+            "prec": "72.7(8/11)",
+            "recall": "66.7(8/12)"
+          },
+          "humiliation": {
+            "f1": "23.5",
+            "prec": "100.0(2/2)",
+            "recall": "13.3(2/15)"
+          },
+          "illegal activity": {
+            "f1": "89.3",
+            "prec": "96.2(25/26)",
+            "recall": "83.3(25/30)"
+          },
+          "safe": {
+            "f1": "75.5",
+            "prec": "66.7(40/60)",
+            "recall": "87.0(40/46)"
+          },
+          "sexual": {
+            "f1": "95.0",
+            "prec": "94.1(48/51)",
+            "recall": "96.0(48/50)"
+          },
+          "violence": {
+            "f1": "79.5",
+            "prec": "79.5(35/44)",
+            "recall": "79.5(35/44)"
+          }
+        }
+      },
+      "step": 1008
+    },
+    {
+      "epoch": 1.001984126984127,
+      "grad_norm": 0.15089259656503126,
+      "learning_rate": 2.5334620804201765e-05,
+      "loss": 0.0813,
+      "step": 1010
+    },
+    {
+      "epoch": 1.0119047619047619,
+      "grad_norm": 0.15814476349549628,
+      "learning_rate": 2.4940947561425505e-05,
+      "loss": 0.0703,
+      "step": 1020
+    },
+    {
+      "epoch": 1.0218253968253967,
+      "grad_norm": 0.11346687531610126,
+      "learning_rate": 2.4547288962078963e-05,
+      "loss": 0.0652,
+      "step": 1030
+    },
+    {
+      "epoch": 1.0317460317460316,
+      "grad_norm": 0.16252940122847073,
+      "learning_rate": 2.415374262299513e-05,
+      "loss": 0.0699,
+      "step": 1040
+    },
+    {
+      "epoch": 1.0416666666666667,
+      "grad_norm": 0.22302915065109266,
+      "learning_rate": 2.3760406133169443e-05,
+      "loss": 0.0671,
+      "step": 1050
+    },
+    {
+      "epoch": 1.0515873015873016,
+      "grad_norm": 0.22373178948720648,
+      "learning_rate": 2.3367377029560304e-05,
+      "loss": 0.071,
+      "step": 1060
+    },
+    {
+      "epoch": 1.0615079365079365,
+      "grad_norm": 0.19248373027885218,
+      "learning_rate": 2.297475277290256e-05,
+      "loss": 0.0684,
+      "step": 1070
+    },
+    {
+      "epoch": 1.0714285714285714,
+      "grad_norm": 0.18697476333136995,
+      "learning_rate": 2.2582630723539784e-05,
+      "loss": 0.0701,
+      "step": 1080
+    },
+    {
+      "epoch": 1.0813492063492063,
+      "grad_norm": 0.15184950126869703,
+      "learning_rate": 2.2191108117281558e-05,
+      "loss": 0.0748,
+      "step": 1090
+    },
+    {
+      "epoch": 1.0912698412698412,
+      "grad_norm": 0.21524692332664133,
+      "learning_rate": 2.1800282041291548e-05,
+      "loss": 0.0718,
+      "step": 1100
+    },
+    {
+      "epoch": 1.1011904761904763,
+      "grad_norm": 0.19429776760950043,
+      "learning_rate": 2.1410249410012496e-05,
+      "loss": 0.0599,
+      "step": 1110
+    },
+    {
+      "epoch": 1.1111111111111112,
+      "grad_norm": 0.24475424138885818,
+      "learning_rate": 2.1021106941134012e-05,
+      "loss": 0.0725,
+      "step": 1120
+    },
+    {
+      "epoch": 1.121031746031746,
+      "grad_norm": 0.19282893432306394,
+      "learning_rate": 2.063295113160919e-05,
+      "loss": 0.0704,
+      "step": 1130
+    },
+    {
+      "epoch": 1.130952380952381,
+      "grad_norm": 0.18724259947539162,
+      "learning_rate": 2.024587823372591e-05,
+      "loss": 0.0752,
+      "step": 1140
+    },
+    {
+      "epoch": 1.1408730158730158,
+      "grad_norm": 0.14771605521783054,
+      "learning_rate": 1.9859984231238835e-05,
+      "loss": 0.0677,
+      "step": 1150
+    },
+    {
+      "epoch": 1.1507936507936507,
+      "grad_norm": 0.27831523957564996,
+      "learning_rate": 1.9475364815568036e-05,
+      "loss": 0.0689,
+      "step": 1160
+    },
+    {
+      "epoch": 1.1607142857142858,
+      "grad_norm": 0.2740109289227727,
+      "learning_rate": 1.9092115362070038e-05,
+      "loss": 0.0684,
+      "step": 1170
+    },
+    {
+      "epoch": 1.1706349206349207,
+      "grad_norm": 0.15722739177982728,
+      "learning_rate": 1.871033090638729e-05,
+      "loss": 0.0649,
+      "step": 1180
+    },
+    {
+      "epoch": 1.1805555555555556,
+      "grad_norm": 0.2155976765362568,
+      "learning_rate": 1.8330106120881846e-05,
+      "loss": 0.0641,
+      "step": 1190
+    },
+    {
+      "epoch": 1.1904761904761905,
+      "grad_norm": 0.2070784065234882,
+      "learning_rate": 1.7951535291159178e-05,
+      "loss": 0.0683,
+      "step": 1200
+    },
+    {
+      "epoch": 1.2003968253968254,
+      "grad_norm": 0.24925088406217583,
+      "learning_rate": 1.7574712292687813e-05,
+      "loss": 0.0736,
+      "step": 1210
+    },
+    {
+      "epoch": 1.2103174603174602,
+      "grad_norm": 0.26008378528910975,
+      "learning_rate": 1.719973056752076e-05,
+      "loss": 0.0613,
+      "step": 1220
+    },
+    {
+      "epoch": 1.2202380952380953,
+      "grad_norm": 0.2687147461146715,
+      "learning_rate": 1.682668310112437e-05,
+      "loss": 0.0648,
+      "step": 1230
+    },
+    {
+      "epoch": 1.2301587301587302,
+      "grad_norm": 0.27904378988460643,
+      "learning_rate": 1.6455662399320383e-05,
+      "loss": 0.0639,
+      "step": 1240
+    },
+    {
+      "epoch": 1.2400793650793651,
+      "grad_norm": 0.24340438457160612,
+      "learning_rate": 1.6086760465346993e-05,
+      "loss": 0.0685,
+      "step": 1250
+    },
+    {
+      "epoch": 1.25,
+      "grad_norm": 0.21382493424744065,
+      "learning_rate": 1.5720068777044476e-05,
+      "loss": 0.0665,
+      "step": 1260
+    },
+    {
+      "epoch": 1.2599206349206349,
+      "grad_norm": 0.17562775798071065,
+      "learning_rate": 1.5355678264171158e-05,
+      "loss": 0.0679,
+      "step": 1270
+    },
+    {
+      "epoch": 1.2698412698412698,
+      "grad_norm": 0.23898778196551948,
+      "learning_rate": 1.4993679285855198e-05,
+      "loss": 0.0693,
+      "step": 1280
+    },
+    {
+      "epoch": 1.2797619047619047,
+      "grad_norm": 0.1564634534054769,
+      "learning_rate": 1.4634161608187999e-05,
+      "loss": 0.0663,
+      "step": 1290
+    },
+    {
+      "epoch": 1.2896825396825398,
+      "grad_norm": 0.3494046426935179,
+      "learning_rate": 1.4277214381964569e-05,
+      "loss": 0.0629,
+      "step": 1300
+    },
+    {
+      "epoch": 1.2996031746031746,
+      "grad_norm": 0.2906431464200776,
+      "learning_rate": 1.3922926120576532e-05,
+      "loss": 0.0755,
+      "step": 1310
+    },
+    {
+      "epoch": 1.3095238095238095,
+      "grad_norm": 0.23278532541655608,
+      "learning_rate": 1.3571384678063128e-05,
+      "loss": 0.0665,
+      "step": 1320
+    },
+    {
+      "epoch": 1.3194444444444444,
+      "grad_norm": 0.26481428967128406,
+      "learning_rate": 1.322267722732582e-05,
+      "loss": 0.0659,
+      "step": 1330
+    },
+    {
+      "epoch": 1.3293650793650793,
+      "grad_norm": 0.17250317955887648,
+      "learning_rate": 1.2876890238511657e-05,
+      "loss": 0.065,
+      "step": 1340
+    },
+    {
+      "epoch": 1.3392857142857144,
+      "grad_norm": 0.1855989884926511,
+      "learning_rate": 1.2534109457571047e-05,
+      "loss": 0.0688,
+      "step": 1350
+    },
+    {
+      "epoch": 1.3492063492063493,
+      "grad_norm": 0.22854169876152886,
+      "learning_rate": 1.2194419884995014e-05,
+      "loss": 0.0694,
+      "step": 1360
+    },
+    {
+      "epoch": 1.3591269841269842,
+      "grad_norm": 0.21567426892633454,
+      "learning_rate": 1.185790575473738e-05,
+      "loss": 0.0685,
+      "step": 1370
+    },
+    {
+      "epoch": 1.369047619047619,
+      "grad_norm": 0.23225993437200204,
+      "learning_rate": 1.1524650513326945e-05,
+      "loss": 0.064,
+      "step": 1380
+    },
+    {
+      "epoch": 1.378968253968254,
+      "grad_norm": 0.18155286576255683,
+      "learning_rate": 1.1194736799174996e-05,
+      "loss": 0.0637,
+      "step": 1390
+    },
+    {
+      "epoch": 1.3888888888888888,
+      "grad_norm": 0.277759522427609,
+      "learning_rate": 1.0868246422083204e-05,
+      "loss": 0.0638,
+      "step": 1400
+    },
+    {
+      "epoch": 1.3988095238095237,
+      "grad_norm": 0.2824587015752722,
+      "learning_rate": 1.0545260342956936e-05,
+      "loss": 0.0728,
+      "step": 1410
+    },
+    {
+      "epoch": 1.4087301587301586,
+      "grad_norm": 0.28789347022215056,
+      "learning_rate": 1.0225858653729143e-05,
+      "loss": 0.0672,
+      "step": 1420
+    },
+    {
+      "epoch": 1.4186507936507937,
+      "grad_norm": 0.18977601762176396,
+      "learning_rate": 9.910120557499666e-06,
+      "loss": 0.0633,
+      "step": 1430
+    },
+    {
+      "epoch": 1.4285714285714286,
+      "grad_norm": 0.25156259951382204,
+      "learning_rate": 9.598124348895032e-06,
+      "loss": 0.0627,
+      "step": 1440
+    },
+    {
+      "epoch": 1.4384920634920635,
+      "grad_norm": 0.2542853679485282,
+      "learning_rate": 9.289947394653407e-06,
+      "loss": 0.0698,
+      "step": 1450
+    },
+    {
+      "epoch": 1.4484126984126984,
+      "grad_norm": 0.2613374152478023,
+      "learning_rate": 8.985666114439758e-06,
+      "loss": 0.0585,
+      "step": 1460
+    },
+    {
+      "epoch": 1.4583333333333333,
+      "grad_norm": 0.2554324424677922,
+      "learning_rate": 8.685355961895784e-06,
+      "loss": 0.0734,
+      "step": 1470
+    },
+    {
+      "epoch": 1.4682539682539684,
+      "grad_norm": 0.34193558194517265,
+      "learning_rate": 8.389091405929467e-06,
+      "loss": 0.0704,
+      "step": 1480
+    },
+    {
+      "epoch": 1.4781746031746033,
+      "grad_norm": 0.5388579049439481,
+      "learning_rate": 8.096945912248718e-06,
+      "loss": 0.0644,
+      "step": 1490
+    },
+    {
+      "epoch": 1.4880952380952381,
+      "grad_norm": 0.17918052887703428,
+      "learning_rate": 7.808991925143869e-06,
+      "loss": 0.0564,
+      "step": 1500
+    },
+    {
+      "epoch": 1.498015873015873,
+      "grad_norm": 0.3011198738615104,
+      "learning_rate": 7.5253008495234255e-06,
+      "loss": 0.0644,
+      "step": 1510
+    },
+    {
+      "epoch": 1.507936507936508,
+      "grad_norm": 0.21945267455563025,
+      "learning_rate": 7.245943033207542e-06,
+      "loss": 0.0627,
+      "step": 1520
+    },
+    {
+      "epoch": 1.5178571428571428,
+      "grad_norm": 0.19972883632416796,
+      "learning_rate": 6.9709877494836314e-06,
+      "loss": 0.0616,
+      "step": 1530
+    },
+    {
+      "epoch": 1.5277777777777777,
+      "grad_norm": 0.2870778046981311,
+      "learning_rate": 6.700503179928458e-06,
+      "loss": 0.0587,
+      "step": 1540
+    },
+    {
+      "epoch": 1.5376984126984126,
+      "grad_norm": 0.46783063057095087,
+      "learning_rate": 6.434556397500918e-06,
+      "loss": 0.0683,
+      "step": 1550
+    },
+    {
+      "epoch": 1.5476190476190477,
+      "grad_norm": 0.25122099969992817,
+      "learning_rate": 6.173213349909729e-06,
+      "loss": 0.0708,
+      "step": 1560
+    },
+    {
+      "epoch": 1.5575396825396826,
+      "grad_norm": 0.30899232162114265,
+      "learning_rate": 5.9165388432601446e-06,
+      "loss": 0.07,
+      "step": 1570
+    },
+    {
+      "epoch": 1.5674603174603174,
+      "grad_norm": 0.1914310341962679,
+      "learning_rate": 5.664596525983814e-06,
+      "loss": 0.0652,
+      "step": 1580
+    },
+    {
+      "epoch": 1.5773809523809523,
+      "grad_norm": 0.26636971533611215,
+      "learning_rate": 5.417448873055617e-06,
+      "loss": 0.0631,
+      "step": 1590
+    },
+    {
+      "epoch": 1.5873015873015874,
+      "grad_norm": 0.22112989096572308,
+      "learning_rate": 5.17515717050156e-06,
+      "loss": 0.076,
+      "step": 1600
+    },
+    {
+      "epoch": 1.5972222222222223,
+      "grad_norm": 0.2928121020824289,
+      "learning_rate": 4.937781500201474e-06,
+      "loss": 0.0701,
+      "step": 1610
+    },
+    {
+      "epoch": 1.6071428571428572,
+      "grad_norm": 0.3156026006058721,
+      "learning_rate": 4.705380724990327e-06,
+      "loss": 0.0615,
+      "step": 1620
+    },
+    {
+      "epoch": 1.617063492063492,
+      "grad_norm": 0.4755190385141863,
+      "learning_rate": 4.478012474061774e-06,
+      "loss": 0.0652,
+      "step": 1630
+    },
+    {
+      "epoch": 1.626984126984127,
+      "grad_norm": 0.17388919764712818,
+      "learning_rate": 4.255733128677691e-06,
+      "loss": 0.0666,
+      "step": 1640
+    },
+    {
+      "epoch": 1.6369047619047619,
+      "grad_norm": 0.3367174542395493,
+      "learning_rate": 4.038597808187092e-06,
+      "loss": 0.0672,
+      "step": 1650
+    },
+    {
+      "epoch": 1.6468253968253967,
+      "grad_norm": 0.3332988216991925,
+      "learning_rate": 3.8266603563580475e-06,
+      "loss": 0.0661,
+      "step": 1660
+    },
+    {
+      "epoch": 1.6567460317460316,
+      "grad_norm": 0.30549723060698114,
+      "learning_rate": 3.6199733280258107e-06,
+      "loss": 0.071,
+      "step": 1670
+    },
+    {
+      "epoch": 1.6666666666666665,
+      "grad_norm": 0.1521266037696581,
+      "learning_rate": 3.418587976060653e-06,
+      "loss": 0.0609,
+      "step": 1680
+    },
+    {
+      "epoch": 1.6765873015873016,
+      "grad_norm": 0.25665858712473993,
+      "learning_rate": 3.2225542386585233e-06,
+      "loss": 0.0668,
+      "step": 1690
+    },
+    {
+      "epoch": 1.6865079365079365,
+      "grad_norm": 0.22578693699521823,
+      "learning_rate": 3.0319207269576903e-06,
+      "loss": 0.059,
+      "step": 1700
+    },
+    {
+      "epoch": 1.6964285714285714,
+      "grad_norm": 0.29793394856689953,
+      "learning_rate": 2.846734712984481e-06,
+      "loss": 0.0636,
+      "step": 1710
+    },
+    {
+      "epoch": 1.7063492063492065,
+      "grad_norm": 0.27937818557407995,
+      "learning_rate": 2.6670421179310788e-06,
+      "loss": 0.0692,
+      "step": 1720
+    },
+    {
+      "epoch": 1.7162698412698414,
+      "grad_norm": 0.49698807611303736,
+      "learning_rate": 2.4928875007683096e-06,
+      "loss": 0.069,
+      "step": 1730
+    },
+    {
+      "epoch": 1.7261904761904763,
+      "grad_norm": 0.22562604605413764,
+      "learning_rate": 2.3243140471961772e-06,
+      "loss": 0.063,
+      "step": 1740
+    },
+    {
+      "epoch": 1.7361111111111112,
+      "grad_norm": 0.2724970311694327,
+      "learning_rate": 2.1613635589349756e-06,
+      "loss": 0.0649,
+      "step": 1750
+    },
+    {
+      "epoch": 1.746031746031746,
+      "grad_norm": 0.22608422901904388,
+      "learning_rate": 2.004076443359593e-06,
+      "loss": 0.0621,
+      "step": 1760
+    },
+    {
+      "epoch": 1.755952380952381,
+      "grad_norm": 0.2169426951719115,
+      "learning_rate": 1.8524917034795252e-06,
+      "loss": 0.0602,
+      "step": 1770
+    },
+    {
+      "epoch": 1.7658730158730158,
+      "grad_norm": 0.24550854266292543,
+      "learning_rate": 1.7066469282672026e-06,
+      "loss": 0.0678,
+      "step": 1780
+    },
+    {
+      "epoch": 1.7757936507936507,
+      "grad_norm": 0.2217749756351088,
+      "learning_rate": 1.566578283336903e-06,
+      "loss": 0.0632,
+      "step": 1790
+    },
+    {
+      "epoch": 1.7857142857142856,
+      "grad_norm": 0.26436288496986254,
+      "learning_rate": 1.4323205019766694e-06,
+      "loss": 0.0684,
+      "step": 1800
+    },
+    {
+      "epoch": 1.7956349206349205,
+      "grad_norm": 0.2687739361594074,
+      "learning_rate": 1.3039068765353573e-06,
+      "loss": 0.0581,
+      "step": 1810
+    },
+    {
+      "epoch": 1.8055555555555556,
+      "grad_norm": 0.28225821347276653,
+      "learning_rate": 1.1813692501670276e-06,
+      "loss": 0.0654,
+      "step": 1820
+    },
+    {
+      "epoch": 1.8154761904761905,
+      "grad_norm": 0.23006756666014438,
+      "learning_rate": 1.064738008934696e-06,
+      "loss": 0.0667,
+      "step": 1830
+    },
+    {
+      "epoch": 1.8253968253968254,
+      "grad_norm": 0.2705006236969955,
+      "learning_rate": 9.540420742754103e-07,
+      "loss": 0.0652,
+      "step": 1840
+    },
+    {
+      "epoch": 1.8353174603174605,
+      "grad_norm": 0.29549308432556487,
+      "learning_rate": 8.493088958284822e-07,
+      "loss": 0.0729,
+      "step": 1850
+    },
+    {
+      "epoch": 1.8452380952380953,
+      "grad_norm": 0.26523762393360467,
+      "learning_rate": 7.505644446287263e-07,
+      "loss": 0.0621,
+      "step": 1860
+    },
+    {
+      "epoch": 1.8551587301587302,
+      "grad_norm": 0.28071984020209584,
+      "learning_rate": 6.578332066663307e-07,
+      "loss": 0.065,
+      "step": 1870
+    },
+    {
+      "epoch": 1.8650793650793651,
+      "grad_norm": 0.19893792873292027,
+      "learning_rate": 5.711381768149865e-07,
+      "loss": 0.0623,
+      "step": 1880
+    },
+    {
+      "epoch": 1.875,
+      "grad_norm": 0.17581046709328915,
+      "learning_rate": 4.905008531297661e-07,
+      "loss": 0.0584,
+      "step": 1890
+    },
+    {
+      "epoch": 1.8849206349206349,
+      "grad_norm": 0.21415646467667077,
+      "learning_rate": 4.1594123151618704e-07,
+      "loss": 0.0593,
+      "step": 1900
+    },
+    {
+      "epoch": 1.8948412698412698,
+      "grad_norm": 0.21962600950346692,
+      "learning_rate": 3.474778007717588e-07,
+      "loss": 0.0683,
+      "step": 1910
+    },
+    {
+      "epoch": 1.9047619047619047,
+      "grad_norm": 0.18764676866552024,
+      "learning_rate": 2.851275380012508e-07,
+      "loss": 0.0637,
+      "step": 1920
+    },
+    {
+      "epoch": 1.9146825396825395,
+      "grad_norm": 0.2667189957763398,
+      "learning_rate": 2.2890590440682314e-07,
+      "loss": 0.0664,
+      "step": 1930
+    },
+    {
+      "epoch": 1.9246031746031746,
+      "grad_norm": 0.25067142387187374,
+      "learning_rate": 1.7882684145406614e-07,
+      "loss": 0.0647,
+      "step": 1940
+    },
+    {
+      "epoch": 1.9345238095238095,
+      "grad_norm": 0.23355277796154653,
+      "learning_rate": 1.3490276741488783e-07,
+      "loss": 0.0661,
+      "step": 1950
+    },
+    {
+      "epoch": 1.9444444444444444,
+      "grad_norm": 0.2871474934919063,
+      "learning_rate": 9.71445742881022e-08,
+      "loss": 0.061,
+      "step": 1960
+    },
+    {
+      "epoch": 1.9543650793650795,
+      "grad_norm": 0.3570405979727388,
+      "learning_rate": 6.556162509852304e-08,
+      "loss": 0.075,
+      "step": 1970
+    },
+    {
+      "epoch": 1.9642857142857144,
+      "grad_norm": 0.24614021074624268,
+      "learning_rate": 4.016175157516844e-08,
+      "loss": 0.0629,
+      "step": 1980
+    },
+    {
+      "epoch": 1.9742063492063493,
+      "grad_norm": 0.31139696477741163,
+      "learning_rate": 2.0951252209208682e-08,
+      "loss": 0.0658,
+      "step": 1990
+    },
+    {
+      "epoch": 1.9841269841269842,
+      "grad_norm": 0.29188443750648085,
+      "learning_rate": 7.934890692101738e-09,
+      "loss": 0.057,
+      "step": 2000
+    },
+    {
+      "epoch": 1.994047619047619,
+      "grad_norm": 0.2831720795550947,
+      "learning_rate": 1.1158947343353766e-09,
+      "loss": 0.0555,
+      "step": 2010
+    },
+    {
+      "Accuracy": 78.2,
+      "Overall_f1": 70.5,
+      "epoch": 2.0,
+      "eval_UnsafeBench_runtime": 226.0049,
+      "eval_UnsafeBench_samples_per_second": 9.062,
+      "eval_UnsafeBench_steps_per_second": 0.142,
+      "generated": {
+        "Accuracy": 78.5,
+        "F1": 74.5,
+        "unsafe": {
+          "Accuracy": 78.5,
+          "F1": 74.5,
+          "safe": {
+            "f1": "81.4",
+            "prec": "83.1(486/585)",
+            "recall": "79.8(486/609)"
+          },
+          "unsafe": {
+            "f1": "74.5",
+            "prec": "72.5(325/448)",
+            "recall": "76.7(325/424)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 77.8,
+        "F1": 64.9,
+        "unsafe": {
+          "Accuracy": 77.8,
+          "F1": 64.9,
+          "safe": {
+            "f1": "83.8",
+            "prec": "79.3(582/734)",
+            "recall": "88.9(582/655)"
+          },
+          "unsafe": {
+            "f1": "64.9",
+            "prec": "74.0(208/281)",
+            "recall": "57.8(208/360)"
+          }
+        }
+      },
+      "step": 2016
+    },
+    {
+      "Accuracy": 69.5,
+      "Overall_f1": 82.0,
+      "epoch": 2.0,
+      "eval_SelfHarm_runtime": 80.8454,
+      "eval_SelfHarm_samples_per_second": 7.916,
+      "eval_SelfHarm_steps_per_second": 0.124,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 69.5,
+        "F1": 82.0,
+        "unsafe": {
+          "Accuracy": 69.5,
+          "F1": 82.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/195)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "82.0",
+            "prec": "100.0(445/445)",
+            "recall": "69.5(445/640)"
+          }
+        }
+      },
+      "step": 2016
+    },
+    {
+      "Accuracy": 79.8,
+      "Overall_f1": 69.6,
+      "epoch": 2.0,
+      "eval_UnsafeDiff_runtime": 98.0101,
+      "eval_UnsafeDiff_samples_per_second": 8.489,
+      "eval_UnsafeDiff_steps_per_second": 0.133,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 79.8,
+        "F1": 69.6,
+        "unsafe": {
+          "Accuracy": 79.8,
+          "F1": 69.6,
+          "safe": {
+            "f1": "84.9",
+            "prec": "93.5(471/504)",
+            "recall": "77.7(471/606)"
+          },
+          "unsafe": {
+            "f1": "69.6",
+            "prec": "58.8(193/328)",
+            "recall": "85.4(193/226)"
+          }
+        }
+      },
+      "step": 2016
+    },
+    {
+      "Accuracy": 69.3,
+      "Overall_f1": 81.9,
+      "epoch": 2.0,
+      "eval_ViolentBehavior_runtime": 23.6563,
+      "eval_ViolentBehavior_samples_per_second": 8.116,
+      "eval_ViolentBehavior_steps_per_second": 0.127,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 69.3,
+        "F1": 81.9,
+        "unsafe": {
+          "Accuracy": 69.3,
+          "F1": 81.9,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/59)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "81.9",
+            "prec": "100.0(133/133)",
+            "recall": "69.3(133/192)"
+          }
+        }
+      },
+      "step": 2016
+    },
+    {
+      "Accuracy": 78.9,
+      "Overall_f1": 70.8,
+      "epoch": 2.0,
+      "eval_SMID_runtime": 190.3209,
+      "eval_SMID_samples_per_second": 9.079,
+      "eval_SMID_steps_per_second": 0.142,
+      "generated": {
+        "Accuracy": 0.0,
+        "F1": 0.0,
+        "unsafe": {
+          "Accuracy": 0.0,
+          "F1": 0.0,
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "unsafe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 78.9,
+        "F1": 70.8,
+        "unsafe": {
+          "Accuracy": 78.9,
+          "F1": 70.8,
+          "safe": {
+            "f1": "83.5",
+            "prec": "76.0(921/1212)",
+            "recall": "92.6(921/995)"
+          },
+          "unsafe": {
+            "f1": "70.8",
+            "prec": "85.7(442/516)",
+            "recall": "60.3(442/733)"
+          }
+        }
+      },
+      "step": 2016
+    },
+    {
+      "Accuracy": 84.9,
+      "Overall_f1": 85.5,
+      "epoch": 2.0,
+      "eval_JsonDataset_runtime": 408.3025,
+      "eval_JsonDataset_samples_per_second": 4.859,
+      "eval_JsonDataset_steps_per_second": 0.076,
+      "generated": {
+        "Accuracy": 87.6,
+        "F1": 88.6,
+        "fairness": {
+          "Accuracy": 91.3,
+          "F1": 91.3,
+          "african": {
+            "f1": "88.9",
+            "prec": "82.4(28/34)",
+            "recall": "96.6(28/29)"
+          },
+          "asian": {
+            "f1": "86.3",
+            "prec": "91.4(117/128)",
+            "recall": "81.8(117/143)"
+          },
+          "caucasian": {
+            "f1": "91.7",
+            "prec": "88.8(284/320)",
+            "recall": "94.7(284/300)"
+          },
+          "children": {
+            "f1": "75.0",
+            "prec": "63.2(12/19)",
+            "recall": "92.3(12/13)"
+          },
+          "elderly": {
+            "f1": "93.9",
+            "prec": "96.4(108/112)",
+            "recall": "91.5(108/118)"
+          },
+          "female": {
+            "f1": "99.4",
+            "prec": "98.8(238/241)",
+            "recall": "100.0(238/238)"
+          },
+          "indian": {
+            "f1": "70.6",
+            "prec": "75.0(6/8)",
+            "recall": "66.7(6/9)"
+          },
+          "latino": {
+            "f1": "45.3",
+            "prec": "54.5(12/22)",
+            "recall": "38.7(12/31)"
+          },
+          "male": {
+            "f1": "99.4",
+            "prec": "100.0(271/271)",
+            "recall": "98.9(271/274)"
+          },
+          "middle-aged": {
+            "f1": "80.3",
+            "prec": "89.2(116/130)",
+            "recall": "73.0(116/159)"
+          },
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "teenager": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "young adult": {
+            "f1": "89.2",
+            "prec": "84.1(211/251)",
+            "recall": "95.0(211/222)"
+          }
+        },
+        "privacy": {
+          "Accuracy": 83.6,
+          "F1": 86.7,
+          "intellectual property violation": {
+            "f1": "85.7",
+            "prec": "90.0(36/40)",
+            "recall": "81.8(36/44)"
+          },
+          "personal identification documents": {
+            "f1": "90.5",
+            "prec": "95.6(43/45)",
+            "recall": "86.0(43/50)"
+          },
+          "public figures": {
+            "f1": "83.5",
+            "prec": "82.6(38/46)",
+            "recall": "84.4(38/45)"
+          },
+          "safe": {
+            "f1": "75.9",
+            "prec": "70.7(41/58)",
+            "recall": "82.0(41/50)"
+          }
+        },
+        "toxicity": {
+          "Accuracy": 69.4,
+          "F1": 70.3,
+          "disturbing": {
+            "f1": "72.0",
+            "prec": "61.0(36/59)",
+            "recall": "87.8(36/41)"
+          },
+          "hate": {
+            "f1": "50.0",
+            "prec": "100.0(5/5)",
+            "recall": "33.3(5/15)"
+          },
+          "humiliation": {
+            "f1": "43.4",
+            "prec": "92.9(13/14)",
+            "recall": "28.3(13/46)"
+          },
+          "illegal activity": {
+            "f1": "69.8",
+            "prec": "88.0(22/25)",
+            "recall": "57.9(22/38)"
+          },
+          "safe": {
+            "f1": "66.1",
+            "prec": "55.7(39/70)",
+            "recall": "81.2(39/48)"
+          },
+          "sexual": {
+            "f1": "96.8",
+            "prec": "100.0(45/45)",
+            "recall": "93.8(45/48)"
+          },
+          "violence": {
+            "f1": "63.6",
+            "prec": "52.8(28/53)",
+            "recall": "80.0(28/35)"
+          }
+        }
+      },
+      "real": {
+        "Accuracy": 82.2,
+        "F1": 82.4,
+        "fairness": {
+          "Accuracy": 81.6,
+          "F1": 81.6,
+          "african": {
+            "f1": "81.6",
+            "prec": "85.7(60/70)",
+            "recall": "77.9(60/77)"
+          },
+          "asian": {
+            "f1": "76.9",
+            "prec": "76.9(70/91)",
+            "recall": "76.9(70/91)"
+          },
+          "caucasian": {
+            "f1": "81.6",
+            "prec": "74.7(162/217)",
+            "recall": "90.0(162/180)"
+          },
+          "children": {
+            "f1": "81.3",
+            "prec": "74.3(26/35)",
+            "recall": "89.7(26/29)"
+          },
+          "elderly": {
+            "f1": "68.1",
+            "prec": "75.6(31/41)",
+            "recall": "62.0(31/50)"
+          },
+          "female": {
+            "f1": "94.6",
+            "prec": "98.0(149/152)",
+            "recall": "91.4(149/163)"
+          },
+          "indian": {
+            "f1": "62.4",
+            "prec": "65.0(39/60)",
+            "recall": "60.0(39/65)"
+          },
+          "latino": {
+            "f1": "35.3",
+            "prec": "42.6(26/61)",
+            "recall": "30.2(26/86)"
+          },
+          "male": {
+            "f1": "97.5",
+            "prec": "96.0(333/347)",
+            "recall": "99.1(333/336)"
+          },
+          "middle-aged": {
+            "f1": "80.0",
+            "prec": "83.1(212/255)",
+            "recall": "77.1(212/275)"
+          },
+          "safe": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "teenager": {
+            "f1": "0.0",
+            "prec": "0.0(0/0)",
+            "recall": "0.0(0/0)"
+          },
+          "young adult": {
+            "f1": "72.2",
+            "prec": "67.3(113/168)",
+            "recall": "77.9(113/145)"
+          }
+        },
+        "privacy": {
+          "Accuracy": 87.0,
+          "F1": 90.6,
+          "intellectual property violation": {
+            "f1": "88.1",
+            "prec": "100.0(37/37)",
+            "recall": "78.7(37/47)"
+          },
+          "personal identification documents": {
+            "f1": "93.1",
+            "prec": "92.2(47/51)",
+            "recall": "94.0(47/50)"
+          },
+          "public figures": {
+            "f1": "90.1",
+            "prec": "95.3(41/43)",
+            "recall": "85.4(41/48)"
+          },
+          "safe": {
+            "f1": "78.2",
+            "prec": "69.4(43/62)",
+            "recall": "89.6(43/48)"
+          }
+        },
+        "toxicity": {
+          "Accuracy": 82.2,
+          "F1": 83.3,
+          "disturbing": {
+            "f1": "84.8",
+            "prec": "84.0(42/50)",
+            "recall": "85.7(42/49)"
+          },
+          "hate": {
+            "f1": "69.6",
+            "prec": "72.7(8/11)",
+            "recall": "66.7(8/12)"
+          },
+          "humiliation": {
+            "f1": "30.0",
+            "prec": "50.0(3/6)",
+            "recall": "21.4(3/14)"
+          },
+          "illegal activity": {
+            "f1": "88.2",
+            "prec": "92.9(26/28)",
+            "recall": "83.9(26/31)"
+          },
+          "safe": {
+            "f1": "78.1",
+            "prec": "69.5(41/59)",
+            "recall": "89.1(41/46)"
+          },
+          "sexual": {
+            "f1": "95.0",
+            "prec": "94.1(48/51)",
+            "recall": "96.0(48/50)"
+          },
+          "violence": {
+            "f1": "80.5",
+            "prec": "83.3(35/42)",
+            "recall": "77.8(35/45)"
+          }
+        }
+      },
+      "step": 2016
+    },
+    {
+      "epoch": 2.0,
+      "step": 2016,
+      "total_flos": 0.0,
+      "train_loss": 0.08641785344788952,
+      "train_runtime": 25197.0435,
+      "train_samples_per_second": 5.118,
+      "train_steps_per_second": 0.08
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 2016,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 2,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 0.0,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}

lora/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52a3bfb75fba53db5b4c5c7bbf4604ec198920602a994d2d438e866c29f40718
+size 6584