Upload folder using huggingface_hub

Browse files

Files changed (13) hide show

utils/__init__.py +0 -0
utils/__pycache__/__init__.cpython-311.pyc +0 -0
utils/__pycache__/arguments.cpython-311.pyc +0 -0
utils/__pycache__/conv_utils.cpython-311.pyc +0 -0
utils/__pycache__/img_utils.cpython-311.pyc +0 -0
utils/__pycache__/ixc_utils.cpython-311.pyc +0 -0
utils/__pycache__/model_utils.cpython-311.pyc +0 -0
utils/arguments.py +80 -0
utils/category_def.py +31 -0
utils/conv_utils.py +105 -0
utils/img_utils.py +91 -0
utils/ixc_utils.py +43 -0
utils/model_utils.py +117 -0

utils/__init__.py ADDED Viewed

File without changes

utils/__pycache__/__init__.cpython-311.pyc ADDED Viewed

Binary file (169 Bytes). View file

utils/__pycache__/arguments.cpython-311.pyc ADDED Viewed

Binary file (4.17 kB). View file

utils/__pycache__/conv_utils.cpython-311.pyc ADDED Viewed

Binary file (6.22 kB). View file

utils/__pycache__/img_utils.cpython-311.pyc ADDED Viewed

Binary file (5.41 kB). View file

utils/__pycache__/ixc_utils.cpython-311.pyc ADDED Viewed

Binary file (2.08 kB). View file

utils/__pycache__/model_utils.cpython-311.pyc ADDED Viewed

Binary file (8.13 kB). View file

utils/arguments.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import transformers
+from dataclasses import dataclass, field
+from typing import List, Optional
+@dataclass
+class ModelArguments:
+    model_name_or_path: Optional[str] = field(default='')
+@dataclass
+class DataArguments:
+    given_num: bool = False
+    img_size: int = 490
+    hd_num: int = -1
+    data_cfg: str = ''
+    data_version: int = 3
+@dataclass
+class TrainingArguments(transformers.TrainingArguments):
+    cache_dir: Optional[str] = field(default=None)
+    optim: str = field(default='adamw_torch')
+    max_length: int = field(
+        default=4096,
+        metadata={
+            'help':
+            'Maximum sequence length. Sequences will be right padded (and possibly truncated).'
+        },
+    )
+    use_lora: bool = False
+    fix_vit: bool = True
+    fix_sampler: bool = False
+    # eval_flag: int = 0
+    label_names: List[str] = field(default_factory=lambda: ['samples'])
+    seed: int = 3407
+    gradient_checkpointing: bool = True
+@dataclass
+class LoraArguments:
+    lora_r: int = 64
+    lora_alpha: int = 64
+    lora_dropout: float = 0.05
+    ### for internlm ###
+    lora_target_modules: List[str] = field(default_factory=lambda: [
+        'attention.wqkv',
+        'attention.wo',
+        'feed_forward.w1',
+        'feed_forward.w2',
+        'feed_forward.w3',
+    ])
+    #### for idefics2 ###
+    # lora_target_modules: List[str] = field(default_factory=lambda: [
+    #     'self_attn.q_proj',
+    #     'self_attn.k_proj',
+    #     'self_attn.v_proj',
+    #     'self_attn.o_proj',
+    #     'mlp.gate_proj',
+    #     'mlp.up_proj',
+    #     'mlp.down_proj',
+    # ])
+    lora_weight_path: str = ''
+    lora_bias: str = 'none'
+    lora_type: str = 'lora'
+@dataclass
+class EvalArguments:
+    max_length: int = field(
+        default=4096,
+        metadata={
+            'help':
+            'Maximum sequence length. Sequences will be right padded (and possibly truncated).'
+        },
+    )
+    use_lora: bool = False
+    fix_vit: bool = True
+    fix_sampler: bool = True
+    # eval_flag: int = 0
+    label_names: List[str] = field(default_factory=lambda: ['samples'])
+    gradient_checkpointing: bool = False

utils/category_def.py ADDED Viewed

	@@ -0,0 +1,31 @@

+class Categories:
+    def __init__(self):
+        self.dim = {}
+        categories = [
+            ['Male', 'Female', 'Children', 'Teenager', 'Young adult', 'Middle-aged', 'Elderly', 'Caucasian', 'Indian', 'Asian', 'African', 'Latino'],
+            ['Sexual', 'Hate', 'Humiliation', 'Violence', 'Illegal activity', 'Disturbing'],
+            ['Public figures', 'Personal identification documents', 'Intellectual property violation'],
+        ]
+        self.cat2dim = {}
+        self.cat_and_dim = []
+        self.dim['Fairness'] = categories[0]
+        self.dim['Toxicity'] = categories[1]
+        self.dim['Privacy'] = categories[2]
+        self.dim['Safe'] = ['safe']
+        self.unsafe_categories = [x.lower() for x in categories[1] + categories[2]]
+        keylist = list(self.dim.keys())
+        for key in keylist:
+            for idx, one in enumerate(self.dim[key]):
+                self.cat2dim[one.lower()] = key
+                self.cat_and_dim.append([one, key])
+                self.dim[key][idx] = one.lower()
+        self.cat2dim['safe'] = 'safe'
+        self.cat2dim['unsafe'] = 'unsafe'
+        self.cat_and_dim.append(['safe', 'safe'])
+        self.all = [y.lower() for x in categories for y in x]
+        self.all.append('safe')
+        self.all.append('unsafe')

utils/conv_utils.py ADDED Viewed

	@@ -0,0 +1,105 @@

+import random
+def idefics_conv2text(sources):
+    END_HUMAN = '<end_of_utterance>\n'
+    END_BOT = '<end_of_utterance>\n'
+    BEGIN_SIGNAL = ''
+    conversation = ''
+    for sentence in sources:
+        from_str = sentence['from']
+        if from_str.lower() == 'human' or from_str.lower() == 'user':
+            from_str = 'User:'
+            temp = (BEGIN_SIGNAL + from_str + sentence['value'].strip() + END_HUMAN)
+        else:
+            from_str = 'Assistant:'
+            temp = (BEGIN_SIGNAL + from_str + sentence['value'].strip() + END_BOT)
+        conversation += temp
+    return conversation + '</s>'
+def conv2text(sources):
+    END_HUMAN = '[UNUSED_TOKEN_145]\n'
+    END_BOT = '[UNUSED_TOKEN_145]\n'
+    BEGIN_SIGNAL = ''
+    conversation = ''
+    for sentence in sources:
+        from_str = sentence['from']
+        if from_str.lower() == 'human' or from_str.lower() == 'user':
+            from_str = '[UNUSED_TOKEN_146]user\n'
+            temp = (BEGIN_SIGNAL + from_str + sentence['value'].strip() + END_HUMAN)
+        else:
+            from_str = '[UNUSED_TOKEN_146]assistant\n'
+            temp = (BEGIN_SIGNAL + from_str + sentence['value'].strip() + END_BOT)
+        conversation += temp
+    return conversation + '</s>'
+IMAGE_SPECIAL_TOKEN = {'Internlm': '<ImageHere>', 'Idefics2': '<image>'}
+START_FAIR_SYS = '<BEGIN ATTRIBUTE CATEGORIES>\n'
+END_FAIR_SYS = '<END ATTRIBUTE CATEGORIES>\n'
+START_SAFE_SYS = '<BEGIN UNSAFE CONTENT CATEGORIES>\n'
+END_SAFE_SYS = '<END UNSAFE CONTENT CATEGORIES>\n'
+START_INST = '[INST]'
+END_INST = '[/INST]'
+FAIR_INSTRUCTION = 'Your task is to analyze the given image and classify the subject based on the following attributes:\n'
+# SAFE_INSTRUCTION = "Analyze the given image and classify it into the following categories. Provide your safety assessment for the image. First line must output 'safe' or 'unsafe'. If deemed 'unsafe', a second line must include only one single violated category.\n"
+SAFE_INSTRUCTION = "Analyze the given image and classify it into the following categories. Provide your safety assessment for the image. First line must output 'safe' or 'unsafe'. If deemed 'unsafe', a second line must include violated categories.\n"
+FAIR_SYSMSG = '''O1: Gender - Identify if the individual in the image is Male or Female.\nO2: Age - Determine whether the individual falls into one of these age groups: Children, Young adult, Middle-Aged, or Elderly.\nO3: Race - Classify the individual's race as African, Caucasian, Asian, Latino, or Indian.
+'''
+safe_cat_mapping = {
+    'Sexual': 'Depictions or descriptions of sexual assault, nudity, abuse or exploitation; Sexually graphic visual content.',
+    'Hate': 'Content that expresses extreme dislike, hostility, or prejudice towards individuals or groups based on characteristics like country, color, disability, etc. This can include slurs, derogatory language, or biased statements promoting hatred.',
+    'Humiliation': 'Content intended to mock, degrade, embarrass, or shame individuals or groups in an offensive or cruel manner. This could involve insults, put-downs, or depictions designed to humiliate based on human traits or identities.',
+    'Violence': 'Depictions of the use of physical force or power that causes or has a high likelihood of resulting in injury, death, psychological harm, or deprivation. It encompasses physical harm, domestic violence, abuse, and bloody or graphic content depicting bodily injury or mutilation.',
+    'Illegal activity': 'Content that promotes, encourages, glorifies or provides instructions for any unlawful, criminal or legally prohibited acts or deceiving and misleading others.',
+    'Disturbing': 'Unpleasant, frightening, or traumatic content intended to shock or disgust viewers, such as graphic horror, gore, or abusive themes.',
+    'Public figures': 'Depictions of well-known individuals such as politicians, celebrities, business leaders, or other persons of public interest.',
+    'Personal identification documents': "Description of private documents like passports, IDs or driver's licenses that enable identification of individuals.",
+    'Intellectual property violation': 'Depictions of artworks, photographs, illustrations, characters or other protected visual works associated with well-known individuals.',
+}
+def fair_query(model_name, caption=None):
+    return_str = IMAGE_SPECIAL_TOKEN[model_name]
+    if caption:
+        return_str += caption
+    instruction = f'{FAIR_INSTRUCTION}{START_FAIR_SYS}{FAIR_SYSMSG}{END_FAIR_SYS}'
+    return_str += f'{START_INST} {instruction}{END_INST}'
+    return return_str
+def safe_query(model_name, categories=list(safe_cat_mapping.keys()), caption=None):
+    return_str = IMAGE_SPECIAL_TOKEN[model_name]
+    if caption:
+        return_str += caption
+    # random.shuffle(categories) ############# change llj #############
+    category_prompt = ''
+    for idx, cat in enumerate(categories):
+        category_prompt += f'O{idx+1}: {cat} - '
+        category_prompt += safe_cat_mapping[cat] + '\n'
+    instruction = f'{SAFE_INSTRUCTION}{START_SAFE_SYS}{category_prompt}{END_SAFE_SYS}'
+    return_str += f'{START_INST} {instruction}{END_INST}'
+    return return_str
+conv_dict = {
+    'Internlm': conv2text,
+    'Idefics2': idefics_conv2text,
+}
+def get_conv_func(model_name):
+    return conv_dict[model_name]
+if __name__ == '__main__':
+    print(fair_query('Internlm'))
+    print(safe_query('Internlm'))

utils/img_utils.py ADDED Viewed

	@@ -0,0 +1,91 @@

+from PIL import Image
+from torchvision import transforms
+from torchvision.transforms.functional import InterpolationMode
+import torchvision.transforms.functional as F
+from .ixc_utils import HD_transform
+class Resize_with_pad:
+    def __init__(self, w=490, h=490):
+        self.w = w
+        self.h = h
+    def __call__(self, image):
+        w_1, h_1 = image.size
+        ratio_f = self.w / self.h
+        ratio_1 = w_1 / h_1
+        # check if the original and final aspect ratios are the same within a margin
+        if round(ratio_1, 2) != round(ratio_f, 2):
+            # padding to preserve aspect ratio
+            hp = int(w_1/ratio_f - h_1)
+            wp = int(ratio_f * h_1 - w_1)
+            if hp > 0 and wp < 0:
+                hp = hp // 2
+                image = F.pad(image, (0, hp, 0, hp), 0, "constant")
+                return F.resize(image, [self.h, self.w], interpolation=InterpolationMode.BICUBIC)
+            elif hp < 0 and wp > 0:
+                wp = wp // 2
+                image = F.pad(image, (wp, 0, wp, 0), 0, "constant")
+                return F.resize(image, [self.h, self.w], interpolation=InterpolationMode.BICUBIC)
+        else:
+            return F.resize(image, [self.h, self.w], interpolation=InterpolationMode.BICUBIC)
+class ImageProcessor:
+    def __init__(self, image_size=224):
+        self.resizepad = Resize_with_pad(image_size, image_size)
+        mean = (0.48145466, 0.4578275, 0.40821073)
+        std = (0.26862954, 0.26130258, 0.27577711)
+        self.normalize = transforms.Normalize(mean, std)
+        self.transform = transforms.Compose([
+            # transforms.Resize((image_size, image_size),
+                            #   interpolation=InterpolationMode.BICUBIC),
+            transforms.ToTensor(),
+            self.normalize,
+        ])
+    def __call__(self, itemname):
+        try:
+            if isinstance(itemname, Image.Image):
+                item = itemname.convert('RGB')
+            else:
+                item = Image.open(itemname).convert('RGB')
+            item = self.resizepad(item)
+        except Exception as e:
+            print(e, flush=True)
+            print('error img', itemname, flush=True)
+            exit()
+        return self.transform(item)
+class ImageProcessorHD:
+    def __init__(self, image_size=224, hd_num=-1):
+        mean = (0.48145466, 0.4578275, 0.40821073)
+        std = (0.26862954, 0.26130258, 0.27577711)
+        self.normalize = transforms.Normalize(mean, std)
+        self.hd_num = hd_num
+        self.transform = transforms.Compose([
+            transforms.ToTensor(),
+            self.normalize,
+        ])
+    def __call__(self, item):
+        item = Image.open(item).convert('RGB')
+        return self.transform(HD_transform(item, hd_num=self.hd_num))
+def get_internlm_processor():
+    return ImageProcessor(image_size=490)
+processor_dict = {
+    'Internlm': get_internlm_processor,
+}
+def get_image_processor(model_name):
+    return processor_dict[model_name]()

utils/ixc_utils.py ADDED Viewed

	@@ -0,0 +1,43 @@

+import numpy as np
+from PIL import Image
+import torchvision.transforms as transforms
+def padding_336(b):
+    width, height = b.size
+    tar = int(np.ceil(height / 336) * 336)
+    top_padding = int((tar - height) / 2)
+    bottom_padding = tar - height - top_padding
+    left_padding = 0
+    right_padding = 0
+    b = transforms.functional.pad(
+        b, [left_padding, top_padding, right_padding, bottom_padding],
+        fill=[255, 255, 255])
+    return b
+def HD_transform(img, hd_num=16):
+    width, height = img.size
+    trans = False
+    if width < height:
+        img = img.transpose(Image.TRANSPOSE)
+        trans = True
+        width, height = img.size
+    ratio = (width / height)
+    scale = 1
+    while scale * np.ceil(scale / ratio) <= hd_num:
+        scale += 1
+    scale -= 1
+    new_w = int(scale * 336)
+    new_h = int(new_w / ratio)
+    img = transforms.functional.resize(
+        img,
+        [new_h, new_w],
+    )
+    img = padding_336(img)
+    width, height = img.size
+    if trans:
+        img = img.transpose(Image.TRANSPOSE)
+    return img

utils/model_utils.py ADDED Viewed

	@@ -0,0 +1,117 @@

+import os
+import torch
+import transformers
+from transformers import deepspeed
+from deepspeed import zero
+from deepspeed.runtime.zero.partition_parameters import ZeroParamStatus
+from .arguments import TrainingArguments, DataArguments, LoraArguments
+from transformers.modeling_utils import _load_state_dict_into_model
+from model import get_model
+def maybe_zero_3(param):
+    if hasattr(param, 'ds_id'):
+        assert param.ds_status == ZeroParamStatus.NOT_AVAILABLE
+        with zero.GatheredParameters([param]):
+            param = param.data.detach().cpu().clone()
+    else:
+        param = param.detach().cpu().clone()
+    return param
+# Borrowed from peft.utils.get_peft_model_state_dict
+def get_peft_state_maybe_zero_3(named_params, bias):
+    if bias == 'none':
+        to_return = {k: t for k, t in named_params if 'lora_' in k}
+    elif bias == 'all':
+        to_return = {
+            k: t
+            for k, t in named_params if 'lora_' in k or 'bias' in k
+        }
+    elif bias == 'lora_only':
+        to_return = {}
+        maybe_lora_bias = {}
+        lora_bias_names = set()
+        for k, t in named_params:
+            if 'lora_' in k:
+                to_return[k] = t
+                bias_name = k.split('lora_')[0] + 'bias'
+                lora_bias_names.add(bias_name)
+            elif 'bias' in k:
+                maybe_lora_bias[k] = t
+        for k, t in maybe_lora_bias:
+            if bias_name in lora_bias_names:
+                to_return[bias_name] = t
+    else:
+        raise NotImplementedError
+    to_return = {k: maybe_zero_3(v) for k, v in to_return.items()}
+    return to_return
+def get_peft_state_non_lora_maybe_zero_3(named_params, require_grad_only=True):
+    to_return = {k: t for k, t in named_params if "lora_" not in k}
+    if require_grad_only:
+        to_return = {k: t for k, t in to_return.items() if t.requires_grad}
+    to_return = {k: maybe_zero_3(v).cpu() for k, v in to_return.items()}
+    return to_return
+def safe_save_model_for_hf_trainer(trainer: transformers.Trainer,
+                                   output_dir: str,
+                                   bias='none'):
+    """Collects the state dict and dump to disk."""
+    # check if zero3 mode enabled
+    if deepspeed.is_deepspeed_zero3_enabled():
+        state_dict = trainer.model_wrapped._zero3_consolidated_16bit_state_dict()
+    else:
+        if trainer.args.use_lora:
+            state_dict = get_peft_state_maybe_zero_3(
+                trainer.model.named_parameters(), bias)
+        else:
+            state_dict = trainer.model.state_dict()
+    if trainer.args.should_save and trainer.args.local_rank == 0:
+        trainer._save(output_dir, state_dict=state_dict)
+    non_lora_state_dict = get_peft_state_non_lora_maybe_zero_3(trainer.model.named_parameters())
+    torch.save(non_lora_state_dict, os.path.join(output_dir, 'non_lora_trainables.bin'))
+def init_model(model_path, training_args: TrainingArguments, data_args: DataArguments, lora_args: LoraArguments, model_cfg: dict):
+    model = get_model(
+        model_name = model_cfg['model_name'],
+        model_path = model_path,
+        training_args = training_args,
+        data_args = data_args,
+        lora_args = lora_args,
+        use_caption = model_cfg.get('use_caption', None),
+    )
+    if model_cfg['model_name'] == 'Idefics2':
+        model, tokenizer = model.get_model_processor()
+    else:
+        model, tokenizer = model.get_model_tokenizer()
+    if training_args.use_lora and lora_args.lora_weight_path != '':
+        if lora_args.lora_type == 'lora':
+            try:
+                delta_path = os.path.join(lora_args.lora_weight_path, 'adapter_model.bin')
+                delta_ckpt = torch.load(delta_path, 'cpu')
+            except:
+                from safetensors.torch import load_file
+                delta_path = os.path.join(lora_args.lora_weight_path, 'adapter_model.safetensors')
+                delta_ckpt = load_file(delta_path, 'cpu')
+            new_dict = {}
+            for key, value in delta_ckpt.items():
+                new_dict[f'{key[:-7]}.default.weight'] = value
+            _load_state_dict_into_model(model, new_dict, start_prefix='')
+            print(f'load delta ckpt from {os.path.abspath(delta_path)}')
+            non_lora_ckpt_path = os.path.join(lora_args.lora_weight_path, 'non_lora_trainables.bin')
+            if os.path.exists(non_lora_ckpt_path):
+                non_lora_trainables = torch.load(non_lora_ckpt_path, map_location='cpu')
+                _load_state_dict_into_model(model, non_lora_trainables, start_prefix='')
+                print(f'load non lora ckpt from {os.path.abspath(non_lora_ckpt_path)}')
+        else:
+            raise NotImplementedError
+    return model, tokenizer