ImageGuard

ImageGuard is a vision-language model (VLM) based on InternLM-XComposer2 for advanced image safety evaluation.

Import from Transformers

ImageGuard works with transformers>=4.42.

Quickstart

We provide a simple example to show how to use InternLM-XComposer with 🤗 Transformers.

import os
import json
import torch
import time
import numpy as np
import argparse
import yaml

from PIL import Image
from utils.img_utils import ImageProcessor
from utils.arguments import ModelArguments, DataArguments, EvalArguments, LoraArguments
from utils.model_utils import init_model
from utils.conv_utils import fair_query, safe_query

def load_yaml(cfg_path):
    with open(cfg_path, 'r', encoding='utf-8') as f:
        result = yaml.load(f.read(), Loader=yaml.FullLoader)
    return result

def textprocess(safe=True):
    if safe:
        conversation = safe_query('Internlm')
    else:
        conversation = fair_query('Internlm')
    return conversation

def model_init(
    model_args: ModelArguments, 
    data_args: DataArguments, 
    training_args: EvalArguments,
    lora_args: LoraArguments,
    model_cfg):
    model, tokenizer = init_model(model_args.model_name_or_path, training_args, data_args, lora_args, model_cfg)
    model.eval()
    model.cuda().eval().half()
    model.tokenizer = tokenizer
    return model
    
 

if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('--load_dir', required=False, type=str, default='lora/')
    parser.add_argument('--base_model', type=str, default='internlm/internlm-xcomposer2-vl-7b') 
    args = parser.parse_args()
    load_dir = args.load_dir
    config = load_yaml(os.path.join(load_dir, 'config.yaml'))
    model_cfg = config['model_cfg']
    data_cfg = config['data_cfg']['data_cfg']
    model_cfg['model_name'] = 'Internlm'
    data_cfg['train']['model_name'] = 'Internlm'
    lora_cfg = config['lora_cfg']
    training_cfg = config['training_cfg']
    
    model_args = ModelArguments()
    model_args.model_name_or_path = args.base_model
    Lora_args = LoraArguments()
    Lora_args.lora_alpha = lora_cfg['lora_alpha']
    Lora_args.lora_bias = lora_cfg['lora_bias']
    Lora_args.lora_dropout = lora_cfg['lora_dropout']
    Lora_args.lora_r = lora_cfg['lora_r']
    Lora_args.lora_target_modules = lora_cfg['lora_target_modules']
    Lora_args.lora_weight_path = load_dir  ### comment for base model testing ### llj ## change ##
    train_args = EvalArguments()
    train_args.max_length = training_cfg['max_length']
    train_args.fix_vit = training_cfg['fix_vit']
    train_args.fix_sampler = training_cfg['fix_sampler']
    train_args.use_lora = training_cfg['use_lora']
    train_args.gradient_checkpointing = training_cfg['gradient_checkpointing']
    data_args = DataArguments()

    model = model_init(model_args, data_args, train_args, Lora_args, model_cfg)
    print(' model device: ', model.device, flush=True)

    img = Image.open('punch.png')
    safe = True ## True for toxicity and privacy, False for fairness
    prompt = textprocess(safe=safe)
    vis_processor = ImageProcessor(image_size=490)
    image = vis_processor(img)[None, :, :, :]
    with torch.cuda.amp.autocast():
        response, _ = model.chat(model.tokenizer, prompt, image, history=[], do_sample=False, meta_instruction=None)
    print(response)
    # unsafe\n violence

Open Source License

The code is licensed under Apache-2.0, while model weights are fully open for academic research and also allow free commercial usage.

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference Providers NEW
This model is not currently available via any of the supported Inference Providers.
The model cannot be deployed to the HF Inference API: The model has no library tag.

Model tree for OpenSafetyLab/ImageGuard

Finetuned
(1)
this model