adwardlee commited on
Commit
e40dad3
·
verified ·
1 Parent(s): 626c8bf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +124 -121
README.md CHANGED
@@ -1,121 +1,124 @@
1
- ---
2
- license: apache-2.0
3
- tag:
4
- - vision
5
- - image-classification
6
- - image-to-text
7
- - image-captioning
8
- ---
9
-
10
-
11
- <p align="center">
12
- <img src="logo_en.png" width="400"/>
13
- <p>
14
-
15
- <p align="center">
16
- <b><font size="6">ImageGuard</font></b>
17
- <p>
18
-
19
- <div align="center">
20
-
21
- [💻Github Repo](https://github.com/adwardlee/t2i_safety)
22
-
23
- [Paper](https://arxiv.org/abs/)
24
-
25
- </div>
26
-
27
- **ImageGuard** is a vision-language model (VLM) based on [InternLM-XComposer2](https://github.com/InternLM/InternLM-XComposer) for advanced image safety evaluation.
28
-
29
- ### Import from Transformers
30
- ImageGuard works with transformers>=4.42.
31
-
32
- ## Quickstart
33
- We provide a simple example to show how to use InternLM-XComposer with 🤗 Transformers.
34
- ```python
35
- import os
36
- import json
37
- import torch
38
- import time
39
- import numpy as np
40
- import argparse
41
- import yaml
42
-
43
- from PIL import Image
44
- from utils.img_utils import ImageProcessor
45
- from utils.arguments import ModelArguments, DataArguments, EvalArguments, LoraArguments
46
- from utils.model_utils import init_model
47
- from utils.conv_utils import fair_query, safe_query
48
-
49
- def load_yaml(cfg_path):
50
- with open(cfg_path, 'r', encoding='utf-8') as f:
51
- result = yaml.load(f.read(), Loader=yaml.FullLoader)
52
- return result
53
-
54
- def textprocess(safe=True):
55
- if safe:
56
- conversation = safe_query('Internlm')
57
- else:
58
- conversation = fair_query('Internlm')
59
- return conversation
60
-
61
- def model_init(
62
- model_args: ModelArguments,
63
- data_args: DataArguments,
64
- training_args: EvalArguments,
65
- lora_args: LoraArguments,
66
- model_cfg):
67
- model, tokenizer = init_model(model_args.model_name_or_path, training_args, data_args, lora_args, model_cfg)
68
- model.eval()
69
- model.cuda().eval().half()
70
- model.tokenizer = tokenizer
71
- return model
72
-
73
-
74
-
75
- if __name__ == '__main__':
76
- parser = argparse.ArgumentParser()
77
- parser.add_argument('--load_dir', required=False, type=str, default='lora/')
78
- parser.add_argument('--base_model', type=str, default='internlm/internlm-xcomposer2-vl-7b')
79
- args = parser.parse_args()
80
- load_dir = args.load_dir
81
- config = load_yaml(os.path.join(load_dir, 'config.yaml'))
82
- model_cfg = config['model_cfg']
83
- data_cfg = config['data_cfg']['data_cfg']
84
- model_cfg['model_name'] = 'Internlm'
85
- data_cfg['train']['model_name'] = 'Internlm'
86
- lora_cfg = config['lora_cfg']
87
- training_cfg = config['training_cfg']
88
-
89
- model_args = ModelArguments()
90
- model_args.model_name_or_path = args.base_model
91
- Lora_args = LoraArguments()
92
- Lora_args.lora_alpha = lora_cfg['lora_alpha']
93
- Lora_args.lora_bias = lora_cfg['lora_bias']
94
- Lora_args.lora_dropout = lora_cfg['lora_dropout']
95
- Lora_args.lora_r = lora_cfg['lora_r']
96
- Lora_args.lora_target_modules = lora_cfg['lora_target_modules']
97
- Lora_args.lora_weight_path = load_dir ### comment for base model testing ### llj ## change ##
98
- train_args = EvalArguments()
99
- train_args.max_length = training_cfg['max_length']
100
- train_args.fix_vit = training_cfg['fix_vit']
101
- train_args.fix_sampler = training_cfg['fix_sampler']
102
- train_args.use_lora = training_cfg['use_lora']
103
- train_args.gradient_checkpointing = training_cfg['gradient_checkpointing']
104
- data_args = DataArguments()
105
-
106
- model = model_init(model_args, data_args, train_args, Lora_args, model_cfg)
107
- print(' model device: ', model.device, flush=True)
108
-
109
- img = Image.open('punch.png')
110
- safe = True ## True for toxicity and privacy, False for fairness
111
- prompt = textprocess(safe=safe)
112
- vis_processor = ImageProcessor(image_size=490)
113
- image = vis_processor(img)[None, :, :, :]
114
- with torch.cuda.amp.autocast():
115
- response, _ = model.chat(model.tokenizer, prompt, image, history=[], do_sample=False, meta_instruction=None)
116
- print(response)
117
- # unsafe\n violence
118
- ```
119
-
120
- ### Open Source License
121
- The code is licensed under Apache-2.0, while model weights are fully open for academic research and also allow free commercial usage.
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ tag:
4
+ - vision
5
+ - image-classification
6
+ - image-to-text
7
+ - image-captioning
8
+ base_model:
9
+ - internlm/internlm-xcomposer2-vl-7b
10
+ pipeline_tag: image-to-text
11
+ ---
12
+
13
+
14
+ <p align="center">
15
+ <img src="logo_en.png" width="400"/>
16
+ <p>
17
+
18
+ <p align="center">
19
+ <b><font size="6">ImageGuard</font></b>
20
+ <p>
21
+
22
+ <div align="center">
23
+
24
+ [💻Github Repo](https://github.com/adwardlee/t2i_safety)
25
+
26
+ [Paper](https://arxiv.org/abs/)
27
+
28
+ </div>
29
+
30
+ **ImageGuard** is a vision-language model (VLM) based on [InternLM-XComposer2](https://github.com/InternLM/InternLM-XComposer) for advanced image safety evaluation.
31
+
32
+ ### Import from Transformers
33
+ ImageGuard works with transformers>=4.42.
34
+
35
+ ## Quickstart
36
+ We provide a simple example to show how to use InternLM-XComposer with 🤗 Transformers.
37
+ ```python
38
+ import os
39
+ import json
40
+ import torch
41
+ import time
42
+ import numpy as np
43
+ import argparse
44
+ import yaml
45
+
46
+ from PIL import Image
47
+ from utils.img_utils import ImageProcessor
48
+ from utils.arguments import ModelArguments, DataArguments, EvalArguments, LoraArguments
49
+ from utils.model_utils import init_model
50
+ from utils.conv_utils import fair_query, safe_query
51
+
52
+ def load_yaml(cfg_path):
53
+ with open(cfg_path, 'r', encoding='utf-8') as f:
54
+ result = yaml.load(f.read(), Loader=yaml.FullLoader)
55
+ return result
56
+
57
+ def textprocess(safe=True):
58
+ if safe:
59
+ conversation = safe_query('Internlm')
60
+ else:
61
+ conversation = fair_query('Internlm')
62
+ return conversation
63
+
64
+ def model_init(
65
+ model_args: ModelArguments,
66
+ data_args: DataArguments,
67
+ training_args: EvalArguments,
68
+ lora_args: LoraArguments,
69
+ model_cfg):
70
+ model, tokenizer = init_model(model_args.model_name_or_path, training_args, data_args, lora_args, model_cfg)
71
+ model.eval()
72
+ model.cuda().eval().half()
73
+ model.tokenizer = tokenizer
74
+ return model
75
+
76
+
77
+
78
+ if __name__ == '__main__':
79
+ parser = argparse.ArgumentParser()
80
+ parser.add_argument('--load_dir', required=False, type=str, default='lora/')
81
+ parser.add_argument('--base_model', type=str, default='internlm/internlm-xcomposer2-vl-7b')
82
+ args = parser.parse_args()
83
+ load_dir = args.load_dir
84
+ config = load_yaml(os.path.join(load_dir, 'config.yaml'))
85
+ model_cfg = config['model_cfg']
86
+ data_cfg = config['data_cfg']['data_cfg']
87
+ model_cfg['model_name'] = 'Internlm'
88
+ data_cfg['train']['model_name'] = 'Internlm'
89
+ lora_cfg = config['lora_cfg']
90
+ training_cfg = config['training_cfg']
91
+
92
+ model_args = ModelArguments()
93
+ model_args.model_name_or_path = args.base_model
94
+ Lora_args = LoraArguments()
95
+ Lora_args.lora_alpha = lora_cfg['lora_alpha']
96
+ Lora_args.lora_bias = lora_cfg['lora_bias']
97
+ Lora_args.lora_dropout = lora_cfg['lora_dropout']
98
+ Lora_args.lora_r = lora_cfg['lora_r']
99
+ Lora_args.lora_target_modules = lora_cfg['lora_target_modules']
100
+ Lora_args.lora_weight_path = load_dir ### comment for base model testing ### llj ## change ##
101
+ train_args = EvalArguments()
102
+ train_args.max_length = training_cfg['max_length']
103
+ train_args.fix_vit = training_cfg['fix_vit']
104
+ train_args.fix_sampler = training_cfg['fix_sampler']
105
+ train_args.use_lora = training_cfg['use_lora']
106
+ train_args.gradient_checkpointing = training_cfg['gradient_checkpointing']
107
+ data_args = DataArguments()
108
+
109
+ model = model_init(model_args, data_args, train_args, Lora_args, model_cfg)
110
+ print(' model device: ', model.device, flush=True)
111
+
112
+ img = Image.open('punch.png')
113
+ safe = True ## True for toxicity and privacy, False for fairness
114
+ prompt = textprocess(safe=safe)
115
+ vis_processor = ImageProcessor(image_size=490)
116
+ image = vis_processor(img)[None, :, :, :]
117
+ with torch.cuda.amp.autocast():
118
+ response, _ = model.chat(model.tokenizer, prompt, image, history=[], do_sample=False, meta_instruction=None)
119
+ print(response)
120
+ # unsafe\n violence
121
+ ```
122
+
123
+ ### Open Source License
124
+ The code is licensed under Apache-2.0, while model weights are fully open for academic research and also allow free commercial usage.