calico-1226 commited on
Commit
839e100
·
0 Parent(s):
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *
config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/juntao/Models/LanguageBind/Video-LLaVA-7B",
3
+ "architectures": [
4
+ "LlavaLlamaForScore"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "freeze_mm_mlp_adapter": false,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 4096,
13
+ "image_aspect_ratio": "pad",
14
+ "image_grid_pinpoints": null,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 11008,
17
+ "max_position_embeddings": 4096,
18
+ "mlp_bias": false,
19
+ "mm_hidden_size": 1024,
20
+ "mm_image_tower": "LanguageBind/LanguageBind_Image",
21
+ "mm_projector_lr": null,
22
+ "mm_projector_type": "mlp2x_gelu",
23
+ "mm_use_im_patch_token": false,
24
+ "mm_use_im_start_end": false,
25
+ "mm_video_tower": "LanguageBind/LanguageBind_Video_merge",
26
+ "mm_vision_select_feature": "patch",
27
+ "mm_vision_select_layer": -2,
28
+ "model_type": "llava_score",
29
+ "num_attention_heads": 32,
30
+ "num_hidden_layers": 32,
31
+ "num_key_value_heads": 32,
32
+ "pad_token_id": 0,
33
+ "pretraining_tp": 1,
34
+ "rms_norm_eps": 1e-05,
35
+ "rope_scaling": null,
36
+ "rope_theta": 10000.0,
37
+ "tie_word_embeddings": false,
38
+ "tokenizer_model_max_length": 3072,
39
+ "tokenizer_padding_side": "right",
40
+ "torch_dtype": "bfloat16",
41
+ "transformers_version": "4.44.1",
42
+ "tune_mm_mlp_adapter": false,
43
+ "use_cache": true,
44
+ "use_mm_proj": true,
45
+ "vocab_size": 32000
46
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 1,
3
+ "eos_token_id": 2,
4
+ "max_length": 4096,
5
+ "pad_token_id": 0,
6
+ "temperature": null,
7
+ "top_p": null,
8
+ "transformers_version": "4.44.1"
9
+ }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:365c45380f2477dcfce97b585c346e11d252a6758a6200c08cdb23d936ff3632
3
+ size 4938985352
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2ad385bef4a52546c760da667fefbc3c9e758e2c7d3c1fc70e1047e98364eee
3
+ size 4947390880
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f4f79e3b856e275be82ad5f385cc2ec7bdda70db4e7278761997809ad0c8cf0
3
+ size 4785196224
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
script.sh ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ #
3
+ # Copyright 2024 PKU-Alignment Team. All Rights Reserved.
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+ # ==============================================================================
17
+
18
+ if [ -z "${BASH_VERSION}" ]; then
19
+ echo "Please use bash to run this script." >&2
20
+ exit 1
21
+ fi
22
+
23
+ VIDEO_DIR="/home/juntao/Data/safe-sora/videos"
24
+ TRAIN_DATA_PATH="/home/juntao/Data/safe-sora/unsafe_pairs/config-train.json"
25
+ EVAL_DATA_PATH="/home/juntao/Data/safe-sora/unsafe_pairs/config-test.json"
26
+ MODEL_NAME_OR_PATH="/home/juntao/Models/LanguageBind/Video-LLaVA-7B"
27
+ MM_MLP_ADAPTER_PATH="/home/juntao/Models/LanguageBind/Video-LLaVA-Pretrain-7B/mm_projector.bin"
28
+ OUTPUT_DIR="./outputs/cost"
29
+ DIMENSION="harmlessness"
30
+
31
+ # VIDEO_DIR="/home/juntao/Projects/safe-sora/data/SafeSora/videos"
32
+ # TRAIN_DATA_PATH="/home/juntao/Projects/safe-sora/data/SafeSora/config-train.json.gz"
33
+ # EVAL_DATA_PATH="/home/juntao/Projects/safe-sora/data/SafeSora/config-test.json.gz"
34
+ # MODEL_NAME_OR_PATH="/home/juntao/Models/LanguageBind/Video-LLaVA-7B"
35
+ # MM_MLP_ADAPTER_PATH="/home/juntao/Models/LanguageBind/Video-LLaVA-Pretrain-7B/mm_projector.bin"
36
+ # OUTPUT_DIR="/home/juntao/Projects/Learning/safe-sora/examples/outputs/goodgood"
37
+ # DIMENSION="helpfulness"
38
+
39
+ while [[ "$#" -gt 0 ]]; do
40
+ arg="$1"
41
+ shift
42
+ case "${arg}" in
43
+ --video_dir)
44
+ VIDEO_DIR="$1"
45
+ shift
46
+ ;;
47
+ --video_dir=*)
48
+ VIDEO_DIR="${arg#*=}"
49
+ ;;
50
+ --train_data_path)
51
+ TRAIN_DATA_PATH="$1"
52
+ shift
53
+ ;;
54
+ --train_data_path=*)
55
+ TRAIN_DATA_PATH="${arg#*=}"
56
+ ;;
57
+ --eval_data_path)
58
+ EVAL_DATA_PATH="$1"
59
+ shift
60
+ ;;
61
+ --eval_data_path=*)
62
+ EVAL_DATA_PATH="${arg#*=}"
63
+ ;;
64
+ --model_name_or_path)
65
+ MODEL_NAME_OR_PATH="$1"
66
+ shift
67
+ ;;
68
+ --model_name_or_path=*)
69
+ MODEL_NAME_OR_PATH="${arg#*=}"
70
+ ;;
71
+ --mm_mlp_adapter_path)
72
+ MM_MLP_ADAPTER_PATH="$1"
73
+ shift
74
+ ;;
75
+ --mm_mlp_adapter_path=*)
76
+ MM_MLP_ADAPTER_PATH="${arg#*=}"
77
+ ;;
78
+ --output_dir)
79
+ OUTPUT_DIR="$1"
80
+ shift
81
+ ;;
82
+ --output_dir=*)
83
+ OUTPUT_DIR="${arg#*=}"
84
+ ;;
85
+ --dimension)
86
+ DIMENSION="$1"
87
+ shift
88
+ ;;
89
+ --dimension=*)
90
+ DIMENSION="${arg#*=}"
91
+ ;;
92
+ *)
93
+ echo "Unknown parameter passed: '${arg}'" >&2
94
+ exit 1
95
+ ;;
96
+ esac
97
+ done
98
+
99
+ if [[ ! "helpfulness harmlessness instruction_following correctness informativeness aesthetics" =~ (^|[[:space:]])"${DIMENSION}"($|[[:space:]]) ]]; then
100
+ echo "Invalid dimension: ${DIMENSION}, should be one of 'helpfulness', 'harmlessness', 'instruction_following', 'correctness', 'informativeness', 'aesthetics'." >&2
101
+ exit 1
102
+ fi
103
+
104
+ IMAGE_DIR="${VIDEO_DIR}"
105
+ RUN_NAME="reward-${DIMENSION}"
106
+ OUTPUT_DIR="${OUTPUT_DIR}/${RUN_NAME}"
107
+
108
+ mkdir -p "${OUTPUT_DIR}"
109
+ OUTPUT_DIR="$(cd "${OUTPUT_DIR}" &>/dev/null && pwd)"
110
+ if [[ ! -f "${OUTPUT_DIR}/.gitignore" ]]; then
111
+ echo '*' >"${OUTPUT_DIR}/.gitignore"
112
+ fi
113
+
114
+ cp -f "$0" "${OUTPUT_DIR}/script.sh"
115
+
116
+ MASTER_PORT_START=10000
117
+ MASTER_PORT_END=65535
118
+ MASTER_PORT="$(
119
+ comm -23 \
120
+ <(seq "${MASTER_PORT_START}" "${MASTER_PORT_END}" | sort) \
121
+ <(ss -Htan | awk '{ print $4 }' | awk -F ':' '{ print $NF }' | sort -u) |
122
+ shuf | head -n 1
123
+ )"
124
+
125
+ exec 1> >(tee "${OUTPUT_DIR}/stdout.log" >&1) 2> >(tee "${OUTPUT_DIR}/stderr.log" >&2)
126
+
127
+ deepspeed --master_port="${MASTER_PORT}" examples/reward_model/train_cost.py \
128
+ --deepspeed examples/scripts/ds_zero2.json \
129
+ --version v1 \
130
+ --run_name "${RUN_NAME}" \
131
+ --model_name_or_path "${MODEL_NAME_OR_PATH}" \
132
+ --train_data_path "${TRAIN_DATA_PATH}" \
133
+ --eval_data_path "${EVAL_DATA_PATH}" \
134
+ --preference_dimension "${DIMENSION}" \
135
+ --image_dir "${IMAGE_DIR}" \
136
+ --video_dir "${VIDEO_DIR}" \
137
+ --image_tower LanguageBind/LanguageBind_Image \
138
+ --video_tower LanguageBind/LanguageBind_Video_merge \
139
+ --mm_projector_type mlp2x_gelu \
140
+ --pretrain_mm_mlp_adapter "${MM_MLP_ADAPTER_PATH}" \
141
+ --mm_vision_select_layer -2 \
142
+ --mm_use_im_start_end False \
143
+ --mm_use_im_patch_token False \
144
+ --image_aspect_ratio pad \
145
+ --group_by_modality_length True \
146
+ --output_dir "${OUTPUT_DIR}" \
147
+ --cache_dir "./models/cache_dir" \
148
+ --num_train_epochs 4 \
149
+ --per_device_train_batch_size 8 \
150
+ --per_device_eval_batch_size 8 \
151
+ --gradient_accumulation_steps 1 \
152
+ --evaluation_strategy "steps" \
153
+ --eval_steps 0.0499 \
154
+ --load_best_model_at_end True \
155
+ --metric_for_best_model "accuracy" \
156
+ --greater_is_better True \
157
+ --logging_first_step True \
158
+ --save_strategy "steps" \
159
+ --save_steps 0.0499 \
160
+ --save_total_limit 1 \
161
+ --learning_rate 2e-5 \
162
+ --weight_decay 0.1 \
163
+ --warmup_ratio 0.03 \
164
+ --lr_scheduler_type "cosine" \
165
+ --logging_steps 1 \
166
+ --model_max_length 2048 \
167
+ --tokenizer_model_max_length 3072 \
168
+ --gradient_checkpointing True \
169
+ --dataloader_num_workers 8 \
170
+ --report_to wandb \
171
+ --bf16 True \
172
+ --tf32 True \
173
+ --num_frames 8
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "<unk>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
stderr.log ADDED
The diff for this file is too large to render. See raw diff
 
stdout.log ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
+ size 499723
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "add_prefix_space": true,
5
+ "added_tokens_decoder": {
6
+ "0": {
7
+ "content": "<unk>",
8
+ "lstrip": false,
9
+ "normalized": false,
10
+ "rstrip": false,
11
+ "single_word": false,
12
+ "special": true
13
+ },
14
+ "1": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false,
20
+ "special": true
21
+ },
22
+ "2": {
23
+ "content": "</s>",
24
+ "lstrip": false,
25
+ "normalized": false,
26
+ "rstrip": false,
27
+ "single_word": false,
28
+ "special": true
29
+ }
30
+ },
31
+ "bos_token": "<s>",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 2048,
36
+ "pad_token": "<unk>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32d0cbb2c81098a0718e1bb3c5fdd12551f4df69815179f5f7286b9af0288021
3
+ size 7224