CodeZzz commited on
Commit
90939c2
·
1 Parent(s): 840bea0

Upload the model

Browse files
.gitattributes CHANGED
@@ -33,4 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
- split_part_* filter=lfs diff=lfs merge=lfs -text
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ ckpts/* filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -1,3 +1,30 @@
1
  ---
2
- license: apache-2.0
 
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: other
3
+ license_name: cogvlm2
4
+ license_link: https://huggingface.co/THUDM/cogvlm2-llama3-chat-19B/blob/main/LICENS
5
+
6
+ language:
7
+ - ens
8
+ pipeline_tag: text-generation
9
+ tags:
10
+ - chat
11
+ - cogvlm2
12
+
13
+ inference: false
14
  ---
15
+ # VisionReward-Image
16
+
17
+ ## Introduction
18
+ We present VisionReward, a general strategy to aligning visual generation models——both image and video generation——with human preferences through a fine-grainedand multi-dimensional framework. We decompose human preferences in images and videos into multiple dimensions,each represented by a series of judgment questions, linearly weighted and summed to an interpretable and accuratescore. To address the challenges of video quality assess-ment, we systematically analyze various dynamic features of videos, which helps VisionReward surpass VideoScore by 17.2% and achieve top performance for video preference prediction.
19
+ Here, we present the model of VisionReward-Image.
20
+
21
+ ## Merging and Extracting Checkpoint Files
22
+ Use the following command to merge the split files into a single `.tar` file and then extract it into the specified directory:
23
+
24
+ ```sh
25
+ cat ckpts/split_part_* > ckpts/visionreward_image.tar
26
+ tar -xvf ckpts/visionreward_image.tar
27
+ ```
28
+
29
+ ## Using this model
30
+ You can quickly install the Python package dependencies and run model inference in our [github](https://github.com/THUDM/VisionReward).
ckpts/split_part_aa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b17eb0d861c70dab3e6226cdacc3bfea8eb59feb897f8728152f5bdb6afd5e
3
+ size 5221908480
ckpts/split_part_ab ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db459381667c7d5bb35a1cada7ce74387924e6397c2b8ee16d6d9f6b0939dc6c
3
+ size 5221908480
ckpts/split_part_ac ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40e6b652af3ef13bb2d15c23f2ee2d31c97916686dbe43788dd23fe42723704e
3
+ size 5221908480
ckpts/split_part_ad ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8b3991f16bd6dd917088c5a869dffe9f993879b687412afe934d2b30063d2749
3
+ size 5221908480
ckpts/split_part_ae ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9fb496a7a83cb6df4cd728cafc3c058c296bbaaa1d2efc943cec9b1982a7850
3
+ size 5221908480
ckpts/split_part_af ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b479a4e489269868e7734c03bd20f4bcad75c74244ece15fcf759faf0c7e443c
3
+ size 5221908480
ckpts/split_part_ag ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75fa73a5cacf6254ac7eb4a536cdd93231474c1aeb93930a15adacb470da77ef
3
+ size 5221908480
ckpts/split_part_ah ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14d377163aac293220b3ee49b85c0c5698de9af70e0197b7c27e52145066b1f6
3
+ size 5221908480
ckpts/split_part_ai ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba1afa5c84efed4e4e03f96d77f31c7a08a4b17c0197f941d962a961ecabeb00
3
+ size 5221908480
ckpts/split_part_aj ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a44c61804c077ff9df1023fded4b40084cc1dd4c8ad0fc9d2aa00e88e04c214
3
+ size 5221908480
ckpts/split_part_ak ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de84a206568f4aaef88defc77166b46fb9c6c2b6e9358f9dd5f4d1f3907b4f27
3
+ size 5221908480
ckpts/split_part_al ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d19506291b67359369c4f4222c596314ee32c826d3df1f6d92242dd8d432362f
3
+ size 5221908480
ckpts/split_part_am ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:35dcc0a761dbb854f25fc1bedeeffa2315542b71a6c01c269572e11b916bf548
3
+ size 5221908480
ckpts/split_part_an ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:182e41c50e7a7233e831e43fb9e717ed97357339446d1d85852b6cf3997316ca
3
+ size 5221908480
ckpts/split_part_ao ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58e74b4bc0d5c976ea441a68aec3a05f920c1ed6724c0c719911f553df7d9c94
3
+ size 4906260480
latest ADDED
@@ -0,0 +1 @@
 
 
1
+ 1
model_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "model_class": "VisualChatModel",
3
+ "tokenizer_type": "Meta-Llama-3-8B-Instruct",
4
+ "num_layers": 32,
5
+ "hidden_size": 4096,
6
+ "num_attention_heads": 32,
7
+ "vocab_size": 128256,
8
+ "layernorm_order": "pre",
9
+ "model_parallel_size": 1,
10
+ "max_sequence_length": 8192,
11
+ "use_bias": false,
12
+ "inner_hidden_size": 14336,
13
+ "num_multi_query_heads": 8,
14
+ "image_length": 2304,
15
+ "image_size": 1344,
16
+ "eva_args": {
17
+ "model_class": "EVA2CLIPModel",
18
+ "num_layers": 63,
19
+ "hidden_size": 1792,
20
+ "num_attention_heads": 16,
21
+ "vocab_size": 1,
22
+ "layernorm_order": "post",
23
+ "model_parallel_size": 1,
24
+ "max_sequence_length": 257,
25
+ "inner_hidden_size": 15360,
26
+ "use_final_layernorm": false,
27
+ "layernorm_epsilon": 1e-06,
28
+ "row_parallel_linear_final_bias": false,
29
+ "image_size": [
30
+ 1344,
31
+ 1344
32
+ ],
33
+ "pre_len": 1,
34
+ "post_len": 0,
35
+ "in_channels": 3,
36
+ "patch_size": 14
37
+ },
38
+ "bos_token_id": 128000,
39
+ "eos_token_id": 128001,
40
+ "pad_token_id": null
41
+ }