MohamedAhmedAE commited on
Commit
e3aefa8
·
verified ·
1 Parent(s): 8c9f44f

Training in progress, step 200

Browse files
Files changed (3) hide show
  1. config.json +44 -24
  2. model.safetensors +2 -2
  3. training_args.bin +2 -2
config.json CHANGED
@@ -1,44 +1,64 @@
1
  {
2
- "_name_or_path": "/root/.cache/huggingface/hub/OpenAI_Clip_Roco_ALL_V1",
3
  "architectures": [
4
- "CLIPModel"
5
  ],
6
- "initializer_factor": 1.0,
7
  "logit_scale_init_value": 2.6592,
8
- "model_type": "clip",
9
- "projection_dim": 512,
10
  "text_config": {
11
- "_name_or_path": "/kaggle/working/TextModel",
12
  "architectures": [
13
- "RobertaModel"
14
  ],
15
- "attention_probs_dropout_prob": 0.1,
16
- "bos_token_id": 0,
17
- "classifier_dropout": null,
18
- "eos_token_id": 2,
19
- "hidden_act": "gelu",
20
- "hidden_dropout_prob": 0.1,
21
- "hidden_size": 768,
22
- "intermediate_size": 3072,
23
- "max_position_embeddings": 1026,
24
- "model_type": "clip_text_model",
25
- "num_attention_heads": 12,
26
- "position_embedding_type": "absolute",
27
- "torch_dtype": "float32",
28
- "type_vocab_size": 1,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  "use_cache": true,
30
- "vocab_size": 50265
31
  },
32
  "torch_dtype": "float32",
33
- "transformers_version": "4.44.0",
34
  "vision_config": {
35
- "_name_or_path": "openai/clip-vit-large-patch14-336",
36
  "dropout": 0.0,
 
37
  "hidden_size": 1024,
38
  "image_size": 336,
 
 
39
  "intermediate_size": 4096,
 
40
  "model_type": "clip_vision_model",
41
  "num_attention_heads": 16,
 
42
  "num_hidden_layers": 24,
43
  "patch_size": 14,
44
  "projection_dim": 768
 
1
  {
 
2
  "architectures": [
3
+ "CustomVisionTextModel"
4
  ],
 
5
  "logit_scale_init_value": 2.6592,
6
+ "model_type": "vision-text-dual-encoder",
7
+ "projection_dim": 1024,
8
  "text_config": {
9
+ "_name_or_path": "meta-llama/Llama-3.2-3B-Instruct",
10
  "architectures": [
11
+ "LlamaForCausalLM"
12
  ],
13
+ "attention_bias": false,
14
+ "attention_dropout": 0.0,
15
+ "bos_token_id": 128000,
16
+ "eos_token_id": [
17
+ 128001,
18
+ 128008,
19
+ 128009
20
+ ],
21
+ "head_dim": 128,
22
+ "hidden_act": "silu",
23
+ "hidden_size": 3072,
24
+ "initializer_range": 0.02,
25
+ "intermediate_size": 8192,
26
+ "max_position_embeddings": 131072,
27
+ "mlp_bias": false,
28
+ "model_type": "llama",
29
+ "num_attention_heads": 24,
30
+ "num_hidden_layers": 28,
31
+ "num_key_value_heads": 8,
32
+ "pretraining_tp": 1,
33
+ "rms_norm_eps": 1e-05,
34
+ "rope_scaling": {
35
+ "factor": 32.0,
36
+ "high_freq_factor": 4.0,
37
+ "low_freq_factor": 1.0,
38
+ "original_max_position_embeddings": 8192,
39
+ "rope_type": "llama3"
40
+ },
41
+ "rope_theta": 500000.0,
42
+ "tie_word_embeddings": true,
43
+ "torch_dtype": "bfloat16",
44
  "use_cache": true,
45
+ "vocab_size": 128256
46
  },
47
  "torch_dtype": "float32",
48
+ "transformers_version": "4.52.4",
49
  "vision_config": {
50
+ "attention_dropout": 0.0,
51
  "dropout": 0.0,
52
+ "hidden_act": "quick_gelu",
53
  "hidden_size": 1024,
54
  "image_size": 336,
55
+ "initializer_factor": 1.0,
56
+ "initializer_range": 0.02,
57
  "intermediate_size": 4096,
58
+ "layer_norm_eps": 1e-05,
59
  "model_type": "clip_vision_model",
60
  "num_attention_heads": 16,
61
+ "num_channels": 3,
62
  "num_hidden_layers": 24,
63
  "patch_size": 14,
64
  "projection_dim": 768
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c45dfc162774ddbec26ef765bf4bd84758c88e2ec06c30ee07bdd1b393ba98c
3
- size 1715561468
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8e1d29c30ce74c9631203df66094201e3f170e3df25fdaf282660f4b423e9fc
3
+ size 2410717252
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7cc20bec6f924b34961dab5381cd049283537f937818f0168241e5becc4862f0
3
- size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00d28d176492d90cb5ed9c6500071e63972afe80ff0b7f0460ddd136ecc10a5c
3
+ size 5368