daekeun-ml commited on
Commit
a4e17fa
·
verified ·
1 Parent(s): 99edef4

Upload Phi4MMForCausalLM

Browse files

Updated weights (Speech full fine-tuning, 4 epochs)

config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "./Phi-4-multimodal-finetune-ko-speech",
3
  "architectures": [
4
  "Phi4MMForCausalLM"
5
  ],
@@ -209,7 +209,7 @@
209
  },
210
  "tie_word_embeddings": true,
211
  "torch_dtype": "bfloat16",
212
- "transformers_version": "4.48.2",
213
  "use_cache": true,
214
  "vision_lora": {
215
  "dp": 0.0,
 
1
  {
2
+ "_name_or_path": "./output",
3
  "architectures": [
4
  "Phi4MMForCausalLM"
5
  ],
 
209
  },
210
  "tie_word_embeddings": true,
211
  "torch_dtype": "bfloat16",
212
+ "transformers_version": "4.46.3",
213
  "use_cache": true,
214
  "vision_lora": {
215
  "dp": 0.0,
generation_config.json CHANGED
@@ -6,5 +6,5 @@
6
  199999
7
  ],
8
  "pad_token_id": 199999,
9
- "transformers_version": "4.48.2"
10
  }
 
6
  199999
7
  ],
8
  "pad_token_id": 199999,
9
+ "transformers_version": "4.46.3"
10
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a1e57956624d5a96c5dcaab437261353efbcfaf5eff49d58d92329311020802
3
- size 4997504848
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93562f8e2135e7d780834a9f2300699e9e71d56d64e986386e92906ae571ca9
3
+ size 4998420448
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7ae17c45b5ab41c7659784e8cb2d6a79e6f886783b3c2af0297448a1f240c1c
3
- size 4952333128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c974d2962a9fa092d2219161c87234175e860328c5978591548f0886e929f33
3
+ size 4983891952
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7277ccd89f1b88a99dd7ce37095510f2036e18e6289b732164704ee0d2934b92
3
- size 1199389232
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d44c5ed024d7f64060d50ad89a41380def480e396d6ae71b774ee9f6125ec38a
3
+ size 1905111704
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 11148920768
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
@@ -1741,50 +1741,50 @@
1741
  "model.layers.24.self_attn.qkv_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1742
  "model.layers.24.self_attn.qkv_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1743
  "model.layers.24.self_attn.qkv_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1744
- "model.layers.25.input_layernorm.weight": "model-00002-of-00003.safetensors",
1745
- "model.layers.25.mlp.down_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1746
- "model.layers.25.mlp.down_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1747
- "model.layers.25.mlp.down_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1748
- "model.layers.25.mlp.down_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1749
- "model.layers.25.mlp.down_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1750
- "model.layers.25.mlp.gate_up_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1751
- "model.layers.25.mlp.gate_up_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1752
- "model.layers.25.mlp.gate_up_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1753
- "model.layers.25.mlp.gate_up_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1754
- "model.layers.25.mlp.gate_up_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1755
- "model.layers.25.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1756
  "model.layers.25.self_attn.o_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1757
  "model.layers.25.self_attn.o_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1758
  "model.layers.25.self_attn.o_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1759
  "model.layers.25.self_attn.o_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1760
  "model.layers.25.self_attn.o_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1761
- "model.layers.25.self_attn.qkv_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1762
- "model.layers.25.self_attn.qkv_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1763
- "model.layers.25.self_attn.qkv_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1764
- "model.layers.25.self_attn.qkv_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1765
- "model.layers.25.self_attn.qkv_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1766
- "model.layers.26.input_layernorm.weight": "model-00002-of-00003.safetensors",
1767
- "model.layers.26.mlp.down_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1768
- "model.layers.26.mlp.down_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1769
- "model.layers.26.mlp.down_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1770
- "model.layers.26.mlp.down_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1771
- "model.layers.26.mlp.down_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1772
- "model.layers.26.mlp.gate_up_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1773
- "model.layers.26.mlp.gate_up_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1774
- "model.layers.26.mlp.gate_up_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1775
- "model.layers.26.mlp.gate_up_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1776
- "model.layers.26.mlp.gate_up_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1777
- "model.layers.26.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1778
- "model.layers.26.self_attn.o_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1779
- "model.layers.26.self_attn.o_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1780
- "model.layers.26.self_attn.o_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1781
- "model.layers.26.self_attn.o_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1782
- "model.layers.26.self_attn.o_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1783
- "model.layers.26.self_attn.qkv_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1784
- "model.layers.26.self_attn.qkv_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1785
- "model.layers.26.self_attn.qkv_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1786
- "model.layers.26.self_attn.qkv_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1787
- "model.layers.26.self_attn.qkv_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1788
  "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
1789
  "model.layers.27.mlp.down_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1790
  "model.layers.27.mlp.down_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
@@ -1797,16 +1797,16 @@
1797
  "model.layers.27.mlp.gate_up_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1798
  "model.layers.27.mlp.gate_up_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1799
  "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1800
- "model.layers.27.self_attn.o_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1801
- "model.layers.27.self_attn.o_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1802
- "model.layers.27.self_attn.o_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1803
- "model.layers.27.self_attn.o_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1804
- "model.layers.27.self_attn.o_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1805
- "model.layers.27.self_attn.qkv_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1806
- "model.layers.27.self_attn.qkv_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1807
- "model.layers.27.self_attn.qkv_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1808
- "model.layers.27.self_attn.qkv_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1809
- "model.layers.27.self_attn.qkv_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1810
  "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
1811
  "model.layers.28.mlp.down_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1812
  "model.layers.28.mlp.down_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
@@ -1989,22 +1989,22 @@
1989
  "model.layers.7.mlp.down_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1990
  "model.layers.7.mlp.down_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1991
  "model.layers.7.mlp.down_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1992
- "model.layers.7.mlp.gate_up_proj.base_layer.weight": "model-00001-of-00003.safetensors",
1993
- "model.layers.7.mlp.gate_up_proj.lora_A.speech.weight": "model-00001-of-00003.safetensors",
1994
- "model.layers.7.mlp.gate_up_proj.lora_A.vision.weight": "model-00001-of-00003.safetensors",
1995
  "model.layers.7.mlp.gate_up_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1996
- "model.layers.7.mlp.gate_up_proj.lora_B.vision.weight": "model-00001-of-00003.safetensors",
1997
  "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1998
  "model.layers.7.self_attn.o_proj.base_layer.weight": "model-00001-of-00003.safetensors",
1999
- "model.layers.7.self_attn.o_proj.lora_A.speech.weight": "model-00001-of-00003.safetensors",
2000
- "model.layers.7.self_attn.o_proj.lora_A.vision.weight": "model-00001-of-00003.safetensors",
2001
- "model.layers.7.self_attn.o_proj.lora_B.speech.weight": "model-00001-of-00003.safetensors",
2002
- "model.layers.7.self_attn.o_proj.lora_B.vision.weight": "model-00001-of-00003.safetensors",
2003
- "model.layers.7.self_attn.qkv_proj.base_layer.weight": "model-00001-of-00003.safetensors",
2004
- "model.layers.7.self_attn.qkv_proj.lora_A.speech.weight": "model-00001-of-00003.safetensors",
2005
- "model.layers.7.self_attn.qkv_proj.lora_A.vision.weight": "model-00001-of-00003.safetensors",
2006
- "model.layers.7.self_attn.qkv_proj.lora_B.speech.weight": "model-00001-of-00003.safetensors",
2007
- "model.layers.7.self_attn.qkv_proj.lora_B.vision.weight": "model-00001-of-00003.safetensors",
2008
  "model.layers.8.input_layernorm.weight": "model-00002-of-00003.safetensors",
2009
  "model.layers.8.mlp.down_proj.base_layer.weight": "model-00002-of-00003.safetensors",
2010
  "model.layers.8.mlp.down_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 11887118272
4
  },
5
  "weight_map": {
6
  "model.embed_tokens.weight": "model-00001-of-00003.safetensors",
 
1741
  "model.layers.24.self_attn.qkv_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1742
  "model.layers.24.self_attn.qkv_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1743
  "model.layers.24.self_attn.qkv_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1744
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00003.safetensors",
1745
+ "model.layers.25.mlp.down_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1746
+ "model.layers.25.mlp.down_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1747
+ "model.layers.25.mlp.down_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1748
+ "model.layers.25.mlp.down_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1749
+ "model.layers.25.mlp.down_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1750
+ "model.layers.25.mlp.gate_up_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1751
+ "model.layers.25.mlp.gate_up_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1752
+ "model.layers.25.mlp.gate_up_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1753
+ "model.layers.25.mlp.gate_up_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1754
+ "model.layers.25.mlp.gate_up_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1755
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1756
  "model.layers.25.self_attn.o_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1757
  "model.layers.25.self_attn.o_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1758
  "model.layers.25.self_attn.o_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1759
  "model.layers.25.self_attn.o_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1760
  "model.layers.25.self_attn.o_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1761
+ "model.layers.25.self_attn.qkv_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1762
+ "model.layers.25.self_attn.qkv_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1763
+ "model.layers.25.self_attn.qkv_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1764
+ "model.layers.25.self_attn.qkv_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1765
+ "model.layers.25.self_attn.qkv_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1766
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00003.safetensors",
1767
+ "model.layers.26.mlp.down_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1768
+ "model.layers.26.mlp.down_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1769
+ "model.layers.26.mlp.down_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1770
+ "model.layers.26.mlp.down_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1771
+ "model.layers.26.mlp.down_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1772
+ "model.layers.26.mlp.gate_up_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1773
+ "model.layers.26.mlp.gate_up_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1774
+ "model.layers.26.mlp.gate_up_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1775
+ "model.layers.26.mlp.gate_up_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1776
+ "model.layers.26.mlp.gate_up_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1777
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1778
+ "model.layers.26.self_attn.o_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1779
+ "model.layers.26.self_attn.o_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1780
+ "model.layers.26.self_attn.o_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1781
+ "model.layers.26.self_attn.o_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1782
+ "model.layers.26.self_attn.o_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1783
+ "model.layers.26.self_attn.qkv_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1784
+ "model.layers.26.self_attn.qkv_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1785
+ "model.layers.26.self_attn.qkv_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1786
+ "model.layers.26.self_attn.qkv_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1787
+ "model.layers.26.self_attn.qkv_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1788
  "model.layers.27.input_layernorm.weight": "model-00003-of-00003.safetensors",
1789
  "model.layers.27.mlp.down_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1790
  "model.layers.27.mlp.down_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
 
1797
  "model.layers.27.mlp.gate_up_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1798
  "model.layers.27.mlp.gate_up_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1799
  "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
1800
+ "model.layers.27.self_attn.o_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1801
+ "model.layers.27.self_attn.o_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1802
+ "model.layers.27.self_attn.o_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1803
+ "model.layers.27.self_attn.o_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1804
+ "model.layers.27.self_attn.o_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1805
+ "model.layers.27.self_attn.qkv_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1806
+ "model.layers.27.self_attn.qkv_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
1807
+ "model.layers.27.self_attn.qkv_proj.lora_A.vision.weight": "model-00003-of-00003.safetensors",
1808
+ "model.layers.27.self_attn.qkv_proj.lora_B.speech.weight": "model-00003-of-00003.safetensors",
1809
+ "model.layers.27.self_attn.qkv_proj.lora_B.vision.weight": "model-00003-of-00003.safetensors",
1810
  "model.layers.28.input_layernorm.weight": "model-00003-of-00003.safetensors",
1811
  "model.layers.28.mlp.down_proj.base_layer.weight": "model-00003-of-00003.safetensors",
1812
  "model.layers.28.mlp.down_proj.lora_A.speech.weight": "model-00003-of-00003.safetensors",
 
1989
  "model.layers.7.mlp.down_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1990
  "model.layers.7.mlp.down_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1991
  "model.layers.7.mlp.down_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1992
+ "model.layers.7.mlp.gate_up_proj.base_layer.weight": "model-00002-of-00003.safetensors",
1993
+ "model.layers.7.mlp.gate_up_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
1994
+ "model.layers.7.mlp.gate_up_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
1995
  "model.layers.7.mlp.gate_up_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
1996
+ "model.layers.7.mlp.gate_up_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
1997
  "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
1998
  "model.layers.7.self_attn.o_proj.base_layer.weight": "model-00001-of-00003.safetensors",
1999
+ "model.layers.7.self_attn.o_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
2000
+ "model.layers.7.self_attn.o_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
2001
+ "model.layers.7.self_attn.o_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
2002
+ "model.layers.7.self_attn.o_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
2003
+ "model.layers.7.self_attn.qkv_proj.base_layer.weight": "model-00002-of-00003.safetensors",
2004
+ "model.layers.7.self_attn.qkv_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",
2005
+ "model.layers.7.self_attn.qkv_proj.lora_A.vision.weight": "model-00002-of-00003.safetensors",
2006
+ "model.layers.7.self_attn.qkv_proj.lora_B.speech.weight": "model-00002-of-00003.safetensors",
2007
+ "model.layers.7.self_attn.qkv_proj.lora_B.vision.weight": "model-00002-of-00003.safetensors",
2008
  "model.layers.8.input_layernorm.weight": "model-00002-of-00003.safetensors",
2009
  "model.layers.8.mlp.down_proj.base_layer.weight": "model-00002-of-00003.safetensors",
2010
  "model.layers.8.mlp.down_proj.lora_A.speech.weight": "model-00002-of-00003.safetensors",