ArthurZ HF staff commited on
Commit
4bd8b75
1 Parent(s): 9431f76

Upload model

Browse files
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/arthur_huggingface_co/fairseq/weights/checkpoints/hf-converted-moe-54b",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "relu",
5
+ "architectures": [
6
+ "NllbMoeModel"
7
+ ],
8
+ "attention_dropout": 0.1,
9
+ "bos_token_id": 0,
10
+ "d_model": 2048,
11
+ "decoder_attention_heads": 16,
12
+ "decoder_ffn_dim": 8192,
13
+ "decoder_layerdrop": 0,
14
+ "decoder_layers": 24,
15
+ "decoder_sparse_step": 4,
16
+ "decoder_start_token_id": 2,
17
+ "dropout": 0.1,
18
+ "encoder_attention_heads": 16,
19
+ "encoder_ffn_dim": 8192,
20
+ "encoder_layerdrop": 0,
21
+ "encoder_layers": 24,
22
+ "encoder_sparse_step": 4,
23
+ "eos_token_id": 2,
24
+ "expert_capacity": 64,
25
+ "init_std": 0.02,
26
+ "is_encoder_decoder": true,
27
+ "max_length": 200,
28
+ "max_position_embeddings": 1024,
29
+ "model_type": "nllb_moe",
30
+ "num_experts": 128,
31
+ "num_hidden_layers": 24,
32
+ "pad_token_id": 1,
33
+ "router_aux_loss_coef": 0.001,
34
+ "router_bias": false,
35
+ "router_dtype": "float32",
36
+ "router_ignore_padding_tokens": false,
37
+ "router_jitter_noise": 0.01,
38
+ "router_type": "tokens_masked",
39
+ "router_z_loss_coef": 0.001,
40
+ "scale_embedding": true,
41
+ "torch_dtype": "float32",
42
+ "transformers_version": "4.27.0.dev0",
43
+ "use_cache": true,
44
+ "vocab_size": 256206
45
+ }
pytorch_model-00001-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8afff1d6a3675b15d3d695b63d90dd28e68516c7acca4996268897ac5561662b
3
+ size 7881651219
pytorch_model-00002-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1f600c504a48015beb784863f667ca37b752487ad737bf0c874e9ba746f8509
3
+ size 9935242109
pytorch_model-00003-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:77a840d95cc925c73f0323000a5928c6a8ad68c0b8928b37e3215fd6190c4a26
3
+ size 9936483839
pytorch_model-00004-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed67290d8b59fef453e0dc2bd16f7f36990c8964a608218a574e28459551451
3
+ size 9935242173
pytorch_model-00005-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33953306f6361678a926c0537582fdd9ce705de1344cc959a1ac7348dc8721d6
3
+ size 9936484139
pytorch_model-00006-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6afde2d3b0d796c600572a202953474cb763060ed8543838a136854de0b4d50
3
+ size 9936484439
pytorch_model-00007-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a8556c3d533514ba8765b3fdc1cfa336c91b6d5784c71c77e59dfc9c4fe425f
3
+ size 9935242365
pytorch_model-00008-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc98da3972700d562c05661fcfd505f6af641b3c05dbb14d85848c78571388f8
3
+ size 9936483959
pytorch_model-00009-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:437f34d3b4217efea0b59d4bacfa10151e80d33eb7dc1e93161c4c53a7eff814
3
+ size 9935242365
pytorch_model-00010-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ffd7d7a25457657d83f0203505b775c49119e088d1a9bcbb875b15dec9d8207
3
+ size 9936483959
pytorch_model-00011-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9a547c6296de0902a42895ec9a3457db0d087ac315d2e555fc44d7baba43519
3
+ size 9935242429
pytorch_model-00012-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d9ebfa0143256c234700a02620c79132c15415832790374bca608269628de29
3
+ size 9962852511
pytorch_model-00013-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cae1903e0823a905e578fcc2b71d9176904e26894e8921ff91888db8d2a95677
3
+ size 9935242109
pytorch_model-00014-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a06bcd65e5326b04cca8a5615ee5dad5cd66a16b35072455702af97d6b93d7d
3
+ size 9936608795
pytorch_model-00015-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1b403c0a0bba6ac541d06c6ec3f0cce11df8fc445e9ac8aa519e48fc348e6fa
3
+ size 9935242173
pytorch_model-00016-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1548b470c85185890455ab04da7d35cbc67fb60a8aa2a205820186513c5ec02d
3
+ size 9936609383
pytorch_model-00017-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56cdd329970fce5d761dbffc96bdd0b021da41480aff4baff26ed7e176fed697
3
+ size 9936609879
pytorch_model-00018-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2f48290f05efaee4ec7afe5c1ae0accbf7bf4e83d2a2d8ab6ae36baca2b418b
3
+ size 9935242301
pytorch_model-00019-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e78293e76a2f6f1a747748219c4060aebdd517f1834b4eec68c68ff28d02b695
3
+ size 9936609719
pytorch_model-00020-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b26ddc9df8fb72f421713a0a75c077eb8a8e8b018b31ff03e5d5d26a15d5421
3
+ size 9935242365
pytorch_model-00021-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02cba09c99a8e6f49de53ee3c4e6c8bc81569e9692228e8ad3e7b0d0d1cf1c7a
3
+ size 9936608939
pytorch_model-00022-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e694227e40ad0e6633ea2bd482a6b1ed1a1f79642fc804aaf8d589528f976c
3
+ size 9935242365
pytorch_model-00023-of-00023.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09a4e4d2c098c2295634446a49154d0745084ae5f3664fbff6077faa2de75c96
3
+ size 3557911077
pytorch_model.bin.index.json ADDED
The diff for this file is too large to render. See raw diff