Upload model
Browse files- config.json +45 -0
- pytorch_model-00001-of-00023.bin +3 -0
- pytorch_model-00002-of-00023.bin +3 -0
- pytorch_model-00003-of-00023.bin +3 -0
- pytorch_model-00004-of-00023.bin +3 -0
- pytorch_model-00005-of-00023.bin +3 -0
- pytorch_model-00006-of-00023.bin +3 -0
- pytorch_model-00007-of-00023.bin +3 -0
- pytorch_model-00008-of-00023.bin +3 -0
- pytorch_model-00009-of-00023.bin +3 -0
- pytorch_model-00010-of-00023.bin +3 -0
- pytorch_model-00011-of-00023.bin +3 -0
- pytorch_model-00012-of-00023.bin +3 -0
- pytorch_model-00013-of-00023.bin +3 -0
- pytorch_model-00014-of-00023.bin +3 -0
- pytorch_model-00015-of-00023.bin +3 -0
- pytorch_model-00016-of-00023.bin +3 -0
- pytorch_model-00017-of-00023.bin +3 -0
- pytorch_model-00018-of-00023.bin +3 -0
- pytorch_model-00019-of-00023.bin +3 -0
- pytorch_model-00020-of-00023.bin +3 -0
- pytorch_model-00021-of-00023.bin +3 -0
- pytorch_model-00022-of-00023.bin +3 -0
- pytorch_model-00023-of-00023.bin +3 -0
- pytorch_model.bin.index.json +0 -0
config.json
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/home/arthur_huggingface_co/fairseq/weights/checkpoints/hf-converted-moe-54b",
|
3 |
+
"activation_dropout": 0.0,
|
4 |
+
"activation_function": "relu",
|
5 |
+
"architectures": [
|
6 |
+
"NllbMoeModel"
|
7 |
+
],
|
8 |
+
"attention_dropout": 0.1,
|
9 |
+
"bos_token_id": 0,
|
10 |
+
"d_model": 2048,
|
11 |
+
"decoder_attention_heads": 16,
|
12 |
+
"decoder_ffn_dim": 8192,
|
13 |
+
"decoder_layerdrop": 0,
|
14 |
+
"decoder_layers": 24,
|
15 |
+
"decoder_sparse_step": 4,
|
16 |
+
"decoder_start_token_id": 2,
|
17 |
+
"dropout": 0.1,
|
18 |
+
"encoder_attention_heads": 16,
|
19 |
+
"encoder_ffn_dim": 8192,
|
20 |
+
"encoder_layerdrop": 0,
|
21 |
+
"encoder_layers": 24,
|
22 |
+
"encoder_sparse_step": 4,
|
23 |
+
"eos_token_id": 2,
|
24 |
+
"expert_capacity": 64,
|
25 |
+
"init_std": 0.02,
|
26 |
+
"is_encoder_decoder": true,
|
27 |
+
"max_length": 200,
|
28 |
+
"max_position_embeddings": 1024,
|
29 |
+
"model_type": "nllb_moe",
|
30 |
+
"num_experts": 128,
|
31 |
+
"num_hidden_layers": 24,
|
32 |
+
"pad_token_id": 1,
|
33 |
+
"router_aux_loss_coef": 0.001,
|
34 |
+
"router_bias": false,
|
35 |
+
"router_dtype": "float32",
|
36 |
+
"router_ignore_padding_tokens": false,
|
37 |
+
"router_jitter_noise": 0.01,
|
38 |
+
"router_type": "tokens_masked",
|
39 |
+
"router_z_loss_coef": 0.001,
|
40 |
+
"scale_embedding": true,
|
41 |
+
"torch_dtype": "float32",
|
42 |
+
"transformers_version": "4.27.0.dev0",
|
43 |
+
"use_cache": true,
|
44 |
+
"vocab_size": 256206
|
45 |
+
}
|
pytorch_model-00001-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8afff1d6a3675b15d3d695b63d90dd28e68516c7acca4996268897ac5561662b
|
3 |
+
size 7881651219
|
pytorch_model-00002-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f1f600c504a48015beb784863f667ca37b752487ad737bf0c874e9ba746f8509
|
3 |
+
size 9935242109
|
pytorch_model-00003-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77a840d95cc925c73f0323000a5928c6a8ad68c0b8928b37e3215fd6190c4a26
|
3 |
+
size 9936483839
|
pytorch_model-00004-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3ed67290d8b59fef453e0dc2bd16f7f36990c8964a608218a574e28459551451
|
3 |
+
size 9935242173
|
pytorch_model-00005-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33953306f6361678a926c0537582fdd9ce705de1344cc959a1ac7348dc8721d6
|
3 |
+
size 9936484139
|
pytorch_model-00006-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6afde2d3b0d796c600572a202953474cb763060ed8543838a136854de0b4d50
|
3 |
+
size 9936484439
|
pytorch_model-00007-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a8556c3d533514ba8765b3fdc1cfa336c91b6d5784c71c77e59dfc9c4fe425f
|
3 |
+
size 9935242365
|
pytorch_model-00008-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc98da3972700d562c05661fcfd505f6af641b3c05dbb14d85848c78571388f8
|
3 |
+
size 9936483959
|
pytorch_model-00009-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:437f34d3b4217efea0b59d4bacfa10151e80d33eb7dc1e93161c4c53a7eff814
|
3 |
+
size 9935242365
|
pytorch_model-00010-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2ffd7d7a25457657d83f0203505b775c49119e088d1a9bcbb875b15dec9d8207
|
3 |
+
size 9936483959
|
pytorch_model-00011-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a9a547c6296de0902a42895ec9a3457db0d087ac315d2e555fc44d7baba43519
|
3 |
+
size 9935242429
|
pytorch_model-00012-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d9ebfa0143256c234700a02620c79132c15415832790374bca608269628de29
|
3 |
+
size 9962852511
|
pytorch_model-00013-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cae1903e0823a905e578fcc2b71d9176904e26894e8921ff91888db8d2a95677
|
3 |
+
size 9935242109
|
pytorch_model-00014-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a06bcd65e5326b04cca8a5615ee5dad5cd66a16b35072455702af97d6b93d7d
|
3 |
+
size 9936608795
|
pytorch_model-00015-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e1b403c0a0bba6ac541d06c6ec3f0cce11df8fc445e9ac8aa519e48fc348e6fa
|
3 |
+
size 9935242173
|
pytorch_model-00016-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1548b470c85185890455ab04da7d35cbc67fb60a8aa2a205820186513c5ec02d
|
3 |
+
size 9936609383
|
pytorch_model-00017-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:56cdd329970fce5d761dbffc96bdd0b021da41480aff4baff26ed7e176fed697
|
3 |
+
size 9936609879
|
pytorch_model-00018-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2f48290f05efaee4ec7afe5c1ae0accbf7bf4e83d2a2d8ab6ae36baca2b418b
|
3 |
+
size 9935242301
|
pytorch_model-00019-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e78293e76a2f6f1a747748219c4060aebdd517f1834b4eec68c68ff28d02b695
|
3 |
+
size 9936609719
|
pytorch_model-00020-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b26ddc9df8fb72f421713a0a75c077eb8a8e8b018b31ff03e5d5d26a15d5421
|
3 |
+
size 9935242365
|
pytorch_model-00021-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02cba09c99a8e6f49de53ee3c4e6c8bc81569e9692228e8ad3e7b0d0d1cf1c7a
|
3 |
+
size 9936608939
|
pytorch_model-00022-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63e694227e40ad0e6633ea2bd482a6b1ed1a1f79642fc804aaf8d589528f976c
|
3 |
+
size 9935242365
|
pytorch_model-00023-of-00023.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:09a4e4d2c098c2295634446a49154d0745084ae5f3664fbff6077faa2de75c96
|
3 |
+
size 3557911077
|
pytorch_model.bin.index.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|