Synchronizing local compiler cache.
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +20 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json +79 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json +79 -0
- neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json +79 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff +1 -1
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo +1 -1
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.done +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff +0 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb +3 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log +1 -0
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb +1 -1
- neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json +1 -0
.gitattributes
CHANGED
@@ -4008,3 +4008,23 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff f
|
|
4008 |
neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4009 |
neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4010 |
neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4008 |
neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4009 |
neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4010 |
neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4011 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4012 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4013 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4014 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4015 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4016 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4017 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4018 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4019 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4020 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4021 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4022 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4023 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4024 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4025 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
|
4026 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
4027 |
+
neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
|
4028 |
+
neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
|
4029 |
+
neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
|
4030 |
+
neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "tengomucho/tiny-random-gpt-oss",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GptOssForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": true,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 384,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 512,
|
15 |
+
"layer_types": [
|
16 |
+
"sliding_attention",
|
17 |
+
"full_attention"
|
18 |
+
],
|
19 |
+
"max_position_embeddings": 8192,
|
20 |
+
"mlp_bias": false,
|
21 |
+
"model_type": "gpt_oss",
|
22 |
+
"neuron": {
|
23 |
+
"_serialized_key": "NxDNeuronConfig",
|
24 |
+
"async_mode": false,
|
25 |
+
"attn_kernel_enabled": false,
|
26 |
+
"batch_size": 1,
|
27 |
+
"capacity_factor": null,
|
28 |
+
"cc_pipeline_tiling_factor": 2,
|
29 |
+
"checkpoint_id": "tengomucho/tiny-random-gpt-oss",
|
30 |
+
"checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8",
|
31 |
+
"continuous_batching": false,
|
32 |
+
"enable_bucketing": false,
|
33 |
+
"ep_degree": 1,
|
34 |
+
"flash_decoding_enabled": false,
|
35 |
+
"fused_qkv": false,
|
36 |
+
"glu_mlp": true,
|
37 |
+
"is_chunked_prefill": false,
|
38 |
+
"local_ranks_size": 2,
|
39 |
+
"logical_nc_config": 1,
|
40 |
+
"max_batch_size": 1,
|
41 |
+
"max_context_length": 100,
|
42 |
+
"max_topk": 256,
|
43 |
+
"mlp_kernel_enabled": false,
|
44 |
+
"mlp_kernel_fuse_residual_add": false,
|
45 |
+
"n_active_tokens": 100,
|
46 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
47 |
+
"num_cores_per_group": 1,
|
48 |
+
"on_device_sampling": false,
|
49 |
+
"optimum_neuron_version": "0.3.1.dev0",
|
50 |
+
"output_logits": false,
|
51 |
+
"padding_side": "right",
|
52 |
+
"pp_degree": 1,
|
53 |
+
"qk_layernorm": false,
|
54 |
+
"qkv_kernel_enabled": false,
|
55 |
+
"rpl_reduce_dtype": "bfloat16",
|
56 |
+
"sequence_length": 100,
|
57 |
+
"sequence_parallel_enabled": false,
|
58 |
+
"speculation_length": 0,
|
59 |
+
"start_rank_id": 0,
|
60 |
+
"target": null,
|
61 |
+
"torch_dtype": "bfloat16",
|
62 |
+
"tp_degree": 2,
|
63 |
+
"vocab_parallel": false
|
64 |
+
},
|
65 |
+
"num_attention_heads": 18,
|
66 |
+
"num_experts_per_tok": 4,
|
67 |
+
"num_hidden_layers": 2,
|
68 |
+
"num_key_value_heads": 2,
|
69 |
+
"num_local_experts": 64,
|
70 |
+
"output_router_logits": false,
|
71 |
+
"rms_norm_eps": 1e-05,
|
72 |
+
"rope_scaling": null,
|
73 |
+
"rope_theta": 150000.0,
|
74 |
+
"router_aux_loss_coef": 0.9,
|
75 |
+
"sliding_window": 128,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 201088
|
79 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "tengomucho/tiny-random-gpt-oss",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GptOssForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": true,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 384,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 512,
|
15 |
+
"layer_types": [
|
16 |
+
"sliding_attention",
|
17 |
+
"full_attention"
|
18 |
+
],
|
19 |
+
"max_position_embeddings": 8192,
|
20 |
+
"mlp_bias": false,
|
21 |
+
"model_type": "gpt_oss",
|
22 |
+
"neuron": {
|
23 |
+
"_serialized_key": "NxDNeuronConfig",
|
24 |
+
"async_mode": false,
|
25 |
+
"attn_kernel_enabled": false,
|
26 |
+
"batch_size": 1,
|
27 |
+
"capacity_factor": null,
|
28 |
+
"cc_pipeline_tiling_factor": 2,
|
29 |
+
"checkpoint_id": "tengomucho/tiny-random-gpt-oss",
|
30 |
+
"checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8",
|
31 |
+
"continuous_batching": false,
|
32 |
+
"enable_bucketing": false,
|
33 |
+
"ep_degree": 1,
|
34 |
+
"flash_decoding_enabled": false,
|
35 |
+
"fused_qkv": false,
|
36 |
+
"glu_mlp": true,
|
37 |
+
"is_chunked_prefill": false,
|
38 |
+
"local_ranks_size": 2,
|
39 |
+
"logical_nc_config": 1,
|
40 |
+
"max_batch_size": 1,
|
41 |
+
"max_context_length": 100,
|
42 |
+
"max_topk": 256,
|
43 |
+
"mlp_kernel_enabled": false,
|
44 |
+
"mlp_kernel_fuse_residual_add": false,
|
45 |
+
"n_active_tokens": 100,
|
46 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
47 |
+
"num_cores_per_group": 1,
|
48 |
+
"on_device_sampling": false,
|
49 |
+
"optimum_neuron_version": "0.3.1.dev0",
|
50 |
+
"output_logits": false,
|
51 |
+
"padding_side": "right",
|
52 |
+
"pp_degree": 1,
|
53 |
+
"qk_layernorm": false,
|
54 |
+
"qkv_kernel_enabled": false,
|
55 |
+
"rpl_reduce_dtype": "float16",
|
56 |
+
"sequence_length": 100,
|
57 |
+
"sequence_parallel_enabled": false,
|
58 |
+
"speculation_length": 0,
|
59 |
+
"start_rank_id": 0,
|
60 |
+
"target": null,
|
61 |
+
"torch_dtype": "float16",
|
62 |
+
"tp_degree": 2,
|
63 |
+
"vocab_parallel": false
|
64 |
+
},
|
65 |
+
"num_attention_heads": 18,
|
66 |
+
"num_experts_per_tok": 4,
|
67 |
+
"num_hidden_layers": 2,
|
68 |
+
"num_key_value_heads": 2,
|
69 |
+
"num_local_experts": 64,
|
70 |
+
"output_router_logits": false,
|
71 |
+
"rms_norm_eps": 1e-05,
|
72 |
+
"rope_scaling": null,
|
73 |
+
"rope_theta": 150000.0,
|
74 |
+
"router_aux_loss_coef": 0.9,
|
75 |
+
"sliding_window": 128,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 201088
|
79 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json
ADDED
@@ -0,0 +1,79 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_entry_class": "SingleModelCacheEntry",
|
3 |
+
"_model_id": "tengomucho/tiny-random-gpt-oss",
|
4 |
+
"_task": "text-generation",
|
5 |
+
"architectures": [
|
6 |
+
"GptOssForCausalLM"
|
7 |
+
],
|
8 |
+
"attention_bias": true,
|
9 |
+
"attention_dropout": 0.0,
|
10 |
+
"head_dim": 64,
|
11 |
+
"hidden_act": "silu",
|
12 |
+
"hidden_size": 384,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 512,
|
15 |
+
"layer_types": [
|
16 |
+
"sliding_attention",
|
17 |
+
"full_attention"
|
18 |
+
],
|
19 |
+
"max_position_embeddings": 8192,
|
20 |
+
"mlp_bias": false,
|
21 |
+
"model_type": "gpt_oss",
|
22 |
+
"neuron": {
|
23 |
+
"_serialized_key": "NxDNeuronConfig",
|
24 |
+
"async_mode": false,
|
25 |
+
"attn_kernel_enabled": false,
|
26 |
+
"batch_size": 2,
|
27 |
+
"capacity_factor": null,
|
28 |
+
"cc_pipeline_tiling_factor": 2,
|
29 |
+
"checkpoint_id": "tengomucho/tiny-random-gpt-oss",
|
30 |
+
"checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8",
|
31 |
+
"continuous_batching": false,
|
32 |
+
"enable_bucketing": false,
|
33 |
+
"ep_degree": 1,
|
34 |
+
"flash_decoding_enabled": false,
|
35 |
+
"fused_qkv": false,
|
36 |
+
"glu_mlp": true,
|
37 |
+
"is_chunked_prefill": false,
|
38 |
+
"local_ranks_size": 2,
|
39 |
+
"logical_nc_config": 1,
|
40 |
+
"max_batch_size": 2,
|
41 |
+
"max_context_length": 100,
|
42 |
+
"max_topk": 256,
|
43 |
+
"mlp_kernel_enabled": false,
|
44 |
+
"mlp_kernel_fuse_residual_add": false,
|
45 |
+
"n_active_tokens": 100,
|
46 |
+
"neuronxcc_version": "2.19.8089.0+8ab9f450",
|
47 |
+
"num_cores_per_group": 1,
|
48 |
+
"on_device_sampling": false,
|
49 |
+
"optimum_neuron_version": "0.3.1.dev0",
|
50 |
+
"output_logits": false,
|
51 |
+
"padding_side": "right",
|
52 |
+
"pp_degree": 1,
|
53 |
+
"qk_layernorm": false,
|
54 |
+
"qkv_kernel_enabled": false,
|
55 |
+
"rpl_reduce_dtype": "bfloat16",
|
56 |
+
"sequence_length": 100,
|
57 |
+
"sequence_parallel_enabled": false,
|
58 |
+
"speculation_length": 0,
|
59 |
+
"start_rank_id": 0,
|
60 |
+
"target": null,
|
61 |
+
"torch_dtype": "bfloat16",
|
62 |
+
"tp_degree": 2,
|
63 |
+
"vocab_parallel": false
|
64 |
+
},
|
65 |
+
"num_attention_heads": 18,
|
66 |
+
"num_experts_per_tok": 4,
|
67 |
+
"num_hidden_layers": 2,
|
68 |
+
"num_key_value_heads": 2,
|
69 |
+
"num_local_experts": 64,
|
70 |
+
"output_router_logits": false,
|
71 |
+
"rms_norm_eps": 1e-05,
|
72 |
+
"rope_scaling": null,
|
73 |
+
"rope_theta": 150000.0,
|
74 |
+
"router_aux_loss_coef": 0.9,
|
75 |
+
"sliding_window": 128,
|
76 |
+
"tie_word_embeddings": false,
|
77 |
+
"use_cache": true,
|
78 |
+
"vocab_size": 201088
|
79 |
+
}
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab9a32677a94a651ba145fbae9fe9f1ce8a545105e8012934444f8d846730cce
|
3 |
+
size 238434
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19d20cba0ee881669a3e5c30bf11aadc2278ee7c0b66b0eca0c2a662c4883cfc
|
3 |
+
size 533504
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:835b74ad54c0544e394e68d445455f7a11a561917054309d0fb7951aad890107
|
3 |
+
size 87986
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2b9b7191800edac2a1ff9fcc7b0c4625a5a0c10255db060bb85d21a6ca7c3019
|
3 |
+
size 369664
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a601af92eb5bf2b3dcb027f5f6ac831565988baf7b0b579f970a33108966a122
|
3 |
+
size 385657
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cb7ee995efc83075286c18ae0bd6aa668df249e6541fb44f7e8e467499146ba
|
3 |
+
size 106146
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c94f5009fea941ff353050a32c72276e9611eb5c5985f03c7616eed0b33eb599
|
3 |
+
size 410624
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:edbf230dd0eb742b4e0753cb960f76abebe45d1364bcd947eef076af3ce83290
|
3 |
+
size 426940
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:45058e55d159e1dddc929e5556e384548310357a373fff6c2bf022f58df66e8a
|
3 |
+
size 16598
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff
ADDED
Binary file (62.5 kB). View file
|
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d12cb605b076ecbbaf380bd5f6c104ee598d4fd16e51573ece04daba033ff668
|
3 |
+
size 87988
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44d6a56e03a8fa0612b686f0709604b9d6aef458bb0acf0c68554d2604886fbb
|
3 |
+
size 369664
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:613cf200ecbd7dbd999d81e6aef20164355a7ac95854fdcfca88d9258b7ca27a
|
3 |
+
size 385657
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2f1fff59be3626706ff573da8beefbc210c912f375d56de5d3090eb3d87b7ddc
|
3 |
+
size 369664
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7f8318dd6061234a500d6b0717ebd6695551a71e51ac3816a4686cce3507c4f
|
3 |
+
size 385657
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 748544
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e890a716146d9a59af4da09e25f3d578e4354e38287ece0c077c2d7ac1621a9
|
3 |
size 748544
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 772992
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64f5ba65cf7bb975a1835dd3098775dde405e5ab85c3e0bdf894b5f01cfb06df
|
3 |
size 772992
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:acf09e474390d8972978858d6b4d6e1e40777439b6e9201c08cff2453fbe210b
|
3 |
+
size 106150
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7cb3675ee770f420c945a0564f3d3fadac9e748a323a64cdd065c5b12ceb1d26
|
3 |
+
size 410624
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf2454e4949622f1a33bbeafba975a592e566c6eb633bca25947ff4dafea710e
|
3 |
+
size 426940
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f0b78950a0e796b44eff2c591c16a280522b577d183b3301fde8aeeb6552d2b6
|
3 |
+
size 1844268
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c5f8396058c368617651d6a4392d0215b5ed6033f220cfd9f01c5c9b1799f28
|
3 |
+
size 1475584
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
"--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.done
ADDED
File without changes
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd1fe65aedfce2d2d9807ea789de5abf6c60bf1c6371d83aee40cd09028963c9
|
3 |
+
size 16917
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff
ADDED
Binary file (62.5 kB). View file
|
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:380a1631f162813ccfe1c8ed3b96e083e269fc0c9dfb0cbb41a55469d5a02eb5
|
3 |
+
size 1844268
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_a2c3cdd4ef3f8549a44e+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_a2c3cdd4ef3f8549a44e+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']:
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 40790698
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c17d50397e7f14e98ec7a192d28f00431bee79435138647151a044684d4e3da8
|
3 |
size 40790698
|
neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
|