tengomucho HF Staff commited on
Commit
fa31572
·
verified ·
1 Parent(s): e9a801a

Synchronizing local compiler cache.

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +20 -0
  2. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json +79 -0
  3. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json +79 -0
  4. neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json +79 -0
  5. neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json +1 -0
  6. neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.done +0 -0
  7. neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb +3 -0
  8. neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff +3 -0
  9. neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json +1 -0
  10. neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.done +0 -0
  11. neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb +3 -0
  12. neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff +3 -0
  13. neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo +3 -0
  14. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json +1 -0
  15. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.done +0 -0
  16. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb +3 -0
  17. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff +3 -0
  18. neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo +3 -0
  19. neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json +1 -0
  20. neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.done +0 -0
  21. neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb +3 -0
  22. neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff +0 -0
  23. neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json +1 -0
  24. neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.done +0 -0
  25. neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb +3 -0
  26. neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff +3 -0
  27. neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo +3 -0
  28. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.done +0 -0
  29. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff +3 -0
  30. neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo +3 -0
  31. neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff +1 -1
  32. neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo +1 -1
  33. neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json +1 -0
  34. neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.done +0 -0
  35. neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb +3 -0
  36. neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff +3 -0
  37. neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo +3 -0
  38. neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json +1 -0
  39. neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.done +0 -0
  40. neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb +3 -0
  41. neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff +3 -0
  42. neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json +1 -0
  43. neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.done +0 -0
  44. neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb +3 -0
  45. neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff +0 -0
  46. neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json +1 -0
  47. neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb +3 -0
  48. neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log +1 -0
  49. neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb +1 -1
  50. neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json +1 -0
.gitattributes CHANGED
@@ -4008,3 +4008,23 @@ neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/model.neff f
4008
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4009
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4010
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4008
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_af00345a8f045c9c3128+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4009
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_c95f9cae22467b36d97b+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4010
  neuronxcc-2.19.8089.0+8ab9f450/MODULE_cc4e26d5285848960dd0+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4011
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4012
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4013
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4014
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4015
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4016
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4017
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4018
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4019
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4020
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4021
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4022
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4023
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4024
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_d7c4e83194e27d3697b6+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4025
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/model.neff filter=lfs diff=lfs merge=lfs -text
4026
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_ec19c89824e181106367+a9d440f5/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
4027
+ neuronxcc-2.19.8089.0+8ab9f450/MODULE_f2a2327bb7995d727d3d+ed72d204/model.neff filter=lfs diff=lfs merge=lfs -text
4028
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_9160407a52b79b3d964e+c2248236/model.neff filter=lfs diff=lfs merge=lfs -text
4029
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/model.neff filter=lfs diff=lfs merge=lfs -text
4030
+ neuronxcc-2.20.9961.0+0acef03a/MODULE_cd478201f76aa054815b+ca355898/wrapped_neff.hlo filter=lfs diff=lfs merge=lfs -text
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/0c827e2846bed9ddd8a6.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "tengomucho/tiny-random-gpt-oss",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GptOssForCausalLM"
7
+ ],
8
+ "attention_bias": true,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 384,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "layer_types": [
16
+ "sliding_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 8192,
20
+ "mlp_bias": false,
21
+ "model_type": "gpt_oss",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "async_mode": false,
25
+ "attn_kernel_enabled": false,
26
+ "batch_size": 1,
27
+ "capacity_factor": null,
28
+ "cc_pipeline_tiling_factor": 2,
29
+ "checkpoint_id": "tengomucho/tiny-random-gpt-oss",
30
+ "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8",
31
+ "continuous_batching": false,
32
+ "enable_bucketing": false,
33
+ "ep_degree": 1,
34
+ "flash_decoding_enabled": false,
35
+ "fused_qkv": false,
36
+ "glu_mlp": true,
37
+ "is_chunked_prefill": false,
38
+ "local_ranks_size": 2,
39
+ "logical_nc_config": 1,
40
+ "max_batch_size": 1,
41
+ "max_context_length": 100,
42
+ "max_topk": 256,
43
+ "mlp_kernel_enabled": false,
44
+ "mlp_kernel_fuse_residual_add": false,
45
+ "n_active_tokens": 100,
46
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
47
+ "num_cores_per_group": 1,
48
+ "on_device_sampling": false,
49
+ "optimum_neuron_version": "0.3.1.dev0",
50
+ "output_logits": false,
51
+ "padding_side": "right",
52
+ "pp_degree": 1,
53
+ "qk_layernorm": false,
54
+ "qkv_kernel_enabled": false,
55
+ "rpl_reduce_dtype": "bfloat16",
56
+ "sequence_length": 100,
57
+ "sequence_parallel_enabled": false,
58
+ "speculation_length": 0,
59
+ "start_rank_id": 0,
60
+ "target": null,
61
+ "torch_dtype": "bfloat16",
62
+ "tp_degree": 2,
63
+ "vocab_parallel": false
64
+ },
65
+ "num_attention_heads": 18,
66
+ "num_experts_per_tok": 4,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 2,
69
+ "num_local_experts": 64,
70
+ "output_router_logits": false,
71
+ "rms_norm_eps": 1e-05,
72
+ "rope_scaling": null,
73
+ "rope_theta": 150000.0,
74
+ "router_aux_loss_coef": 0.9,
75
+ "sliding_window": 128,
76
+ "tie_word_embeddings": false,
77
+ "use_cache": true,
78
+ "vocab_size": 201088
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/7e28884f7ff1f765a20d.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "tengomucho/tiny-random-gpt-oss",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GptOssForCausalLM"
7
+ ],
8
+ "attention_bias": true,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 384,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "layer_types": [
16
+ "sliding_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 8192,
20
+ "mlp_bias": false,
21
+ "model_type": "gpt_oss",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "async_mode": false,
25
+ "attn_kernel_enabled": false,
26
+ "batch_size": 1,
27
+ "capacity_factor": null,
28
+ "cc_pipeline_tiling_factor": 2,
29
+ "checkpoint_id": "tengomucho/tiny-random-gpt-oss",
30
+ "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8",
31
+ "continuous_batching": false,
32
+ "enable_bucketing": false,
33
+ "ep_degree": 1,
34
+ "flash_decoding_enabled": false,
35
+ "fused_qkv": false,
36
+ "glu_mlp": true,
37
+ "is_chunked_prefill": false,
38
+ "local_ranks_size": 2,
39
+ "logical_nc_config": 1,
40
+ "max_batch_size": 1,
41
+ "max_context_length": 100,
42
+ "max_topk": 256,
43
+ "mlp_kernel_enabled": false,
44
+ "mlp_kernel_fuse_residual_add": false,
45
+ "n_active_tokens": 100,
46
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
47
+ "num_cores_per_group": 1,
48
+ "on_device_sampling": false,
49
+ "optimum_neuron_version": "0.3.1.dev0",
50
+ "output_logits": false,
51
+ "padding_side": "right",
52
+ "pp_degree": 1,
53
+ "qk_layernorm": false,
54
+ "qkv_kernel_enabled": false,
55
+ "rpl_reduce_dtype": "float16",
56
+ "sequence_length": 100,
57
+ "sequence_parallel_enabled": false,
58
+ "speculation_length": 0,
59
+ "start_rank_id": 0,
60
+ "target": null,
61
+ "torch_dtype": "float16",
62
+ "tp_degree": 2,
63
+ "vocab_parallel": false
64
+ },
65
+ "num_attention_heads": 18,
66
+ "num_experts_per_tok": 4,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 2,
69
+ "num_local_experts": 64,
70
+ "output_router_logits": false,
71
+ "rms_norm_eps": 1e-05,
72
+ "rope_scaling": null,
73
+ "rope_theta": 150000.0,
74
+ "router_aux_loss_coef": 0.9,
75
+ "sliding_window": 128,
76
+ "tie_word_embeddings": false,
77
+ "use_cache": true,
78
+ "vocab_size": 201088
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/0_REGISTRY/0.3.1.dev0/gpt_oss/tengomucho/tiny-random-gpt-oss/a963e10d150c7358e338.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_entry_class": "SingleModelCacheEntry",
3
+ "_model_id": "tengomucho/tiny-random-gpt-oss",
4
+ "_task": "text-generation",
5
+ "architectures": [
6
+ "GptOssForCausalLM"
7
+ ],
8
+ "attention_bias": true,
9
+ "attention_dropout": 0.0,
10
+ "head_dim": 64,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 384,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 512,
15
+ "layer_types": [
16
+ "sliding_attention",
17
+ "full_attention"
18
+ ],
19
+ "max_position_embeddings": 8192,
20
+ "mlp_bias": false,
21
+ "model_type": "gpt_oss",
22
+ "neuron": {
23
+ "_serialized_key": "NxDNeuronConfig",
24
+ "async_mode": false,
25
+ "attn_kernel_enabled": false,
26
+ "batch_size": 2,
27
+ "capacity_factor": null,
28
+ "cc_pipeline_tiling_factor": 2,
29
+ "checkpoint_id": "tengomucho/tiny-random-gpt-oss",
30
+ "checkpoint_revision": "2109d1ee033bb4191af6dd950fda542c9c351dc8",
31
+ "continuous_batching": false,
32
+ "enable_bucketing": false,
33
+ "ep_degree": 1,
34
+ "flash_decoding_enabled": false,
35
+ "fused_qkv": false,
36
+ "glu_mlp": true,
37
+ "is_chunked_prefill": false,
38
+ "local_ranks_size": 2,
39
+ "logical_nc_config": 1,
40
+ "max_batch_size": 2,
41
+ "max_context_length": 100,
42
+ "max_topk": 256,
43
+ "mlp_kernel_enabled": false,
44
+ "mlp_kernel_fuse_residual_add": false,
45
+ "n_active_tokens": 100,
46
+ "neuronxcc_version": "2.19.8089.0+8ab9f450",
47
+ "num_cores_per_group": 1,
48
+ "on_device_sampling": false,
49
+ "optimum_neuron_version": "0.3.1.dev0",
50
+ "output_logits": false,
51
+ "padding_side": "right",
52
+ "pp_degree": 1,
53
+ "qk_layernorm": false,
54
+ "qkv_kernel_enabled": false,
55
+ "rpl_reduce_dtype": "bfloat16",
56
+ "sequence_length": 100,
57
+ "sequence_parallel_enabled": false,
58
+ "speculation_length": 0,
59
+ "start_rank_id": 0,
60
+ "target": null,
61
+ "torch_dtype": "bfloat16",
62
+ "tp_degree": 2,
63
+ "vocab_parallel": false
64
+ },
65
+ "num_attention_heads": 18,
66
+ "num_experts_per_tok": 4,
67
+ "num_hidden_layers": 2,
68
+ "num_key_value_heads": 2,
69
+ "num_local_experts": 64,
70
+ "output_router_logits": false,
71
+ "rms_norm_eps": 1e-05,
72
+ "rope_scaling": null,
73
+ "rope_theta": 150000.0,
74
+ "router_aux_loss_coef": 0.9,
75
+ "sliding_window": 128,
76
+ "tie_word_embeddings": false,
77
+ "use_cache": true,
78
+ "vocab_size": 201088
79
+ }
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab9a32677a94a651ba145fbae9fe9f1ce8a545105e8012934444f8d846730cce
3
+ size 238434
neuronxcc-2.19.8089.0+8ab9f450/MODULE_081d68c668721c1151f8+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:19d20cba0ee881669a3e5c30bf11aadc2278ee7c0b66b0eca0c2a662c4883cfc
3
+ size 533504
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835b74ad54c0544e394e68d445455f7a11a561917054309d0fb7951aad890107
3
+ size 87986
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b9b7191800edac2a1ff9fcc7b0c4625a5a0c10255db060bb85d21a6ca7c3019
3
+ size 369664
neuronxcc-2.19.8089.0+8ab9f450/MODULE_12f1fb2b3b8d96cc17eb+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a601af92eb5bf2b3dcb027f5f6ac831565988baf7b0b579f970a33108966a122
3
+ size 385657
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cb7ee995efc83075286c18ae0bd6aa668df249e6541fb44f7e8e467499146ba
3
+ size 106146
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c94f5009fea941ff353050a32c72276e9611eb5c5985f03c7616eed0b33eb599
3
+ size 410624
neuronxcc-2.19.8089.0+8ab9f450/MODULE_1326452b1bf03b9192f7+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:edbf230dd0eb742b4e0753cb960f76abebe45d1364bcd947eef076af3ce83290
3
+ size 426940
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45058e55d159e1dddc929e5556e384548310357a373fff6c2bf022f58df66e8a
3
+ size 16598
neuronxcc-2.19.8089.0+8ab9f450/MODULE_276796e69c32251f53d1+431f5505/model.neff ADDED
Binary file (62.5 kB). View file
 
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d12cb605b076ecbbaf380bd5f6c104ee598d4fd16e51573ece04daba033ff668
3
+ size 87988
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44d6a56e03a8fa0612b686f0709604b9d6aef458bb0acf0c68554d2604886fbb
3
+ size 369664
neuronxcc-2.19.8089.0+8ab9f450/MODULE_27d704a88404585e55bc+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:613cf200ecbd7dbd999d81e6aef20164355a7ac95854fdcfca88d9258b7ca27a
3
+ size 385657
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f1fff59be3626706ff573da8beefbc210c912f375d56de5d3090eb3d87b7ddc
3
+ size 369664
neuronxcc-2.19.8089.0+8ab9f450/MODULE_2bcbc6c2917ca30ebb3d+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7f8318dd6061234a500d6b0717ebd6695551a71e51ac3816a4686cce3507c4f
3
+ size 385657
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/model.neff CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6d47b92669be6842cd9c1da3d600b7d21b63bc2e2910874ea1a32101a79f18b
3
  size 748544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e890a716146d9a59af4da09e25f3d578e4354e38287ece0c077c2d7ac1621a9
3
  size 748544
neuronxcc-2.19.8089.0+8ab9f450/MODULE_4ed1e4ca4ea70401ca1c+a9d440f5/wrapped_neff.hlo CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1b6cc78f946f46795f8565376f97193c2ad1edbcd3685ec8cd98292cb036038a
3
  size 772992
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f5ba65cf7bb975a1835dd3098775dde405e5ab85c3e0bdf894b5f01cfb06df
3
  size 772992
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/token_generation_model/_tp0_bk0/log-neuron-cc.txt", "--enable-internal-neff-wrapper"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acf09e474390d8972978858d6b4d6e1e40777439b6e9201c08cff2453fbe210b
3
+ size 106150
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cb3675ee770f420c945a0564f3d3fadac9e748a323a64cdd065c5b12ceb1d26
3
+ size 410624
neuronxcc-2.19.8089.0+8ab9f450/MODULE_6490fe1fcba01bbd78a6+a9d440f5/wrapped_neff.hlo ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf2454e4949622f1a33bbeafba975a592e566c6eb633bca25947ff4dafea710e
3
+ size 426940
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0b78950a0e796b44eff2c591c16a280522b577d183b3301fde8aeeb6552d2b6
3
+ size 1844268
neuronxcc-2.19.8089.0+8ab9f450/MODULE_687ca63877eb31e6c039+ed72d204/model.neff ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c5f8396058c368617651d6a4392d0215b5ed6033f220cfd9f01c5c9b1799f28
3
+ size 1475584
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ "--model-type=transformer -O1 --lnc=1 --internal-hlo2tensorizer-options=--experimental-unsafe-fp8e4m3fn-as-fp8e4m3 --logfile=/tmp/nxd_model/layout_opt/log-neuron-cc.txt"
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.done ADDED
File without changes
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd1fe65aedfce2d2d9807ea789de5abf6c60bf1c6371d83aee40cd09028963c9
3
+ size 16917
neuronxcc-2.19.8089.0+8ab9f450/MODULE_8883e3ddc45d542e0187+431f5505/model.neff ADDED
Binary file (62.5 kB). View file
 
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.hlo_module.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:380a1631f162813ccfe1c8ed3b96e083e269fc0c9dfb0cbb41a55469d5a02eb5
3
+ size 1844268
neuronxcc-2.19.8089.0+8ab9f450/MODULE_a2c3cdd4ef3f8549a44e+ed72d204/model.log ADDED
@@ -0,0 +1 @@
 
 
1
+ Failed compilation with ['neuronx-cc', 'compile', '--framework=XLA', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_a2c3cdd4ef3f8549a44e+ed72d204.hlo_module.pb', '--output', '/tmp/nxd_model/context_encoding_model/_tp0_bk0/model.MODULE_a2c3cdd4ef3f8549a44e+ed72d204.neff', '--target=trn1', '--auto-cast=none', '--model-type=transformer', '--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ', '-O2', '--lnc=1', '--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt', '--verbose=35']:
neuronxcc-2.19.8089.0+8ab9f450/MODULE_b695d5cc0521af5a71d2+ed72d204/model.hlo_module.pb CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:87fdd0a28b51aede94ee64146624c6d123505cb5c5f67d5279671e9441277fec
3
  size 40790698
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c17d50397e7f14e98ec7a192d28f00431bee79435138647151a044684d4e3da8
3
  size 40790698
neuronxcc-2.19.8089.0+8ab9f450/MODULE_be84edb3798c056b44df+ed72d204/compile_flags.json ADDED
@@ -0,0 +1 @@
 
 
1
+ ["--target=trn1", "--auto-cast=none", "--model-type=transformer", "--tensorizer-options=--enable-ccop-compute-overlap --cc-pipeline-tiling-factor=2 --vectorize-strided-dma ", "-O2", "--lnc=1", "--logfile=/tmp/nxd_model/context_encoding_model/_tp0_bk0/log-neuron-cc.txt"]