|
{ |
|
"architectures": [ |
|
"PerceiverCausalLanguageModel" |
|
], |
|
"is_decoder": true, |
|
"model_config": { |
|
"abs_pos_emb": false, |
|
"activation_checkpointing": false, |
|
"activation_offloading": false, |
|
"cross_attention_dropout": 0.0, |
|
"cross_attention_widening_factor": 4, |
|
"init_scale": 0.02, |
|
"max_heads_parallel": 2, |
|
"max_latents": 512, |
|
"max_seq_len": 1024, |
|
"num_channels": 1280, |
|
"num_heads": 10, |
|
"num_self_attention_layers": 20, |
|
"output_bias": false, |
|
"output_norm": true, |
|
"post_attention_dropout": 0.0, |
|
"self_attention_widening_factor": 4, |
|
"vocab_size": 32000 |
|
}, |
|
"model_type": "perceiver-ar-causal-language-model", |
|
"tokenizer_class": "XLNetTokenizerFast", |
|
"torch_dtype": "float32", |
|
"transformers_version": "4.28.0" |
|
} |
|
|