{ "architectures": [ "PerceiverCausalLanguageModel" ], "is_decoder": true, "model_config": { "abs_pos_emb": false, "activation_checkpointing": false, "activation_offloading": false, "cross_attention_dropout": 0.0, "cross_attention_widening_factor": 4, "init_scale": 0.02, "max_heads_parallel": 2, "max_latents": 512, "max_seq_len": 1024, "num_channels": 1280, "num_heads": 10, "num_self_attention_layers": 20, "output_bias": false, "output_norm": true, "post_attention_dropout": 0.0, "self_attention_widening_factor": 4, "vocab_size": 32000 }, "model_type": "perceiver-ar-causal-language-model", "tokenizer_class": "XLNetTokenizerFast", "torch_dtype": "float32", "transformers_version": "4.28.0" }