Phando
/

fairseq-moe-15b-bf16

Text Generation

Inference Endpoints

Model card Files Files and versions Community

fairseq-moe-15b-bf16 / config.json

xxchenxx

upload bf16

f8d50eb about 1 year ago

history blame contribute delete

1.09 kB

	{
	"_name_or_path": "../fairseq-moe/fairseq-moe-15b",
	"activation_function": "gelu",
	"architectures": [
	"FSGPTMoEForCausalLM"
	],
	"attention_dropout": 0.1,
	"attention_heads": 12,
	"batch_prioritized_routing": false,
	"bos_token_id": 50257,
	"classifier_dropout": 0.1,
	"d_model": 768,
	"embed_dropout": 0.0,
	"eos_token_id": 50259,
	"expert_capacity": null,
	"ffn_dim": 3072,
	"hidden_size": 768,
	"intermediate_size": 3072,
	"max_position_embeddings": 2048,
	"moe_eval_capacity_token_fraction": -1.0,
	"moe_token_dropout": 0.2,
	"no_scale_embedding": false,
	"normalize_router_prob_before_dropping": false,
	"num_experts": 512,
	"num_heads": 12,
	"num_layers": 12,
	"output_router_logits": true,
	"pad_token_id": 50258,
	"resid_dropout": 0.1,
	"router_bias": false,
	"router_dtype": "float32",
	"router_ignore_padding_tokens": true,
	"second_expert_policy": "all",
	"sparse_step": 2,
	"tokenizer_class": "GPT2Tokenizer",
	"torch_dtype": "bfloat16",
	"transformers_version": "4.33.2",
	"unk_token_id": 3,
	"use_cache": true,
	"vocab_size": 51200
	}