{ "architectures": [ "CustomTransformerForCausalLM" ], "d_model": 512, "dim_feedforward": 2048, "dropout": 0.1, "group_size": 16, "model_type": "custom_transformer", "n_heads": 8, "num_layers": 6, "torch_dtype": "float32", "transformers_version": "4.42.4", "vocab_size": 32000 }