{ | |
"architectures": [ | |
"SnowflakeCoreG1" | |
], | |
"auto_map": { | |
"AutoConfig": "configuration_snowflake_core.SnowflakeCoreConfig", | |
"AutoModelForCausalLM": "modeling_snowflake_core.SnowflakeCoreG1" | |
}, | |
"bos_token_id": 50256, | |
"dropout": 0.1, | |
"embed_dim": 1024, | |
"eos_token_id": 50256, | |
"ffn_dim": 4096, | |
"max_length": 2048, | |
"model_type": "snowflake_core", | |
"num_heads": 16, | |
"num_layers": 24, | |
"pad_token_id": 50256, | |
"torch_dtype": "float32", | |
"training_config": { | |
"actual_epochs": 0, | |
"batch_size": 1, | |
"early_stopping": { | |
"min_delta": 0.001, | |
"patience": 3, | |
"triggered": false | |
}, | |
"epochs": 2, | |
"grad_accum_steps": 32, | |
"learning_rate": 0.0002, | |
"max_length": 2048, | |
"val_split_ratio": 0.1 | |
}, | |
"training_metrics": { | |
"best_val_loss": Infinity, | |
"best_val_perplexity": null, | |
"final_train_loss": null, | |
"final_train_perplexity": null, | |
"final_val_loss": null, | |
"final_val_perplexity": null | |
}, | |
"transformers_version": "4.53.1", | |
"unk_token_id": 50256, | |
"vocab_size": 50257 | |
} | |