mlfoundations-dev
/

mlfoundations-dev_stackoverflow_100000_samples

Text Generation

Generated from Trainer

text-generation-inference

Model card Files Files and versions

Metrics Training metrics Community

mlfoundations-dev_stackoverflow_100000_samples / trainer_state.json

marianna13's picture

Upload folder using huggingface_hub

54d77c8 verified 7 months ago

history blame contribute delete

4.46 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 2.9869281045751634,
	"eval_steps": 500,
	"global_step": 228,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.13071895424836602,
	"grad_norm": 13.640703803287584,
	"learning_rate": 5e-06,
	"loss": 0.9844,
	"step": 10
	},
	{
	"epoch": 0.26143790849673204,
	"grad_norm": 0.9447823765191696,
	"learning_rate": 5e-06,
	"loss": 0.8863,
	"step": 20
	},
	{
	"epoch": 0.39215686274509803,
	"grad_norm": 1.038467083575288,
	"learning_rate": 5e-06,
	"loss": 0.8449,
	"step": 30
	},
	{
	"epoch": 0.5228758169934641,
	"grad_norm": 1.6427820915875102,
	"learning_rate": 5e-06,
	"loss": 0.8257,
	"step": 40
	},
	{
	"epoch": 0.6535947712418301,
	"grad_norm": 1.7936488531165335,
	"learning_rate": 5e-06,
	"loss": 0.8169,
	"step": 50
	},
	{
	"epoch": 0.7843137254901961,
	"grad_norm": 0.7697816321414731,
	"learning_rate": 5e-06,
	"loss": 0.8056,
	"step": 60
	},
	{
	"epoch": 0.9150326797385621,
	"grad_norm": 1.250994290613137,
	"learning_rate": 5e-06,
	"loss": 0.7971,
	"step": 70
	},
	{
	"epoch": 1.0490196078431373,
	"grad_norm": 0.843433753244107,
	"learning_rate": 5e-06,
	"loss": 0.8367,
	"step": 80
	},
	{
	"epoch": 1.1797385620915033,
	"grad_norm": 0.6027656670101825,
	"learning_rate": 5e-06,
	"loss": 0.7483,
	"step": 90
	},
	{
	"epoch": 1.3104575163398693,
	"grad_norm": 0.8024576817469242,
	"learning_rate": 5e-06,
	"loss": 0.7428,
	"step": 100
	},
	{
	"epoch": 1.4411764705882353,
	"grad_norm": 0.7594730875779195,
	"learning_rate": 5e-06,
	"loss": 0.7401,
	"step": 110
	},
	{
	"epoch": 1.5718954248366013,
	"grad_norm": 0.5293998500966177,
	"learning_rate": 5e-06,
	"loss": 0.7426,
	"step": 120
	},
	{
	"epoch": 1.7026143790849673,
	"grad_norm": 0.9249042901353932,
	"learning_rate": 5e-06,
	"loss": 0.7406,
	"step": 130
	},
	{
	"epoch": 1.8333333333333335,
	"grad_norm": 0.5767932991870924,
	"learning_rate": 5e-06,
	"loss": 0.7379,
	"step": 140
	},
	{
	"epoch": 1.9640522875816995,
	"grad_norm": 0.5974379920218519,
	"learning_rate": 5e-06,
	"loss": 0.7337,
	"step": 150
	},
	{
	"epoch": 2.0980392156862746,
	"grad_norm": 1.3799060310730653,
	"learning_rate": 5e-06,
	"loss": 0.7541,
	"step": 160
	},
	{
	"epoch": 2.2287581699346406,
	"grad_norm": 0.8440697657265467,
	"learning_rate": 5e-06,
	"loss": 0.6849,
	"step": 170
	},
	{
	"epoch": 2.3594771241830066,
	"grad_norm": 0.6523664577578698,
	"learning_rate": 5e-06,
	"loss": 0.6828,
	"step": 180
	},
	{
	"epoch": 2.4901960784313726,
	"grad_norm": 0.5604368514967889,
	"learning_rate": 5e-06,
	"loss": 0.6833,
	"step": 190
	},
	{
	"epoch": 2.6209150326797386,
	"grad_norm": 0.651015676014187,
	"learning_rate": 5e-06,
	"loss": 0.6825,
	"step": 200
	},
	{
	"epoch": 2.7516339869281046,
	"grad_norm": 0.6331718263692562,
	"learning_rate": 5e-06,
	"loss": 0.6826,
	"step": 210
	},
	{
	"epoch": 2.8823529411764706,
	"grad_norm": 0.6748382635791591,
	"learning_rate": 5e-06,
	"loss": 0.6867,
	"step": 220
	},
	{
	"epoch": 2.9869281045751634,
	"step": 228,
	"total_flos": 381489732648960.0,
	"train_loss": 0.7633350188272041,
	"train_runtime": 3333.6415,
	"train_samples_per_second": 35.22,
	"train_steps_per_second": 0.068
	}
	],
	"logging_steps": 10,
	"max_steps": 228,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 3,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 381489732648960.0,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}