{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 3039, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09871668311944719, "grad_norm": 7.595918655395508, "learning_rate": 6.578947368421054e-06, "loss": 0.4785, "step": 100 }, { "epoch": 0.09871668311944719, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.48153268812956745, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8239186253209559, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.6804266245309105, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.7476298637171638, "eval_custom_dataset_evaluation_cosine_map@100": 0.6014421644194009, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.5954963687693909, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.650756003676077, "eval_custom_dataset_evaluation_cosine_precision@1": 0.48153268812956745, "eval_custom_dataset_evaluation_cosine_precision@10": 0.0823918625320956, "eval_custom_dataset_evaluation_cosine_precision@3": 0.22680887484363688, "eval_custom_dataset_evaluation_cosine_precision@5": 0.14952597274343274, "eval_custom_dataset_evaluation_cosine_recall@1": 0.48153268812956745, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8239186253209559, "eval_custom_dataset_evaluation_cosine_recall@3": 0.6804266245309105, "eval_custom_dataset_evaluation_cosine_recall@5": 0.7476298637171638, "eval_loss": 0.4484286308288574, "eval_runtime": 204.03, "eval_samples_per_second": 9.925, "eval_steps_per_second": 0.622, "step": 100 }, { "epoch": 0.19743336623889438, "grad_norm": 10.92927074432373, "learning_rate": 1.3157894736842108e-05, "loss": 0.4112, "step": 200 }, { "epoch": 0.19743336623889438, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5033083152281256, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8463855421686747, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7041279873592732, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.772615050365396, "eval_custom_dataset_evaluation_cosine_map@100": 0.624250951720462, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6187061477761295, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.6738741593374042, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5033083152281256, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08463855421686747, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2347093291197577, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1545230100730792, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5033083152281256, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8463855421686747, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7041279873592732, "eval_custom_dataset_evaluation_cosine_recall@5": 0.772615050365396, "eval_loss": 0.3707010746002197, "eval_runtime": 203.8165, "eval_samples_per_second": 9.935, "eval_steps_per_second": 0.623, "step": 200 }, { "epoch": 0.29615004935834155, "grad_norm": 33.57826232910156, "learning_rate": 1.9736842105263158e-05, "loss": 0.2838, "step": 300 }, { "epoch": 0.29615004935834155, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5243432747382974, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.860655737704918, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7234841003357693, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.7913292514319573, "eval_custom_dataset_evaluation_cosine_map@100": 0.6432678473897245, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6380460561998335, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.6920347683997495, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5243432747382974, "eval_custom_dataset_evaluation_cosine_precision@10": 0.0860655737704918, "eval_custom_dataset_evaluation_cosine_precision@3": 0.24116136677858976, "eval_custom_dataset_evaluation_cosine_precision@5": 0.15826585028639148, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5243432747382974, "eval_custom_dataset_evaluation_cosine_recall@10": 0.860655737704918, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7234841003357693, "eval_custom_dataset_evaluation_cosine_recall@5": 0.7913292514319573, "eval_loss": 0.32815688848495483, "eval_runtime": 203.3881, "eval_samples_per_second": 9.956, "eval_steps_per_second": 0.624, "step": 300 }, { "epoch": 0.39486673247778875, "grad_norm": 2.814175605773926, "learning_rate": 1.9297989031078612e-05, "loss": 0.2422, "step": 400 }, { "epoch": 0.39486673247778875, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5371321350977681, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8726051747975508, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7371123839620778, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8044143788267826, "eval_custom_dataset_evaluation_cosine_map@100": 0.6563831674366017, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6514166541262787, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7051169138321652, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5371321350977681, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08726051747975509, "eval_custom_dataset_evaluation_cosine_precision@3": 0.24570412798735927, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1608828757653565, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5371321350977681, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8726051747975508, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7371123839620778, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8044143788267826, "eval_loss": 0.28870803117752075, "eval_runtime": 203.6125, "eval_samples_per_second": 9.945, "eval_steps_per_second": 0.624, "step": 400 }, { "epoch": 0.49358341559723595, "grad_norm": 7.553986072540283, "learning_rate": 1.856672760511883e-05, "loss": 0.2369, "step": 500 }, { "epoch": 0.49358341559723595, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5465139245506616, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8817894528935414, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7465929290934229, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8143393245111594, "eval_custom_dataset_evaluation_cosine_map@100": 0.6652980546980072, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6607006284309909, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.714385034123883, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5465139245506616, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08817894528935415, "eval_custom_dataset_evaluation_cosine_precision@3": 0.24886430969780762, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16286786490223187, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5465139245506616, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8817894528935414, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7465929290934229, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8143393245111594, "eval_loss": 0.28374621272087097, "eval_runtime": 204.4559, "eval_samples_per_second": 9.904, "eval_steps_per_second": 0.621, "step": 500 }, { "epoch": 0.5923000987166831, "grad_norm": 32.98969268798828, "learning_rate": 1.783546617915905e-05, "loss": 0.2899, "step": 600 }, { "epoch": 0.5923000987166831, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5526367766146554, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8908749753110804, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7583448548291527, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8260912502468892, "eval_custom_dataset_evaluation_cosine_map@100": 0.6738304576911812, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.669296996730104, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7232271970632966, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5526367766146554, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08908749753110803, "eval_custom_dataset_evaluation_cosine_precision@3": 0.25278161827638423, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16521825004937787, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5526367766146554, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8908749753110804, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7583448548291527, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8260912502468892, "eval_loss": 0.26842400431632996, "eval_runtime": 203.2737, "eval_samples_per_second": 9.962, "eval_steps_per_second": 0.625, "step": 600 }, { "epoch": 0.6910167818361304, "grad_norm": 9.886919021606445, "learning_rate": 1.710420475319927e-05, "loss": 0.1801, "step": 700 }, { "epoch": 0.6910167818361304, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5653268812956745, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8955164921983014, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7669365988544341, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8332510369346238, "eval_custom_dataset_evaluation_cosine_map@100": 0.683737510172223, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6793955094382355, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7320247334733945, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5653268812956745, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08955164921983014, "eval_custom_dataset_evaluation_cosine_precision@3": 0.255645532951478, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16665020738692476, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5653268812956745, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8955164921983014, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7669365988544341, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8332510369346238, "eval_loss": 0.2664617896080017, "eval_runtime": 203.9076, "eval_samples_per_second": 9.931, "eval_steps_per_second": 0.623, "step": 700 }, { "epoch": 0.7897334649555775, "grad_norm": 17.79576873779297, "learning_rate": 1.637294332723949e-05, "loss": 0.2279, "step": 800 }, { "epoch": 0.7897334649555775, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5654256369741261, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8918131542563698, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7636282836263085, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8272763183883073, "eval_custom_dataset_evaluation_cosine_map@100": 0.6824116708533701, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6777940607081065, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7298590424609255, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5654256369741261, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08918131542563698, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2545427612087695, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16545526367766147, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5654256369741261, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8918131542563698, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7636282836263085, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8272763183883073, "eval_loss": 0.2617240250110626, "eval_runtime": 203.217, "eval_samples_per_second": 9.965, "eval_steps_per_second": 0.625, "step": 800 }, { "epoch": 0.8884501480750246, "grad_norm": 29.348552703857422, "learning_rate": 1.564168190127971e-05, "loss": 0.2051, "step": 900 }, { "epoch": 0.8884501480750246, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5677463954177365, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.8971953387319771, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7670353545328856, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8318190795970768, "eval_custom_dataset_evaluation_cosine_map@100": 0.6851778091536691, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6808611691104925, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7334481505960582, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5677463954177365, "eval_custom_dataset_evaluation_cosine_precision@10": 0.08971953387319773, "eval_custom_dataset_evaluation_cosine_precision@3": 0.25567845151096186, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16636381591941535, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5677463954177365, "eval_custom_dataset_evaluation_cosine_recall@10": 0.8971953387319771, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7670353545328856, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8318190795970768, "eval_loss": 0.26834186911582947, "eval_runtime": 203.4633, "eval_samples_per_second": 9.953, "eval_steps_per_second": 0.624, "step": 900 }, { "epoch": 0.9871668311944719, "grad_norm": 0.5696656703948975, "learning_rate": 1.491042047531993e-05, "loss": 0.2097, "step": 1000 }, { "epoch": 0.9871668311944719, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5728323128579893, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9064783725064192, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7784910132332609, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8429290934228718, "eval_custom_dataset_evaluation_cosine_map@100": 0.692767963880535, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6886411963231704, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.741697294005231, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5728323128579893, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09064783725064192, "eval_custom_dataset_evaluation_cosine_precision@3": 0.259497004411087, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1685858186845744, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5728323128579893, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9064783725064192, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7784910132332609, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8429290934228718, "eval_loss": 0.2445395439863205, "eval_runtime": 203.8284, "eval_samples_per_second": 9.935, "eval_steps_per_second": 0.623, "step": 1000 }, { "epoch": 1.085883514313919, "grad_norm": 5.538768291473389, "learning_rate": 1.4179159049360148e-05, "loss": 0.1047, "step": 1100 }, { "epoch": 1.085883514313919, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5813253012048193, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9092435315030615, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7825399960497729, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8449535848311278, "eval_custom_dataset_evaluation_cosine_map@100": 0.6984402538800855, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6944231453526992, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7466897096167746, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5813253012048193, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09092435315030614, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2608466653499243, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16899071696622553, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5813253012048193, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9092435315030615, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7825399960497729, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8449535848311278, "eval_loss": 0.23973241448402405, "eval_runtime": 203.3277, "eval_samples_per_second": 9.959, "eval_steps_per_second": 0.625, "step": 1100 }, { "epoch": 1.1846001974333662, "grad_norm": 6.3217902183532715, "learning_rate": 1.3447897623400368e-05, "loss": 0.0984, "step": 1200 }, { "epoch": 1.1846001974333662, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5921390479952597, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9151688722101521, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7911317400750543, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8541378629271184, "eval_custom_dataset_evaluation_cosine_map@100": 0.7083777420863926, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7045087061752225, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.755835586028989, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5921390479952597, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09151688722101521, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2637105800250181, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1708275725854237, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5921390479952597, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9151688722101521, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7911317400750543, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8541378629271184, "eval_loss": 0.2229250818490982, "eval_runtime": 203.6954, "eval_samples_per_second": 9.941, "eval_steps_per_second": 0.623, "step": 1200 }, { "epoch": 1.2833168805528135, "grad_norm": 11.505134582519531, "learning_rate": 1.2716636197440586e-05, "loss": 0.0498, "step": 1300 }, { "epoch": 1.2833168805528135, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5782638751728224, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9055895714003556, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7789354137862927, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8386825992494569, "eval_custom_dataset_evaluation_cosine_map@100": 0.6954513367155198, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.6912109797503966, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7433337553688117, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5782638751728224, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09055895714003556, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2596451379287642, "eval_custom_dataset_evaluation_cosine_precision@5": 0.16773651984989138, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5782638751728224, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9055895714003556, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7789354137862927, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8386825992494569, "eval_loss": 0.2586788237094879, "eval_runtime": 204.0885, "eval_samples_per_second": 9.922, "eval_steps_per_second": 0.622, "step": 1300 }, { "epoch": 1.3820335636722607, "grad_norm": 1.1614787578582764, "learning_rate": 1.1985374771480804e-05, "loss": 0.0993, "step": 1400 }, { "epoch": 1.3820335636722607, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.59411416156429, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9151194943709263, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7925636974126012, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8543353742840213, "eval_custom_dataset_evaluation_cosine_map@100": 0.7096847447019631, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.705814299351989, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7568020206373794, "eval_custom_dataset_evaluation_cosine_precision@1": 0.59411416156429, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09151194943709264, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2641878991375337, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17086707485680427, "eval_custom_dataset_evaluation_cosine_recall@1": 0.59411416156429, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9151194943709263, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7925636974126012, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8543353742840213, "eval_loss": 0.2147156298160553, "eval_runtime": 204.4732, "eval_samples_per_second": 9.903, "eval_steps_per_second": 0.621, "step": 1400 }, { "epoch": 1.4807502467917077, "grad_norm": 1.6429851055145264, "learning_rate": 1.1254113345521024e-05, "loss": 0.0621, "step": 1500 }, { "epoch": 1.4807502467917077, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5962867864902232, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9217855026664034, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7997234841003358, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.860853249061821, "eval_custom_dataset_evaluation_cosine_map@100": 0.7136002941229976, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7100783500904636, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7616828218263099, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5962867864902232, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09217855026664035, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2665744947001119, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1721706498123642, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5962867864902232, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9217855026664034, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7997234841003358, "eval_custom_dataset_evaluation_cosine_recall@5": 0.860853249061821, "eval_loss": 0.20447228848934174, "eval_runtime": 204.051, "eval_samples_per_second": 9.924, "eval_steps_per_second": 0.622, "step": 1500 }, { "epoch": 1.579466929911155, "grad_norm": 15.424310684204102, "learning_rate": 1.0522851919561243e-05, "loss": 0.0922, "step": 1600 }, { "epoch": 1.579466929911155, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.5950029626703536, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9201560339719533, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.7980446375666601, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.858038712225953, "eval_custom_dataset_evaluation_cosine_map@100": 0.7121969229630567, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7085377027234775, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7601050276866773, "eval_custom_dataset_evaluation_cosine_precision@1": 0.5950029626703536, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09201560339719535, "eval_custom_dataset_evaluation_cosine_precision@3": 0.26601487918888667, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17160774244519061, "eval_custom_dataset_evaluation_cosine_recall@1": 0.5950029626703536, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9201560339719533, "eval_custom_dataset_evaluation_cosine_recall@3": 0.7980446375666601, "eval_custom_dataset_evaluation_cosine_recall@5": 0.858038712225953, "eval_loss": 0.20542284846305847, "eval_runtime": 203.1105, "eval_samples_per_second": 9.97, "eval_steps_per_second": 0.625, "step": 1600 }, { "epoch": 1.678183613030602, "grad_norm": 4.0268096923828125, "learning_rate": 9.791590493601464e-06, "loss": 0.1093, "step": 1700 }, { "epoch": 1.678183613030602, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6026071499111199, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9248469286984001, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8095496741062611, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8681117914280071, "eval_custom_dataset_evaluation_cosine_map@100": 0.7204789219182511, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7169305441908291, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7677587208908659, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6026071499111199, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09248469286984001, "eval_custom_dataset_evaluation_cosine_precision@3": 0.26984989136875365, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1736223582856014, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6026071499111199, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9248469286984001, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8095496741062611, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8681117914280071, "eval_loss": 0.1992715299129486, "eval_runtime": 203.9449, "eval_samples_per_second": 9.929, "eval_steps_per_second": 0.623, "step": 1700 }, { "epoch": 1.7769002961500493, "grad_norm": 0.6978006958961487, "learning_rate": 9.060329067641682e-06, "loss": 0.0795, "step": 1800 }, { "epoch": 1.7769002961500493, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.605816709460794, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9286984001580091, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8117222990321944, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8722101520837449, "eval_custom_dataset_evaluation_cosine_map@100": 0.7237388658743508, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7203004837460861, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7712328450315743, "eval_custom_dataset_evaluation_cosine_precision@1": 0.605816709460794, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09286984001580093, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2705740996773981, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17444203041674897, "eval_custom_dataset_evaluation_cosine_recall@1": 0.605816709460794, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9286984001580091, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8117222990321944, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8722101520837449, "eval_loss": 0.19426828622817993, "eval_runtime": 204.0488, "eval_samples_per_second": 9.924, "eval_steps_per_second": 0.622, "step": 1800 }, { "epoch": 1.8756169792694966, "grad_norm": 2.5955662727355957, "learning_rate": 8.329067641681902e-06, "loss": 0.1181, "step": 1900 }, { "epoch": 1.8756169792694966, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6043353742840213, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9297847126209757, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8089077621963263, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8701362828362631, "eval_custom_dataset_evaluation_cosine_map@100": 0.7219374022367849, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7185761194661708, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7700958544922684, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6043353742840213, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09297847126209759, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2696359207321088, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17402725656725263, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6043353742840213, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9297847126209757, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8089077621963263, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8701362828362631, "eval_loss": 0.20659147202968597, "eval_runtime": 204.0716, "eval_samples_per_second": 9.923, "eval_steps_per_second": 0.622, "step": 1900 }, { "epoch": 1.9743336623889438, "grad_norm": 0.8803901672363281, "learning_rate": 7.597806215722121e-06, "loss": 0.0709, "step": 2000 }, { "epoch": 1.9743336623889438, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.603199683981829, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9302784910132332, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8102903416946474, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8703831720323919, "eval_custom_dataset_evaluation_cosine_map@100": 0.7214873578820934, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7181931668908422, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7699451899898953, "eval_custom_dataset_evaluation_cosine_precision@1": 0.603199683981829, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09302784910132333, "eval_custom_dataset_evaluation_cosine_precision@3": 0.27009678056488245, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1740766344064784, "eval_custom_dataset_evaluation_cosine_recall@1": 0.603199683981829, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9302784910132332, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8102903416946474, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8703831720323919, "eval_loss": 0.19996753334999084, "eval_runtime": 203.9747, "eval_samples_per_second": 9.928, "eval_steps_per_second": 0.623, "step": 2000 }, { "epoch": 2.073050345508391, "grad_norm": 0.9776083827018738, "learning_rate": 6.866544789762341e-06, "loss": 0.0423, "step": 2100 }, { "epoch": 2.073050345508391, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6150503653960103, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9359075646849694, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8209065771281848, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8796168279676081, "eval_custom_dataset_evaluation_cosine_map@100": 0.7317976394363255, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.728681000348013, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7793256141306921, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6150503653960103, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09359075646849695, "eval_custom_dataset_evaluation_cosine_precision@3": 0.27363552570939487, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17592336559352165, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6150503653960103, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9359075646849694, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8209065771281848, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8796168279676081, "eval_loss": 0.19277189671993256, "eval_runtime": 203.5377, "eval_samples_per_second": 9.949, "eval_steps_per_second": 0.624, "step": 2100 }, { "epoch": 2.171767028627838, "grad_norm": 0.3466501235961914, "learning_rate": 6.13528336380256e-06, "loss": 0.0365, "step": 2200 }, { "epoch": 2.171767028627838, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6160379221805253, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9352162749358088, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8210547106458621, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8787280268615445, "eval_custom_dataset_evaluation_cosine_map@100": 0.7324401240286031, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7293587895689079, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7796830803735836, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6160379221805253, "eval_custom_dataset_evaluation_cosine_precision@10": 0.0935216274935809, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2736849035486207, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17574560537230893, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6160379221805253, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9352162749358088, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8210547106458621, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8787280268615445, "eval_loss": 0.2016632854938507, "eval_runtime": 203.3562, "eval_samples_per_second": 9.958, "eval_steps_per_second": 0.625, "step": 2200 }, { "epoch": 2.270483711747285, "grad_norm": 11.319628715515137, "learning_rate": 5.40402193784278e-06, "loss": 0.0488, "step": 2300 }, { "epoch": 2.270483711747285, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6060635986569227, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9303772466916848, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.810784120086905, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8715188623345842, "eval_custom_dataset_evaluation_cosine_map@100": 0.7233919610739875, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7200670950782208, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7713978686997449, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6060635986569227, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09303772466916849, "eval_custom_dataset_evaluation_cosine_precision@3": 0.27026137336230166, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17430377246691683, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6060635986569227, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9303772466916848, "eval_custom_dataset_evaluation_cosine_recall@3": 0.810784120086905, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8715188623345842, "eval_loss": 0.2220190316438675, "eval_runtime": 203.7981, "eval_samples_per_second": 9.936, "eval_steps_per_second": 0.623, "step": 2300 }, { "epoch": 2.3692003948667324, "grad_norm": 1.2127763032913208, "learning_rate": 4.672760511882998e-06, "loss": 0.0405, "step": 2400 }, { "epoch": 2.3692003948667324, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.612482717756271, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9345249851866483, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8190302192376062, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8769010468101915, "eval_custom_dataset_evaluation_cosine_map@100": 0.7299302543873705, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7267631415592252, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7775318729565346, "eval_custom_dataset_evaluation_cosine_precision@1": 0.612482717756271, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09345249851866484, "eval_custom_dataset_evaluation_cosine_precision@3": 0.27301007307920205, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17538020936203833, "eval_custom_dataset_evaluation_cosine_recall@1": 0.612482717756271, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9345249851866483, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8190302192376062, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8769010468101915, "eval_loss": 0.20903073251247406, "eval_runtime": 204.2344, "eval_samples_per_second": 9.915, "eval_steps_per_second": 0.622, "step": 2400 }, { "epoch": 2.4679170779861797, "grad_norm": 0.3473336398601532, "learning_rate": 3.941499085923218e-06, "loss": 0.0327, "step": 2500 }, { "epoch": 2.4679170779861797, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6181611692672329, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9379814339324511, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8215484890381197, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8832214102310882, "eval_custom_dataset_evaluation_cosine_map@100": 0.7347609073841157, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7317605707764854, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7821769875234544, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6181611692672329, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09379814339324512, "eval_custom_dataset_evaluation_cosine_precision@3": 0.27384949634603983, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17664428204621765, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6181611692672329, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9379814339324511, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8215484890381197, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8832214102310882, "eval_loss": 0.1959671527147293, "eval_runtime": 204.2804, "eval_samples_per_second": 9.913, "eval_steps_per_second": 0.622, "step": 2500 }, { "epoch": 2.566633761105627, "grad_norm": 3.084174871444702, "learning_rate": 3.210237659963437e-06, "loss": 0.0369, "step": 2600 }, { "epoch": 2.566633761105627, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6188524590163934, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9379814339324511, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8248568042662453, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.882678253999605, "eval_custom_dataset_evaluation_cosine_map@100": 0.7355447502017791, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7324816047954649, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7827478877040044, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6188524590163934, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09379814339324513, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2749522680887484, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17653565079992103, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6188524590163934, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9379814339324511, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8248568042662453, "eval_custom_dataset_evaluation_cosine_recall@5": 0.882678253999605, "eval_loss": 0.19852839410305023, "eval_runtime": 203.3999, "eval_samples_per_second": 9.956, "eval_steps_per_second": 0.624, "step": 2600 }, { "epoch": 2.665350444225074, "grad_norm": 0.2841149568557739, "learning_rate": 2.4789762340036565e-06, "loss": 0.0493, "step": 2700 }, { "epoch": 2.665350444225074, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6227039304760024, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9392652577523207, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8246099150701165, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8839126999802489, "eval_custom_dataset_evaluation_cosine_map@100": 0.7377982498848669, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7347973078888687, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7847760881684369, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6227039304760024, "eval_custom_dataset_evaluation_cosine_precision@10": 0.0939265257752321, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2748699716900388, "eval_custom_dataset_evaluation_cosine_precision@5": 0.1767825399960498, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6227039304760024, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9392652577523207, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8246099150701165, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8839126999802489, "eval_loss": 0.1986970454454422, "eval_runtime": 203.1067, "eval_samples_per_second": 9.97, "eval_steps_per_second": 0.625, "step": 2700 }, { "epoch": 2.7640671273445214, "grad_norm": 35.21987533569336, "learning_rate": 1.7477148080438758e-06, "loss": 0.0466, "step": 2800 }, { "epoch": 2.7640671273445214, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6232964645467114, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9393640134307722, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8262887616037922, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8854434129962473, "eval_custom_dataset_evaluation_cosine_map@100": 0.7388327302504635, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7358363978944812, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7856277834747031, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6232964645467114, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09393640134307724, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2754295872012641, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17708868259924945, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6232964645467114, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9393640134307722, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8262887616037922, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8854434129962473, "eval_loss": 0.20075508952140808, "eval_runtime": 203.5383, "eval_samples_per_second": 9.949, "eval_steps_per_second": 0.624, "step": 2800 }, { "epoch": 2.8627838104639682, "grad_norm": 1.2376320362091064, "learning_rate": 1.0164533820840951e-06, "loss": 0.03, "step": 2900 }, { "epoch": 2.8627838104639682, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.6220620185660676, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9396109026269011, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8248568042662453, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8844558562117322, "eval_custom_dataset_evaluation_cosine_map@100": 0.7376002793094979, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7346429237637674, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7847496074277119, "eval_custom_dataset_evaluation_cosine_precision@1": 0.6220620185660676, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09396109026269013, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2749522680887484, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17689117124234646, "eval_custom_dataset_evaluation_cosine_recall@1": 0.6220620185660676, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9396109026269011, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8248568042662453, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8844558562117322, "eval_loss": 0.2035462111234665, "eval_runtime": 202.781, "eval_samples_per_second": 9.986, "eval_steps_per_second": 0.626, "step": 2900 }, { "epoch": 2.9615004935834155, "grad_norm": 0.5317863821983337, "learning_rate": 2.8519195612431445e-07, "loss": 0.0633, "step": 3000 }, { "epoch": 2.9615004935834155, "eval_custom_dataset_evaluation_cosine_accuracy@1": 0.621963262887616, "eval_custom_dataset_evaluation_cosine_accuracy@10": 0.9401046810191586, "eval_custom_dataset_evaluation_cosine_accuracy@3": 0.8262393837645665, "eval_custom_dataset_evaluation_cosine_accuracy@5": 0.8843077226940549, "eval_custom_dataset_evaluation_cosine_map@100": 0.7376242387041089, "eval_custom_dataset_evaluation_cosine_mrr@10": 0.7346985522104195, "eval_custom_dataset_evaluation_cosine_ndcg@10": 0.7849111131501391, "eval_custom_dataset_evaluation_cosine_precision@1": 0.621963262887616, "eval_custom_dataset_evaluation_cosine_precision@10": 0.09401046810191586, "eval_custom_dataset_evaluation_cosine_precision@3": 0.2754131279215221, "eval_custom_dataset_evaluation_cosine_precision@5": 0.17686154453881098, "eval_custom_dataset_evaluation_cosine_recall@1": 0.621963262887616, "eval_custom_dataset_evaluation_cosine_recall@10": 0.9401046810191586, "eval_custom_dataset_evaluation_cosine_recall@3": 0.8262393837645665, "eval_custom_dataset_evaluation_cosine_recall@5": 0.8843077226940549, "eval_loss": 0.20363624393939972, "eval_runtime": 203.946, "eval_samples_per_second": 9.929, "eval_steps_per_second": 0.623, "step": 3000 } ], "logging_steps": 100, "max_steps": 3039, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }