Training in progress, step 250, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500770656
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b091bf5f1ce9e8388e64b336bdb4cf8f02f0eb007585067a4b0747d3b743c3aa
|
3 |
size 500770656
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 254917780
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d24339e9d80ca13375893d5df939b469a5841bd9782322090d796d0025d923d0
|
3 |
size 254917780
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14244
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:98fa1f833b77a5bef15319c574c6083893d7c2840ec5da7147454424b67d975e
|
3 |
size 14244
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:477e1ab9e7e387f392e0bb68fb7cd86779a760a788b2ed973ec470f1c83dd5f7
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
-
"best_metric": 0.
|
3 |
-
"best_model_checkpoint": "miner_id_24/checkpoint-
|
4 |
-
"epoch": 0.
|
5 |
"eval_steps": 50,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -1447,6 +1447,364 @@
|
|
1447 |
"eval_samples_per_second": 2.678,
|
1448 |
"eval_steps_per_second": 2.678,
|
1449 |
"step": 200
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1450 |
}
|
1451 |
],
|
1452 |
"logging_steps": 1,
|
@@ -1475,7 +1833,7 @@
|
|
1475 |
"attributes": {}
|
1476 |
}
|
1477 |
},
|
1478 |
-
"total_flos": 1.
|
1479 |
"train_batch_size": 1,
|
1480 |
"trial_name": null,
|
1481 |
"trial_params": null
|
|
|
1 |
{
|
2 |
+
"best_metric": 0.4805048406124115,
|
3 |
+
"best_model_checkpoint": "miner_id_24/checkpoint-250",
|
4 |
+
"epoch": 0.08072490968900728,
|
5 |
"eval_steps": 50,
|
6 |
+
"global_step": 250,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
1447 |
"eval_samples_per_second": 2.678,
|
1448 |
"eval_steps_per_second": 2.678,
|
1449 |
"step": 200
|
1450 |
+
},
|
1451 |
+
{
|
1452 |
+
"epoch": 0.06490282738996185,
|
1453 |
+
"grad_norm": 0.318142294883728,
|
1454 |
+
"learning_rate": 7.830721146206451e-05,
|
1455 |
+
"loss": 0.5384,
|
1456 |
+
"step": 201
|
1457 |
+
},
|
1458 |
+
{
|
1459 |
+
"epoch": 0.06522572702871789,
|
1460 |
+
"grad_norm": 0.288631409406662,
|
1461 |
+
"learning_rate": 7.688410249570214e-05,
|
1462 |
+
"loss": 0.5078,
|
1463 |
+
"step": 202
|
1464 |
+
},
|
1465 |
+
{
|
1466 |
+
"epoch": 0.06554862666747392,
|
1467 |
+
"grad_norm": 0.280100554227829,
|
1468 |
+
"learning_rate": 7.54695740040912e-05,
|
1469 |
+
"loss": 0.4788,
|
1470 |
+
"step": 203
|
1471 |
+
},
|
1472 |
+
{
|
1473 |
+
"epoch": 0.06587152630622994,
|
1474 |
+
"grad_norm": 0.279681533575058,
|
1475 |
+
"learning_rate": 7.406379198842189e-05,
|
1476 |
+
"loss": 0.4447,
|
1477 |
+
"step": 204
|
1478 |
+
},
|
1479 |
+
{
|
1480 |
+
"epoch": 0.06619442594498598,
|
1481 |
+
"grad_norm": 0.2892783284187317,
|
1482 |
+
"learning_rate": 7.266692142344672e-05,
|
1483 |
+
"loss": 0.4932,
|
1484 |
+
"step": 205
|
1485 |
+
},
|
1486 |
+
{
|
1487 |
+
"epoch": 0.066517325583742,
|
1488 |
+
"grad_norm": 0.2658500075340271,
|
1489 |
+
"learning_rate": 7.127912623811993e-05,
|
1490 |
+
"loss": 0.4682,
|
1491 |
+
"step": 206
|
1492 |
+
},
|
1493 |
+
{
|
1494 |
+
"epoch": 0.06684022522249804,
|
1495 |
+
"grad_norm": 0.2946866452693939,
|
1496 |
+
"learning_rate": 6.990056929635957e-05,
|
1497 |
+
"loss": 0.4838,
|
1498 |
+
"step": 207
|
1499 |
+
},
|
1500 |
+
{
|
1501 |
+
"epoch": 0.06716312486125406,
|
1502 |
+
"grad_norm": 0.2683822214603424,
|
1503 |
+
"learning_rate": 6.853141237793506e-05,
|
1504 |
+
"loss": 0.4408,
|
1505 |
+
"step": 208
|
1506 |
+
},
|
1507 |
+
{
|
1508 |
+
"epoch": 0.0674860245000101,
|
1509 |
+
"grad_norm": 0.3225007653236389,
|
1510 |
+
"learning_rate": 6.717181615948126e-05,
|
1511 |
+
"loss": 0.4949,
|
1512 |
+
"step": 209
|
1513 |
+
},
|
1514 |
+
{
|
1515 |
+
"epoch": 0.06780892413876612,
|
1516 |
+
"grad_norm": 0.25332513451576233,
|
1517 |
+
"learning_rate": 6.582194019564266e-05,
|
1518 |
+
"loss": 0.4141,
|
1519 |
+
"step": 210
|
1520 |
+
},
|
1521 |
+
{
|
1522 |
+
"epoch": 0.06813182377752215,
|
1523 |
+
"grad_norm": 0.2799530625343323,
|
1524 |
+
"learning_rate": 6.448194290034848e-05,
|
1525 |
+
"loss": 0.4445,
|
1526 |
+
"step": 211
|
1527 |
+
},
|
1528 |
+
{
|
1529 |
+
"epoch": 0.06845472341627817,
|
1530 |
+
"grad_norm": 0.27327555418014526,
|
1531 |
+
"learning_rate": 6.315198152822272e-05,
|
1532 |
+
"loss": 0.4138,
|
1533 |
+
"step": 212
|
1534 |
+
},
|
1535 |
+
{
|
1536 |
+
"epoch": 0.06877762305503421,
|
1537 |
+
"grad_norm": 0.3778553903102875,
|
1538 |
+
"learning_rate": 6.183221215612904e-05,
|
1539 |
+
"loss": 0.4804,
|
1540 |
+
"step": 213
|
1541 |
+
},
|
1542 |
+
{
|
1543 |
+
"epoch": 0.06910052269379023,
|
1544 |
+
"grad_norm": 0.3077884614467621,
|
1545 |
+
"learning_rate": 6.052278966485491e-05,
|
1546 |
+
"loss": 0.4657,
|
1547 |
+
"step": 214
|
1548 |
+
},
|
1549 |
+
{
|
1550 |
+
"epoch": 0.06942342233254627,
|
1551 |
+
"grad_norm": 0.29660362005233765,
|
1552 |
+
"learning_rate": 5.922386772093526e-05,
|
1553 |
+
"loss": 0.4297,
|
1554 |
+
"step": 215
|
1555 |
+
},
|
1556 |
+
{
|
1557 |
+
"epoch": 0.06974632197130229,
|
1558 |
+
"grad_norm": 0.3540116548538208,
|
1559 |
+
"learning_rate": 5.793559875861938e-05,
|
1560 |
+
"loss": 0.466,
|
1561 |
+
"step": 216
|
1562 |
+
},
|
1563 |
+
{
|
1564 |
+
"epoch": 0.07006922161005832,
|
1565 |
+
"grad_norm": 0.2957676351070404,
|
1566 |
+
"learning_rate": 5.6658133961981894e-05,
|
1567 |
+
"loss": 0.4421,
|
1568 |
+
"step": 217
|
1569 |
+
},
|
1570 |
+
{
|
1571 |
+
"epoch": 0.07039212124881435,
|
1572 |
+
"grad_norm": 0.3042965233325958,
|
1573 |
+
"learning_rate": 5.5391623247180744e-05,
|
1574 |
+
"loss": 0.441,
|
1575 |
+
"step": 218
|
1576 |
+
},
|
1577 |
+
{
|
1578 |
+
"epoch": 0.07071502088757038,
|
1579 |
+
"grad_norm": 0.36982765793800354,
|
1580 |
+
"learning_rate": 5.413621524486363e-05,
|
1581 |
+
"loss": 0.4114,
|
1582 |
+
"step": 219
|
1583 |
+
},
|
1584 |
+
{
|
1585 |
+
"epoch": 0.07103792052632642,
|
1586 |
+
"grad_norm": 0.3452307879924774,
|
1587 |
+
"learning_rate": 5.289205728272586e-05,
|
1588 |
+
"loss": 0.4562,
|
1589 |
+
"step": 220
|
1590 |
+
},
|
1591 |
+
{
|
1592 |
+
"epoch": 0.07136082016508244,
|
1593 |
+
"grad_norm": 0.3854043483734131,
|
1594 |
+
"learning_rate": 5.165929536822059e-05,
|
1595 |
+
"loss": 0.5003,
|
1596 |
+
"step": 221
|
1597 |
+
},
|
1598 |
+
{
|
1599 |
+
"epoch": 0.07168371980383847,
|
1600 |
+
"grad_norm": 0.3237496018409729,
|
1601 |
+
"learning_rate": 5.043807417142436e-05,
|
1602 |
+
"loss": 0.4592,
|
1603 |
+
"step": 222
|
1604 |
+
},
|
1605 |
+
{
|
1606 |
+
"epoch": 0.0720066194425945,
|
1607 |
+
"grad_norm": 0.32223159074783325,
|
1608 |
+
"learning_rate": 4.922853700805909e-05,
|
1609 |
+
"loss": 0.4553,
|
1610 |
+
"step": 223
|
1611 |
+
},
|
1612 |
+
{
|
1613 |
+
"epoch": 0.07232951908135053,
|
1614 |
+
"grad_norm": 0.40129488706588745,
|
1615 |
+
"learning_rate": 4.8030825822673814e-05,
|
1616 |
+
"loss": 0.4276,
|
1617 |
+
"step": 224
|
1618 |
+
},
|
1619 |
+
{
|
1620 |
+
"epoch": 0.07265241872010655,
|
1621 |
+
"grad_norm": 0.34809187054634094,
|
1622 |
+
"learning_rate": 4.684508117198648e-05,
|
1623 |
+
"loss": 0.4856,
|
1624 |
+
"step": 225
|
1625 |
+
},
|
1626 |
+
{
|
1627 |
+
"epoch": 0.07297531835886259,
|
1628 |
+
"grad_norm": 0.3367185592651367,
|
1629 |
+
"learning_rate": 4.567144220838923e-05,
|
1630 |
+
"loss": 0.4555,
|
1631 |
+
"step": 226
|
1632 |
+
},
|
1633 |
+
{
|
1634 |
+
"epoch": 0.07329821799761861,
|
1635 |
+
"grad_norm": 0.35933539271354675,
|
1636 |
+
"learning_rate": 4.4510046663617996e-05,
|
1637 |
+
"loss": 0.4837,
|
1638 |
+
"step": 227
|
1639 |
+
},
|
1640 |
+
{
|
1641 |
+
"epoch": 0.07362111763637465,
|
1642 |
+
"grad_norm": 0.3718101382255554,
|
1643 |
+
"learning_rate": 4.336103083258942e-05,
|
1644 |
+
"loss": 0.4789,
|
1645 |
+
"step": 228
|
1646 |
+
},
|
1647 |
+
{
|
1648 |
+
"epoch": 0.07394401727513067,
|
1649 |
+
"grad_norm": 0.3542415201663971,
|
1650 |
+
"learning_rate": 4.2224529557405645e-05,
|
1651 |
+
"loss": 0.5075,
|
1652 |
+
"step": 229
|
1653 |
+
},
|
1654 |
+
{
|
1655 |
+
"epoch": 0.0742669169138867,
|
1656 |
+
"grad_norm": 0.3407626748085022,
|
1657 |
+
"learning_rate": 4.1100676211530404e-05,
|
1658 |
+
"loss": 0.4803,
|
1659 |
+
"step": 230
|
1660 |
+
},
|
1661 |
+
{
|
1662 |
+
"epoch": 0.07458981655264273,
|
1663 |
+
"grad_norm": 0.39396294951438904,
|
1664 |
+
"learning_rate": 3.998960268413666e-05,
|
1665 |
+
"loss": 0.5117,
|
1666 |
+
"step": 231
|
1667 |
+
},
|
1668 |
+
{
|
1669 |
+
"epoch": 0.07491271619139876,
|
1670 |
+
"grad_norm": 0.3785285949707031,
|
1671 |
+
"learning_rate": 3.889143936462914e-05,
|
1672 |
+
"loss": 0.4925,
|
1673 |
+
"step": 232
|
1674 |
+
},
|
1675 |
+
{
|
1676 |
+
"epoch": 0.07523561583015478,
|
1677 |
+
"grad_norm": 0.36613747477531433,
|
1678 |
+
"learning_rate": 3.780631512734241e-05,
|
1679 |
+
"loss": 0.4434,
|
1680 |
+
"step": 233
|
1681 |
+
},
|
1682 |
+
{
|
1683 |
+
"epoch": 0.07555851546891082,
|
1684 |
+
"grad_norm": 0.3978104591369629,
|
1685 |
+
"learning_rate": 3.673435731641691e-05,
|
1686 |
+
"loss": 0.4613,
|
1687 |
+
"step": 234
|
1688 |
+
},
|
1689 |
+
{
|
1690 |
+
"epoch": 0.07588141510766684,
|
1691 |
+
"grad_norm": 0.43552708625793457,
|
1692 |
+
"learning_rate": 3.567569173085454e-05,
|
1693 |
+
"loss": 0.4177,
|
1694 |
+
"step": 235
|
1695 |
+
},
|
1696 |
+
{
|
1697 |
+
"epoch": 0.07620431474642288,
|
1698 |
+
"grad_norm": 0.3718654215335846,
|
1699 |
+
"learning_rate": 3.463044260975566e-05,
|
1700 |
+
"loss": 0.4611,
|
1701 |
+
"step": 236
|
1702 |
+
},
|
1703 |
+
{
|
1704 |
+
"epoch": 0.07652721438517891,
|
1705 |
+
"grad_norm": 0.41485676169395447,
|
1706 |
+
"learning_rate": 3.3598732617739036e-05,
|
1707 |
+
"loss": 0.5586,
|
1708 |
+
"step": 237
|
1709 |
+
},
|
1710 |
+
{
|
1711 |
+
"epoch": 0.07685011402393493,
|
1712 |
+
"grad_norm": 0.37860673666000366,
|
1713 |
+
"learning_rate": 3.258068283054666e-05,
|
1714 |
+
"loss": 0.4256,
|
1715 |
+
"step": 238
|
1716 |
+
},
|
1717 |
+
{
|
1718 |
+
"epoch": 0.07717301366269097,
|
1719 |
+
"grad_norm": 0.4362449645996094,
|
1720 |
+
"learning_rate": 3.1576412720834746e-05,
|
1721 |
+
"loss": 0.5763,
|
1722 |
+
"step": 239
|
1723 |
+
},
|
1724 |
+
{
|
1725 |
+
"epoch": 0.07749591330144699,
|
1726 |
+
"grad_norm": 0.3914451003074646,
|
1727 |
+
"learning_rate": 3.058604014415343e-05,
|
1728 |
+
"loss": 0.4739,
|
1729 |
+
"step": 240
|
1730 |
+
},
|
1731 |
+
{
|
1732 |
+
"epoch": 0.07781881294020303,
|
1733 |
+
"grad_norm": 0.3677349388599396,
|
1734 |
+
"learning_rate": 2.960968132511567e-05,
|
1735 |
+
"loss": 0.4716,
|
1736 |
+
"step": 241
|
1737 |
+
},
|
1738 |
+
{
|
1739 |
+
"epoch": 0.07814171257895905,
|
1740 |
+
"grad_norm": 0.3888345956802368,
|
1741 |
+
"learning_rate": 2.8647450843757897e-05,
|
1742 |
+
"loss": 0.5218,
|
1743 |
+
"step": 242
|
1744 |
+
},
|
1745 |
+
{
|
1746 |
+
"epoch": 0.07846461221771509,
|
1747 |
+
"grad_norm": 0.37700045108795166,
|
1748 |
+
"learning_rate": 2.7699461622093304e-05,
|
1749 |
+
"loss": 0.4978,
|
1750 |
+
"step": 243
|
1751 |
+
},
|
1752 |
+
{
|
1753 |
+
"epoch": 0.0787875118564711,
|
1754 |
+
"grad_norm": 0.41537439823150635,
|
1755 |
+
"learning_rate": 2.67658249108603e-05,
|
1756 |
+
"loss": 0.4907,
|
1757 |
+
"step": 244
|
1758 |
+
},
|
1759 |
+
{
|
1760 |
+
"epoch": 0.07911041149522714,
|
1761 |
+
"grad_norm": 0.40000054240226746,
|
1762 |
+
"learning_rate": 2.584665027646643e-05,
|
1763 |
+
"loss": 0.488,
|
1764 |
+
"step": 245
|
1765 |
+
},
|
1766 |
+
{
|
1767 |
+
"epoch": 0.07943331113398316,
|
1768 |
+
"grad_norm": 0.395548552274704,
|
1769 |
+
"learning_rate": 2.49420455881305e-05,
|
1770 |
+
"loss": 0.4847,
|
1771 |
+
"step": 246
|
1772 |
+
},
|
1773 |
+
{
|
1774 |
+
"epoch": 0.0797562107727392,
|
1775 |
+
"grad_norm": 0.4183206558227539,
|
1776 |
+
"learning_rate": 2.4052117005223455e-05,
|
1777 |
+
"loss": 0.5261,
|
1778 |
+
"step": 247
|
1779 |
+
},
|
1780 |
+
{
|
1781 |
+
"epoch": 0.08007911041149522,
|
1782 |
+
"grad_norm": 0.37241002917289734,
|
1783 |
+
"learning_rate": 2.317696896481024e-05,
|
1784 |
+
"loss": 0.499,
|
1785 |
+
"step": 248
|
1786 |
+
},
|
1787 |
+
{
|
1788 |
+
"epoch": 0.08040201005025126,
|
1789 |
+
"grad_norm": 0.4700750410556793,
|
1790 |
+
"learning_rate": 2.231670416939364e-05,
|
1791 |
+
"loss": 0.435,
|
1792 |
+
"step": 249
|
1793 |
+
},
|
1794 |
+
{
|
1795 |
+
"epoch": 0.08072490968900728,
|
1796 |
+
"grad_norm": 0.47890686988830566,
|
1797 |
+
"learning_rate": 2.147142357486164e-05,
|
1798 |
+
"loss": 0.6928,
|
1799 |
+
"step": 250
|
1800 |
+
},
|
1801 |
+
{
|
1802 |
+
"epoch": 0.08072490968900728,
|
1803 |
+
"eval_loss": 0.4805048406124115,
|
1804 |
+
"eval_runtime": 93.118,
|
1805 |
+
"eval_samples_per_second": 2.674,
|
1806 |
+
"eval_steps_per_second": 2.674,
|
1807 |
+
"step": 250
|
1808 |
}
|
1809 |
],
|
1810 |
"logging_steps": 1,
|
|
|
1833 |
"attributes": {}
|
1834 |
}
|
1835 |
},
|
1836 |
+
"total_flos": 1.4707264776044544e+17,
|
1837 |
"train_batch_size": 1,
|
1838 |
"trial_name": null,
|
1839 |
"trial_params": null
|