mlplayer-top10 / eval.log
davidquarel's picture
Upload folder using huggingface_hub
09ebadb verified
type eval | step 0 | loss 168.1098 272.7849 581.8813 833.2634 | checkpoint False | ce_loss 1.5684 | sae_losses 167.5743 0.5353 268.5416 4.2431 572.0223 9.8590 779.2783 53.9850 | ce_loss_increases 3.0187 3.5602 3.3668 2.4859 | compound_ce_loss_increase 4.3647 | l0s 10.1774 10.1507 10.1342 10.1511 10.1575 10.1400 10.1654 10.1365 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 167.5743 0.5353 268.5416 4.2431 572.0223 9.8590 779.2783 53.9850
type eval | step 250 | loss 22.9183 47.8590 124.4966 189.7251 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 22.9013 0.0171 47.2923 0.5668 122.4917 2.0050 182.1994 7.5258 | ce_loss_increases 0.6032 2.4329 2.6599 2.1753 | compound_ce_loss_increase 4.0605 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 22.9013 0.0171 47.2923 0.5668 122.4917 2.0050 182.1994 7.5258
type eval | step 500 | loss 1.3615 6.9050 21.9898 41.8133 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 1.3600 0.0015 6.8214 0.0837 21.5443 0.4455 40.4955 1.3178 | ce_loss_increases 0.0245 0.3712 0.7480 0.6704 | compound_ce_loss_increase 1.4771 | l0s 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 1.3600 0.0015 6.8214 0.0837 21.5443 0.4455 40.4955 1.3178
type eval | step 750 | loss 0.2225 2.4260 10.0496 27.8999 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.2221 0.0004 2.3874 0.0386 9.7767 0.2729 27.0278 0.8721 | ce_loss_increases 0.0021 0.1390 0.3861 0.4228 | compound_ce_loss_increase 0.7694 | l0s 9.9998 9.9979 10.0000 10.0000 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.2221 0.0004 2.3874 0.0386 9.7767 0.2729 27.0278 0.8721
type eval | step 1000 | loss 0.1528 1.5595 7.3178 24.4172 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1525 0.0002 1.5301 0.0294 7.0889 0.2289 23.6454 0.7718 | ce_loss_increases 0.0014 0.0887 0.3078 0.3579 | compound_ce_loss_increase 0.6174 | l0s 10.0000 9.9957 10.0000 9.9998 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1525 0.0002 1.5301 0.0294 7.0889 0.2289 23.6454 0.7718
type eval | step 1250 | loss 0.1295 1.2767 6.4260 22.9479 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1293 0.0002 1.2507 0.0260 6.2123 0.2137 22.2141 0.7338 | ce_loss_increases 0.0008 0.0757 0.2810 0.3334 | compound_ce_loss_increase 0.5539 | l0s 10.0000 9.9909 9.9999 9.9995 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1293 0.0002 1.2507 0.0260 6.2123 0.2137 22.2141 0.7338
type eval | step 1500 | loss 0.1170 1.1319 5.9754 22.1373 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1168 0.0002 1.1077 0.0242 5.7696 0.2058 21.4258 0.7115 | ce_loss_increases 0.0002 0.0654 0.2675 0.3179 | compound_ce_loss_increase 0.5209 | l0s 9.9999 9.9878 9.9998 9.9994 10.0000 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1168 0.0002 1.1077 0.0242 5.7696 0.2058 21.4258 0.7115
type eval | step 1750 | loss 0.1071 1.0436 5.7051 21.6486 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.1070 0.0001 1.0205 0.0231 5.5038 0.2014 20.9486 0.7001 | ce_loss_increases 0.0004 0.0590 0.2544 0.3120 | compound_ce_loss_increase 0.4968 | l0s 10.0000 9.9853 9.9999 9.9993 9.9999 10.0000 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.1070 0.0001 1.0205 0.0231 5.5038 0.2014 20.9486 0.7001
type eval | step 2000 | loss 0.0994 0.9797 5.5101 21.2640 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0993 0.0001 0.9574 0.0222 5.3118 0.1983 20.5717 0.6924 | ce_loss_increases 0.0003 0.0571 0.2497 0.3090 | compound_ce_loss_increase 0.4972 | l0s 10.0000 9.9869 9.9999 9.9992 10.0000 9.9999 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0993 0.0001 0.9574 0.0222 5.3118 0.1983 20.5717 0.6924
type eval | step 2250 | loss 0.0931 0.9465 5.3812 20.9064 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0930 0.0001 0.9249 0.0216 5.1854 0.1958 20.2193 0.6870 | ce_loss_increases 0.0003 0.0559 0.2429 0.3034 | compound_ce_loss_increase 0.4841 | l0s 9.9999 9.9844 9.9999 9.9990 10.0000 9.9999 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0930 0.0001 0.9249 0.0216 5.1854 0.1958 20.2193 0.6870
type eval | step 2500 | loss 0.0881 0.9210 5.2853 20.6242 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0880 0.0001 0.9000 0.0211 5.0917 0.1937 19.9405 0.6837 | ce_loss_increases 0.0005 0.0529 0.2394 0.3026 | compound_ce_loss_increase 0.4768 | l0s 10.0000 9.9872 9.9999 9.9991 9.9999 9.9999 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0880 0.0001 0.9000 0.0211 5.0917 0.1937 19.9405 0.6837
type eval | step 2750 | loss 0.0840 0.8982 5.2004 20.3624 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0839 0.0001 0.8776 0.0207 5.0089 0.1915 19.6832 0.6792 | ce_loss_increases 0.0003 0.0502 0.2372 0.2953 | compound_ce_loss_increase 0.4662 | l0s 10.0000 9.9806 9.9999 9.9989 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0839 0.0001 0.8776 0.0207 5.0089 0.1915 19.6832 0.6792
type eval | step 3000 | loss 0.0803 0.8846 5.1537 20.2101 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0802 0.0001 0.8642 0.0204 4.9636 0.1901 19.5323 0.6778 | ce_loss_increases 0.0004 0.0510 0.2324 0.2918 | compound_ce_loss_increase 0.4651 | l0s 10.0000 9.9783 9.9999 9.9988 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0802 0.0001 0.8642 0.0204 4.9636 0.1901 19.5323 0.6778
type eval | step 3250 | loss 0.0779 0.8748 5.1142 20.0800 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0778 0.0001 0.8546 0.0201 4.9249 0.1893 19.4033 0.6767 | ce_loss_increases 0.0002 0.0505 0.2327 0.2960 | compound_ce_loss_increase 0.4629 | l0s 10.0000 9.9794 9.9999 9.9987 10.0000 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0778 0.0001 0.8546 0.0201 4.9249 0.1893 19.4033 0.6767
type eval | step 3500 | loss 0.0748 0.8655 5.0961 19.9682 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0747 0.0001 0.8455 0.0200 4.9071 0.1890 19.2914 0.6768 | ce_loss_increases 0.0004 0.0494 0.2314 0.2938 | compound_ce_loss_increase 0.4568 | l0s 10.0000 9.9794 9.9999 9.9986 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0747 0.0001 0.8455 0.0200 4.9071 0.1890 19.2914 0.6768
type eval | step 3750 | loss 0.0717 0.8599 5.0671 19.8584 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0716 0.0001 0.8401 0.0198 4.8784 0.1887 19.1818 0.6767 | ce_loss_increases 0.0004 0.0495 0.2263 0.2952 | compound_ce_loss_increase 0.4553 | l0s 10.0000 9.9760 10.0000 9.9986 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0716 0.0001 0.8401 0.0198 4.8784 0.1887 19.1818 0.6767
type eval | step 4000 | loss 0.0699 0.8519 5.0313 19.7217 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0698 0.0001 0.8322 0.0197 4.8434 0.1879 19.0465 0.6752 | ce_loss_increases 0.0004 0.0479 0.2238 0.2940 | compound_ce_loss_increase 0.4556 | l0s 10.0000 9.9733 9.9999 9.9986 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0698 0.0001 0.8322 0.0197 4.8434 0.1879 19.0465 0.6752
type eval | step 4250 | loss 0.0680 0.8522 5.0099 19.6328 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.0679 0.0001 0.8326 0.0196 4.8224 0.1875 18.9579 0.6750 | ce_loss_increases 0.0004 0.0487 0.2271 0.2948 | compound_ce_loss_increase 0.4520 | l0s 10.0000 9.9760 9.9999 9.9987 10.0000 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0679 0.0001 0.8326 0.0196 4.8224 0.1875 18.9579 0.6750
type eval | step 4500 | loss 0.0663 0.8437 5.0006 19.5670 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0663 0.0001 0.8242 0.0195 4.8133 0.1873 18.8916 0.6754 | ce_loss_increases 0.0004 0.0468 0.2301 0.2982 | compound_ce_loss_increase 0.4466 | l0s 10.0000 9.9786 9.9999 9.9986 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0663 0.0001 0.8242 0.0195 4.8133 0.1873 18.8916 0.6754
type eval | step 4750 | loss 0.0646 0.8410 4.9807 19.4882 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0645 0.0001 0.8216 0.0194 4.7939 0.1869 18.8137 0.6744 | ce_loss_increases 0.0004 0.0479 0.2308 0.2946 | compound_ce_loss_increase 0.4431 | l0s 10.0000 9.9768 9.9999 9.9988 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0645 0.0001 0.8216 0.0194 4.7939 0.1869 18.8137 0.6744
type eval | step 5000 | loss 0.0634 0.8368 4.9585 19.4179 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0633 0.0001 0.8175 0.0192 4.7722 0.1863 18.7446 0.6733 | ce_loss_increases 0.0004 0.0479 0.2308 0.2927 | compound_ce_loss_increase 0.4429 | l0s 10.0000 9.9816 9.9999 9.9984 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0633 0.0001 0.8175 0.0192 4.7722 0.1863 18.7446 0.6733
type eval | step 5250 | loss 0.0619 0.8281 4.9185 19.3174 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0619 0.0001 0.8091 0.0191 4.7333 0.1852 18.6464 0.6710 | ce_loss_increases 0.0005 0.0459 0.2323 0.2906 | compound_ce_loss_increase 0.4439 | l0s 10.0000 9.9810 9.9998 9.9983 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0619 0.0001 0.8091 0.0191 4.7333 0.1852 18.6464 0.6710
type eval | step 5500 | loss 0.0607 0.8254 4.9029 19.2816 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0606 0.0001 0.8064 0.0190 4.7179 0.1850 18.6109 0.6707 | ce_loss_increases 0.0006 0.0454 0.2311 0.2922 | compound_ce_loss_increase 0.4428 | l0s 10.0000 9.9800 9.9999 9.9984 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0606 0.0001 0.8064 0.0190 4.7179 0.1850 18.6109 0.6707
type eval | step 5750 | loss 0.0604 0.8260 4.8949 19.2572 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.0603 0.0001 0.8070 0.0190 4.7101 0.1848 18.5869 0.6703 | ce_loss_increases 0.0005 0.0460 0.2293 0.2948 | compound_ce_loss_increase 0.4432 | l0s 10.0000 9.9758 9.9999 9.9985 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0603 0.0001 0.8070 0.0190 4.7101 0.1848 18.5869 0.6703
type eval | step 6000 | loss 0.0589 0.8227 4.8973 19.2551 | checkpoint True True False True | ce_loss 1.5684 | sae_losses 0.0588 0.0001 0.8037 0.0190 4.7126 0.1847 18.5844 0.6707 | ce_loss_increases 0.0003 0.0452 0.2243 0.2952 | compound_ce_loss_increase 0.4497 | l0s 10.0000 9.9778 9.9999 9.9984 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0588 0.0001 0.8037 0.0190 4.7126 0.1847 18.5844 0.6707
type eval | step 6250 | loss 0.0577 0.8182 4.8942 19.2669 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0576 0.0001 0.7992 0.0190 4.7092 0.1850 18.5960 0.6709 | ce_loss_increases 0.0002 0.0457 0.2225 0.2943 | compound_ce_loss_increase 0.4482 | l0s 10.0000 9.9836 9.9999 9.9983 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0576 0.0001 0.7992 0.0190 4.7092 0.1850 18.5960 0.6709
type eval | step 6500 | loss 0.0568 0.8121 4.8741 19.2402 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0567 0.0001 0.7932 0.0189 4.6895 0.1846 18.5702 0.6700 | ce_loss_increases 0.0002 0.0441 0.2209 0.2920 | compound_ce_loss_increase 0.4524 | l0s 10.0000 9.9852 9.9999 9.9978 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0567 0.0001 0.7932 0.0189 4.6895 0.1846 18.5702 0.6700
type eval | step 6750 | loss 0.0563 0.8108 4.8613 19.2203 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0562 0.0001 0.7919 0.0189 4.6767 0.1845 18.5506 0.6697 | ce_loss_increases 0.0002 0.0441 0.2172 0.2906 | compound_ce_loss_increase 0.4520 | l0s 10.0000 9.9817 9.9999 9.9979 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0562 0.0001 0.7919 0.0189 4.6767 0.1845 18.5506 0.6697
type eval | step 7000 | loss 0.0555 0.8104 4.8550 19.2265 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0554 0.0001 0.7916 0.0189 4.6702 0.1848 18.5573 0.6693 | ce_loss_increases 0.0000 0.0437 0.2151 0.2892 | compound_ce_loss_increase 0.4478 | l0s 10.0000 9.9826 9.9999 9.9982 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0554 0.0001 0.7916 0.0189 4.6702 0.1848 18.5573 0.6693
type eval | step 7250 | loss 0.0550 0.8072 4.8374 19.2168 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0550 0.0001 0.7884 0.0188 4.6529 0.1845 18.5482 0.6687 | ce_loss_increases -0.0003 0.0438 0.2148 0.2890 | compound_ce_loss_increase 0.4449 | l0s 10.0000 9.9819 9.9999 9.9980 9.9999 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0550 0.0001 0.7884 0.0188 4.6529 0.1845 18.5482 0.6687
type eval | step 7500 | loss 0.0544 0.8011 4.8251 19.1991 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0543 0.0001 0.7824 0.0187 4.6408 0.1843 18.5308 0.6683 | ce_loss_increases 0.0001 0.0435 0.2151 0.2881 | compound_ce_loss_increase 0.4432 | l0s 10.0000 9.9822 9.9999 9.9978 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0543 0.0001 0.7824 0.0187 4.6408 0.1843 18.5308 0.6683
type eval | step 7750 | loss 0.0538 0.7944 4.7945 19.1503 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0538 0.0001 0.7758 0.0186 4.6110 0.1835 18.4838 0.6664 | ce_loss_increases 0.0004 0.0430 0.2132 0.2892 | compound_ce_loss_increase 0.4394 | l0s 10.0000 9.9851 9.9999 9.9976 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0538 0.0001 0.7758 0.0186 4.6110 0.1835 18.4838 0.6664
type eval | step 8000 | loss 0.0533 0.7960 4.7875 19.1361 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.0532 0.0001 0.7774 0.0186 4.6042 0.1833 18.4698 0.6663 | ce_loss_increases 0.0002 0.0423 0.2128 0.2884 | compound_ce_loss_increase 0.4376 | l0s 10.0000 9.9797 9.9999 9.9975 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0532 0.0001 0.7774 0.0186 4.6042 0.1833 18.4698 0.6663
type eval | step 8250 | loss 0.0542 0.7989 4.7787 19.1375 | checkpoint False False True False | ce_loss 1.5684 | sae_losses 0.0541 0.0001 0.7802 0.0187 4.5955 0.1833 18.4717 0.6659 | ce_loss_increases 0.0002 0.0433 0.2107 0.2903 | compound_ce_loss_increase 0.4423 | l0s 10.0000 9.9783 9.9999 9.9979 9.9999 9.9996 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0541 0.0001 0.7802 0.0187 4.5955 0.1833 18.4717 0.6659
type eval | step 8500 | loss 0.0529 0.8011 4.7837 19.1452 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0529 0.0001 0.7824 0.0187 4.6002 0.1835 18.4793 0.6659 | ce_loss_increases 0.0002 0.0428 0.2097 0.2900 | compound_ce_loss_increase 0.4467 | l0s 10.0000 9.9825 9.9999 9.9982 10.0000 9.9996 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0529 0.0001 0.7824 0.0187 4.6002 0.1835 18.4793 0.6659
type eval | step 8750 | loss 0.0525 0.7999 4.7846 19.1646 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0524 0.0001 0.7812 0.0187 4.6009 0.1837 18.4980 0.6666 | ce_loss_increases 0.0001 0.0437 0.2079 0.2909 | compound_ce_loss_increase 0.4455 | l0s 10.0000 9.9795 10.0000 9.9982 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0524 0.0001 0.7812 0.0187 4.6009 0.1837 18.4980 0.6666
type eval | step 9000 | loss 0.0517 0.7989 4.7757 19.1540 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.0516 0.0001 0.7803 0.0186 4.5922 0.1835 18.4875 0.6665 | ce_loss_increases 0.0000 0.0439 0.2091 0.2910 | compound_ce_loss_increase 0.4483 | l0s 10.0000 9.9845 10.0000 9.9981 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0516 0.0001 0.7803 0.0186 4.5922 0.1835 18.4875 0.6665
type eval | step 9250 | loss 0.0513 0.7937 4.7696 19.1385 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0513 0.0001 0.7750 0.0186 4.5862 0.1834 18.4723 0.6662 | ce_loss_increases 0.0002 0.0448 0.2065 0.2900 | compound_ce_loss_increase 0.4443 | l0s 10.0000 9.9847 10.0000 9.9983 10.0000 9.9996 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0513 0.0001 0.7750 0.0186 4.5862 0.1834 18.4723 0.6662
type eval | step 9500 | loss 0.0508 0.7958 4.7673 19.1424 | checkpoint True False True False | ce_loss 1.5684 | sae_losses 0.0507 0.0001 0.7772 0.0186 4.5838 0.1835 18.4760 0.6664 | ce_loss_increases -0.0000 0.0452 0.2055 0.2922 | compound_ce_loss_increase 0.4430 | l0s 10.0000 9.9848 10.0000 9.9980 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0507 0.0001 0.7772 0.0186 4.5838 0.1835 18.4760 0.6664
type eval | step 9750 | loss 0.0509 0.7914 4.7581 19.1319 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.0508 0.0001 0.7729 0.0186 4.5748 0.1833 18.4654 0.6664 | ce_loss_increases -0.0001 0.0450 0.2051 0.2920 | compound_ce_loss_increase 0.4445 | l0s 10.0000 9.9858 10.0000 9.9980 10.0000 9.9996 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0508 0.0001 0.7729 0.0186 4.5748 0.1833 18.4654 0.6664
type eval | step 10000 | loss 0.0503 0.7873 4.7461 19.1205 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0502 0.0001 0.7688 0.0185 4.5630 0.1831 18.4546 0.6659 | ce_loss_increases 0.0001 0.0446 0.2053 0.2921 | compound_ce_loss_increase 0.4420 | l0s 10.0000 9.9839 10.0000 9.9979 9.9999 9.9996 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0502 0.0001 0.7688 0.0185 4.5630 0.1831 18.4546 0.6659
type eval | step 10250 | loss 0.0497 0.7823 4.7289 19.0921 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0496 0.0001 0.7639 0.0185 4.5463 0.1826 18.4271 0.6650 | ce_loss_increases 0.0001 0.0439 0.2053 0.2925 | compound_ce_loss_increase 0.4417 | l0s 10.0000 9.9818 10.0000 9.9980 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0496 0.0001 0.7639 0.0185 4.5463 0.1826 18.4271 0.6650
type eval | step 10500 | loss 0.0494 0.7826 4.7208 19.0669 | checkpoint True False True True | ce_loss 1.5684 | sae_losses 0.0493 0.0001 0.7641 0.0184 4.5385 0.1822 18.4022 0.6646 | ce_loss_increases 0.0001 0.0422 0.2049 0.2929 | compound_ce_loss_increase 0.4423 | l0s 10.0000 9.9857 10.0000 9.9981 10.0000 9.9996 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0493 0.0001 0.7641 0.0184 4.5385 0.1822 18.4022 0.6646
type eval | step 10750 | loss 0.0500 0.7826 4.7174 19.0702 | checkpoint False False True False | ce_loss 1.5684 | sae_losses 0.0499 0.0001 0.7641 0.0185 4.5351 0.1823 18.4058 0.6645 | ce_loss_increases 0.0000 0.0425 0.2058 0.2928 | compound_ce_loss_increase 0.4458 | l0s 10.0000 9.9833 10.0000 9.9981 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0499 0.0001 0.7641 0.0185 4.5351 0.1823 18.4058 0.6645
type eval | step 11000 | loss 0.0489 0.7865 4.7229 19.0775 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0488 0.0001 0.7679 0.0185 4.5405 0.1824 18.4130 0.6645 | ce_loss_increases 0.0002 0.0423 0.2052 0.2943 | compound_ce_loss_increase 0.4453 | l0s 10.0000 9.9831 10.0000 9.9980 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0488 0.0001 0.7679 0.0185 4.5405 0.1824 18.4130 0.6645
type eval | step 11250 | loss 0.0487 0.7857 4.7317 19.0920 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0486 0.0001 0.7671 0.0185 4.5491 0.1826 18.4269 0.6652 | ce_loss_increases 0.0002 0.0431 0.2044 0.2942 | compound_ce_loss_increase 0.4456 | l0s 10.0000 9.9823 10.0000 9.9980 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0486 0.0001 0.7671 0.0185 4.5491 0.1826 18.4269 0.6652
type eval | step 11500 | loss 0.0479 0.7833 4.7260 19.0932 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0478 0.0001 0.7649 0.0185 4.5434 0.1826 18.4279 0.6654 | ce_loss_increases 0.0001 0.0427 0.2045 0.2939 | compound_ce_loss_increase 0.4444 | l0s 9.9999 9.9835 10.0000 9.9980 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0478 0.0001 0.7649 0.0185 4.5434 0.1826 18.4279 0.6654
type eval | step 11750 | loss 0.0473 0.7804 4.7204 19.0848 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.0472 0.0001 0.7620 0.0185 4.5379 0.1825 18.4197 0.6651 | ce_loss_increases 0.0002 0.0432 0.2039 0.2926 | compound_ce_loss_increase 0.4476 | l0s 9.9999 9.9812 10.0000 9.9982 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0472 0.0001 0.7620 0.0185 4.5379 0.1825 18.4197 0.6651
type eval | step 12000 | loss 0.0472 0.7773 4.7205 19.0853 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.0471 0.0001 0.7587 0.0185 4.5379 0.1826 18.4203 0.6649 | ce_loss_increases 0.0001 0.0435 0.2028 0.2918 | compound_ce_loss_increase 0.4473 | l0s 9.9998 9.9833 10.0000 9.9981 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0471 0.0001 0.7587 0.0185 4.5379 0.1826 18.4203 0.6649
type eval | step 12250 | loss 0.0470 0.7747 4.7178 19.0871 | checkpoint True True False False | ce_loss 1.5684 | sae_losses 0.0469 0.0001 0.7562 0.0185 4.5353 0.1825 18.4223 0.6648 | ce_loss_increases 0.0002 0.0434 0.2027 0.2904 | compound_ce_loss_increase 0.4472 | l0s 9.9999 9.9843 10.0000 9.9981 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0469 0.0001 0.7562 0.0185 4.5353 0.1825 18.4223 0.6648
type eval | step 12500 | loss 0.0463 0.7735 4.7101 19.0819 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0463 0.0001 0.7550 0.0185 4.5276 0.1824 18.4174 0.6645 | ce_loss_increases 0.0000 0.0429 0.2019 0.2907 | compound_ce_loss_increase 0.4478 | l0s 9.9999 9.9827 10.0000 9.9980 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0463 0.0001 0.7550 0.0185 4.5276 0.1824 18.4174 0.6645
type eval | step 12750 | loss 0.0464 0.7695 4.7006 19.0653 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.0463 0.0001 0.7510 0.0184 4.5183 0.1822 18.4014 0.6639 | ce_loss_increases 0.0000 0.0444 0.2024 0.2909 | compound_ce_loss_increase 0.4480 | l0s 9.9999 9.9852 9.9999 9.9980 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0463 0.0001 0.7510 0.0184 4.5183 0.1822 18.4014 0.6639
type eval | step 13000 | loss 0.0462 0.7672 4.6928 19.0483 | checkpoint True True True True | ce_loss 1.5684 | sae_losses 0.0461 0.0001 0.7488 0.0184 4.5108 0.1819 18.3850 0.6633 | ce_loss_increases 0.0000 0.0435 0.2025 0.2900 | compound_ce_loss_increase 0.4478 | l0s 9.9999 9.9812 10.0000 9.9979 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0461 0.0001 0.7488 0.0184 4.5108 0.1819 18.3850 0.6633
type eval | step 13250 | loss 0.0467 0.7665 4.6924 19.0481 | checkpoint False True True True | ce_loss 1.5684 | sae_losses 0.0466 0.0001 0.7481 0.0184 4.5105 0.1819 18.3849 0.6632 | ce_loss_increases -0.0001 0.0435 0.2018 0.2905 | compound_ce_loss_increase 0.4495 | l0s 9.9999 9.9841 10.0000 9.9978 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0466 0.0001 0.7481 0.0184 4.5105 0.1819 18.3849 0.6632
type eval | step 13500 | loss 0.0464 0.7690 4.6959 19.0542 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0464 0.0001 0.7505 0.0184 4.5139 0.1820 18.3911 0.6631 | ce_loss_increases 0.0000 0.0432 0.2033 0.2911 | compound_ce_loss_increase 0.4494 | l0s 9.9999 9.9851 10.0000 9.9982 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0464 0.0001 0.7505 0.0184 4.5139 0.1820 18.3911 0.6631
type eval | step 13750 | loss 0.0459 0.7718 4.7052 19.0673 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0459 0.0001 0.7534 0.0185 4.5231 0.1821 18.4039 0.6634 | ce_loss_increases 0.0002 0.0433 0.2043 0.2918 | compound_ce_loss_increase 0.4493 | l0s 9.9998 9.9820 10.0000 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0459 0.0001 0.7534 0.0185 4.5231 0.1821 18.4039 0.6634
type eval | step 14000 | loss 0.0457 0.7698 4.7050 19.0779 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0457 0.0001 0.7514 0.0184 4.5229 0.1822 18.4145 0.6634 | ce_loss_increases 0.0001 0.0428 0.2035 0.2922 | compound_ce_loss_increase 0.4502 | l0s 9.9999 9.9828 10.0000 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0457 0.0001 0.7514 0.0184 4.5229 0.1822 18.4145 0.6634
type eval | step 14250 | loss 0.0455 0.7687 4.7004 19.0730 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0454 0.0001 0.7503 0.0184 4.5183 0.1821 18.4102 0.6628 | ce_loss_increases 0.0000 0.0428 0.2032 0.2933 | compound_ce_loss_increase 0.4504 | l0s 10.0000 9.9844 9.9999 9.9982 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0454 0.0001 0.7503 0.0184 4.5183 0.1821 18.4102 0.6628
type eval | step 14500 | loss 0.0459 0.7677 4.7017 19.0782 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0458 0.0001 0.7493 0.0184 4.5196 0.1821 18.4152 0.6630 | ce_loss_increases -0.0000 0.0428 0.2021 0.2925 | compound_ce_loss_increase 0.4511 | l0s 9.9999 9.9796 10.0000 9.9985 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0458 0.0001 0.7493 0.0184 4.5196 0.1821 18.4152 0.6630
type eval | step 14750 | loss 0.0454 0.7675 4.6989 19.0797 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0454 0.0001 0.7491 0.0184 4.5168 0.1821 18.4169 0.6628 | ce_loss_increases 0.0001 0.0432 0.2028 0.2926 | compound_ce_loss_increase 0.4506 | l0s 9.9999 9.9839 9.9999 9.9985 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0454 0.0001 0.7491 0.0184 4.5168 0.1821 18.4169 0.6628
type eval | step 15000 | loss 0.0454 0.7669 4.6961 19.0762 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0454 0.0001 0.7485 0.0184 4.5140 0.1820 18.4135 0.6627 | ce_loss_increases 0.0001 0.0433 0.2018 0.2921 | compound_ce_loss_increase 0.4507 | l0s 9.9999 9.9839 9.9999 9.9982 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0454 0.0001 0.7485 0.0184 4.5140 0.1820 18.4135 0.6627
type eval | step 15250 | loss 0.0452 0.7654 4.6917 19.0676 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0451 0.0001 0.7471 0.0184 4.5098 0.1819 18.4052 0.6624 | ce_loss_increases 0.0002 0.0431 0.2016 0.2929 | compound_ce_loss_increase 0.4513 | l0s 9.9999 9.9833 9.9999 9.9981 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0451 0.0001 0.7471 0.0184 4.5098 0.1819 18.4052 0.6624
type eval | step 15500 | loss 0.0450 0.7636 4.6846 19.0570 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0449 0.0001 0.7453 0.0183 4.5029 0.1817 18.3949 0.6621 | ce_loss_increases 0.0001 0.0435 0.2012 0.2922 | compound_ce_loss_increase 0.4534 | l0s 9.9998 9.9845 9.9999 9.9981 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0449 0.0001 0.7453 0.0183 4.5029 0.1817 18.3949 0.6621
type eval | step 15750 | loss 0.0453 0.7628 4.6851 19.0576 | checkpoint False True False False | ce_loss 1.5684 | sae_losses 0.0452 0.0001 0.7445 0.0183 4.5035 0.1816 18.3955 0.6621 | ce_loss_increases 0.0001 0.0434 0.2017 0.2926 | compound_ce_loss_increase 0.4531 | l0s 9.9998 9.9827 9.9999 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0452 0.0001 0.7445 0.0183 4.5035 0.1816 18.3955 0.6621
type eval | step 16000 | loss 0.0455 0.7642 4.6876 19.0632 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0454 0.0001 0.7459 0.0183 4.5060 0.1816 18.4009 0.6622 | ce_loss_increases 0.0001 0.0432 0.2027 0.2932 | compound_ce_loss_increase 0.4546 | l0s 9.9999 9.9793 10.0000 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0454 0.0001 0.7459 0.0183 4.5060 0.1816 18.4009 0.6622
type eval | step 16250 | loss 0.0451 0.7667 4.6912 19.0676 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0450 0.0001 0.7484 0.0184 4.5094 0.1817 18.4052 0.6624 | ce_loss_increases 0.0003 0.0428 0.2029 0.2935 | compound_ce_loss_increase 0.4563 | l0s 9.9999 9.9810 10.0000 9.9985 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0450 0.0001 0.7484 0.0184 4.5094 0.1817 18.4052 0.6624
type eval | step 16500 | loss 0.0450 0.7670 4.6942 19.0760 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0449 0.0001 0.7486 0.0184 4.5124 0.1818 18.4133 0.6627 | ce_loss_increases 0.0003 0.0426 0.2026 0.2946 | compound_ce_loss_increase 0.4544 | l0s 9.9998 9.9830 10.0000 9.9984 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0449 0.0001 0.7486 0.0184 4.5124 0.1818 18.4133 0.6627
type eval | step 16750 | loss 0.0448 0.7658 4.6928 19.0710 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0447 0.0001 0.7475 0.0184 4.5110 0.1818 18.4085 0.6625 | ce_loss_increases 0.0003 0.0422 0.2025 0.2938 | compound_ce_loss_increase 0.4564 | l0s 9.9999 9.9841 10.0000 9.9982 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0447 0.0001 0.7475 0.0184 4.5110 0.1818 18.4085 0.6625
type eval | step 17000 | loss 0.0450 0.7650 4.6930 19.0730 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0450 0.0001 0.7467 0.0184 4.5113 0.1818 18.4104 0.6625 | ce_loss_increases -0.0002 0.0423 0.2019 0.2936 | compound_ce_loss_increase 0.4567 | l0s 9.9998 9.9822 10.0000 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0450 0.0001 0.7467 0.0184 4.5113 0.1818 18.4104 0.6625
type eval | step 17250 | loss 0.0445 0.7644 4.6934 19.0765 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0445 0.0001 0.7460 0.0183 4.5117 0.1817 18.4140 0.6624 | ce_loss_increases 0.0000 0.0416 0.2015 0.2935 | compound_ce_loss_increase 0.4565 | l0s 9.9999 9.9843 10.0000 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0445 0.0001 0.7460 0.0183 4.5117 0.1817 18.4140 0.6624
type eval | step 17500 | loss 0.0451 0.7639 4.6909 19.0755 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0450 0.0001 0.7456 0.0183 4.5091 0.1817 18.4132 0.6622 | ce_loss_increases 0.0002 0.0424 0.2023 0.2943 | compound_ce_loss_increase 0.4557 | l0s 9.9999 9.9827 10.0000 9.9984 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0450 0.0001 0.7456 0.0183 4.5091 0.1817 18.4132 0.6622
type eval | step 17750 | loss 0.0447 0.7632 4.6871 19.0713 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0447 0.0001 0.7449 0.0183 4.5055 0.1817 18.4092 0.6621 | ce_loss_increases 0.0001 0.0428 0.2017 0.2949 | compound_ce_loss_increase 0.4566 | l0s 9.9999 9.9837 9.9999 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0447 0.0001 0.7449 0.0183 4.5055 0.1817 18.4092 0.6621
type eval | step 18000 | loss 0.0444 0.7616 4.6827 19.0639 | checkpoint True True True False | ce_loss 1.5684 | sae_losses 0.0444 0.0001 0.7433 0.0183 4.5012 0.1815 18.4021 0.6618 | ce_loss_increases 0.0002 0.0431 0.2009 0.2943 | compound_ce_loss_increase 0.4569 | l0s 9.9999 9.9834 9.9999 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0444 0.0001 0.7433 0.0183 4.5012 0.1815 18.4021 0.6618
type eval | step 18250 | loss 0.0444 0.7616 4.6823 19.0625 | checkpoint False True True False | ce_loss 1.5684 | sae_losses 0.0444 0.0001 0.7433 0.0183 4.5008 0.1815 18.4010 0.6615 | ce_loss_increases 0.0001 0.0428 0.2018 0.2946 | compound_ce_loss_increase 0.4556 | l0s 9.9999 9.9824 10.0000 9.9981 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0444 0.0001 0.7433 0.0183 4.5008 0.1815 18.4010 0.6615
type eval | step 18500 | loss 0.0446 0.7613 4.6825 19.0634 | checkpoint False True False False | ce_loss 1.5684 | sae_losses 0.0445 0.0001 0.7430 0.0183 4.5010 0.1815 18.4020 0.6614 | ce_loss_increases 0.0001 0.0433 0.2021 0.2938 | compound_ce_loss_increase 0.4587 | l0s 9.9998 9.9824 10.0000 9.9984 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0445 0.0001 0.7430 0.0183 4.5010 0.1815 18.4020 0.6614
type eval | step 18750 | loss 0.0448 0.7629 4.6847 19.0700 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0447 0.0001 0.7446 0.0183 4.5032 0.1815 18.4087 0.6613 | ce_loss_increases 0.0001 0.0432 0.2028 0.2949 | compound_ce_loss_increase 0.4557 | l0s 9.9999 9.9839 10.0000 9.9984 9.9999 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0447 0.0001 0.7446 0.0183 4.5032 0.1815 18.4087 0.6613
type eval | step 19000 | loss 0.0444 0.7639 4.6879 19.0715 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0443 0.0001 0.7455 0.0183 4.5063 0.1816 18.4102 0.6613 | ce_loss_increases 0.0002 0.0431 0.2025 0.2953 | compound_ce_loss_increase 0.4589 | l0s 9.9999 9.9824 10.0000 9.9985 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0443 0.0001 0.7455 0.0183 4.5063 0.1816 18.4102 0.6613
type eval | step 19250 | loss 0.0444 0.7637 4.6876 19.0698 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0443 0.0001 0.7454 0.0183 4.5061 0.1815 18.4087 0.6611 | ce_loss_increases 0.0003 0.0432 0.2023 0.2965 | compound_ce_loss_increase 0.4570 | l0s 9.9999 9.9830 10.0000 9.9983 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0443 0.0001 0.7454 0.0183 4.5061 0.1815 18.4087 0.6611
type eval | step 19500 | loss 0.0444 0.7635 4.6891 19.0711 | checkpoint False False False False | ce_loss 1.5684 | sae_losses 0.0444 0.0001 0.7452 0.0183 4.5076 0.1816 18.4102 0.6608 | ce_loss_increases 0.0001 0.0428 0.2017 0.2955 | compound_ce_loss_increase 0.4577 | l0s 9.9999 9.9836 10.0000 9.9984 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0444 0.0001 0.7452 0.0183 4.5076 0.1816 18.4102 0.6608
type eval | step 19750 | loss 0.0442 0.7629 4.6895 19.0707 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0441 0.0001 0.7445 0.0183 4.5080 0.1816 18.4101 0.6606 | ce_loss_increases -0.0001 0.0425 0.2015 0.2955 | compound_ce_loss_increase 0.4561 | l0s 9.9999 9.9841 9.9999 9.9984 10.0000 9.9998 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0441 0.0001 0.7445 0.0183 4.5080 0.1816 18.4101 0.6606
type eval | step 20000 | loss 0.0441 0.7626 4.6893 19.0727 | checkpoint True False False False | ce_loss 1.5684 | sae_losses 0.0440 0.0001 0.7443 0.0183 4.5077 0.1816 18.4123 0.6604 | ce_loss_increases 0.0003 0.0424 0.2017 0.2952 | compound_ce_loss_increase 0.4580 | l0s 9.9999 9.9828 10.0000 9.9985 10.0000 9.9997 10.0000 10.0000 | stream_l1s 37.2125 2.0307 51.7214 6.3245 74.0042 9.5952 85.4173 19.7454 | βˆ‡_l1 0.0000 0.0000 0.0000 0.0000 | recon_l2 0.0440 0.0001 0.7443 0.0183 4.5077 0.1816 18.4123 0.6604