diff --git "a/evals/core_9mcqa/task-007-csqa-predictions.jsonl" "b/evals/core_9mcqa/task-007-csqa-predictions.jsonl" new file mode 100644--- /dev/null +++ "b/evals/core_9mcqa/task-007-csqa-predictions.jsonl" @@ -0,0 +1,1221 @@ +{"doc_id": 0, "native_id": "1afa02df02c908a558b4036e80242fac", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3672780990600586, "incorrect_loss_raw": 8.486880421638489, "correct_loss_per_char": 0.2734556198120117, "incorrect_loss_per_char": 1.015405047426816, "correct_loss_per_token": 1.3672780990600586, "incorrect_loss_per_token": 5.264063318570455, "correct_loss_uncond": -11.464608192443848, "incorrect_loss_uncond": -7.036345362663269}, "model_output": [{"sum_logits": -1.3672780990600586, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -1.3672780990600586, "logits_per_char": -0.2734556198120117, "num_chars": 5}, {"sum_logits": -6.355949878692627, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -6.355949878692627, "logits_per_char": -0.7944937348365784, "num_chars": 8}, {"sum_logits": -7.144725799560547, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -3.5723628997802734, "logits_per_char": -0.42027798820944395, "num_chars": 17}, {"sum_logits": -6.468487739562988, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -6.468487739562988, "logits_per_char": -1.2936975479125976, "num_chars": 5}, {"sum_logits": -13.978358268737793, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -4.659452756245931, "logits_per_char": -1.5531509187486436, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1, "native_id": "a7ab086045575bb497933726e4e6ad28", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.404781341552734, "incorrect_loss_raw": 14.799781560897827, "correct_loss_per_char": 0.8003677955040565, "incorrect_loss_per_char": 1.0243811614029892, "correct_loss_per_token": 5.202390670776367, "incorrect_loss_per_token": 5.513587594032288, "correct_loss_uncond": -8.944704055786133, "incorrect_loss_uncond": -5.0227577686309814}, "model_output": [{"sum_logits": -10.404781341552734, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.349485397338867, "logits_per_token": -5.202390670776367, "logits_per_char": -0.8003677955040565, "num_chars": 13}, {"sum_logits": -16.05098533630371, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.444162368774414, "logits_per_token": -4.012746334075928, "logits_per_char": -0.7295902425592596, "num_chars": 22}, {"sum_logits": -12.028044700622559, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.203275680541992, "logits_per_token": -6.014022350311279, "logits_per_char": -0.9252342077401968, "num_chars": 13}, {"sum_logits": -16.990230560302734, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.56057357788086, "logits_per_token": -8.495115280151367, "logits_per_char": -1.6990230560302735, "num_chars": 10}, {"sum_logits": -14.129865646362305, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -3.532466411590576, "logits_per_char": -0.7436771392822266, "num_chars": 19}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 2, "native_id": "b8c0a4703079cf661d7261a60a1bcbff", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9773478507995605, "incorrect_loss_raw": 11.109384775161743, "correct_loss_per_char": 0.29773478507995604, "incorrect_loss_per_char": 1.2740338870457242, "correct_loss_per_token": 1.4886739253997803, "incorrect_loss_per_token": 7.883079171180725, "correct_loss_uncond": -13.74675989151001, "incorrect_loss_uncond": -4.125202417373657}, "model_output": [{"sum_logits": -11.597760200500488, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.656052589416504, "logits_per_token": -11.597760200500488, "logits_per_char": -1.656822885785784, "num_chars": 7}, {"sum_logits": -2.9773478507995605, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.72410774230957, "logits_per_token": -1.4886739253997803, "logits_per_char": -0.29773478507995604, "num_chars": 10}, {"sum_logits": -7.02933406829834, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -7.02933406829834, "logits_per_char": -1.0041905811854772, "num_chars": 7}, {"sum_logits": -10.904169082641602, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -5.452084541320801, "logits_per_char": -0.7788692201886859, "num_chars": 14}, {"sum_logits": -14.906275749206543, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.38226318359375, "logits_per_token": -7.4531378746032715, "logits_per_char": -1.6562528610229492, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 3, "native_id": "e68fb2448fd74e402aae9982aa76e527", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7722601890563965, "incorrect_loss_raw": 17.030321836471558, "correct_loss_per_char": 0.1320123899550665, "incorrect_loss_per_char": 1.6730378923994123, "correct_loss_per_token": 0.9240867296854655, "incorrect_loss_per_token": 8.380525350570679, "correct_loss_uncond": -15.163937091827393, "incorrect_loss_uncond": -6.102617263793945}, "model_output": [{"sum_logits": -2.7722601890563965, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -17.93619728088379, "logits_per_token": -0.9240867296854655, "logits_per_char": -0.1320123899550665, "num_chars": 21}, {"sum_logits": -7.878630638122559, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.856280326843262, "logits_per_token": -7.878630638122559, "logits_per_char": -1.3131051063537598, "num_chars": 6}, {"sum_logits": -23.20880126953125, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -32.787208557128906, "logits_per_token": -5.8022003173828125, "logits_per_char": -1.1604400634765626, "num_chars": 20}, {"sum_logits": -11.244977951049805, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.016199111938477, "logits_per_token": -11.244977951049805, "logits_per_char": -1.874162991841634, "num_chars": 6}, {"sum_logits": -25.788877487182617, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -31.872068405151367, "logits_per_token": -8.596292495727539, "logits_per_char": -2.3444434079256924, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 4, "native_id": "2435de612dd69f2012b9e40d6af4ce38", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.423169136047363, "incorrect_loss_raw": 7.793874740600586, "correct_loss_per_char": 1.1778961420059204, "incorrect_loss_per_char": 0.7925317287445068, "correct_loss_per_token": 4.711584568023682, "incorrect_loss_per_token": 5.250807523727417, "correct_loss_uncond": -10.191315650939941, "incorrect_loss_uncond": -9.23627233505249}, "model_output": [{"sum_logits": -9.423169136047363, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.614484786987305, "logits_per_token": -4.711584568023682, "logits_per_char": -1.1778961420059204, "num_chars": 8}, {"sum_logits": -5.035953521728516, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -5.035953521728516, "logits_per_char": -0.4196627934773763, "num_chars": 12}, {"sum_logits": -5.795007705688477, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.047285079956055, "logits_per_token": -5.795007705688477, "logits_per_char": -0.8278582436697823, "num_chars": 7}, {"sum_logits": -8.515037536621094, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.914615631103516, "logits_per_token": -4.257518768310547, "logits_per_char": -0.6082169669015067, "num_chars": 14}, {"sum_logits": -11.829500198364258, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.43558692932129, "logits_per_token": -5.914750099182129, "logits_per_char": -1.314388910929362, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 5, "native_id": "a4892551cb4beb279653ae52d0de4c89", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.009472846984863, "incorrect_loss_raw": 14.94875454902649, "correct_loss_per_char": 0.7863909176417759, "incorrect_loss_per_char": 1.7188903437720402, "correct_loss_per_token": 2.752368211746216, "incorrect_loss_per_token": 8.551096022129059, "correct_loss_uncond": -7.132912635803223, "incorrect_loss_uncond": -2.886972427368164}, "model_output": [{"sum_logits": -18.276437759399414, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -9.138218879699707, "logits_per_char": -2.030715306599935, "num_chars": 9}, {"sum_logits": -12.493374824523926, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.084766387939453, "logits_per_token": -3.1233437061309814, "logits_per_char": -0.8328916549682617, "num_chars": 15}, {"sum_logits": -11.009472846984863, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.142385482788086, "logits_per_token": -2.752368211746216, "logits_per_char": -0.7863909176417759, "num_chars": 14}, {"sum_logits": -14.16476821899414, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.438270568847656, "logits_per_token": -7.08238410949707, "logits_per_char": -2.36079470316569, "num_chars": 6}, {"sum_logits": -14.860437393188477, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.825310707092285, "logits_per_token": -14.860437393188477, "logits_per_char": -1.6511597103542752, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 6, "native_id": "118a9093a30695622363455e4d911866", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.658804893493652, "incorrect_loss_raw": 17.72214651107788, "correct_loss_per_char": 1.2369721276419503, "incorrect_loss_per_char": 1.7830472299546907, "correct_loss_per_token": 4.329402446746826, "incorrect_loss_per_token": 7.5279097080230715, "correct_loss_uncond": -9.007201194763184, "incorrect_loss_uncond": -4.078982353210449}, "model_output": [{"sum_logits": -29.136327743530273, "num_tokens": 5, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -36.91728210449219, "logits_per_token": -5.8272655487060545, "logits_per_char": -1.3243785337968306, "num_chars": 22}, {"sum_logits": -8.658804893493652, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.666006088256836, "logits_per_token": -4.329402446746826, "logits_per_char": -1.2369721276419503, "num_chars": 7}, {"sum_logits": -14.130674362182617, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.090351104736328, "logits_per_token": -7.065337181091309, "logits_per_char": -2.355112393697103, "num_chars": 6}, {"sum_logits": -12.017762184143066, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -12.017762184143066, "logits_per_char": -1.5022202730178833, "num_chars": 8}, {"sum_logits": -15.603821754455566, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.99258041381836, "logits_per_token": -5.2012739181518555, "logits_per_char": -1.9504777193069458, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 7, "native_id": "05ea49b82e8ec519e82d6633936ab8bf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.219839096069336, "incorrect_loss_raw": 13.918318510055542, "correct_loss_per_char": 0.6771351950509208, "incorrect_loss_per_char": 1.4687396492276874, "correct_loss_per_token": 3.554959774017334, "incorrect_loss_per_token": 8.167177557945251, "correct_loss_uncond": -6.390573501586914, "incorrect_loss_uncond": -3.955005645751953}, "model_output": [{"sum_logits": -17.893524169921875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.239540100097656, "logits_per_token": -8.946762084960938, "logits_per_char": -1.278108869280134, "num_chars": 14}, {"sum_logits": -11.886214256286621, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.07366180419922, "logits_per_token": -5.9431071281433105, "logits_per_char": -1.188621425628662, "num_chars": 10}, {"sum_logits": -16.229389190673828, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.773582458496094, "logits_per_token": -8.114694595336914, "logits_per_char": -1.4753990173339844, "num_chars": 11}, {"sum_logits": -14.219839096069336, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.61041259765625, "logits_per_token": -3.554959774017334, "logits_per_char": -0.6771351950509208, "num_chars": 21}, {"sum_logits": -9.664146423339844, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.406512260437012, "logits_per_token": -9.664146423339844, "logits_per_char": -1.9328292846679687, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 8, "native_id": "c0c07ce781653b2a2c01871ba2bcba93", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.453479290008545, "incorrect_loss_raw": 14.124382495880127, "correct_loss_per_char": 0.38371992111206055, "incorrect_loss_per_char": 1.499846508492639, "correct_loss_per_token": 3.453479290008545, "incorrect_loss_per_token": 9.045385003089905, "correct_loss_uncond": -11.002273082733154, "incorrect_loss_uncond": -1.3084490299224854}, "model_output": [{"sum_logits": -3.453479290008545, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.4557523727417, "logits_per_token": -3.453479290008545, "logits_per_char": -0.38371992111206055, "num_chars": 9}, {"sum_logits": -13.436365127563477, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.596572875976562, "logits_per_token": -3.359091281890869, "logits_per_char": -0.6718182563781738, "num_chars": 20}, {"sum_logits": -10.068891525268555, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -10.5106201171875, "logits_per_token": -10.068891525268555, "logits_per_char": -1.678148587544759, "num_chars": 6}, {"sum_logits": -12.514841079711914, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.940627098083496, "logits_per_token": -12.514841079711914, "logits_per_char": -1.7878344399588448, "num_chars": 7}, {"sum_logits": -20.477432250976562, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.68350601196289, "logits_per_token": -10.238716125488281, "logits_per_char": -1.8615847500887783, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 9, "native_id": "1d24f406b6828492040b405d3f35119c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.150252342224121, "incorrect_loss_raw": 10.78638780117035, "correct_loss_per_char": 0.7687815427780151, "incorrect_loss_per_char": 1.2686924920632288, "correct_loss_per_token": 6.150252342224121, "incorrect_loss_per_token": 8.039772868156433, "correct_loss_uncond": -9.102336883544922, "incorrect_loss_uncond": -5.764476418495178}, "model_output": [{"sum_logits": -7.8286004066467285, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.974520683288574, "logits_per_token": -7.8286004066467285, "logits_per_char": -1.9571501016616821, "num_chars": 4}, {"sum_logits": -14.506776809692383, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.072710037231445, "logits_per_token": -7.253388404846191, "logits_per_char": -1.2088980674743652, "num_chars": 12}, {"sum_logits": -6.150252342224121, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.252589225769043, "logits_per_token": -6.150252342224121, "logits_per_char": -0.7687815427780151, "num_chars": 8}, {"sum_logits": -13.34403133392334, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.92566204071045, "logits_per_token": -13.34403133392334, "logits_per_char": -1.334403133392334, "num_chars": 10}, {"sum_logits": -7.466142654418945, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.23056411743164, "logits_per_token": -3.7330713272094727, "logits_per_char": -0.5743186657245343, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 10, "native_id": "57f92025d860e32c4e780c0d51c1c20c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.483027458190918, "incorrect_loss_raw": 8.726415634155273, "correct_loss_per_char": 0.5483027458190918, "incorrect_loss_per_char": 1.000546103868729, "correct_loss_per_token": 2.741513729095459, "incorrect_loss_per_token": 6.531249046325684, "correct_loss_uncond": -10.822773933410645, "incorrect_loss_uncond": -9.76528286933899}, "model_output": [{"sum_logits": -9.032111167907715, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.15460205078125, "logits_per_token": -9.032111167907715, "logits_per_char": -1.5053518613179524, "num_chars": 6}, {"sum_logits": -7.552818298339844, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.626628875732422, "logits_per_token": -3.776409149169922, "logits_per_char": -0.5394870213099888, "num_chars": 14}, {"sum_logits": -10.008514404296875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.247882843017578, "logits_per_token": -5.0042572021484375, "logits_per_char": -0.7698857234074519, "num_chars": 13}, {"sum_logits": -8.31221866607666, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.9376802444458, "logits_per_token": -8.31221866607666, "logits_per_char": -1.1874598094395228, "num_chars": 7}, {"sum_logits": -5.483027458190918, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.305801391601562, "logits_per_token": -2.741513729095459, "logits_per_char": -0.5483027458190918, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 11, "native_id": "81eb4b2ee66edd8bc91ee944697c4e9f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.788999557495117, "incorrect_loss_raw": 11.312066078186035, "correct_loss_per_char": 0.43536359613591974, "incorrect_loss_per_char": 1.1058703239517982, "correct_loss_per_token": 2.3944997787475586, "incorrect_loss_per_token": 5.656033039093018, "correct_loss_uncond": -12.563779830932617, "incorrect_loss_uncond": -7.111650466918945}, "model_output": [{"sum_logits": -9.403209686279297, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.18486785888672, "logits_per_token": -4.701604843139648, "logits_per_char": -0.9403209686279297, "num_chars": 10}, {"sum_logits": -10.686245918273926, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.97815704345703, "logits_per_token": -5.343122959136963, "logits_per_char": -0.971476901661266, "num_chars": 11}, {"sum_logits": -11.113690376281738, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.03679084777832, "logits_per_token": -5.556845188140869, "logits_per_char": -1.2348544862535265, "num_chars": 9}, {"sum_logits": -4.788999557495117, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.352779388427734, "logits_per_token": -2.3944997787475586, "logits_per_char": -0.43536359613591974, "num_chars": 11}, {"sum_logits": -14.04511833190918, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.49505043029785, "logits_per_token": -7.02255916595459, "logits_per_char": -1.276828939264471, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 12, "native_id": "d807e7ae60976324920c8d29eb42dad6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.017200946807861, "incorrect_loss_raw": 11.719826698303223, "correct_loss_per_char": 0.5470182678916238, "incorrect_loss_per_char": 1.1449264536301293, "correct_loss_per_token": 3.0086004734039307, "incorrect_loss_per_token": 5.859913349151611, "correct_loss_uncond": -13.835822582244873, "incorrect_loss_uncond": -7.75592565536499}, "model_output": [{"sum_logits": -6.017200946807861, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.853023529052734, "logits_per_token": -3.0086004734039307, "logits_per_char": -0.5470182678916238, "num_chars": 11}, {"sum_logits": -7.5878801345825195, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.4078369140625, "logits_per_token": -3.7939400672912598, "logits_per_char": -0.8430977927313911, "num_chars": 9}, {"sum_logits": -8.101081848144531, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.36882781982422, "logits_per_token": -4.050540924072266, "logits_per_char": -0.8101081848144531, "num_chars": 10}, {"sum_logits": -11.089934349060059, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.649335861206055, "logits_per_token": -5.544967174530029, "logits_per_char": -0.6931208968162537, "num_chars": 16}, {"sum_logits": -20.10041046142578, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -10.05020523071289, "logits_per_char": -2.23337894015842, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 13, "native_id": "7ea9f721ffc662918bb0c0937a487f04", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.203407287597656, "incorrect_loss_raw": 10.168920040130615, "correct_loss_per_char": 0.4557448493109809, "incorrect_loss_per_char": 1.62796825000218, "correct_loss_per_token": 2.7344690958658853, "incorrect_loss_per_token": 8.73998236656189, "correct_loss_uncond": -12.3985595703125, "incorrect_loss_uncond": -5.818233251571655}, "model_output": [{"sum_logits": -7.596261978149414, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.987686157226562, "logits_per_token": -7.596261978149414, "logits_per_char": -1.519252395629883, "num_chars": 5}, {"sum_logits": -11.213556289672852, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.055574417114258, "logits_per_token": -11.213556289672852, "logits_per_char": -1.8689260482788086, "num_chars": 6}, {"sum_logits": -11.431501388549805, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.866344451904297, "logits_per_token": -5.715750694274902, "logits_per_char": -1.6330716269356864, "num_chars": 7}, {"sum_logits": -8.203407287597656, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.601966857910156, "logits_per_token": -2.7344690958658853, "logits_per_char": -0.4557448493109809, "num_chars": 18}, {"sum_logits": -10.43436050415039, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.039008140563965, "logits_per_token": -10.43436050415039, "logits_per_char": -1.4906229291643416, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 14, "native_id": "fc1d33a2301a30214523c12573f81aba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.424084663391113, "incorrect_loss_raw": 14.118366479873657, "correct_loss_per_char": 0.5791158146328397, "incorrect_loss_per_char": 1.9460530530838738, "correct_loss_per_token": 5.212042331695557, "incorrect_loss_per_token": 8.57267173131307, "correct_loss_uncond": -9.673811912536621, "incorrect_loss_uncond": -2.61215877532959}, "model_output": [{"sum_logits": -17.333906173706055, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.378948211669922, "logits_per_token": -8.666953086853027, "logits_per_char": -1.9259895748562283, "num_chars": 9}, {"sum_logits": -9.125886917114258, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -9.125886917114258, "logits_per_char": -1.0139874352349176, "num_chars": 9}, {"sum_logits": -10.424084663391113, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.097896575927734, "logits_per_token": -5.212042331695557, "logits_per_char": -0.5791158146328397, "num_chars": 18}, {"sum_logits": -9.739933967590332, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.51126480102539, "logits_per_token": -9.739933967590332, "logits_per_char": -1.9479867935180664, "num_chars": 5}, {"sum_logits": -20.273738861083984, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.317607879638672, "logits_per_token": -6.757912953694661, "logits_per_char": -2.8962484087262834, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 15, "native_id": "3b8e1d236f5169b6c833a994d6d9c39a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.691155433654785, "incorrect_loss_raw": 9.847700595855713, "correct_loss_per_char": 0.6701650619506836, "incorrect_loss_per_char": 1.434034196157304, "correct_loss_per_token": 4.691155433654785, "incorrect_loss_per_token": 9.847700595855713, "correct_loss_uncond": -5.51860237121582, "incorrect_loss_uncond": -5.722314834594727}, "model_output": [{"sum_logits": -10.906463623046875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.682865142822266, "logits_per_token": -10.906463623046875, "logits_per_char": -1.8177439371744792, "num_chars": 6}, {"sum_logits": -6.757045745849609, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.263690948486328, "logits_per_token": -6.757045745849609, "logits_per_char": -1.1261742909749348, "num_chars": 6}, {"sum_logits": -9.817934036254883, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -9.817934036254883, "logits_per_char": -1.090881559583876, "num_chars": 9}, {"sum_logits": -4.691155433654785, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -10.209757804870605, "logits_per_token": -4.691155433654785, "logits_per_char": -0.6701650619506836, "num_chars": 7}, {"sum_logits": -11.909358978271484, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.258374214172363, "logits_per_token": -11.909358978271484, "logits_per_char": -1.7013369968959264, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 16, "native_id": "c5c4166f2ed3c2b3517b79e6848e9ae2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 19.5153865814209, "incorrect_loss_raw": 18.337308883666992, "correct_loss_per_char": 1.393956184387207, "incorrect_loss_per_char": 1.323599236133771, "correct_loss_per_token": 9.75769329071045, "incorrect_loss_per_token": 9.323870340983072, "correct_loss_uncond": -4.118865966796875, "incorrect_loss_uncond": -2.84538197517395}, "model_output": [{"sum_logits": -28.026180267333984, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -29.177127838134766, "logits_per_token": -9.342060089111328, "logits_per_char": -1.0779300102820764, "num_chars": 26}, {"sum_logits": -9.861078262329102, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.44467544555664, "logits_per_token": -3.2870260874430337, "logits_per_char": -0.8217565218607584, "num_chars": 12}, {"sum_logits": -21.591163635253906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.00868034362793, "logits_per_token": -10.795581817626953, "logits_per_char": -1.6608587411733775, "num_chars": 13}, {"sum_logits": -19.5153865814209, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.634252548217773, "logits_per_token": -9.75769329071045, "logits_per_char": -1.393956184387207, "num_chars": 14}, {"sum_logits": -13.870813369750977, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.100279808044434, "logits_per_token": -13.870813369750977, "logits_per_char": -1.733851671218872, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 17, "native_id": "6dc5b2884737e66543ce65f8dc40c992", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.617681503295898, "incorrect_loss_raw": 9.202287793159485, "correct_loss_per_char": 0.5745121002197265, "incorrect_loss_per_char": 1.0666938524741632, "correct_loss_per_token": 4.308840751647949, "incorrect_loss_per_token": 7.843064904212952, "correct_loss_uncond": -10.876476287841797, "incorrect_loss_uncond": -5.25580108165741}, "model_output": [{"sum_logits": -2.783968448638916, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -2.783968448638916, "logits_per_char": -0.397709778376988, "num_chars": 7}, {"sum_logits": -10.873783111572266, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.887245178222656, "logits_per_token": -5.436891555786133, "logits_per_char": -0.9885257374156605, "num_chars": 11}, {"sum_logits": -11.415079116821289, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.279019355773926, "logits_per_token": -11.415079116821289, "logits_per_char": -1.9025131861368816, "num_chars": 6}, {"sum_logits": -11.736320495605469, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.382490158081055, "logits_per_token": -11.736320495605469, "logits_per_char": -0.9780267079671224, "num_chars": 12}, {"sum_logits": -8.617681503295898, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.494157791137695, "logits_per_token": -4.308840751647949, "logits_per_char": -0.5745121002197265, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 18, "native_id": "8af63d58cc35061dec38e5448c325988", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.026421546936035, "incorrect_loss_raw": 8.460538268089294, "correct_loss_per_char": 0.4473801718817817, "incorrect_loss_per_char": 1.1662452528401026, "correct_loss_per_token": 2.0132107734680176, "incorrect_loss_per_token": 8.460538268089294, "correct_loss_uncond": -12.280362129211426, "incorrect_loss_uncond": -7.08250367641449}, "model_output": [{"sum_logits": -10.261244773864746, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.857513427734375, "logits_per_token": -10.261244773864746, "logits_per_char": -1.2826555967330933, "num_chars": 8}, {"sum_logits": -10.69238567352295, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.27749252319336, "logits_per_token": -10.69238567352295, "logits_per_char": -1.7820642789204915, "num_chars": 6}, {"sum_logits": -5.298396587371826, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.582313537597656, "logits_per_token": -5.298396587371826, "logits_per_char": -0.7569137981959752, "num_chars": 7}, {"sum_logits": -7.590126037597656, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.454848289489746, "logits_per_token": -7.590126037597656, "logits_per_char": -0.8433473375108507, "num_chars": 9}, {"sum_logits": -4.026421546936035, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.30678367614746, "logits_per_token": -2.0132107734680176, "logits_per_char": -0.4473801718817817, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 19, "native_id": "768fb09deab56046e1565b6a2556ad5c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.788905143737793, "incorrect_loss_raw": 8.702348530292511, "correct_loss_per_char": 0.826986449105399, "incorrect_loss_per_char": 0.7141553620497385, "correct_loss_per_token": 5.788905143737793, "incorrect_loss_per_token": 5.1422533094882965, "correct_loss_uncond": -7.917279243469238, "incorrect_loss_uncond": -8.882490813732147}, "model_output": [{"sum_logits": -6.328632354736328, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -6.328632354736328, "logits_per_char": -0.6328632354736328, "num_chars": 10}, {"sum_logits": -12.203250885009766, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -6.101625442504883, "logits_per_char": -1.0169375737508137, "num_chars": 12}, {"sum_logits": -3.649597406387329, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.067410469055176, "logits_per_token": -1.8247987031936646, "logits_per_char": -0.36495974063873293, "num_chars": 10}, {"sum_logits": -5.788905143737793, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -5.788905143737793, "logits_per_char": -0.826986449105399, "num_chars": 7}, {"sum_logits": -12.627913475036621, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.607328414916992, "logits_per_token": -6.3139567375183105, "logits_per_char": -0.8418608983357747, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 20, "native_id": "cd639cf3ff82f825ace7dd2b087562bd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.097110748291016, "incorrect_loss_raw": 11.476619720458984, "correct_loss_per_char": 1.2995872497558594, "incorrect_loss_per_char": 1.133678941293196, "correct_loss_per_token": 9.097110748291016, "incorrect_loss_per_token": 6.530752658843994, "correct_loss_uncond": -6.532464027404785, "incorrect_loss_uncond": -6.089935779571533}, "model_output": [{"sum_logits": -13.85792350769043, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.188400268554688, "logits_per_token": -6.928961753845215, "logits_per_char": -1.2598112279718572, "num_chars": 11}, {"sum_logits": -15.13139820098877, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -7.565699100494385, "logits_per_char": -1.513139820098877, "num_chars": 10}, {"sum_logits": -6.339542388916016, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -6.339542388916016, "logits_per_char": -1.0565903981526692, "num_chars": 6}, {"sum_logits": -9.097110748291016, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.6295747756958, "logits_per_token": -9.097110748291016, "logits_per_char": -1.2995872497558594, "num_chars": 7}, {"sum_logits": -10.577614784240723, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -5.288807392120361, "logits_per_char": -0.7051743189493815, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 21, "native_id": "8d79cc5e4eea11f50fab18fdea20fd4f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.969923973083496, "incorrect_loss_raw": 9.98867917060852, "correct_loss_per_char": 0.330826997756958, "incorrect_loss_per_char": 1.1162301858868262, "correct_loss_per_token": 3.969923973083496, "incorrect_loss_per_token": 9.98867917060852, "correct_loss_uncond": -9.685464859008789, "incorrect_loss_uncond": -4.260904312133789}, "model_output": [{"sum_logits": -8.632339477539062, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.3347749710083, "logits_per_token": -8.632339477539062, "logits_per_char": -1.0790424346923828, "num_chars": 8}, {"sum_logits": -8.830527305603027, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.727229118347168, "logits_per_token": -8.830527305603027, "logits_per_char": -1.1038159132003784, "num_chars": 8}, {"sum_logits": -3.969923973083496, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.655388832092285, "logits_per_token": -3.969923973083496, "logits_per_char": -0.330826997756958, "num_chars": 12}, {"sum_logits": -10.743085861206055, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.006147384643555, "logits_per_token": -10.743085861206055, "logits_per_char": -0.9766441692005504, "num_chars": 11}, {"sum_logits": -11.748764038085938, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.930182456970215, "logits_per_token": -11.748764038085938, "logits_per_char": -1.305418226453993, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 22, "native_id": "e5ad2184e37ae88b2bf46bf6bc0ed2f4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.510142803192139, "incorrect_loss_raw": 6.927777588367462, "correct_loss_per_char": 0.3006761868794759, "incorrect_loss_per_char": 0.5563647906695093, "correct_loss_per_token": 1.1275357007980347, "incorrect_loss_per_token": 2.556715498367945, "correct_loss_uncond": -13.034374713897705, "incorrect_loss_uncond": -11.676863133907318}, "model_output": [{"sum_logits": -4.510142803192139, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.544517517089844, "logits_per_token": -1.1275357007980347, "logits_per_char": -0.3006761868794759, "num_chars": 15}, {"sum_logits": -3.8028995990753174, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.12869930267334, "logits_per_token": -3.8028995990753174, "logits_per_char": -0.4753624498844147, "num_chars": 8}, {"sum_logits": -14.035151481628418, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.27353858947754, "logits_per_token": -3.5087878704071045, "logits_per_char": -1.0025108201163155, "num_chars": 14}, {"sum_logits": -4.510142803192139, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.544517517089844, "logits_per_token": -1.1275357007980347, "logits_per_char": -0.3006761868794759, "num_chars": 15}, {"sum_logits": -5.362916469573975, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.4718074798584, "logits_per_token": -1.787638823191325, "logits_per_char": -0.44690970579783124, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 23, "native_id": "b8b287b6277fccd4b7c9c72577177328", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.782081604003906, "incorrect_loss_raw": 7.70557713508606, "correct_loss_per_char": 0.7782081604003906, "incorrect_loss_per_char": 0.9004831918648311, "correct_loss_per_token": 7.782081604003906, "incorrect_loss_per_token": 6.605637550354004, "correct_loss_uncond": -6.970837593078613, "incorrect_loss_uncond": -7.647804260253906}, "model_output": [{"sum_logits": -8.799516677856445, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.71932601928711, "logits_per_token": -4.399758338928223, "logits_per_char": -0.8799516677856445, "num_chars": 10}, {"sum_logits": -4.640169143676758, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -4.640169143676758, "logits_per_char": -0.6628813062395368, "num_chars": 7}, {"sum_logits": -9.19420337677002, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.139057159423828, "logits_per_token": -9.19420337677002, "logits_per_char": -1.1492754220962524, "num_chars": 8}, {"sum_logits": -7.782081604003906, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -7.782081604003906, "logits_per_char": -0.7782081604003906, "num_chars": 10}, {"sum_logits": -8.188419342041016, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -8.188419342041016, "logits_per_char": -0.9098243713378906, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 24, "native_id": "f646f3e064f06423fc25b98500796cf0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.1150405406951904, "incorrect_loss_raw": 6.313105344772339, "correct_loss_per_char": 0.15929150581359863, "incorrect_loss_per_char": 0.8007506699789138, "correct_loss_per_token": 1.1150405406951904, "incorrect_loss_per_token": 3.126367370287577, "correct_loss_uncond": -10.989896059036255, "incorrect_loss_uncond": -10.005295276641846}, "model_output": [{"sum_logits": -6.312105655670166, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.587493896484375, "logits_per_token": -3.156052827835083, "logits_per_char": -0.9017293793814523, "num_chars": 7}, {"sum_logits": -9.65226936340332, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.333717346191406, "logits_per_token": -3.21742312113444, "logits_per_char": -0.80435578028361, "num_chars": 12}, {"sum_logits": -2.975940704345703, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.764897346496582, "logits_per_token": -2.975940704345703, "logits_per_char": -0.5951881408691406, "num_chars": 5}, {"sum_logits": -6.312105655670166, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.587493896484375, "logits_per_token": -3.156052827835083, "logits_per_char": -0.9017293793814523, "num_chars": 7}, {"sum_logits": -1.1150405406951904, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -12.104936599731445, "logits_per_token": -1.1150405406951904, "logits_per_char": -0.15929150581359863, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 25, "native_id": "b0f7d7978ac41c465108a92660d70e84", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.254195213317871, "incorrect_loss_raw": 20.892027616500854, "correct_loss_per_char": 0.1254195213317871, "incorrect_loss_per_char": 1.179787227627519, "correct_loss_per_token": 1.254195213317871, "incorrect_loss_per_token": 7.678144907951355, "correct_loss_uncond": -13.9308443069458, "incorrect_loss_uncond": -4.719038724899292}, "model_output": [{"sum_logits": -16.63762664794922, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.358997344970703, "logits_per_token": -8.31881332397461, "logits_per_char": -1.1091751098632812, "num_chars": 15}, {"sum_logits": -14.008955955505371, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.43581199645996, "logits_per_token": -7.0044779777526855, "logits_per_char": -1.0006397111075265, "num_chars": 14}, {"sum_logits": -16.01660919189453, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.513267517089844, "logits_per_token": -8.008304595947266, "logits_per_char": -1.456055381081321, "num_chars": 11}, {"sum_logits": -1.254195213317871, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -15.185039520263672, "logits_per_token": -1.254195213317871, "logits_per_char": -0.1254195213317871, "num_chars": 10}, {"sum_logits": -36.9049186706543, "num_tokens": 5, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -43.13618850708008, "logits_per_token": -7.38098373413086, "logits_per_char": -1.1532787084579468, "num_chars": 32}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 26, "native_id": "54075de8b8b89ecef2e4eb4eaee2713d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.031987190246582, "incorrect_loss_raw": 13.92211627960205, "correct_loss_per_char": 1.0063974380493164, "incorrect_loss_per_char": 1.2700053852993054, "correct_loss_per_token": 5.031987190246582, "incorrect_loss_per_token": 6.7999452749888105, "correct_loss_uncond": -9.094817161560059, "incorrect_loss_uncond": -4.615973949432373}, "model_output": [{"sum_logits": -14.458486557006836, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.135677337646484, "logits_per_token": -7.229243278503418, "logits_per_char": -1.8073108196258545, "num_chars": 8}, {"sum_logits": -5.031987190246582, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.12680435180664, "logits_per_token": -5.031987190246582, "logits_per_char": -1.0063974380493164, "num_chars": 5}, {"sum_logits": -19.864627838134766, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.97578239440918, "logits_per_token": -9.932313919067383, "logits_per_char": -1.4189019884381975, "num_chars": 14}, {"sum_logits": -16.990690231323242, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.322839736938477, "logits_per_token": -5.663563410441081, "logits_per_char": -1.3069761716402495, "num_chars": 13}, {"sum_logits": -4.374660491943359, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.718061447143555, "logits_per_token": -4.374660491943359, "logits_per_char": -0.5468325614929199, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 27, "native_id": "65435b996ce9d1685bebb74b49c1ba7f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.007382392883301, "incorrect_loss_raw": 12.456117153167725, "correct_loss_per_char": 0.3754613995552063, "incorrect_loss_per_char": 1.2115033369559747, "correct_loss_per_token": 3.0036911964416504, "incorrect_loss_per_token": 9.923354864120483, "correct_loss_uncond": -15.32907772064209, "incorrect_loss_uncond": -2.983555793762207}, "model_output": [{"sum_logits": -6.007382392883301, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.33646011352539, "logits_per_token": -3.0036911964416504, "logits_per_char": -0.3754613995552063, "num_chars": 16}, {"sum_logits": -15.345949172973633, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.277997970581055, "logits_per_token": -15.345949172973633, "logits_per_char": -1.2788290977478027, "num_chars": 12}, {"sum_logits": -4.280387878417969, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.862573623657227, "logits_per_token": -4.280387878417969, "logits_per_char": -0.3057419913155692, "num_chars": 14}, {"sum_logits": -9.936033248901367, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.245857238769531, "logits_per_token": -9.936033248901367, "logits_per_char": -1.419433321271624, "num_chars": 7}, {"sum_logits": -20.26209831237793, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.372262954711914, "logits_per_token": -10.131049156188965, "logits_per_char": -1.8420089374889026, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 28, "native_id": "9889e5389917d812c09d6e5d382d333d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.467101812362671, "incorrect_loss_raw": 11.940814733505249, "correct_loss_per_char": 0.43338772654533386, "incorrect_loss_per_char": 1.3155140553638613, "correct_loss_per_token": 1.7335509061813354, "incorrect_loss_per_token": 7.422139883041382, "correct_loss_uncond": -15.301496744155884, "incorrect_loss_uncond": -3.1863298416137695}, "model_output": [{"sum_logits": -7.663146018981934, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.696717262268066, "logits_per_token": -3.831573009490967, "logits_per_char": -0.8514606687757704, "num_chars": 9}, {"sum_logits": -3.467101812362671, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.768598556518555, "logits_per_token": -1.7335509061813354, "logits_per_char": -0.43338772654533386, "num_chars": 8}, {"sum_logits": -16.386507034301758, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -8.193253517150879, "logits_per_char": -1.8207230038113065, "num_chars": 9}, {"sum_logits": -12.099745750427246, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.652633666992188, "logits_per_token": -6.049872875213623, "logits_per_char": -0.9307496731097882, "num_chars": 13}, {"sum_logits": -11.613860130310059, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -11.613860130310059, "logits_per_char": -1.6591228757585799, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 29, "native_id": "a651ffa44ac5febf0aede6748899b981", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7883157134056091, "incorrect_loss_raw": 9.364095330238342, "correct_loss_per_char": 0.1313859522342682, "incorrect_loss_per_char": 1.1188506282339192, "correct_loss_per_token": 0.7883157134056091, "incorrect_loss_per_token": 7.436410705248515, "correct_loss_uncond": -12.746909201145172, "incorrect_loss_uncond": -4.910383820533752}, "model_output": [{"sum_logits": -9.12442398071289, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.729527473449707, "logits_per_token": -9.12442398071289, "logits_per_char": -0.8294930891557173, "num_chars": 11}, {"sum_logits": -0.7883157134056091, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -13.535224914550781, "logits_per_token": -0.7883157134056091, "logits_per_char": -0.1313859522342682, "num_chars": 6}, {"sum_logits": -11.49852466583252, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.649933815002441, "logits_per_token": -11.49852466583252, "logits_per_char": -1.2776138517591689, "num_chars": 9}, {"sum_logits": -5.267324924468994, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.782803535461426, "logits_per_token": -5.267324924468994, "logits_per_char": -1.3168312311172485, "num_chars": 4}, {"sum_logits": -11.566107749938965, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.935651779174805, "logits_per_token": -3.855369249979655, "logits_per_char": -1.0514643409035422, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 30, "native_id": "bdcfbe2132295d437e4c5701085f19c0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.905559539794922, "incorrect_loss_raw": 9.911217093467712, "correct_loss_per_char": 1.5579370771135603, "incorrect_loss_per_char": 1.2184520221891857, "correct_loss_per_token": 5.452779769897461, "incorrect_loss_per_token": 6.093395113945007, "correct_loss_uncond": -5.0849456787109375, "incorrect_loss_uncond": -6.4592589139938354}, "model_output": [{"sum_logits": -6.294780254364014, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.464104652404785, "logits_per_token": -6.294780254364014, "logits_per_char": -1.5736950635910034, "num_chars": 4}, {"sum_logits": -10.905559539794922, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.99050521850586, "logits_per_token": -5.452779769897461, "logits_per_char": -1.5579370771135603, "num_chars": 7}, {"sum_logits": -13.339688301086426, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -22.28934097290039, "logits_per_token": -4.446562767028809, "logits_per_char": -1.4821875890096028, "num_chars": 9}, {"sum_logits": -12.756324768066406, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.109777450561523, "logits_per_token": -6.378162384033203, "logits_per_char": -0.9111660548618862, "num_chars": 14}, {"sum_logits": -7.254075050354004, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.618680953979492, "logits_per_token": -7.254075050354004, "logits_per_char": -0.9067593812942505, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 31, "native_id": "8d3dc21a53523850ec80771daaa5ff20", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.5794849395751953, "incorrect_loss_raw": 10.444183468818665, "correct_loss_per_char": 0.3224356174468994, "incorrect_loss_per_char": 0.7936182827827258, "correct_loss_per_token": 2.5794849395751953, "incorrect_loss_per_token": 4.809146285057068, "correct_loss_uncond": -11.521364212036133, "incorrect_loss_uncond": -9.580437541007996}, "model_output": [{"sum_logits": -2.5794849395751953, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -2.5794849395751953, "logits_per_char": -0.3224356174468994, "num_chars": 8}, {"sum_logits": -8.37525749206543, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.461427688598633, "logits_per_token": -4.187628746032715, "logits_per_char": -0.5583504994710287, "num_chars": 15}, {"sum_logits": -18.41818618774414, "num_tokens": 4, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -28.15085220336914, "logits_per_token": -4.604546546936035, "logits_per_char": -0.7367274475097656, "num_chars": 25}, {"sum_logits": -9.077760696411133, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.649930953979492, "logits_per_token": -4.538880348205566, "logits_per_char": -0.6982892843393179, "num_chars": 13}, {"sum_logits": -5.905529499053955, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -5.905529499053955, "logits_per_char": -1.181105899810791, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 32, "native_id": "a80ee7775e934c423012fe98e20ba28b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.7049784660339355, "incorrect_loss_raw": 10.402788639068604, "correct_loss_per_char": 0.3704978466033936, "incorrect_loss_per_char": 1.3611159926369076, "correct_loss_per_token": 1.8524892330169678, "incorrect_loss_per_token": 6.441507975260416, "correct_loss_uncond": -11.078285694122314, "incorrect_loss_uncond": -6.986420154571533}, "model_output": [{"sum_logits": -9.245431900024414, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.18486785888672, "logits_per_token": -4.622715950012207, "logits_per_char": -0.9245431900024415, "num_chars": 10}, {"sum_logits": -9.556862831115723, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -22.12228775024414, "logits_per_token": -3.1856209437052407, "logits_per_char": -1.3652661187308175, "num_chars": 7}, {"sum_logits": -9.702329635620117, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.293865203857422, "logits_per_token": -4.851164817810059, "logits_per_char": -0.9702329635620117, "num_chars": 10}, {"sum_logits": -13.10653018951416, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -13.10653018951416, "logits_per_char": -2.18442169825236, "num_chars": 6}, {"sum_logits": -3.7049784660339355, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.78326416015625, "logits_per_token": -1.8524892330169678, "logits_per_char": -0.3704978466033936, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 33, "native_id": "48a315cfa3ce11f7a9d615bc854331d5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.320844650268555, "incorrect_loss_raw": 11.975667953491211, "correct_loss_per_char": 0.5229174750191825, "incorrect_loss_per_char": 1.36080417519524, "correct_loss_per_token": 2.440281550089518, "incorrect_loss_per_token": 6.960641304651896, "correct_loss_uncond": -12.101131439208984, "incorrect_loss_uncond": -4.422184467315674}, "model_output": [{"sum_logits": -11.022594451904297, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.968094825744629, "logits_per_token": -5.511297225952148, "logits_per_char": -0.7873281751360212, "num_chars": 14}, {"sum_logits": -11.378095626831055, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.92650032043457, "logits_per_token": -3.792698542277018, "logits_per_char": -0.9481746355692545, "num_chars": 12}, {"sum_logits": -11.575157165527344, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.406512260437012, "logits_per_token": -11.575157165527344, "logits_per_char": -2.3150314331054687, "num_chars": 5}, {"sum_logits": -7.320844650268555, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.42197608947754, "logits_per_token": -2.440281550089518, "logits_per_char": -0.5229174750191825, "num_chars": 14}, {"sum_logits": -13.926824569702148, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.290302276611328, "logits_per_token": -6.963412284851074, "logits_per_char": -1.3926824569702148, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 34, "native_id": "4acd496cc78d96c2431279a5fd87de7c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.2563271522521973, "incorrect_loss_raw": 7.738215446472168, "correct_loss_per_char": 0.22563271522521972, "incorrect_loss_per_char": 1.1515852289322095, "correct_loss_per_token": 2.2563271522521973, "incorrect_loss_per_token": 6.210054874420166, "correct_loss_uncond": -11.23390531539917, "incorrect_loss_uncond": -5.959859609603882}, "model_output": [{"sum_logits": -2.3263988494873047, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -2.3263988494873047, "logits_per_char": -0.17895375765286958, "num_chars": 13}, {"sum_logits": -8.30017375946045, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.50505542755127, "logits_per_token": -8.30017375946045, "logits_per_char": -1.3833622932434082, "num_chars": 6}, {"sum_logits": -12.225284576416016, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.29062843322754, "logits_per_token": -6.112642288208008, "logits_per_char": -1.0187737147013347, "num_chars": 12}, {"sum_logits": -8.101004600524902, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.35445499420166, "logits_per_token": -8.101004600524902, "logits_per_char": -2.0252511501312256, "num_chars": 4}, {"sum_logits": -2.2563271522521973, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -2.2563271522521973, "logits_per_char": -0.22563271522521972, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 35, "native_id": "91e0f4ab62c9d2fd440d73a3f5308d96", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.186290740966797, "incorrect_loss_raw": 17.15788507461548, "correct_loss_per_char": 0.4491431713104248, "incorrect_loss_per_char": 1.8836150481587366, "correct_loss_per_token": 3.5931453704833984, "incorrect_loss_per_token": 9.87405252456665, "correct_loss_uncond": -10.86268424987793, "incorrect_loss_uncond": -3.2450568675994873}, "model_output": [{"sum_logits": -38.84710693359375, "num_tokens": 4, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -35.86824035644531, "logits_per_token": -9.711776733398438, "logits_per_char": -3.2372589111328125, "num_chars": 12}, {"sum_logits": -8.799943923950195, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -8.799943923950195, "logits_per_char": -1.466657320658366, "num_chars": 6}, {"sum_logits": -7.186290740966797, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.048974990844727, "logits_per_token": -3.5931453704833984, "logits_per_char": -0.4491431713104248, "num_chars": 16}, {"sum_logits": -11.619035720825195, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -11.619035720825195, "logits_per_char": -1.6598622458321708, "num_chars": 7}, {"sum_logits": -9.365453720092773, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.02054786682129, "logits_per_token": -9.365453720092773, "logits_per_char": -1.1706817150115967, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 36, "native_id": "b61e849e44db16a581f0b65e28ab95dc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.4814960956573486, "incorrect_loss_raw": 9.396814823150635, "correct_loss_per_char": 0.49629921913146974, "incorrect_loss_per_char": 1.1376759336108253, "correct_loss_per_token": 2.4814960956573486, "incorrect_loss_per_token": 7.691512942314148, "correct_loss_uncond": -10.18481993675232, "incorrect_loss_uncond": -5.65567946434021}, "model_output": [{"sum_logits": -9.364418029785156, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.585418701171875, "logits_per_token": -9.364418029785156, "logits_per_char": -1.8728836059570313, "num_chars": 5}, {"sum_logits": -13.642415046691895, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.45631217956543, "logits_per_token": -6.821207523345947, "logits_per_char": -0.9094943364461263, "num_chars": 15}, {"sum_logits": -2.4814960956573486, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -2.4814960956573486, "logits_per_char": -0.49629921913146974, "num_chars": 5}, {"sum_logits": -7.239940643310547, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -7.239940643310547, "logits_per_char": -1.0342772347586495, "num_chars": 7}, {"sum_logits": -7.340485572814941, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.346670150756836, "logits_per_token": -7.340485572814941, "logits_per_char": -0.7340485572814941, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 37, "native_id": "ba6bd1bdef02d0ebfe5370f92365ae18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2806355953216553, "incorrect_loss_raw": 10.592134237289429, "correct_loss_per_char": 0.175433507332435, "incorrect_loss_per_char": 1.294659794512249, "correct_loss_per_token": 2.2806355953216553, "incorrect_loss_per_token": 5.859739661216736, "correct_loss_uncond": -13.112443208694458, "incorrect_loss_uncond": -7.610830783843994}, "model_output": [{"sum_logits": -16.678701400756836, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.645370483398438, "logits_per_token": -8.339350700378418, "logits_per_char": -2.0848376750946045, "num_chars": 8}, {"sum_logits": -2.2806355953216553, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -2.2806355953216553, "logits_per_char": -0.175433507332435, "num_chars": 13}, {"sum_logits": -4.509380340576172, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -4.509380340576172, "logits_per_char": -0.9018760681152344, "num_chars": 5}, {"sum_logits": -14.008745193481445, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.085739135742188, "logits_per_token": -7.004372596740723, "logits_per_char": -1.1673954327901204, "num_chars": 12}, {"sum_logits": -7.171710014343262, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.10690689086914, "logits_per_token": -3.585855007171631, "logits_per_char": -1.0245300020490373, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 38, "native_id": "dc55d473c22b04877b11d584f9548194", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.990279197692871, "incorrect_loss_raw": 10.400483131408691, "correct_loss_per_char": 0.7326852798461914, "incorrect_loss_per_char": 1.3695707638542376, "correct_loss_per_token": 3.663426399230957, "incorrect_loss_per_token": 6.174956917762756, "correct_loss_uncond": -9.431082725524902, "incorrect_loss_uncond": -5.760245323181152}, "model_output": [{"sum_logits": -10.990279197692871, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.421361923217773, "logits_per_token": -3.663426399230957, "logits_per_char": -0.7326852798461914, "num_chars": 15}, {"sum_logits": -13.460911750793457, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.379703521728516, "logits_per_token": -6.7304558753967285, "logits_per_char": -1.2237192500721326, "num_chars": 11}, {"sum_logits": -7.797722816467285, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.968931198120117, "logits_per_token": -7.797722816467285, "logits_per_char": -1.9494307041168213, "num_chars": 4}, {"sum_logits": -11.570257186889648, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.13169288635254, "logits_per_token": -5.785128593444824, "logits_per_char": -1.0518415624445134, "num_chars": 11}, {"sum_logits": -8.773040771484375, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.162586212158203, "logits_per_token": -4.3865203857421875, "logits_per_char": -1.2532915387834822, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 39, "native_id": "113aaea2b1a27a976547f54e531d99bb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.4681708812713623, "incorrect_loss_raw": 8.38317883014679, "correct_loss_per_char": 0.3152882619337602, "incorrect_loss_per_char": 0.9271839018378938, "correct_loss_per_token": 1.7340854406356812, "incorrect_loss_per_token": 5.507351040840149, "correct_loss_uncond": -12.9300377368927, "incorrect_loss_uncond": -10.503911852836609}, "model_output": [{"sum_logits": -11.276988983154297, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.90937042236328, "logits_per_token": -5.638494491577148, "logits_per_char": -1.6109984261648995, "num_chars": 7}, {"sum_logits": -3.4681708812713623, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.398208618164062, "logits_per_token": -1.7340854406356812, "logits_per_char": -0.3152882619337602, "num_chars": 11}, {"sum_logits": -5.815576553344727, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.336503982543945, "logits_per_token": -5.815576553344727, "logits_per_char": -0.7269470691680908, "num_chars": 8}, {"sum_logits": -11.729633331298828, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.089393615722656, "logits_per_token": -5.864816665649414, "logits_per_char": -0.7819755554199219, "num_chars": 15}, {"sum_logits": -4.710516452789307, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.21309471130371, "logits_per_token": -4.710516452789307, "logits_per_char": -0.5888145565986633, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 40, "native_id": "ba640b9634ad6b4ad98b17b4f152e562", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.215444564819336, "incorrect_loss_raw": 12.310803174972534, "correct_loss_per_char": 0.2607722282409668, "incorrect_loss_per_char": 1.3365184643885473, "correct_loss_per_token": 1.7384815216064453, "incorrect_loss_per_token": 6.490040063858032, "correct_loss_uncond": -12.259550094604492, "incorrect_loss_uncond": -6.11664342880249}, "model_output": [{"sum_logits": -7.715679168701172, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.124066352844238, "logits_per_token": -7.715679168701172, "logits_per_char": -1.5431358337402343, "num_chars": 5}, {"sum_logits": -5.215444564819336, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.474994659423828, "logits_per_token": -1.7384815216064453, "logits_per_char": -0.2607722282409668, "num_chars": 20}, {"sum_logits": -17.20783042907715, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.843685150146484, "logits_per_token": -8.603915214538574, "logits_per_char": -1.7207830429077149, "num_chars": 10}, {"sum_logits": -15.115714073181152, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.026775360107422, "logits_per_token": -5.038571357727051, "logits_per_char": -1.3741558248346502, "num_chars": 11}, {"sum_logits": -9.203989028930664, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.715259552001953, "logits_per_token": -4.601994514465332, "logits_per_char": -0.7079991560715896, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 41, "native_id": "750ebdf36a0b3b407be0fe2163e3700b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.903484344482422, "incorrect_loss_raw": 11.99277114868164, "correct_loss_per_char": 0.5903484344482421, "incorrect_loss_per_char": 0.9737423885952343, "correct_loss_per_token": 2.951742172241211, "incorrect_loss_per_token": 5.99638557434082, "correct_loss_uncond": -11.618452072143555, "incorrect_loss_uncond": -7.014824390411377}, "model_output": [{"sum_logits": -18.04365348815918, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -21.09711456298828, "logits_per_token": -9.02182674407959, "logits_per_char": -1.2029102325439454, "num_chars": 15}, {"sum_logits": -5.903484344482422, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.521936416625977, "logits_per_token": -2.951742172241211, "logits_per_char": -0.5903484344482421, "num_chars": 10}, {"sum_logits": -14.946898460388184, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.533336639404297, "logits_per_token": -7.473449230194092, "logits_per_char": -1.2455748716990154, "num_chars": 12}, {"sum_logits": -4.1885833740234375, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.20847511291504, "logits_per_token": -2.0942916870117188, "logits_per_char": -0.4653981526692708, "num_chars": 9}, {"sum_logits": -10.791949272155762, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.191455841064453, "logits_per_token": -5.395974636077881, "logits_per_char": -0.9810862974687056, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 42, "native_id": "8f01273422a370a8dbda6bf473a395a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.775785207748413, "incorrect_loss_raw": 9.201988220214844, "correct_loss_per_char": 0.39654074396405903, "incorrect_loss_per_char": 1.1083419534895156, "correct_loss_per_token": 2.775785207748413, "incorrect_loss_per_token": 9.201988220214844, "correct_loss_uncond": -10.647773027420044, "incorrect_loss_uncond": -3.7100958824157715}, "model_output": [{"sum_logits": -15.889877319335938, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.39954662322998, "logits_per_token": -15.889877319335938, "logits_per_char": -1.7655419243706598, "num_chars": 9}, {"sum_logits": -7.481097221374512, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.093122482299805, "logits_per_token": -7.481097221374512, "logits_per_char": -1.0687281744820731, "num_chars": 7}, {"sum_logits": -7.47764778137207, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -7.47764778137207, "logits_per_char": -0.747764778137207, "num_chars": 10}, {"sum_logits": -5.9593305587768555, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.306968688964844, "logits_per_token": -5.9593305587768555, "logits_per_char": -0.8513329369681222, "num_chars": 7}, {"sum_logits": -2.775785207748413, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -2.775785207748413, "logits_per_char": -0.39654074396405903, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 43, "native_id": "e6586bba9fe96d38792e6e6d4f2703dc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.10084867477417, "incorrect_loss_raw": 9.018452763557434, "correct_loss_per_char": 1.0168081124623616, "incorrect_loss_per_char": 1.3879159967104595, "correct_loss_per_token": 6.10084867477417, "incorrect_loss_per_token": 7.405772805213928, "correct_loss_uncond": -7.2735371589660645, "incorrect_loss_uncond": -5.9974285364151}, "model_output": [{"sum_logits": -12.901439666748047, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.8222713470459, "logits_per_token": -6.450719833374023, "logits_per_char": -1.4334932963053386, "num_chars": 9}, {"sum_logits": -3.1677889823913574, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.062310218811035, "logits_per_token": -3.1677889823913574, "logits_per_char": -0.6335577964782715, "num_chars": 5}, {"sum_logits": -10.110218048095703, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -10.110218048095703, "logits_per_char": -1.0110218048095703, "num_chars": 10}, {"sum_logits": -9.894364356994629, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -9.894364356994629, "logits_per_char": -2.4735910892486572, "num_chars": 4}, {"sum_logits": -6.10084867477417, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -6.10084867477417, "logits_per_char": -1.0168081124623616, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 44, "native_id": "6e433471d0e2590b8c73ceef275022b1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.827770233154297, "incorrect_loss_raw": 13.763253211975098, "correct_loss_per_char": 0.9843427484685724, "incorrect_loss_per_char": 1.6364402670364875, "correct_loss_per_token": 5.413885116577148, "incorrect_loss_per_token": 9.625939567883808, "correct_loss_uncond": -10.568504333496094, "incorrect_loss_uncond": -2.064084529876709}, "model_output": [{"sum_logits": -11.105561256408691, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.612686157226562, "logits_per_token": -5.552780628204346, "logits_per_char": -1.0095964778553357, "num_chars": 11}, {"sum_logits": -11.705679893493652, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -11.705679893493652, "logits_per_char": -2.926419973373413, "num_chars": 4}, {"sum_logits": -16.49471092224121, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.340621948242188, "logits_per_token": -5.498236974080403, "logits_per_char": -1.1781936373029436, "num_chars": 14}, {"sum_logits": -10.827770233154297, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -21.39627456665039, "logits_per_token": -5.413885116577148, "logits_per_char": -0.9843427484685724, "num_chars": 11}, {"sum_logits": -15.747060775756836, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -15.747060775756836, "logits_per_char": -1.4315509796142578, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 45, "native_id": "1bc986f8aea88d6927d8a45367855a94", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.998526573181152, "incorrect_loss_raw": 15.614537477493286, "correct_loss_per_char": 0.5332351048787435, "incorrect_loss_per_char": 1.2965664759959519, "correct_loss_per_token": 3.999263286590576, "incorrect_loss_per_token": 8.689869840939838, "correct_loss_uncond": -10.279953956604004, "incorrect_loss_uncond": -2.009470224380493}, "model_output": [{"sum_logits": -17.1828670501709, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.068204879760742, "logits_per_token": -5.727622350056966, "logits_per_char": -1.0107568853041704, "num_chars": 17}, {"sum_logits": -15.799351692199707, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.73453712463379, "logits_per_token": -7.8996758460998535, "logits_per_char": -0.8315448259052477, "num_chars": 19}, {"sum_logits": -12.788431167602539, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.336091995239258, "logits_per_token": -12.788431167602539, "logits_per_char": -1.8269187382289342, "num_chars": 7}, {"sum_logits": -16.6875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.357196807861328, "logits_per_token": -8.34375, "logits_per_char": -1.5170454545454546, "num_chars": 11}, {"sum_logits": -7.998526573181152, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.278480529785156, "logits_per_token": -3.999263286590576, "logits_per_char": -0.5332351048787435, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 46, "native_id": "8d1563697d751a364d688d6701ebdb39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.621414184570312, "incorrect_loss_raw": 8.436942338943481, "correct_loss_per_char": 0.8621414184570313, "incorrect_loss_per_char": 0.7390210428437987, "correct_loss_per_token": 4.310707092285156, "incorrect_loss_per_token": 3.8851177295049033, "correct_loss_uncond": -9.159297943115234, "incorrect_loss_uncond": -10.48024296760559}, "model_output": [{"sum_logits": -8.621414184570312, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.780712127685547, "logits_per_token": -4.310707092285156, "logits_per_char": -0.8621414184570313, "num_chars": 10}, {"sum_logits": -6.608351707458496, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.604267120361328, "logits_per_token": -3.304175853729248, "logits_per_char": -0.6007592461325906, "num_chars": 11}, {"sum_logits": -9.569467544555664, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.92786407470703, "logits_per_token": -4.784733772277832, "logits_per_char": -0.8699515949596058, "num_chars": 11}, {"sum_logits": -8.000482559204102, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.2087459564209, "logits_per_token": -2.6668275197347007, "logits_per_char": -0.6154217353233924, "num_chars": 13}, {"sum_logits": -9.569467544555664, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.92786407470703, "logits_per_token": -4.784733772277832, "logits_per_char": -0.8699515949596058, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 47, "native_id": "91f512273a2da7ae796919069b20d6cf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6660804748535156, "incorrect_loss_raw": 15.043211936950684, "correct_loss_per_char": 0.30550670623779297, "incorrect_loss_per_char": 1.4644012435486442, "correct_loss_per_token": 1.8330402374267578, "incorrect_loss_per_token": 8.143225034077961, "correct_loss_uncond": -17.705110549926758, "incorrect_loss_uncond": -4.753885984420776}, "model_output": [{"sum_logits": -13.538646697998047, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -6.769323348999023, "logits_per_char": -1.128220558166504, "num_chars": 12}, {"sum_logits": -15.388264656066895, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -15.388264656066895, "logits_per_char": -1.9235330820083618, "num_chars": 8}, {"sum_logits": -11.387221336364746, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.782602310180664, "logits_per_token": -3.7957404454549155, "logits_per_char": -0.5993274387560392, "num_chars": 19}, {"sum_logits": -19.858715057373047, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -24.945661544799805, "logits_per_token": -6.619571685791016, "logits_per_char": -2.206523895263672, "num_chars": 9}, {"sum_logits": -3.6660804748535156, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -21.371191024780273, "logits_per_token": -1.8330402374267578, "logits_per_char": -0.30550670623779297, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 48, "native_id": "49cda7eedbf63b3f38e59ba72f1ee1f9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.914997100830078, "incorrect_loss_raw": 6.779270529747009, "correct_loss_per_char": 1.152499516805013, "incorrect_loss_per_char": 0.7337845580445396, "correct_loss_per_token": 6.914997100830078, "incorrect_loss_per_token": 4.640841245651245, "correct_loss_uncond": -5.669367790222168, "incorrect_loss_uncond": -9.966612458229065}, "model_output": [{"sum_logits": -12.64561939239502, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -6.32280969619751, "logits_per_char": -1.4050688213772244, "num_chars": 9}, {"sum_logits": -7.2891387939453125, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -7.2891387939453125, "logits_per_char": -0.9111423492431641, "num_chars": 8}, {"sum_logits": -2.7205090522766113, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -2.7205090522766113, "logits_per_char": -0.3400636315345764, "num_chars": 8}, {"sum_logits": -4.461814880371094, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.565406799316406, "logits_per_token": -2.230907440185547, "logits_per_char": -0.27886343002319336, "num_chars": 16}, {"sum_logits": -6.914997100830078, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -6.914997100830078, "logits_per_char": -1.152499516805013, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 49, "native_id": "a588407ecaecf0f30c2241c30b470fe2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.9825439453125, "incorrect_loss_raw": 12.882202863693237, "correct_loss_per_char": 0.831878662109375, "incorrect_loss_per_char": 1.2288928975661595, "correct_loss_per_token": 3.3275146484375, "incorrect_loss_per_token": 7.288030922412872, "correct_loss_uncond": -10.806875228881836, "incorrect_loss_uncond": -4.504456996917725}, "model_output": [{"sum_logits": -11.333730697631836, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.93790054321289, "logits_per_token": -5.666865348815918, "logits_per_char": -0.9444775581359863, "num_chars": 12}, {"sum_logits": -12.210988998413086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.050827026367188, "logits_per_token": -6.105494499206543, "logits_per_char": -1.3567765553792317, "num_chars": 9}, {"sum_logits": -9.9825439453125, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.789419174194336, "logits_per_token": -3.3275146484375, "logits_per_char": -0.831878662109375, "num_chars": 12}, {"sum_logits": -14.139103889465332, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.3708553314209, "logits_per_token": -3.534775972366333, "logits_per_char": -0.8836939930915833, "num_chars": 16}, {"sum_logits": -13.844987869262695, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -13.844987869262695, "logits_per_char": -1.730623483657837, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 50, "native_id": "011096bcfff30fd38046cf9db3a411c5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.148697853088379, "incorrect_loss_raw": 12.845611810684204, "correct_loss_per_char": 0.7407907139171254, "incorrect_loss_per_char": 1.1258191230235162, "correct_loss_per_token": 4.0743489265441895, "incorrect_loss_per_token": 6.737136363983154, "correct_loss_uncond": -13.426444053649902, "incorrect_loss_uncond": -6.464136362075806}, "model_output": [{"sum_logits": -11.320791244506836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -26.39179039001465, "logits_per_token": -5.660395622253418, "logits_per_char": -1.0291628404097124, "num_chars": 11}, {"sum_logits": -14.151163101196289, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.10388946533203, "logits_per_token": -4.71705436706543, "logits_per_char": -0.8844476938247681, "num_chars": 16}, {"sum_logits": -8.148697853088379, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.57514190673828, "logits_per_token": -4.0743489265441895, "logits_per_char": -0.7407907139171254, "num_chars": 11}, {"sum_logits": -7.231698036193848, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -7.231698036193848, "logits_per_char": -1.0330997194562639, "num_chars": 7}, {"sum_logits": -18.678794860839844, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.263301849365234, "logits_per_token": -9.339397430419922, "logits_per_char": -1.5565662384033203, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 51, "native_id": "435a728f45d32faa4b3c4553c966fd6b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.7591447830200195, "incorrect_loss_raw": 8.810214757919312, "correct_loss_per_char": 0.5172763188680013, "incorrect_loss_per_char": 0.98465586200593, "correct_loss_per_token": 2.5863815943400064, "incorrect_loss_per_token": 5.546911597251892, "correct_loss_uncond": -12.333830833435059, "incorrect_loss_uncond": -6.087571144104004}, "model_output": [{"sum_logits": -5.304771423339844, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.69635772705078, "logits_per_token": -2.652385711669922, "logits_per_char": -0.5304771423339844, "num_chars": 10}, {"sum_logits": -7.7591447830200195, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.092975616455078, "logits_per_token": -2.5863815943400064, "logits_per_char": -0.5172763188680013, "num_chars": 15}, {"sum_logits": -9.13443374633789, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -9.13443374633789, "logits_per_char": -1.5224056243896484, "num_chars": 6}, {"sum_logits": -10.723965644836426, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.672050476074219, "logits_per_token": -5.361982822418213, "logits_per_char": -0.7659975460597447, "num_chars": 14}, {"sum_logits": -10.077688217163086, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.914724349975586, "logits_per_token": -5.038844108581543, "logits_per_char": -1.1197431352403429, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 52, "native_id": "e953dee48c70159ad879143a319ec607", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.144206047058105, "incorrect_loss_raw": 10.961963653564453, "correct_loss_per_char": 1.0160228941175673, "incorrect_loss_per_char": 1.2776135725634439, "correct_loss_per_token": 9.144206047058105, "incorrect_loss_per_token": 7.364463806152344, "correct_loss_uncond": -6.195188522338867, "incorrect_loss_uncond": -2.852964162826538}, "model_output": [{"sum_logits": -9.478238105773926, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -10.325328826904297, "logits_per_token": -9.478238105773926, "logits_per_char": -1.8956476211547852, "num_chars": 5}, {"sum_logits": -12.666339874267578, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.808551788330078, "logits_per_token": -6.333169937133789, "logits_per_char": -0.9047385624476841, "num_chars": 14}, {"sum_logits": -5.589617729187012, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.153367042541504, "logits_per_token": -5.589617729187012, "logits_per_char": -0.6987022161483765, "num_chars": 8}, {"sum_logits": -9.144206047058105, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.339394569396973, "logits_per_token": -9.144206047058105, "logits_per_char": -1.0160228941175673, "num_chars": 9}, {"sum_logits": -16.113658905029297, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.972463607788086, "logits_per_token": -8.056829452514648, "logits_per_char": -1.6113658905029298, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 53, "native_id": "9c784727afd7176b54764055df7a7927", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.695625305175781, "incorrect_loss_raw": 17.97191095352173, "correct_loss_per_char": 0.9661805894639757, "incorrect_loss_per_char": 1.2893237338932504, "correct_loss_per_token": 4.347812652587891, "incorrect_loss_per_token": 7.7984729409217834, "correct_loss_uncond": -11.677703857421875, "incorrect_loss_uncond": -4.586260795593262}, "model_output": [{"sum_logits": -12.172181129455566, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.962568283081055, "logits_per_token": -6.086090564727783, "logits_per_char": -1.2172181129455566, "num_chars": 10}, {"sum_logits": -13.613226890563965, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.879924774169922, "logits_per_token": -13.613226890563965, "logits_per_char": -1.7016533613204956, "num_chars": 8}, {"sum_logits": -8.695625305175781, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.373329162597656, "logits_per_token": -4.347812652587891, "logits_per_char": -0.9661805894639757, "num_chars": 9}, {"sum_logits": -30.81744956970215, "num_tokens": 8, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -38.47568893432617, "logits_per_token": -3.8521811962127686, "logits_per_char": -1.0626706748173154, "num_chars": 29}, {"sum_logits": -15.284786224365234, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.914505004882812, "logits_per_token": -7.642393112182617, "logits_per_char": -1.1757527864896333, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 54, "native_id": "b47d912136e3304cb5e5890b6b879551", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.07954216003418, "incorrect_loss_raw": 16.26680827140808, "correct_loss_per_char": 0.7753493969257061, "incorrect_loss_per_char": 1.2231255223116047, "correct_loss_per_token": 3.35984738667806, "incorrect_loss_per_token": 8.911580284436544, "correct_loss_uncond": -10.993570327758789, "incorrect_loss_uncond": -1.2303593158721924}, "model_output": [{"sum_logits": -18.0889892578125, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.519874572753906, "logits_per_token": -9.04449462890625, "logits_per_char": -1.507415771484375, "num_chars": 12}, {"sum_logits": -13.050594329833984, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.459878921508789, "logits_per_token": -13.050594329833984, "logits_per_char": -1.186417666348544, "num_chars": 11}, {"sum_logits": -13.452094078063965, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.059886932373047, "logits_per_token": -6.726047039031982, "logits_per_char": -1.1210078398386638, "num_chars": 12}, {"sum_logits": -10.07954216003418, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.07311248779297, "logits_per_token": -3.35984738667806, "logits_per_char": -0.7753493969257061, "num_chars": 13}, {"sum_logits": -20.475555419921875, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.94902992248535, "logits_per_token": -6.825185139973958, "logits_per_char": -1.0776608115748356, "num_chars": 19}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 55, "native_id": "49b4c9e1bd7946a819e173ce8fa4c7c9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6377139687538147, "incorrect_loss_raw": 11.298288822174072, "correct_loss_per_char": 0.06377139687538147, "incorrect_loss_per_char": 1.1407117559796287, "correct_loss_per_token": 0.6377139687538147, "incorrect_loss_per_token": 7.847082138061523, "correct_loss_uncond": -13.467744290828705, "incorrect_loss_uncond": -5.265962600708008}, "model_output": [{"sum_logits": -12.73864459991455, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.859416961669922, "logits_per_token": -6.369322299957275, "logits_per_char": -0.9099031857081822, "num_chars": 14}, {"sum_logits": -14.87100887298584, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.9415225982666, "logits_per_token": -7.43550443649292, "logits_per_char": -1.0622149194989885, "num_chars": 14}, {"sum_logits": -8.156513214111328, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.997017860412598, "logits_per_token": -8.156513214111328, "logits_per_char": -1.019564151763916, "num_chars": 8}, {"sum_logits": -9.42698860168457, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -9.42698860168457, "logits_per_char": -1.5711647669474285, "num_chars": 6}, {"sum_logits": -0.6377139687538147, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -0.6377139687538147, "logits_per_char": -0.06377139687538147, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 56, "native_id": "950af0b765c298960ce3dada66df8db1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.583464622497559, "incorrect_loss_raw": 11.685147762298584, "correct_loss_per_char": 0.4652887185414632, "incorrect_loss_per_char": 1.3360682554317242, "correct_loss_per_token": 5.583464622497559, "incorrect_loss_per_token": 5.334630131721497, "correct_loss_uncond": -10.139636039733887, "incorrect_loss_uncond": -5.376479864120483}, "model_output": [{"sum_logits": -9.581765174865723, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.381830215454102, "logits_per_token": -4.790882587432861, "logits_per_char": -0.8710695613514293, "num_chars": 11}, {"sum_logits": -5.583464622497559, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -5.583464622497559, "logits_per_char": -0.4652887185414632, "num_chars": 12}, {"sum_logits": -12.358783721923828, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.039682388305664, "logits_per_token": -6.179391860961914, "logits_per_char": -1.3731981913248699, "num_chars": 9}, {"sum_logits": -12.609392166137695, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.94958209991455, "logits_per_token": -6.304696083068848, "logits_per_char": -1.576174020767212, "num_chars": 8}, {"sum_logits": -12.19064998626709, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.875415802001953, "logits_per_token": -4.063549995422363, "logits_per_char": -1.5238312482833862, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 57, "native_id": "63cf1adb5fe302b9867ead8bc8103d0b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.147971153259277, "incorrect_loss_raw": 10.86702573299408, "correct_loss_per_char": 0.5431980768839518, "incorrect_loss_per_char": 1.1862428165617442, "correct_loss_per_token": 2.7159903844197593, "incorrect_loss_per_token": 7.619866967201233, "correct_loss_uncond": -13.063462257385254, "incorrect_loss_uncond": -4.391125798225403}, "model_output": [{"sum_logits": -14.625471115112305, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.328392028808594, "logits_per_token": -7.312735557556152, "logits_per_char": -0.7312735557556153, "num_chars": 20}, {"sum_logits": -9.957921028137207, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -9.46829605102539, "logits_per_token": -9.957921028137207, "logits_per_char": -1.9915842056274413, "num_chars": 5}, {"sum_logits": -7.532911777496338, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.096864700317383, "logits_per_token": -7.532911777496338, "logits_per_char": -1.0761302539280482, "num_chars": 7}, {"sum_logits": -8.147971153259277, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -2.7159903844197593, "logits_per_char": -0.5431980768839518, "num_chars": 15}, {"sum_logits": -11.351799011230469, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.139053344726562, "logits_per_token": -5.675899505615234, "logits_per_char": -0.9459832509358724, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 58, "native_id": "ede4d302fc2ffe07703158f83c1493f2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.079994201660156, "incorrect_loss_raw": 11.535688400268555, "correct_loss_per_char": 1.3422215779622395, "incorrect_loss_per_char": 1.614304158422682, "correct_loss_per_token": 12.079994201660156, "incorrect_loss_per_token": 11.535688400268555, "correct_loss_uncond": -5.601320266723633, "incorrect_loss_uncond": -2.580571413040161}, "model_output": [{"sum_logits": -13.565238952636719, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.931857109069824, "logits_per_token": -13.565238952636719, "logits_per_char": -2.2608731587727866, "num_chars": 6}, {"sum_logits": -12.079994201660156, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.68131446838379, "logits_per_token": -12.079994201660156, "logits_per_char": -1.3422215779622395, "num_chars": 9}, {"sum_logits": -6.919353485107422, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.685956954956055, "logits_per_token": -6.919353485107422, "logits_per_char": -1.153225580851237, "num_chars": 6}, {"sum_logits": -13.83919906616211, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.901671409606934, "logits_per_token": -13.83919906616211, "logits_per_char": -1.7298998832702637, "num_chars": 8}, {"sum_logits": -11.818962097167969, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.94555377960205, "logits_per_token": -11.818962097167969, "logits_per_char": -1.313218010796441, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 59, "native_id": "74ad13a03634e79c85382f1b90969b74", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.586376190185547, "incorrect_loss_raw": 15.085850715637207, "correct_loss_per_char": 1.6586376190185548, "incorrect_loss_per_char": 1.1130997024030769, "correct_loss_per_token": 8.293188095092773, "incorrect_loss_per_token": 6.50733462969462, "correct_loss_uncond": -6.981410980224609, "incorrect_loss_uncond": -6.164205551147461}, "model_output": [{"sum_logits": -16.586376190185547, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.567787170410156, "logits_per_token": -8.293188095092773, "logits_per_char": -1.6586376190185548, "num_chars": 10}, {"sum_logits": -11.523380279541016, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.941198348999023, "logits_per_token": -5.761690139770508, "logits_per_char": -1.1523380279541016, "num_chars": 10}, {"sum_logits": -24.854177474975586, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -30.614816665649414, "logits_per_token": -8.284725824991861, "logits_per_char": -1.3807876374986436, "num_chars": 18}, {"sum_logits": -11.816469192504883, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.06946563720703, "logits_per_token": -5.908234596252441, "logits_per_char": -0.9847057660420736, "num_chars": 12}, {"sum_logits": -12.149375915527344, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.374744415283203, "logits_per_token": -6.074687957763672, "logits_per_char": -0.934567378117488, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 60, "native_id": "49e466b1782aa4837dae53ff891fcdee", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.531917572021484, "incorrect_loss_raw": 13.403625249862671, "correct_loss_per_char": 1.0591019524468317, "incorrect_loss_per_char": 1.043409509735031, "correct_loss_per_token": 4.765958786010742, "incorrect_loss_per_token": 6.336981534957886, "correct_loss_uncond": -6.203751564025879, "incorrect_loss_uncond": -4.524399995803833}, "model_output": [{"sum_logits": -14.375081062316895, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.371414184570312, "logits_per_token": -4.791693687438965, "logits_per_char": -1.105775466332069, "num_chars": 13}, {"sum_logits": -9.531917572021484, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -4.765958786010742, "logits_per_char": -1.0591019524468317, "num_chars": 9}, {"sum_logits": -15.51498031616211, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -3.8787450790405273, "logits_per_char": -1.108212879725865, "num_chars": 14}, {"sum_logits": -9.630535125732422, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -9.630535125732422, "logits_per_char": -0.875503193248402, "num_chars": 11}, {"sum_logits": -14.093904495239258, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.654294967651367, "logits_per_token": -7.046952247619629, "logits_per_char": -1.084146499633789, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 61, "native_id": "a8a8ae7792901c7179ff5538c701af1f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.91660737991333, "incorrect_loss_raw": 8.050634145736694, "correct_loss_per_char": 0.986101229985555, "incorrect_loss_per_char": 1.2053851791790553, "correct_loss_per_token": 5.91660737991333, "incorrect_loss_per_token": 7.327435553073883, "correct_loss_uncond": -6.667757511138916, "incorrect_loss_uncond": -6.623471260070801}, "model_output": [{"sum_logits": -13.349810600280762, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.021522521972656, "logits_per_token": -13.349810600280762, "logits_per_char": -2.6699621200561525, "num_chars": 5}, {"sum_logits": -5.8760857582092285, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -5.8760857582092285, "logits_per_char": -0.8394408226013184, "num_chars": 7}, {"sum_logits": -7.191051483154297, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.118462562561035, "logits_per_token": -7.191051483154297, "logits_per_char": -0.8988814353942871, "num_chars": 8}, {"sum_logits": -5.78558874130249, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.076425552368164, "logits_per_token": -2.892794370651245, "logits_per_char": -0.4132563386644636, "num_chars": 14}, {"sum_logits": -5.91660737991333, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -5.91660737991333, "logits_per_char": -0.986101229985555, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 62, "native_id": "2ffa3808ce26181926990b454e429c85", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.535844326019287, "incorrect_loss_raw": 11.411988854408264, "correct_loss_per_char": 0.35358443260192873, "incorrect_loss_per_char": 1.0361704606148932, "correct_loss_per_token": 1.7679221630096436, "incorrect_loss_per_token": 5.507690827051798, "correct_loss_uncond": -10.999441623687744, "incorrect_loss_uncond": -5.581454396247864}, "model_output": [{"sum_logits": -7.97029972076416, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.709622383117676, "logits_per_token": -7.97029972076416, "logits_per_char": -0.8855888578626845, "num_chars": 9}, {"sum_logits": -6.23799467086792, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.377809524536133, "logits_per_token": -2.079331556955973, "logits_per_char": -0.389874666929245, "num_chars": 16}, {"sum_logits": -3.535844326019287, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.535285949707031, "logits_per_token": -1.7679221630096436, "logits_per_char": -0.35358443260192873, "num_chars": 10}, {"sum_logits": -14.954793930053711, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.948440551757812, "logits_per_token": -3.7386984825134277, "logits_per_char": -1.4954793930053711, "num_chars": 10}, {"sum_logits": -16.484867095947266, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.93790054321289, "logits_per_token": -8.242433547973633, "logits_per_char": -1.3737389246622722, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 63, "native_id": "4319eaa36d256a92b72445c0392f9c94", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.044137001037598, "incorrect_loss_raw": 12.305595874786377, "correct_loss_per_char": 2.174022833506266, "incorrect_loss_per_char": 1.3504723058806525, "correct_loss_per_token": 6.522068500518799, "incorrect_loss_per_token": 6.613416751225789, "correct_loss_uncond": -5.234145164489746, "incorrect_loss_uncond": -5.0599963665008545}, "model_output": [{"sum_logits": -7.834751129150391, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.969649314880371, "logits_per_token": -7.834751129150391, "logits_per_char": -1.566950225830078, "num_chars": 5}, {"sum_logits": -12.44940185546875, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.630592346191406, "logits_per_token": -4.149800618489583, "logits_per_char": -1.3832668728298612, "num_chars": 9}, {"sum_logits": -17.147539138793945, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.208097457885742, "logits_per_token": -8.573769569396973, "logits_per_char": -1.7147539138793946, "num_chars": 10}, {"sum_logits": -13.044137001037598, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.278282165527344, "logits_per_token": -6.522068500518799, "logits_per_char": -2.174022833506266, "num_chars": 6}, {"sum_logits": -11.790691375732422, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.654029846191406, "logits_per_token": -5.895345687866211, "logits_per_char": -0.7369182109832764, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 64, "native_id": "ec79ef747bb89281923edb89ba26786d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.47293758392334, "incorrect_loss_raw": 11.988849401473999, "correct_loss_per_char": 0.9520852349021218, "incorrect_loss_per_char": 1.2499676496263534, "correct_loss_per_token": 5.23646879196167, "incorrect_loss_per_token": 6.54096519947052, "correct_loss_uncond": -9.422348976135254, "incorrect_loss_uncond": -5.297980785369873}, "model_output": [{"sum_logits": -10.47293758392334, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -5.23646879196167, "logits_per_char": -0.9520852349021218, "num_chars": 11}, {"sum_logits": -9.505109786987305, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -4.752554893493652, "logits_per_char": -1.056123309665256, "num_chars": 9}, {"sum_logits": -16.89525032043457, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.16470718383789, "logits_per_token": -5.631750106811523, "logits_per_char": -1.689525032043457, "num_chars": 10}, {"sum_logits": -10.004074096679688, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.272040367126465, "logits_per_token": -10.004074096679688, "logits_per_char": -1.4291534423828125, "num_chars": 7}, {"sum_logits": -11.550963401794434, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -5.775481700897217, "logits_per_char": -0.8250688144138881, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 65, "native_id": "2d33cde5e3987adc8fa2bca0af4dd3dd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.59087085723877, "incorrect_loss_raw": 12.837596893310547, "correct_loss_per_char": 0.7550483809577094, "incorrect_loss_per_char": 1.0782846127237593, "correct_loss_per_token": 4.530290285746257, "incorrect_loss_per_token": 6.418798446655273, "correct_loss_uncond": -11.97819995880127, "incorrect_loss_uncond": -7.082311630249023}, "model_output": [{"sum_logits": -13.59087085723877, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -25.56907081604004, "logits_per_token": -4.530290285746257, "logits_per_char": -0.7550483809577094, "num_chars": 18}, {"sum_logits": -13.495884895324707, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.79824447631836, "logits_per_token": -6.7479424476623535, "logits_per_char": -1.2268986268477007, "num_chars": 11}, {"sum_logits": -12.561367988586426, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.090572357177734, "logits_per_token": -6.280683994293213, "logits_per_char": -1.1419425444169478, "num_chars": 11}, {"sum_logits": -11.562163352966309, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.57262420654297, "logits_per_token": -5.781081676483154, "logits_per_char": -0.9635136127471924, "num_chars": 12}, {"sum_logits": -13.730971336364746, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.21819305419922, "logits_per_token": -6.865485668182373, "logits_per_char": -0.9807836668831962, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 66, "native_id": "cc46d936bf69d69a3863b0cb85d75c17", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.126954555511475, "incorrect_loss_raw": 10.11930251121521, "correct_loss_per_char": 0.7126954555511474, "incorrect_loss_per_char": 1.8008400064248304, "correct_loss_per_token": 7.126954555511475, "incorrect_loss_per_token": 10.11930251121521, "correct_loss_uncond": -6.59894323348999, "incorrect_loss_uncond": -3.3187663555145264}, "model_output": [{"sum_logits": -9.186328887939453, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -9.186328887939453, "logits_per_char": -2.2965822219848633, "num_chars": 4}, {"sum_logits": -9.662663459777832, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.196586608886719, "logits_per_token": -9.662663459777832, "logits_per_char": -2.415665864944458, "num_chars": 4}, {"sum_logits": -12.408585548400879, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -12.408585548400879, "logits_per_char": -0.9545065806462214, "num_chars": 13}, {"sum_logits": -7.126954555511475, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.725897789001465, "logits_per_token": -7.126954555511475, "logits_per_char": -0.7126954555511474, "num_chars": 10}, {"sum_logits": -9.219632148742676, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.351434707641602, "logits_per_token": -9.219632148742676, "logits_per_char": -1.5366053581237793, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 67, "native_id": "46bc1a50eeead10509a43a048e01194e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.530757904052734, "incorrect_loss_raw": 7.09831690788269, "correct_loss_per_char": 0.8163447380065918, "incorrect_loss_per_char": 0.6142624290280752, "correct_loss_per_token": 2.1769193013509116, "incorrect_loss_per_token": 4.135972678661346, "correct_loss_uncond": -9.320060729980469, "incorrect_loss_uncond": -11.396038293838501}, "model_output": [{"sum_logits": -6.530757904052734, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.850818634033203, "logits_per_token": -2.1769193013509116, "logits_per_char": -0.8163447380065918, "num_chars": 8}, {"sum_logits": -10.742237091064453, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.92497444152832, "logits_per_token": -5.371118545532227, "logits_per_char": -0.826325930081881, "num_chars": 13}, {"sum_logits": -4.69451379776001, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.036821365356445, "logits_per_token": -4.69451379776001, "logits_per_char": -0.782418966293335, "num_chars": 6}, {"sum_logits": -4.760161876678467, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.746349334716797, "logits_per_token": -2.3800809383392334, "logits_per_char": -0.3661662982060359, "num_chars": 13}, {"sum_logits": -8.196354866027832, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -4.098177433013916, "logits_per_char": -0.48213852153104897, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 68, "native_id": "4336a8c55b7cb17275d1c60206cd2f18", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.424005508422852, "incorrect_loss_raw": 9.735740303993225, "correct_loss_per_char": 0.9040009180704752, "incorrect_loss_per_char": 1.2366683268099807, "correct_loss_per_token": 5.424005508422852, "incorrect_loss_per_token": 8.083608746528625, "correct_loss_uncond": -7.1603593826293945, "incorrect_loss_uncond": -4.513638615608215}, "model_output": [{"sum_logits": -13.217052459716797, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.494548797607422, "logits_per_token": -6.608526229858398, "logits_per_char": -1.201550223610618, "num_chars": 11}, {"sum_logits": -7.068021297454834, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.947824478149414, "logits_per_token": -7.068021297454834, "logits_per_char": -1.4136042594909668, "num_chars": 5}, {"sum_logits": -5.424005508422852, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -5.424005508422852, "logits_per_char": -0.9040009180704752, "num_chars": 6}, {"sum_logits": -10.517650604248047, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -10.517650604248047, "logits_per_char": -1.1686278449164496, "num_chars": 9}, {"sum_logits": -8.140236854553223, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -8.140236854553223, "logits_per_char": -1.1628909792218889, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 69, "native_id": "a287575d3ba4b9f958536fc14a1f5b5a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.629601240158081, "incorrect_loss_raw": 13.165266036987305, "correct_loss_per_char": 0.5185144628797259, "incorrect_loss_per_char": 1.3404095427556473, "correct_loss_per_token": 3.629601240158081, "incorrect_loss_per_token": 6.0970689455668134, "correct_loss_uncond": -11.507774591445923, "incorrect_loss_uncond": -3.9256629943847656}, "model_output": [{"sum_logits": -13.85171127319336, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.136791229248047, "logits_per_token": -6.92585563659668, "logits_per_char": -1.2592464793812146, "num_chars": 11}, {"sum_logits": -14.059908866882324, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.86774253845215, "logits_per_token": -7.029954433441162, "logits_per_char": -1.4059908866882325, "num_chars": 10}, {"sum_logits": -13.095906257629395, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.921577453613281, "logits_per_token": -6.547953128814697, "logits_per_char": -1.6369882822036743, "num_chars": 8}, {"sum_logits": -3.629601240158081, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.137375831604004, "logits_per_token": -3.629601240158081, "logits_per_char": -0.5185144628797259, "num_chars": 7}, {"sum_logits": -11.65353775024414, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.437604904174805, "logits_per_token": -3.8845125834147134, "logits_per_char": -1.0594125227494673, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 70, "native_id": "f481dc35b0a97a20dc5cdfe1a59746e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.848369836807251, "incorrect_loss_raw": 12.529569864273071, "correct_loss_per_char": 0.3080616394678752, "incorrect_loss_per_char": 1.3102272885186332, "correct_loss_per_token": 1.848369836807251, "incorrect_loss_per_token": 8.791945338249207, "correct_loss_uncond": -12.65068793296814, "incorrect_loss_uncond": -3.344139337539673}, "model_output": [{"sum_logits": -14.424935340881348, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.510005950927734, "logits_per_token": -7.212467670440674, "logits_per_char": -1.4424935340881349, "num_chars": 10}, {"sum_logits": -5.34522819519043, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.57123851776123, "logits_per_token": -5.34522819519043, "logits_per_char": -0.7636040278843471, "num_chars": 7}, {"sum_logits": -15.47606086730957, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.651203155517578, "logits_per_token": -7.738030433654785, "logits_per_char": -1.5476060867309571, "num_chars": 10}, {"sum_logits": -14.872055053710938, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.762389183044434, "logits_per_token": -14.872055053710938, "logits_per_char": -1.4872055053710938, "num_chars": 10}, {"sum_logits": -1.848369836807251, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.49905776977539, "logits_per_token": -1.848369836807251, "logits_per_char": -0.3080616394678752, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 71, "native_id": "c1c7a9efa379b8a7024a71cf364a144c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.415093421936035, "incorrect_loss_raw": 12.748359441757202, "correct_loss_per_char": 0.48787048884800505, "incorrect_loss_per_char": 1.2406866923727171, "correct_loss_per_token": 3.415093421936035, "incorrect_loss_per_token": 6.466986060142517, "correct_loss_uncond": -9.06491756439209, "incorrect_loss_uncond": -5.289307594299316}, "model_output": [{"sum_logits": -3.913339614868164, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -3.913339614868164, "logits_per_char": -0.7826679229736329, "num_chars": 5}, {"sum_logits": -23.25082015991211, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.221445083618164, "logits_per_token": -11.625410079956055, "logits_per_char": -2.1137109236283735, "num_chars": 11}, {"sum_logits": -9.512666702270508, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.934810638427734, "logits_per_token": -3.170888900756836, "logits_per_char": -0.4756333351135254, "num_chars": 20}, {"sum_logits": -14.316611289978027, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.15237045288086, "logits_per_token": -7.158305644989014, "logits_per_char": -1.5907345877753363, "num_chars": 9}, {"sum_logits": -3.415093421936035, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -3.415093421936035, "logits_per_char": -0.48787048884800505, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 72, "native_id": "821b32d39f57396979069b948030afe9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.178234100341797, "incorrect_loss_raw": 11.77065920829773, "correct_loss_per_char": 0.5452156066894531, "incorrect_loss_per_char": 1.0998416219974732, "correct_loss_per_token": 2.7260780334472656, "incorrect_loss_per_token": 6.556298732757568, "correct_loss_uncond": -12.607141494750977, "incorrect_loss_uncond": -4.803992748260498}, "model_output": [{"sum_logits": -15.536413192749023, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.413169860839844, "logits_per_token": -7.768206596374512, "logits_per_char": -1.1951087071345403, "num_chars": 13}, {"sum_logits": -8.176288604736328, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.689945220947266, "logits_per_token": -4.088144302368164, "logits_per_char": -0.743298964066939, "num_chars": 11}, {"sum_logits": -13.501636505126953, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.27172088623047, "logits_per_token": -4.500545501708984, "logits_per_char": -1.2274215004660867, "num_chars": 11}, {"sum_logits": -8.178234100341797, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.785375595092773, "logits_per_token": -2.7260780334472656, "logits_per_char": -0.5452156066894531, "num_chars": 15}, {"sum_logits": -9.868298530578613, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.923771858215332, "logits_per_token": -9.868298530578613, "logits_per_char": -1.2335373163223267, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 73, "native_id": "c68b4082a6872cf8198502651d0f3352", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.214326858520508, "incorrect_loss_raw": 10.196187138557434, "correct_loss_per_char": 0.47402971441095526, "incorrect_loss_per_char": 0.9982739363397871, "correct_loss_per_token": 2.607163429260254, "incorrect_loss_per_token": 5.282450973987579, "correct_loss_uncond": -13.228036880493164, "incorrect_loss_uncond": -7.195456147193909}, "model_output": [{"sum_logits": -5.214326858520508, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.442363739013672, "logits_per_token": -2.607163429260254, "logits_per_char": -0.47402971441095526, "num_chars": 11}, {"sum_logits": -10.837682723999023, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.367205619812012, "logits_per_token": -10.837682723999023, "logits_per_char": -1.5482403891427177, "num_chars": 7}, {"sum_logits": -4.531088352203369, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.854202270507812, "logits_per_token": -2.2655441761016846, "logits_per_char": -0.4531088352203369, "num_chars": 10}, {"sum_logits": -6.690330505371094, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.260398864746094, "logits_per_token": -3.345165252685547, "logits_per_char": -0.7433700561523438, "num_chars": 9}, {"sum_logits": -18.72564697265625, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.084766387939453, "logits_per_token": -4.6814117431640625, "logits_per_char": -1.24837646484375, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 74, "native_id": "dd11fea36d89aa09f9a6069545ba4c9c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.156794548034668, "incorrect_loss_raw": 13.429149866104126, "correct_loss_per_char": 1.0130662123362224, "incorrect_loss_per_char": 1.415288523623818, "correct_loss_per_token": 4.05226484934489, "incorrect_loss_per_token": 6.978155422210693, "correct_loss_uncond": -7.302802085876465, "incorrect_loss_uncond": -3.3664462566375732}, "model_output": [{"sum_logits": -11.270055770874023, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.10810661315918, "logits_per_token": -5.635027885437012, "logits_per_char": -1.1270055770874023, "num_chars": 10}, {"sum_logits": -25.2111873626709, "num_tokens": 5, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -23.767004013061523, "logits_per_token": -5.04223747253418, "logits_per_char": -1.3269045980353105, "num_chars": 19}, {"sum_logits": -12.156794548034668, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.459596633911133, "logits_per_token": -4.05226484934489, "logits_per_char": -1.0130662123362224, "num_chars": 12}, {"sum_logits": -10.040535926818848, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.979119300842285, "logits_per_token": -10.040535926818848, "logits_per_char": -2.0081071853637695, "num_chars": 5}, {"sum_logits": -7.194820404052734, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.328154563903809, "logits_per_token": -7.194820404052734, "logits_per_char": -1.199136734008789, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 75, "native_id": "7792b2c6518ecf9775efba6d41253312", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.747218132019043, "incorrect_loss_raw": 13.094290971755981, "correct_loss_per_char": 0.4315652847290039, "incorrect_loss_per_char": 1.0435934576603871, "correct_loss_per_token": 4.747218132019043, "incorrect_loss_per_token": 8.87997442483902, "correct_loss_uncond": -7.996241569519043, "incorrect_loss_uncond": -2.2742459774017334}, "model_output": [{"sum_logits": -12.558759689331055, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.32103157043457, "logits_per_token": -6.279379844665527, "logits_per_char": -0.9660584376408503, "num_chars": 13}, {"sum_logits": -4.747218132019043, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.743459701538086, "logits_per_token": -4.747218132019043, "logits_per_char": -0.4315652847290039, "num_chars": 11}, {"sum_logits": -14.103848457336426, "num_tokens": 4, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.656736373901367, "logits_per_token": -3.5259621143341064, "logits_per_char": -0.7423078135440224, "num_chars": 19}, {"sum_logits": -14.115276336669922, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -14.115276336669922, "logits_per_char": -1.4115276336669922, "num_chars": 10}, {"sum_logits": -11.599279403686523, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.006147384643555, "logits_per_token": -11.599279403686523, "logits_per_char": -1.0544799457896838, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 76, "native_id": "1feb4c2a0e8ed638259f5d27b16eae9a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.01434850692749, "incorrect_loss_raw": 8.272851943969727, "correct_loss_per_char": 0.5845290422439575, "incorrect_loss_per_char": 1.2992386893792585, "correct_loss_per_token": 7.01434850692749, "incorrect_loss_per_token": 6.653107643127441, "correct_loss_uncond": -8.708752155303955, "incorrect_loss_uncond": -6.037508964538574}, "model_output": [{"sum_logits": -6.482151031494141, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.407280921936035, "logits_per_token": -6.482151031494141, "logits_per_char": -1.2964302062988282, "num_chars": 5}, {"sum_logits": -7.208531379699707, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -7.208531379699707, "logits_per_char": -1.8021328449249268, "num_chars": 4}, {"sum_logits": -7.01434850692749, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -7.01434850692749, "logits_per_char": -0.5845290422439575, "num_chars": 12}, {"sum_logits": -6.442770957946777, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -6.442770957946777, "logits_per_char": -0.9203958511352539, "num_chars": 7}, {"sum_logits": -12.957954406738281, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.36029624938965, "logits_per_token": -6.478977203369141, "logits_per_char": -1.1779958551580256, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 77, "native_id": "2de08c7a518b7c226e19bdc8fc10ef1d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.161832809448242, "incorrect_loss_raw": 10.205735445022583, "correct_loss_per_char": 0.832893891768022, "incorrect_loss_per_char": 1.310919219917721, "correct_loss_per_token": 9.161832809448242, "incorrect_loss_per_token": 7.507365465164185, "correct_loss_uncond": -5.539874076843262, "incorrect_loss_uncond": -7.351134538650513}, "model_output": [{"sum_logits": -21.586959838867188, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -25.15966033935547, "logits_per_token": -10.793479919433594, "logits_per_char": -2.398551093207465, "num_chars": 9}, {"sum_logits": -7.887996196746826, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.389422416687012, "logits_per_token": -7.887996196746826, "logits_per_char": -1.5775992393493652, "num_chars": 5}, {"sum_logits": -3.862332820892334, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.955296516418457, "logits_per_token": -3.862332820892334, "logits_per_char": -0.643722136815389, "num_chars": 6}, {"sum_logits": -7.485652923583984, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -7.485652923583984, "logits_per_char": -0.6238044102986654, "num_chars": 12}, {"sum_logits": -9.161832809448242, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.701706886291504, "logits_per_token": -9.161832809448242, "logits_per_char": -0.832893891768022, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 78, "native_id": "ea8664e77205224154f8519f922220e1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9683260917663574, "incorrect_loss_raw": 10.771219253540039, "correct_loss_per_char": 0.13833229882376535, "incorrect_loss_per_char": 1.4680336892604828, "correct_loss_per_token": 0.9683260917663574, "incorrect_loss_per_token": 9.661900997161865, "correct_loss_uncond": -10.894452571868896, "incorrect_loss_uncond": -1.372401237487793}, "model_output": [{"sum_logits": -8.87454605102539, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.867948532104492, "logits_per_token": -4.437273025512695, "logits_per_char": -0.887454605102539, "num_chars": 10}, {"sum_logits": -0.9683260917663574, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.862778663635254, "logits_per_token": -0.9683260917663574, "logits_per_char": -0.13833229882376535, "num_chars": 7}, {"sum_logits": -9.445183753967285, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -10.181456565856934, "logits_per_token": -9.445183753967285, "logits_per_char": -1.8890367507934571, "num_chars": 5}, {"sum_logits": -11.832174301147461, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.312068939208984, "logits_per_token": -11.832174301147461, "logits_per_char": -1.4790217876434326, "num_chars": 8}, {"sum_logits": -12.93297290802002, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.213007926940918, "logits_per_token": -12.93297290802002, "logits_per_char": -1.6166216135025024, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 79, "native_id": "a64d45cecde84fdcf5f0a79805a0c6fe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.54996109008789, "incorrect_loss_raw": 9.320265173912048, "correct_loss_per_char": 1.8388845655653212, "incorrect_loss_per_char": 1.2132947101737512, "correct_loss_per_token": 8.274980545043945, "incorrect_loss_per_token": 6.869517763455709, "correct_loss_uncond": -3.8850669860839844, "incorrect_loss_uncond": -6.750601172447205}, "model_output": [{"sum_logits": -7.285854816436768, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.935651779174805, "logits_per_token": -2.4286182721455893, "logits_per_char": -0.6623504378578879, "num_chars": 11}, {"sum_logits": -10.13440227508545, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.974520683288574, "logits_per_token": -10.13440227508545, "logits_per_char": -2.5336005687713623, "num_chars": 4}, {"sum_logits": -16.54996109008789, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.435028076171875, "logits_per_token": -8.274980545043945, "logits_per_char": -1.8388845655653212, "num_chars": 9}, {"sum_logits": -9.89150619506836, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.434167861938477, "logits_per_token": -4.94575309753418, "logits_per_char": -0.5495281219482422, "num_chars": 18}, {"sum_logits": -9.969297409057617, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.939125061035156, "logits_per_token": -9.969297409057617, "logits_per_char": -1.107699712117513, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 80, "native_id": "60e92cd2f35c345872d1a898e1718d55", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2234328985214233, "incorrect_loss_raw": 13.384741067886353, "correct_loss_per_char": 0.24468657970428467, "incorrect_loss_per_char": 1.7578801434449476, "correct_loss_per_token": 1.2234328985214233, "incorrect_loss_per_token": 8.397046446800232, "correct_loss_uncond": -11.98844587802887, "incorrect_loss_uncond": -2.5510642528533936}, "model_output": [{"sum_logits": -15.802929878234863, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.901464939117432, "logits_per_char": -1.7558810975816515, "num_chars": 9}, {"sum_logits": -1.2234328985214233, "num_tokens": 1, "num_tokens_all": 167, "is_greedy": true, "sum_logits_uncond": -13.211878776550293, "logits_per_token": -1.2234328985214233, "logits_per_char": -0.24468657970428467, "num_chars": 5}, {"sum_logits": -13.380834579467773, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -18.961448669433594, "logits_per_token": -6.690417289733887, "logits_per_char": -1.216439507224343, "num_chars": 11}, {"sum_logits": -10.717792510986328, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -16.33707046508789, "logits_per_token": -5.358896255493164, "logits_per_char": -1.7862987518310547, "num_chars": 6}, {"sum_logits": -13.637407302856445, "num_tokens": 1, "num_tokens_all": 167, "is_greedy": false, "sum_logits_uncond": -12.709033012390137, "logits_per_token": -13.637407302856445, "logits_per_char": -2.2729012171427407, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 81, "native_id": "08f3c187908646997b9080c7e9ea7da4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.495028018951416, "incorrect_loss_raw": 7.115285754203796, "correct_loss_per_char": 0.24972377883063424, "incorrect_loss_per_char": 1.176597136921353, "correct_loss_per_token": 2.247514009475708, "incorrect_loss_per_token": 4.658934473991394, "correct_loss_uncond": -13.74750280380249, "incorrect_loss_uncond": -7.95550811290741}, "model_output": [{"sum_logits": -12.836310386657715, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.856157302856445, "logits_per_token": -6.418155193328857, "logits_per_char": -2.1393850644429526, "num_chars": 6}, {"sum_logits": -5.088685989379883, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.335503578186035, "logits_per_token": -5.088685989379883, "logits_per_char": -1.0177371978759766, "num_chars": 5}, {"sum_logits": -6.814499855041504, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.9560546875, "logits_per_token": -3.407249927520752, "logits_per_char": -1.1357499758402507, "num_chars": 6}, {"sum_logits": -4.495028018951416, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.242530822753906, "logits_per_token": -2.247514009475708, "logits_per_char": -0.24972377883063424, "num_chars": 18}, {"sum_logits": -3.721646785736084, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -3.721646785736084, "logits_per_char": -0.41351630952623153, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 82, "native_id": "9aff72f0c480c2b4edde45bd2e7e4870", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.067880153656006, "incorrect_loss_raw": 10.647313594818115, "correct_loss_per_char": 0.5889900128046671, "incorrect_loss_per_char": 0.8913872987796099, "correct_loss_per_token": 3.533940076828003, "incorrect_loss_per_token": 4.713302691777547, "correct_loss_uncond": -15.141489505767822, "incorrect_loss_uncond": -10.940245151519775}, "model_output": [{"sum_logits": -8.887361526489258, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.957321166992188, "logits_per_token": -4.443680763244629, "logits_per_char": -0.6836431943453275, "num_chars": 13}, {"sum_logits": -9.191719055175781, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.81662368774414, "logits_per_token": -4.595859527587891, "logits_per_char": -0.9191719055175781, "num_chars": 10}, {"sum_logits": -9.861675262451172, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.90944480895996, "logits_per_token": -4.930837631225586, "logits_per_char": -0.9861675262451172, "num_chars": 10}, {"sum_logits": -14.64849853515625, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -25.666845321655273, "logits_per_token": -4.882832845052083, "logits_per_char": -0.9765665690104167, "num_chars": 15}, {"sum_logits": -7.067880153656006, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.209369659423828, "logits_per_token": -3.533940076828003, "logits_per_char": -0.5889900128046671, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 83, "native_id": "fd243c96edec5b1b8520d5bfeddc6622", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0914061069488525, "incorrect_loss_raw": 7.47300124168396, "correct_loss_per_char": 0.19012782790444113, "incorrect_loss_per_char": 1.3194063561303275, "correct_loss_per_token": 0.6971353689829508, "incorrect_loss_per_token": 7.47300124168396, "correct_loss_uncond": -14.741840124130249, "incorrect_loss_uncond": -7.129134654998779}, "model_output": [{"sum_logits": -2.0914061069488525, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.8332462310791, "logits_per_token": -0.6971353689829508, "logits_per_char": -0.19012782790444113, "num_chars": 11}, {"sum_logits": -7.2213544845581055, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.05290412902832, "logits_per_token": -7.2213544845581055, "logits_per_char": -1.0316220692225866, "num_chars": 7}, {"sum_logits": -9.401830673217773, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.196922302246094, "logits_per_token": -9.401830673217773, "logits_per_char": -2.3504576683044434, "num_chars": 4}, {"sum_logits": -8.192795753479004, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -8.192795753479004, "logits_per_char": -1.1703993933541434, "num_chars": 7}, {"sum_logits": -5.076024055480957, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -5.076024055480957, "logits_per_char": -0.7251462936401367, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 84, "native_id": "f5ec4fdfd0e37e733bfc1606b986f1e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.598957061767578, "incorrect_loss_raw": 15.38544774055481, "correct_loss_per_char": 0.9554396735297309, "incorrect_loss_per_char": 1.540554093746912, "correct_loss_per_token": 4.299478530883789, "incorrect_loss_per_token": 5.6178514162699384, "correct_loss_uncond": -12.38914680480957, "incorrect_loss_uncond": -5.760404825210571}, "model_output": [{"sum_logits": -11.744852066040039, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.15974235534668, "logits_per_token": -5.8724260330200195, "logits_per_char": -1.9574753443400066, "num_chars": 6}, {"sum_logits": -15.739258766174316, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.195404052734375, "logits_per_token": -5.246419588724772, "logits_per_char": -1.311604897181193, "num_chars": 12}, {"sum_logits": -16.115564346313477, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.524616241455078, "logits_per_token": -5.371854782104492, "logits_per_char": -1.6115564346313476, "num_chars": 10}, {"sum_logits": -17.942115783691406, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.70364761352539, "logits_per_token": -5.980705261230469, "logits_per_char": -1.2815796988351005, "num_chars": 14}, {"sum_logits": -8.598957061767578, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.98810386657715, "logits_per_token": -4.299478530883789, "logits_per_char": -0.9554396735297309, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 85, "native_id": "e3c6d147f8a727d314046e70e9579ba0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.2743611335754395, "incorrect_loss_raw": 6.7824835777282715, "correct_loss_per_char": 0.3561967611312866, "incorrect_loss_per_char": 0.5444104608562257, "correct_loss_per_token": 2.1371805667877197, "incorrect_loss_per_token": 3.0405580202738443, "correct_loss_uncond": -14.108211040496826, "incorrect_loss_uncond": -10.104615449905396}, "model_output": [{"sum_logits": -4.2743611335754395, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.382572174072266, "logits_per_token": -2.1371805667877197, "logits_per_char": -0.3561967611312866, "num_chars": 12}, {"sum_logits": -2.5663328170776367, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -2.5663328170776367, "logits_per_char": -0.2851480907864041, "num_chars": 9}, {"sum_logits": -8.448192596435547, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.777921676635742, "logits_per_token": -4.224096298217773, "logits_per_char": -0.5632128397623698, "num_chars": 15}, {"sum_logits": -8.588476181030273, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.818557739257812, "logits_per_token": -2.862825393676758, "logits_per_char": -0.8588476181030273, "num_chars": 10}, {"sum_logits": -7.526932716369629, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.876785278320312, "logits_per_token": -2.5089775721232095, "logits_per_char": -0.4704332947731018, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 86, "native_id": "8ce13c6e08bf38d4cd4af756b661e47c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7088394165039062, "incorrect_loss_raw": 6.9022451639175415, "correct_loss_per_char": 0.30098215738932294, "incorrect_loss_per_char": 0.8199646626199995, "correct_loss_per_token": 2.7088394165039062, "incorrect_loss_per_token": 5.427818655967712, "correct_loss_uncond": -13.261272430419922, "incorrect_loss_uncond": -8.732232689857483}, "model_output": [{"sum_logits": -5.904325485229492, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.518326759338379, "logits_per_token": -5.904325485229492, "logits_per_char": -0.7380406856536865, "num_chars": 8}, {"sum_logits": -11.795412063598633, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.163862228393555, "logits_per_token": -5.897706031799316, "logits_per_char": -1.1795412063598634, "num_chars": 10}, {"sum_logits": -2.9864463806152344, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -2.9864463806152344, "logits_per_char": -0.3733057975769043, "num_chars": 8}, {"sum_logits": -2.7088394165039062, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -2.7088394165039062, "logits_per_char": -0.30098215738932294, "num_chars": 9}, {"sum_logits": -6.922796726226807, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -6.922796726226807, "logits_per_char": -0.9889709608895438, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 87, "native_id": "0f4159e80f8dbf682819215bbf0f5b5a_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.212794303894043, "incorrect_loss_raw": 10.205695748329163, "correct_loss_per_char": 1.2765992879867554, "incorrect_loss_per_char": 1.027109759865385, "correct_loss_per_token": 10.212794303894043, "incorrect_loss_per_token": 8.812347412109375, "correct_loss_uncond": -2.662745475769043, "incorrect_loss_uncond": -3.260354161262512}, "model_output": [{"sum_logits": -5.857981204986572, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.297904014587402, "logits_per_token": -5.857981204986572, "logits_per_char": -0.5857981204986572, "num_chars": 10}, {"sum_logits": -10.212794303894043, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.875539779663086, "logits_per_token": -10.212794303894043, "logits_per_char": -1.2765992879867554, "num_chars": 8}, {"sum_logits": -11.1467866897583, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.80309009552002, "logits_per_token": -5.57339334487915, "logits_per_char": -1.1146786689758301, "num_chars": 10}, {"sum_logits": -12.013063430786133, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -12.013063430786133, "logits_per_char": -1.3347848256429036, "num_chars": 9}, {"sum_logits": -11.804951667785645, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.743459701538086, "logits_per_token": -11.804951667785645, "logits_per_char": -1.0731774243441494, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 88, "native_id": "1a8b3c2a46efabcbd506f9cf70886ed0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.986991882324219, "incorrect_loss_raw": 20.586259126663208, "correct_loss_per_char": 0.7214995490180122, "incorrect_loss_per_char": 1.3635125639674428, "correct_loss_per_token": 4.328997294108073, "incorrect_loss_per_token": 7.637358983357748, "correct_loss_uncond": -9.36292839050293, "incorrect_loss_uncond": -2.3088161945343018}, "model_output": [{"sum_logits": -10.991755485534668, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.412267684936523, "logits_per_token": -3.6639184951782227, "logits_per_char": -0.845519652733436, "num_chars": 13}, {"sum_logits": -18.77834701538086, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -6.259449005126953, "logits_per_char": -1.2518898010253907, "num_chars": 15}, {"sum_logits": -33.96839141845703, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -26.609716415405273, "logits_per_token": -11.322797139485678, "logits_per_char": -2.426313672746931, "num_chars": 14}, {"sum_logits": -18.606542587280273, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.34688377380371, "logits_per_token": -9.303271293640137, "logits_per_char": -0.9303271293640136, "num_chars": 20}, {"sum_logits": -12.986991882324219, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.34992027282715, "logits_per_token": -4.328997294108073, "logits_per_char": -0.7214995490180122, "num_chars": 18}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 89, "native_id": "db0cfd52ca6b2bbfcf26d1a898fd929b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.370284080505371, "incorrect_loss_raw": 7.905206799507141, "correct_loss_per_char": 0.7808570067087809, "incorrect_loss_per_char": 0.997813677125507, "correct_loss_per_token": 4.6851420402526855, "incorrect_loss_per_token": 6.090434670448303, "correct_loss_uncond": -8.711403846740723, "incorrect_loss_uncond": -6.912246823310852}, "model_output": [{"sum_logits": -5.680386066436768, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.239700317382812, "logits_per_token": -5.680386066436768, "logits_per_char": -1.1360772132873536, "num_chars": 5}, {"sum_logits": -11.422264099121094, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.496543884277344, "logits_per_token": -11.422264099121094, "logits_per_char": -1.2691404554578993, "num_chars": 9}, {"sum_logits": -9.370284080505371, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.081687927246094, "logits_per_token": -4.6851420402526855, "logits_per_char": -0.7808570067087809, "num_chars": 12}, {"sum_logits": -5.489642143249512, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.439910888671875, "logits_per_token": -2.744821071624756, "logits_per_char": -0.457470178604126, "num_chars": 12}, {"sum_logits": -9.028534889221191, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.093659400939941, "logits_per_token": -4.514267444610596, "logits_per_char": -1.128566861152649, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 90, "native_id": "400fb2e196e71abb70e5b3f9aab4b9ee", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.252973556518555, "incorrect_loss_raw": 10.351722955703735, "correct_loss_per_char": 1.4066216945648193, "incorrect_loss_per_char": 1.2175707548965902, "correct_loss_per_token": 11.252973556518555, "incorrect_loss_per_token": 6.362178206443787, "correct_loss_uncond": -7.800434112548828, "incorrect_loss_uncond": -5.63556694984436}, "model_output": [{"sum_logits": -9.490533828735352, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.546567916870117, "logits_per_token": -9.490533828735352, "logits_per_char": -1.3557905469621931, "num_chars": 7}, {"sum_logits": -12.906366348266602, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.962568283081055, "logits_per_token": -6.453183174133301, "logits_per_char": -1.2906366348266602, "num_chars": 10}, {"sum_logits": -11.252973556518555, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.053407669067383, "logits_per_token": -11.252973556518555, "logits_per_char": -1.4066216945648193, "num_chars": 8}, {"sum_logits": -10.524162292480469, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.329479217529297, "logits_per_token": -5.262081146240234, "logits_per_char": -0.8095509455754206, "num_chars": 13}, {"sum_logits": -8.48582935333252, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.110544204711914, "logits_per_token": -4.24291467666626, "logits_per_char": -1.4143048922220867, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 91, "native_id": "3fb36127a61903029a363911a1d2b1e9_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.806629657745361, "incorrect_loss_raw": 8.353435754776001, "correct_loss_per_char": 0.7806629657745361, "incorrect_loss_per_char": 1.1322020071640349, "correct_loss_per_token": 3.9033148288726807, "incorrect_loss_per_token": 7.017141938209534, "correct_loss_uncond": -11.343936443328857, "incorrect_loss_uncond": -6.383565902709961}, "model_output": [{"sum_logits": -6.86311149597168, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -6.86311149597168, "logits_per_char": -1.14385191599528, "num_chars": 6}, {"sum_logits": -10.690350532531738, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -5.345175266265869, "logits_per_char": -0.5626500280279862, "num_chars": 19}, {"sum_logits": -7.806629657745361, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.15056610107422, "logits_per_token": -3.9033148288726807, "logits_per_char": -0.7806629657745361, "num_chars": 10}, {"sum_logits": -4.663334846496582, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -4.663334846496582, "logits_per_char": -0.5829168558120728, "num_chars": 8}, {"sum_logits": -11.196946144104004, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.534889221191406, "logits_per_token": -11.196946144104004, "logits_per_char": -2.2393892288208006, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 92, "native_id": "8494b0b95533dcedbd76ae2916c481d4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.92435359954834, "incorrect_loss_raw": 26.12009072303772, "correct_loss_per_char": 0.743696133295695, "incorrect_loss_per_char": 1.6900018042291118, "correct_loss_per_token": 4.46217679977417, "incorrect_loss_per_token": 9.304121780395509, "correct_loss_uncond": -9.903658866882324, "incorrect_loss_uncond": -0.94195556640625}, "model_output": [{"sum_logits": -11.568135261535645, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.890442848205566, "logits_per_token": -11.568135261535645, "logits_per_char": -1.6525907516479492, "num_chars": 7}, {"sum_logits": -29.74276351928711, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -31.10651206970215, "logits_per_token": -7.435690879821777, "logits_per_char": -1.652375751071506, "num_chars": 18}, {"sum_logits": -18.59589385986328, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.609106063842773, "logits_per_token": -9.29794692993164, "logits_per_char": -0.9787312557822779, "num_chars": 19}, {"sum_logits": -8.92435359954834, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.828012466430664, "logits_per_token": -4.46217679977417, "logits_per_char": -0.743696133295695, "num_chars": 12}, {"sum_logits": -44.573570251464844, "num_tokens": 5, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -46.64212417602539, "logits_per_token": -8.914714050292968, "logits_per_char": -2.4763094584147134, "num_chars": 18}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 93, "native_id": "1531f1523f5fd24bbdb42c311dbf90e8", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.768716335296631, "incorrect_loss_raw": 9.161691308021545, "correct_loss_per_char": 0.5298573705885146, "incorrect_loss_per_char": 1.0316411107431203, "correct_loss_per_token": 2.3843581676483154, "incorrect_loss_per_token": 5.413870334625244, "correct_loss_uncond": -12.049065113067627, "incorrect_loss_uncond": -10.722424626350403}, "model_output": [{"sum_logits": -6.008927345275879, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -3.0044636726379395, "logits_per_char": -0.5007439454396566, "num_chars": 12}, {"sum_logits": -15.705026626586914, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.961870193481445, "logits_per_token": -7.852513313293457, "logits_per_char": -1.2080789712759166, "num_chars": 13}, {"sum_logits": -6.6641974449157715, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.299443244934082, "logits_per_token": -6.6641974449157715, "logits_per_char": -1.6660493612289429, "num_chars": 4}, {"sum_logits": -8.268613815307617, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -4.134306907653809, "logits_per_char": -0.7516921650279652, "num_chars": 11}, {"sum_logits": -4.768716335296631, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.817781448364258, "logits_per_token": -2.3843581676483154, "logits_per_char": -0.5298573705885146, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 94, "native_id": "716ce4404a84b42dd64e561390c4b53b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.41658878326416, "incorrect_loss_raw": 7.426076889038086, "correct_loss_per_char": 1.55207359790802, "incorrect_loss_per_char": 0.7003232283251626, "correct_loss_per_token": 6.20829439163208, "incorrect_loss_per_token": 3.2254455983638763, "correct_loss_uncond": -4.006131172180176, "incorrect_loss_uncond": -9.564440250396729}, "model_output": [{"sum_logits": -7.531942367553711, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.046459197998047, "logits_per_token": -3.7659711837768555, "logits_per_char": -0.6847220334139738, "num_chars": 11}, {"sum_logits": -6.953115940093994, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.27696990966797, "logits_per_token": -3.476557970046997, "logits_per_char": -0.632101449099454, "num_chars": 11}, {"sum_logits": -7.417763710021973, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.915063858032227, "logits_per_token": -3.7088818550109863, "logits_per_char": -0.9272204637527466, "num_chars": 8}, {"sum_logits": -7.801485538482666, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -1.9503713846206665, "logits_per_char": -0.5572489670344761, "num_chars": 14}, {"sum_logits": -12.41658878326416, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.422719955444336, "logits_per_token": -6.20829439163208, "logits_per_char": -1.55207359790802, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 95, "native_id": "5169f7ae0781b15161551de3a189ebef", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.689884185791016, "incorrect_loss_raw": 13.18685245513916, "correct_loss_per_char": 0.7635631561279297, "incorrect_loss_per_char": 1.0701049843473298, "correct_loss_per_token": 10.689884185791016, "incorrect_loss_per_token": 9.256340463956196, "correct_loss_uncond": -1.172689437866211, "incorrect_loss_uncond": -2.037234306335449}, "model_output": [{"sum_logits": -16.8309326171875, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.031421661376953, "logits_per_token": -5.610310872395833, "logits_per_char": -0.8858385587993421, "num_chars": 19}, {"sum_logits": -14.239179611206055, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.856307983398438, "logits_per_token": -14.239179611206055, "logits_per_char": -1.4239179611206054, "num_chars": 10}, {"sum_logits": -9.002852439880371, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.43341827392578, "logits_per_token": -4.5014262199401855, "logits_per_char": -0.8184411308982156, "num_chars": 11}, {"sum_logits": -12.674445152282715, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.575199127197266, "logits_per_token": -12.674445152282715, "logits_per_char": -1.152222286571156, "num_chars": 11}, {"sum_logits": -10.689884185791016, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.862573623657227, "logits_per_token": -10.689884185791016, "logits_per_char": -0.7635631561279297, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 96, "native_id": "ef22ef7aeec70aaa688720f805c1cf38", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.149791717529297, "incorrect_loss_raw": 12.436461448669434, "correct_loss_per_char": 0.6535565512520927, "incorrect_loss_per_char": 1.5229572640524969, "correct_loss_per_token": 4.574895858764648, "incorrect_loss_per_token": 10.178195476531982, "correct_loss_uncond": -7.546897888183594, "incorrect_loss_uncond": -3.156489133834839}, "model_output": [{"sum_logits": -8.687171936035156, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -8.687171936035156, "logits_per_char": -0.8687171936035156, "num_chars": 10}, {"sum_logits": -9.149791717529297, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.69668960571289, "logits_per_token": -4.574895858764648, "logits_per_char": -0.6535565512520927, "num_chars": 14}, {"sum_logits": -7.480654716491699, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -7.480654716491699, "logits_per_char": -0.8311838573879666, "num_chars": 9}, {"sum_logits": -15.51189136505127, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -15.51189136505127, "logits_per_char": -2.585315227508545, "num_chars": 6}, {"sum_logits": -18.06612777709961, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.906009674072266, "logits_per_token": -9.033063888549805, "logits_per_char": -1.806612777709961, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 97, "native_id": "514310637fb43a252bfadc8cbf79b277", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.5275323390960693, "incorrect_loss_raw": 8.556281507015228, "correct_loss_per_char": 0.22977566719055176, "incorrect_loss_per_char": 0.9513845273426601, "correct_loss_per_token": 2.5275323390960693, "incorrect_loss_per_token": 4.667102018992106, "correct_loss_uncond": -12.055818796157837, "incorrect_loss_uncond": -8.39486700296402}, "model_output": [{"sum_logits": -6.850563049316406, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -16.188400268554688, "logits_per_token": -6.850563049316406, "logits_per_char": -0.7611736721462674, "num_chars": 9}, {"sum_logits": -2.3924124240875244, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": true, "sum_logits_uncond": -12.736413955688477, "logits_per_token": -2.3924124240875244, "logits_per_char": -0.3417732034410749, "num_chars": 7}, {"sum_logits": -18.393856048583984, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -24.440689086914062, "logits_per_token": -6.131285349527995, "logits_per_char": -2.043761783175998, "num_chars": 9}, {"sum_logits": -2.5275323390960693, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -14.583351135253906, "logits_per_token": -2.5275323390960693, "logits_per_char": -0.22977566719055176, "num_chars": 11}, {"sum_logits": -6.588294506072998, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.439090728759766, "logits_per_token": -3.294147253036499, "logits_per_char": -0.6588294506072998, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 98, "native_id": "9370b2b0897b796dec4a40f107854c8d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.81389856338501, "incorrect_loss_raw": 11.98295545578003, "correct_loss_per_char": 0.5241460433373084, "incorrect_loss_per_char": 1.226241857291543, "correct_loss_per_token": 3.406949281692505, "incorrect_loss_per_token": 6.987281680107117, "correct_loss_uncond": -9.625314235687256, "incorrect_loss_uncond": -7.07792592048645}, "model_output": [{"sum_logits": -9.93533706665039, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.79820442199707, "logits_per_token": -4.967668533325195, "logits_per_char": -0.8279447555541992, "num_chars": 12}, {"sum_logits": -6.81389856338501, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.439212799072266, "logits_per_token": -3.406949281692505, "logits_per_char": -0.5241460433373084, "num_chars": 13}, {"sum_logits": -9.940329551696777, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.400543212890625, "logits_per_token": -4.970164775848389, "logits_per_char": -0.9036663228815253, "num_chars": 11}, {"sum_logits": -20.089723587036133, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -26.261974334716797, "logits_per_token": -10.044861793518066, "logits_per_char": -1.181748446296243, "num_chars": 17}, {"sum_logits": -7.966431617736816, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.782803535461426, "logits_per_token": -7.966431617736816, "logits_per_char": -1.991607904434204, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 99, "native_id": "49902e768c45aa41a0f9f95be81114e5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.8734458684921265, "incorrect_loss_raw": 14.35238802433014, "correct_loss_per_char": 0.3746891736984253, "incorrect_loss_per_char": 1.1489256113692932, "correct_loss_per_token": 1.8734458684921265, "incorrect_loss_per_token": 4.19312125047048, "correct_loss_uncond": -10.792870163917542, "incorrect_loss_uncond": -6.905384659767151}, "model_output": [{"sum_logits": -19.628238677978516, "num_tokens": 4, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -25.546749114990234, "logits_per_token": -4.907059669494629, "logits_per_char": -1.0330651935778166, "num_chars": 19}, {"sum_logits": -20.835935592651367, "num_tokens": 5, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -27.638874053955078, "logits_per_token": -4.167187118530274, "logits_per_char": -1.7363279660542805, "num_chars": 12}, {"sum_logits": -3.0746684074401855, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.147025108337402, "logits_per_token": -3.0746684074401855, "logits_per_char": -0.43923834392002653, "num_chars": 7}, {"sum_logits": -13.870709419250488, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.698442459106445, "logits_per_token": -4.623569806416829, "logits_per_char": -1.3870709419250489, "num_chars": 10}, {"sum_logits": -1.8734458684921265, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": true, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -1.8734458684921265, "logits_per_char": -0.3746891736984253, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 100, "native_id": "e1f90cd664a6b150291e6d8444d85c54", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.356288909912109, "incorrect_loss_raw": 9.225565433502197, "correct_loss_per_char": 0.6130240758260092, "incorrect_loss_per_char": 1.1306590129028666, "correct_loss_per_token": 3.6781444549560547, "incorrect_loss_per_token": 6.567518591880798, "correct_loss_uncond": -10.543174743652344, "incorrect_loss_uncond": -5.333948850631714}, "model_output": [{"sum_logits": -8.517386436462402, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.213007926940918, "logits_per_token": -8.517386436462402, "logits_per_char": -1.0646733045578003, "num_chars": 8}, {"sum_logits": -7.356288909912109, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.899463653564453, "logits_per_token": -3.6781444549560547, "logits_per_char": -0.6130240758260092, "num_chars": 12}, {"sum_logits": -10.183232307434082, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.973922729492188, "logits_per_token": -5.091616153717041, "logits_per_char": -0.9257483915849165, "num_chars": 11}, {"sum_logits": -11.08114242553711, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.2942533493042, "logits_per_token": -5.540571212768555, "logits_per_char": -1.108114242553711, "num_chars": 10}, {"sum_logits": -7.120500564575195, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.75687313079834, "logits_per_token": -7.120500564575195, "logits_per_char": -1.424100112915039, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 101, "native_id": "320ec9b68fdefe13d59cc8b628083790", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9109129905700684, "incorrect_loss_raw": 18.860511779785156, "correct_loss_per_char": 0.5587018557957241, "incorrect_loss_per_char": 1.830955866404942, "correct_loss_per_token": 3.9109129905700684, "incorrect_loss_per_token": 7.598691082000732, "correct_loss_uncond": -9.5681471824646, "incorrect_loss_uncond": -3.2460663318634033}, "model_output": [{"sum_logits": -13.947298049926758, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -6.973649024963379, "logits_per_char": -1.5496997833251953, "num_chars": 9}, {"sum_logits": -17.34107208251953, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.065139770507812, "logits_per_token": -8.670536041259766, "logits_per_char": -1.734107208251953, "num_chars": 10}, {"sum_logits": -19.732812881469727, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.082653045654297, "logits_per_token": -9.866406440734863, "logits_per_char": -2.8189732687813893, "num_chars": 7}, {"sum_logits": -3.9109129905700684, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.479060173034668, "logits_per_token": -3.9109129905700684, "logits_per_char": -0.5587018557957241, "num_chars": 7}, {"sum_logits": -24.42086410522461, "num_tokens": 5, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -33.542850494384766, "logits_per_token": -4.884172821044922, "logits_per_char": -1.2210432052612306, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 102, "native_id": "964185aed0e381853332bca1a4d91f46", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.9292707443237305, "incorrect_loss_raw": 13.657415628433228, "correct_loss_per_char": 0.592927074432373, "incorrect_loss_per_char": 1.212914215477686, "correct_loss_per_token": 2.9646353721618652, "incorrect_loss_per_token": 7.220773259798686, "correct_loss_uncond": -13.581219673156738, "incorrect_loss_uncond": -4.075907945632935}, "model_output": [{"sum_logits": -14.672528266906738, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.580930709838867, "logits_per_token": -7.336264133453369, "logits_per_char": -1.8340660333633423, "num_chars": 8}, {"sum_logits": -16.463523864746094, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -20.382665634155273, "logits_per_token": -5.487841288248698, "logits_per_char": -0.9146402147081163, "num_chars": 18}, {"sum_logits": -8.624364852905273, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.072769165039062, "logits_per_token": -8.624364852905273, "logits_per_char": -0.6160260609218052, "num_chars": 14}, {"sum_logits": -5.9292707443237305, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.51049041748047, "logits_per_token": -2.9646353721618652, "logits_per_char": -0.592927074432373, "num_chars": 10}, {"sum_logits": -14.869245529174805, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.896928787231445, "logits_per_token": -7.434622764587402, "logits_per_char": -1.4869245529174804, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 103, "native_id": "db8e010754c532d78635e5b7cf81a147", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.1086859703063965, "incorrect_loss_raw": 10.131912589073181, "correct_loss_per_char": 0.5108685970306397, "incorrect_loss_per_char": 0.9143289870126939, "correct_loss_per_token": 1.7028953234354656, "incorrect_loss_per_token": 5.065956294536591, "correct_loss_uncond": -9.570287227630615, "incorrect_loss_uncond": -8.322568774223328}, "model_output": [{"sum_logits": -9.332564353942871, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.062774658203125, "logits_per_token": -4.6662821769714355, "logits_per_char": -0.7777136961619059, "num_chars": 12}, {"sum_logits": -10.604317665100098, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -21.293292999267578, "logits_per_token": -5.302158832550049, "logits_per_char": -0.7574512617928642, "num_chars": 14}, {"sum_logits": -7.3410468101501465, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.080073356628418, "logits_per_token": -3.6705234050750732, "logits_per_char": -0.9176308512687683, "num_chars": 8}, {"sum_logits": -13.24972152709961, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.381784439086914, "logits_per_token": -6.624860763549805, "logits_per_char": -1.2045201388272373, "num_chars": 11}, {"sum_logits": -5.1086859703063965, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.678973197937012, "logits_per_token": -1.7028953234354656, "logits_per_char": -0.5108685970306397, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 104, "native_id": "998381f854f51da2a6ccde45909e5168", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.862683296203613, "incorrect_loss_raw": 9.319839715957642, "correct_loss_per_char": 0.7586679458618164, "incorrect_loss_per_char": 1.008601775324733, "correct_loss_per_token": 4.931341648101807, "incorrect_loss_per_token": 7.062628348668416, "correct_loss_uncond": -8.700295448303223, "incorrect_loss_uncond": -6.432995319366455}, "model_output": [{"sum_logits": -13.543268203735352, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.67224884033203, "logits_per_token": -4.51442273457845, "logits_per_char": -0.7128035896702817, "num_chars": 19}, {"sum_logits": -8.116020202636719, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.776552200317383, "logits_per_token": -8.116020202636719, "logits_per_char": -0.6763350168863932, "num_chars": 12}, {"sum_logits": -8.378047943115234, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.076739311218262, "logits_per_token": -8.378047943115234, "logits_per_char": -1.1968639918736048, "num_chars": 7}, {"sum_logits": -9.862683296203613, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.562978744506836, "logits_per_token": -4.931341648101807, "logits_per_char": -0.7586679458618164, "num_chars": 13}, {"sum_logits": -7.242022514343262, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.485799789428711, "logits_per_token": -7.242022514343262, "logits_per_char": -1.4484045028686523, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 105, "native_id": "bc38ad28e99cff7a65771233f734a007", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.785030841827393, "incorrect_loss_raw": 9.09323787689209, "correct_loss_per_char": 0.9570061683654785, "incorrect_loss_per_char": 0.9896724522113799, "correct_loss_per_token": 4.785030841827393, "incorrect_loss_per_token": 5.322122931480408, "correct_loss_uncond": -10.289517879486084, "incorrect_loss_uncond": -6.970377683639526}, "model_output": [{"sum_logits": -6.204031944274902, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.540929794311523, "logits_per_token": -6.204031944274902, "logits_per_char": -0.7755039930343628, "num_chars": 8}, {"sum_logits": -6.080988883972168, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.519742965698242, "logits_per_token": -3.040494441986084, "logits_per_char": -0.5067490736643473, "num_chars": 12}, {"sum_logits": -10.192941665649414, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.660015106201172, "logits_per_token": -5.096470832824707, "logits_per_char": -1.132549073961046, "num_chars": 9}, {"sum_logits": -4.785030841827393, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.074548721313477, "logits_per_token": -4.785030841827393, "logits_per_char": -0.9570061683654785, "num_chars": 5}, {"sum_logits": -13.894989013671875, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -6.9474945068359375, "logits_per_char": -1.5438876681857638, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 106, "native_id": "e3949997bf9d02048cfa5d8dd0f287aa", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.624589920043945, "incorrect_loss_raw": 11.692807793617249, "correct_loss_per_char": 0.374972661336263, "incorrect_loss_per_char": 1.6309655484699068, "correct_loss_per_token": 2.8122949600219727, "incorrect_loss_per_token": 6.501825332641602, "correct_loss_uncond": -13.611255645751953, "incorrect_loss_uncond": -5.904353737831116}, "model_output": [{"sum_logits": -10.978066444396973, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.985862731933594, "logits_per_token": -5.489033222198486, "logits_per_char": -1.8296777407328289, "num_chars": 6}, {"sum_logits": -16.770740509033203, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.398082733154297, "logits_per_token": -8.385370254516602, "logits_per_char": -1.6770740509033204, "num_chars": 10}, {"sum_logits": -5.243371486663818, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -5.243371486663818, "logits_per_char": -1.0486742973327636, "num_chars": 5}, {"sum_logits": -13.779052734375, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.3383846282959, "logits_per_token": -6.8895263671875, "logits_per_char": -1.9684361049107142, "num_chars": 7}, {"sum_logits": -5.624589920043945, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.2358455657959, "logits_per_token": -2.8122949600219727, "logits_per_char": -0.374972661336263, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 107, "native_id": "a7d51b753c2113d8b2dbd0ebb5375855", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.473308563232422, "incorrect_loss_raw": 12.281817436218262, "correct_loss_per_char": 0.37277571360270184, "incorrect_loss_per_char": 0.9260694305102031, "correct_loss_per_token": 2.236654281616211, "incorrect_loss_per_token": 4.327732841173808, "correct_loss_uncond": -11.055830001831055, "incorrect_loss_uncond": -6.912540435791016}, "model_output": [{"sum_logits": -4.473308563232422, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.529138565063477, "logits_per_token": -2.236654281616211, "logits_per_char": -0.37277571360270184, "num_chars": 12}, {"sum_logits": -10.271639823913574, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.488744735717773, "logits_per_token": -5.135819911956787, "logits_per_char": -0.6847759882609049, "num_chars": 15}, {"sum_logits": -11.591065406799316, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.31532096862793, "logits_per_token": -3.8636884689331055, "logits_per_char": -0.9659221172332764, "num_chars": 12}, {"sum_logits": -17.943382263183594, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.562074661254883, "logits_per_token": -5.981127421061198, "logits_per_char": -1.1214613914489746, "num_chars": 16}, {"sum_logits": -9.321182250976562, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.411291122436523, "logits_per_token": -2.3302955627441406, "logits_per_char": -0.9321182250976563, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 108, "native_id": "3e4b326aff96e9adbb52ba18cfa877b2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.752749443054199, "incorrect_loss_raw": 11.023967981338501, "correct_loss_per_char": 0.7503054936726888, "incorrect_loss_per_char": 1.1835940382697365, "correct_loss_per_token": 6.752749443054199, "incorrect_loss_per_token": 6.7231398820877075, "correct_loss_uncond": -7.052711486816406, "incorrect_loss_uncond": -5.591537952423096}, "model_output": [{"sum_logits": -6.752749443054199, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -6.752749443054199, "logits_per_char": -0.7503054936726888, "num_chars": 9}, {"sum_logits": -9.689247131347656, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.287192344665527, "logits_per_token": -9.689247131347656, "logits_per_char": -1.6148745218912761, "num_chars": 6}, {"sum_logits": -7.023785591125488, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.483875274658203, "logits_per_token": -3.511892795562744, "logits_per_char": -0.7023785591125489, "num_chars": 10}, {"sum_logits": -9.533824920654297, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -16.136394500732422, "logits_per_token": -4.766912460327148, "logits_per_char": -0.7944854100545248, "num_chars": 12}, {"sum_logits": -17.849014282226562, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.554561614990234, "logits_per_token": -8.924507141113281, "logits_per_char": -1.6226376620205967, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 109, "native_id": "5ac83e9e6fa9851ad3cccb0d57c1d88f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.335201263427734, "incorrect_loss_raw": 10.016547679901123, "correct_loss_per_char": 0.7919001579284668, "incorrect_loss_per_char": 0.9258272802145635, "correct_loss_per_token": 6.335201263427734, "incorrect_loss_per_token": 6.856399893760681, "correct_loss_uncond": -7.9710845947265625, "incorrect_loss_uncond": -5.670428991317749}, "model_output": [{"sum_logits": -14.66329574584961, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.540481567382812, "logits_per_token": -7.331647872924805, "logits_per_char": -0.9775530497233073, "num_chars": 15}, {"sum_logits": -10.617886543273926, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.159805297851562, "logits_per_token": -5.308943271636963, "logits_per_char": -0.8167605033287635, "num_chars": 13}, {"sum_logits": -6.399177551269531, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.801762580871582, "logits_per_token": -6.399177551269531, "logits_per_char": -0.7110197279188368, "num_chars": 9}, {"sum_logits": -8.385830879211426, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.245857238769531, "logits_per_token": -8.385830879211426, "logits_per_char": -1.1979758398873466, "num_chars": 7}, {"sum_logits": -6.335201263427734, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.306285858154297, "logits_per_token": -6.335201263427734, "logits_per_char": -0.7919001579284668, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 110, "native_id": "2c0030cc14a27be2401dcfdaa501f0fc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.045931816101074, "incorrect_loss_raw": 10.991699934005737, "correct_loss_per_char": 0.5038276513417562, "incorrect_loss_per_char": 0.932180148210281, "correct_loss_per_token": 3.022965908050537, "incorrect_loss_per_token": 5.82202410697937, "correct_loss_uncond": -11.02828311920166, "incorrect_loss_uncond": -6.164353370666504}, "model_output": [{"sum_logits": -8.366058349609375, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.520639419555664, "logits_per_token": -4.1830291748046875, "logits_per_char": -0.5577372233072917, "num_chars": 15}, {"sum_logits": -8.995267868041992, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.134942054748535, "logits_per_token": -8.995267868041992, "logits_per_char": -1.124408483505249, "num_chars": 8}, {"sum_logits": -6.045931816101074, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.074214935302734, "logits_per_token": -3.022965908050537, "logits_per_char": -0.5038276513417562, "num_chars": 12}, {"sum_logits": -12.771749496459961, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.106761932373047, "logits_per_token": -3.1929373741149902, "logits_per_char": -0.9824422689584585, "num_chars": 13}, {"sum_logits": -13.833724021911621, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.86186981201172, "logits_per_token": -6.9168620109558105, "logits_per_char": -1.0641326170701246, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 111, "native_id": "feb83263e6be392351db0794004efc3f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.225861549377441, "incorrect_loss_raw": 9.813788056373596, "correct_loss_per_char": 0.48557166049354955, "incorrect_loss_per_char": 1.225950026512146, "correct_loss_per_token": 4.612930774688721, "incorrect_loss_per_token": 6.824137449264526, "correct_loss_uncond": -11.021773338317871, "incorrect_loss_uncond": -6.561946511268616}, "model_output": [{"sum_logits": -11.110197067260742, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.594663619995117, "logits_per_token": -5.555098533630371, "logits_per_char": -0.5555098533630372, "num_chars": 20}, {"sum_logits": -12.807007789611816, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.72327423095703, "logits_per_token": -6.403503894805908, "logits_per_char": -1.2807007789611817, "num_chars": 10}, {"sum_logits": -9.225861549377441, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.247634887695312, "logits_per_token": -4.612930774688721, "logits_per_char": -0.48557166049354955, "num_chars": 19}, {"sum_logits": -8.415611267089844, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.097515106201172, "logits_per_token": -8.415611267089844, "logits_per_char": -1.6831222534179688, "num_chars": 5}, {"sum_logits": -6.922336101531982, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -6.922336101531982, "logits_per_char": -1.3844672203063966, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 112, "native_id": "80697d599280d994d8a584c95824ef1f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.696502685546875, "incorrect_loss_raw": 7.90337061882019, "correct_loss_per_char": 0.5218336317274306, "incorrect_loss_per_char": 0.8427930487526789, "correct_loss_per_token": 2.3482513427734375, "incorrect_loss_per_token": 4.263156533241272, "correct_loss_uncond": -12.274768829345703, "incorrect_loss_uncond": -9.648657083511353}, "model_output": [{"sum_logits": -8.349458694458008, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.371767044067383, "logits_per_token": -4.174729347229004, "logits_per_char": -0.8349458694458007, "num_chars": 10}, {"sum_logits": -14.03676700592041, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.018383502960205, "logits_per_char": -1.5596407784356012, "num_chars": 9}, {"sum_logits": -6.73548698425293, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -3.367743492126465, "logits_per_char": -0.5612905820210775, "num_chars": 12}, {"sum_logits": -2.491769790649414, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.955296516418457, "logits_per_token": -2.491769790649414, "logits_per_char": -0.41529496510823566, "num_chars": 6}, {"sum_logits": -4.696502685546875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -2.3482513427734375, "logits_per_char": -0.5218336317274306, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 113, "native_id": "3c1800e7dd96d37fdd3c51b9fe502342", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.605677604675293, "incorrect_loss_raw": 8.182599544525146, "correct_loss_per_char": 0.7007097005844116, "incorrect_loss_per_char": 1.1253812483378818, "correct_loss_per_token": 5.605677604675293, "incorrect_loss_per_token": 7.00352931022644, "correct_loss_uncond": -9.75312328338623, "incorrect_loss_uncond": -7.5623626708984375}, "model_output": [{"sum_logits": -9.173271179199219, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -15.906532287597656, "logits_per_token": -9.173271179199219, "logits_per_char": -1.310467311314174, "num_chars": 7}, {"sum_logits": -8.210561752319336, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -15.294832229614258, "logits_per_token": -8.210561752319336, "logits_per_char": -1.026320219039917, "num_chars": 8}, {"sum_logits": -5.914003372192383, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.903877258300781, "logits_per_token": -5.914003372192383, "logits_per_char": -0.9856672286987305, "num_chars": 6}, {"sum_logits": -5.605677604675293, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -15.358800888061523, "logits_per_token": -5.605677604675293, "logits_per_char": -0.7007097005844116, "num_chars": 8}, {"sum_logits": -9.432561874389648, "num_tokens": 2, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -16.87460708618164, "logits_per_token": -4.716280937194824, "logits_per_char": -1.179070234298706, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 114, "native_id": "4da33e6f4b789776acb1bc10195baa83", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.706443786621094, "incorrect_loss_raw": 7.648547172546387, "correct_loss_per_char": 0.7844072977701823, "incorrect_loss_per_char": 1.1972277990409308, "correct_loss_per_token": 4.706443786621094, "incorrect_loss_per_token": 6.6189799308776855, "correct_loss_uncond": -7.877921104431152, "incorrect_loss_uncond": -7.3793113231658936}, "model_output": [{"sum_logits": -6.135349273681641, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -6.135349273681641, "logits_per_char": -1.5338373184204102, "num_chars": 4}, {"sum_logits": -4.706443786621094, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -4.706443786621094, "logits_per_char": -0.7844072977701823, "num_chars": 6}, {"sum_logits": -7.702807426452637, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.616548538208008, "logits_per_token": -7.702807426452637, "logits_per_char": -0.9628509283065796, "num_chars": 8}, {"sum_logits": -8.51949405670166, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -8.51949405670166, "logits_per_char": -1.703898811340332, "num_chars": 5}, {"sum_logits": -8.23653793334961, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.859416961669922, "logits_per_token": -4.118268966674805, "logits_per_char": -0.5883241380964007, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 115, "native_id": "ae038e9af9d5a511ada7456b5e73b15e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.075037002563477, "incorrect_loss_raw": 11.673371195793152, "correct_loss_per_char": 0.5075037002563476, "incorrect_loss_per_char": 1.4888088769382901, "correct_loss_per_token": 5.075037002563477, "incorrect_loss_per_token": 6.822923004627228, "correct_loss_uncond": -12.09489631652832, "incorrect_loss_uncond": -7.635032534599304}, "model_output": [{"sum_logits": -21.039196014404297, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -25.523466110229492, "logits_per_token": -10.519598007202148, "logits_per_char": -2.337688446044922, "num_chars": 9}, {"sum_logits": -5.075037002563477, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -5.075037002563477, "logits_per_char": -0.5075037002563476, "num_chars": 10}, {"sum_logits": -11.834957122802734, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -5.917478561401367, "logits_per_char": -0.9862464269002279, "num_chars": 12}, {"sum_logits": -7.889899253845215, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.619296073913574, "logits_per_token": -7.889899253845215, "logits_per_char": -1.9724748134613037, "num_chars": 4}, {"sum_logits": -5.929432392120361, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.817781448364258, "logits_per_token": -2.9647161960601807, "logits_per_char": -0.6588258213467069, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 116, "native_id": "a400b9fd1e319f901471c4b42d401c52", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.514601707458496, "incorrect_loss_raw": 12.692814946174622, "correct_loss_per_char": 0.5514601707458496, "incorrect_loss_per_char": 1.074246184994476, "correct_loss_per_token": 2.757300853729248, "incorrect_loss_per_token": 5.823183596134186, "correct_loss_uncond": -15.034768104553223, "incorrect_loss_uncond": -8.374691843986511}, "model_output": [{"sum_logits": -14.984109878540039, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.91109848022461, "logits_per_token": -7.4920549392700195, "logits_per_char": -1.24867582321167, "num_chars": 12}, {"sum_logits": -7.507922649383545, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.09969711303711, "logits_per_token": -3.7539613246917725, "logits_per_char": -0.8342136277092828, "num_chars": 9}, {"sum_logits": -15.721854209899902, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -24.963428497314453, "logits_per_token": -7.860927104949951, "logits_per_char": -1.429259473627264, "num_chars": 11}, {"sum_logits": -12.557373046875, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.29580307006836, "logits_per_token": -4.185791015625, "logits_per_char": -0.7848358154296875, "num_chars": 16}, {"sum_logits": -5.514601707458496, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.54936981201172, "logits_per_token": -2.757300853729248, "logits_per_char": -0.5514601707458496, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 117, "native_id": "9dffd2021771e0ecddb19031acf3701b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.551956176757812, "incorrect_loss_raw": 11.85301685333252, "correct_loss_per_char": 1.2551956176757812, "incorrect_loss_per_char": 1.1918710623296653, "correct_loss_per_token": 6.275978088378906, "incorrect_loss_per_token": 6.994116544723511, "correct_loss_uncond": -3.3253660202026367, "incorrect_loss_uncond": -5.4832923412323}, "model_output": [{"sum_logits": -12.389633178710938, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -6.194816589355469, "logits_per_char": -1.3766259087456598, "num_chars": 9}, {"sum_logits": -8.540864944458008, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.590502738952637, "logits_per_token": -8.540864944458008, "logits_per_char": -1.220123563494001, "num_chars": 7}, {"sum_logits": -12.551956176757812, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -6.275978088378906, "logits_per_char": -1.2551956176757812, "num_chars": 10}, {"sum_logits": -14.331964492797852, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.136791229248047, "logits_per_token": -7.165982246398926, "logits_per_char": -1.3029058629816228, "num_chars": 11}, {"sum_logits": -12.149604797363281, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.88495445251465, "logits_per_token": -6.074802398681641, "logits_per_char": -0.8678289140973773, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 118, "native_id": "3730c646fdf54472ab873aac9ff7852e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.095240592956543, "incorrect_loss_raw": 15.797478437423706, "correct_loss_per_char": 0.6496600423540387, "incorrect_loss_per_char": 1.3531202932198843, "correct_loss_per_token": 3.0317468643188477, "incorrect_loss_per_token": 9.721384048461914, "correct_loss_uncond": -10.185250282287598, "incorrect_loss_uncond": -1.5688834190368652}, "model_output": [{"sum_logits": -16.02515983581543, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.778059005737305, "logits_per_token": -8.012579917907715, "logits_per_char": -1.3354299863179524, "num_chars": 12}, {"sum_logits": -20.079063415527344, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.36058807373047, "logits_per_token": -10.039531707763672, "logits_per_char": -1.0039531707763671, "num_chars": 20}, {"sum_logits": -12.504531860351562, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -6.252265930175781, "logits_per_char": -1.2504531860351562, "num_chars": 10}, {"sum_logits": -14.581158638000488, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -14.581158638000488, "logits_per_char": -1.822644829750061, "num_chars": 8}, {"sum_logits": -9.095240592956543, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.28049087524414, "logits_per_token": -3.0317468643188477, "logits_per_char": -0.6496600423540387, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 119, "native_id": "175e7dcdded13d5adafaebf2264c3abd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.519286155700684, "incorrect_loss_raw": 7.993313431739807, "correct_loss_per_char": 0.5012857437133789, "incorrect_loss_per_char": 0.680845708506448, "correct_loss_per_token": 3.759643077850342, "incorrect_loss_per_token": 4.923684537410736, "correct_loss_uncond": -10.955073356628418, "incorrect_loss_uncond": -10.338119149208069}, "model_output": [{"sum_logits": -3.3411946296691895, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.72410774230957, "logits_per_token": -1.6705973148345947, "logits_per_char": -0.33411946296691897, "num_chars": 10}, {"sum_logits": -7.41622257232666, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -7.41622257232666, "logits_per_char": -0.7416222572326661, "num_chars": 10}, {"sum_logits": -7.519286155700684, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.4743595123291, "logits_per_token": -3.759643077850342, "logits_per_char": -0.5012857437133789, "num_chars": 15}, {"sum_logits": -11.106834411621094, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.07915496826172, "logits_per_token": -5.553417205810547, "logits_per_char": -0.9255695343017578, "num_chars": 12}, {"sum_logits": -10.109002113342285, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.417009353637695, "logits_per_token": -5.054501056671143, "logits_per_char": -0.7220715795244489, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 120, "native_id": "11d7db1d8e1cff2f40d4184f15cf7ae7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.471790313720703, "incorrect_loss_raw": 17.20068621635437, "correct_loss_per_char": 0.4314526875813802, "incorrect_loss_per_char": 1.2991105046578322, "correct_loss_per_token": 3.2358951568603516, "incorrect_loss_per_token": 9.611714482307434, "correct_loss_uncond": -12.445236206054688, "incorrect_loss_uncond": -1.6406912803649902}, "model_output": [{"sum_logits": -19.753921508789062, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.076425552368164, "logits_per_token": -9.876960754394531, "logits_per_char": -1.410994393484933, "num_chars": 14}, {"sum_logits": -6.471790313720703, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.91702651977539, "logits_per_token": -3.2358951568603516, "logits_per_char": -0.4314526875813802, "num_chars": 15}, {"sum_logits": -24.6119384765625, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -24.45120620727539, "logits_per_token": -8.2039794921875, "logits_per_char": -1.0700842815896738, "num_chars": 23}, {"sum_logits": -16.294950485229492, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -16.294950485229492, "logits_per_char": -2.0368688106536865, "num_chars": 8}, {"sum_logits": -8.141934394836426, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -4.070967197418213, "logits_per_char": -0.6784945329030355, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 121, "native_id": "08db69edf0ec5848c1a53dca8fc1601a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.106025695800781, "incorrect_loss_raw": 12.824942588806152, "correct_loss_per_char": 1.3451139662000868, "incorrect_loss_per_char": 1.3185838227922266, "correct_loss_per_token": 6.053012847900391, "incorrect_loss_per_token": 9.380826234817505, "correct_loss_uncond": -6.74595832824707, "incorrect_loss_uncond": -3.715658664703369}, "model_output": [{"sum_logits": -12.875760078430176, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.32713794708252, "logits_per_token": -12.875760078430176, "logits_per_char": -1.1705236434936523, "num_chars": 11}, {"sum_logits": -13.683603286743164, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.732112884521484, "logits_per_token": -4.561201095581055, "logits_per_char": -1.2439639351584695, "num_chars": 11}, {"sum_logits": -12.106025695800781, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.85198402404785, "logits_per_token": -6.053012847900391, "logits_per_char": -1.3451139662000868, "num_chars": 9}, {"sum_logits": -15.432280540466309, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.907557487487793, "logits_per_token": -15.432280540466309, "logits_per_char": -1.9290350675582886, "num_chars": 8}, {"sum_logits": -9.308126449584961, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.19559669494629, "logits_per_token": -4.6540632247924805, "logits_per_char": -0.9308126449584961, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 122, "native_id": "855ab6ba47f6311104c4d29e24ef0234", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.526041030883789, "incorrect_loss_raw": 24.11449885368347, "correct_loss_per_char": 0.5328775644302368, "incorrect_loss_per_char": 1.390219676728342, "correct_loss_per_token": 4.2630205154418945, "incorrect_loss_per_token": 7.815098279998416, "correct_loss_uncond": -11.298103332519531, "incorrect_loss_uncond": -2.8857314586639404}, "model_output": [{"sum_logits": -25.0377197265625, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -27.46965217590332, "logits_per_token": -8.345906575520834, "logits_per_char": -1.4728070427389706, "num_chars": 17}, {"sum_logits": -13.23261547088623, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -21.468921661376953, "logits_per_token": -6.616307735443115, "logits_per_char": -0.9451868193490165, "num_chars": 14}, {"sum_logits": -35.827823638916016, "num_tokens": 7, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -40.337100982666016, "logits_per_token": -5.118260519845145, "logits_per_char": -1.2795651299612862, "num_chars": 28}, {"sum_logits": -8.526041030883789, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.82414436340332, "logits_per_token": -4.2630205154418945, "logits_per_char": -0.5328775644302368, "num_chars": 16}, {"sum_logits": -22.35983657836914, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.72524642944336, "logits_per_token": -11.17991828918457, "logits_per_char": -1.863319714864095, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 123, "native_id": "7ec11eeca4221795c117943ca2639e86", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.362364768981934, "incorrect_loss_raw": 15.33751392364502, "correct_loss_per_char": 1.0329422517256304, "incorrect_loss_per_char": 1.2338037811792812, "correct_loss_per_token": 5.681182384490967, "incorrect_loss_per_token": 5.990523815155029, "correct_loss_uncond": -6.586556434631348, "incorrect_loss_uncond": -4.113937616348267}, "model_output": [{"sum_logits": -15.92184829711914, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.3708553314209, "logits_per_token": -3.980462074279785, "logits_per_char": -0.9951155185699463, "num_chars": 16}, {"sum_logits": -15.570640563964844, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.439861297607422, "logits_per_token": -7.785320281982422, "logits_per_char": -1.5570640563964844, "num_chars": 10}, {"sum_logits": -13.462743759155273, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.486662864685059, "logits_per_token": -6.731371879577637, "logits_per_char": -1.1218953132629395, "num_chars": 12}, {"sum_logits": -11.362364768981934, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.94892120361328, "logits_per_token": -5.681182384490967, "logits_per_char": -1.0329422517256304, "num_chars": 11}, {"sum_logits": -16.39482307434082, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.508426666259766, "logits_per_token": -5.464941024780273, "logits_per_char": -1.2611402364877553, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 124, "native_id": "e9389b08fdd17f14b148d498d6ff4dfe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.2115583419799805, "incorrect_loss_raw": 13.98195195198059, "correct_loss_per_char": 0.35096319516499835, "incorrect_loss_per_char": 1.5230082644356622, "correct_loss_per_token": 2.1057791709899902, "incorrect_loss_per_token": 7.394914547602335, "correct_loss_uncond": -11.924836158752441, "incorrect_loss_uncond": -4.413628816604614}, "model_output": [{"sum_logits": -4.2115583419799805, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.136394500732422, "logits_per_token": -2.1057791709899902, "logits_per_char": -0.35096319516499835, "num_chars": 12}, {"sum_logits": -21.351177215576172, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -27.615482330322266, "logits_per_token": -7.117059071858724, "logits_per_char": -1.186176511976454, "num_chars": 18}, {"sum_logits": -18.17104721069336, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.003143310546875, "logits_per_token": -6.057015736897786, "logits_per_char": -2.27138090133667, "num_chars": 8}, {"sum_logits": -7.274711608886719, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -7.274711608886719, "logits_per_char": -0.8083012898763021, "num_chars": 9}, {"sum_logits": -9.130871772766113, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.158236503601074, "logits_per_token": -9.130871772766113, "logits_per_char": -1.8261743545532227, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 125, "native_id": "afa2899cc21e204fa64e63e7839e8c1e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.879323959350586, "incorrect_loss_raw": 9.457319855690002, "correct_loss_per_char": 0.4522556891808143, "incorrect_loss_per_char": 0.7510215029830024, "correct_loss_per_token": 1.959774653116862, "incorrect_loss_per_token": 5.7722819447517395, "correct_loss_uncond": -16.755979537963867, "incorrect_loss_uncond": -8.883805632591248}, "model_output": [{"sum_logits": -8.417158126831055, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.321617126464844, "logits_per_token": -2.8057193756103516, "logits_per_char": -0.7014298439025879, "num_chars": 12}, {"sum_logits": -5.879323959350586, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.635303497314453, "logits_per_token": -1.959774653116862, "logits_per_char": -0.4522556891808143, "num_chars": 13}, {"sum_logits": -5.795694828033447, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.71073341369629, "logits_per_token": -2.8978474140167236, "logits_per_char": -0.4829745690027873, "num_chars": 12}, {"sum_logits": -12.46173095703125, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.86435317993164, "logits_per_token": -6.230865478515625, "logits_per_char": -0.8901236397879464, "num_chars": 14}, {"sum_logits": -11.154695510864258, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.467798233032227, "logits_per_token": -11.154695510864258, "logits_per_char": -0.9295579592386881, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 126, "native_id": "f898eb5b789d2dc6804edba269f051f0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.636995792388916, "incorrect_loss_raw": 9.179176926612854, "correct_loss_per_char": 0.376713684626988, "incorrect_loss_per_char": 0.9869324735232762, "correct_loss_per_token": 2.636995792388916, "incorrect_loss_per_token": 5.997718135515848, "correct_loss_uncond": -11.916475772857666, "incorrect_loss_uncond": -6.031116604804993}, "model_output": [{"sum_logits": -9.209056854248047, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.807899475097656, "logits_per_token": -3.069685618082682, "logits_per_char": -0.657789775303432, "num_chars": 14}, {"sum_logits": -5.385166645050049, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.809617042541504, "logits_per_token": -5.385166645050049, "logits_per_char": -0.8975277741750082, "num_chars": 6}, {"sum_logits": -8.949556350708008, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.51531982421875, "logits_per_token": -8.949556350708008, "logits_per_char": -0.7457963625590006, "num_chars": 12}, {"sum_logits": -2.636995792388916, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.553471565246582, "logits_per_token": -2.636995792388916, "logits_per_char": -0.376713684626988, "num_chars": 7}, {"sum_logits": -13.172927856445312, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.708337783813477, "logits_per_token": -6.586463928222656, "logits_per_char": -1.646615982055664, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 127, "native_id": "7ed7379fc51fd35a47be022f6c56ce51", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.189052104949951, "incorrect_loss_raw": 11.053917109966278, "correct_loss_per_char": 0.5315086841583252, "incorrect_loss_per_char": 1.063061834155739, "correct_loss_per_token": 3.189052104949951, "incorrect_loss_per_token": 5.941909492015839, "correct_loss_uncond": -10.857240200042725, "incorrect_loss_uncond": -6.848535120487213}, "model_output": [{"sum_logits": -10.387161254882812, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -5.193580627441406, "logits_per_char": -0.8655967712402344, "num_chars": 12}, {"sum_logits": -3.189052104949951, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -3.189052104949951, "logits_per_char": -0.5315086841583252, "num_chars": 6}, {"sum_logits": -15.231595039367676, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.041534423828125, "logits_per_token": -7.615797519683838, "logits_per_char": -1.3846904581243342, "num_chars": 11}, {"sum_logits": -3.3196074962615967, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.6295747756958, "logits_per_token": -3.3196074962615967, "logits_per_char": -0.47422964232308523, "num_chars": 7}, {"sum_logits": -15.277304649353027, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.79332160949707, "logits_per_token": -7.638652324676514, "logits_per_char": -1.5277304649353027, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 128, "native_id": "15798a23ee6952fedd6d202064069126", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.38216495513916, "incorrect_loss_raw": 16.480395078659058, "correct_loss_per_char": 1.029397304241474, "incorrect_loss_per_char": 1.3983485562460762, "correct_loss_per_token": 4.460721651713054, "incorrect_loss_per_token": 9.12194162607193, "correct_loss_uncond": -6.867091178894043, "incorrect_loss_uncond": -1.564427137374878}, "model_output": [{"sum_logits": -19.927547454833984, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.246797561645508, "logits_per_token": -9.963773727416992, "logits_per_char": -0.9963773727416992, "num_chars": 20}, {"sum_logits": -22.25812339782715, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -11.129061698913574, "logits_per_char": -2.2258123397827148, "num_chars": 10}, {"sum_logits": -11.121304512023926, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.30910301208496, "logits_per_token": -2.7803261280059814, "logits_per_char": -0.7943788937159947, "num_chars": 14}, {"sum_logits": -12.614604949951172, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.483644485473633, "logits_per_token": -12.614604949951172, "logits_per_char": -1.5768256187438965, "num_chars": 8}, {"sum_logits": -13.38216495513916, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.249256134033203, "logits_per_token": -4.460721651713054, "logits_per_char": -1.029397304241474, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 129, "native_id": "273d0134e8ce53d4ebcf41ca7fde02af", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.514989852905273, "incorrect_loss_raw": 10.470585107803345, "correct_loss_per_char": 0.8088453733004056, "incorrect_loss_per_char": 1.163855079268208, "correct_loss_per_token": 3.5049966176350913, "incorrect_loss_per_token": 7.292377710342407, "correct_loss_uncond": -8.318254470825195, "incorrect_loss_uncond": -5.545802354812622}, "model_output": [{"sum_logits": -10.142793655395508, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -10.142793655395508, "logits_per_char": -1.6904656092325847, "num_chars": 6}, {"sum_logits": -6.313887596130371, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.63414192199707, "logits_per_token": -6.313887596130371, "logits_per_char": -0.48568366124079776, "num_chars": 13}, {"sum_logits": -19.512441635131836, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.178415298461914, "logits_per_token": -9.756220817565918, "logits_per_char": -2.168049070570204, "num_chars": 9}, {"sum_logits": -10.514989852905273, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.83324432373047, "logits_per_token": -3.5049966176350913, "logits_per_char": -0.8088453733004056, "num_chars": 13}, {"sum_logits": -5.913217544555664, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -2.956608772277832, "logits_per_char": -0.31122197602924545, "num_chars": 19}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 130, "native_id": "2f0931adc3d0d422d9ab6264395e89d8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6751015186309814, "incorrect_loss_raw": 10.400098085403442, "correct_loss_per_char": 0.0964430740901402, "incorrect_loss_per_char": 1.452147735475184, "correct_loss_per_token": 0.6751015186309814, "incorrect_loss_per_token": 9.041395783424377, "correct_loss_uncond": -12.60849928855896, "incorrect_loss_uncond": -4.360308408737183}, "model_output": [{"sum_logits": -10.86961841583252, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.69713020324707, "logits_per_token": -5.43480920791626, "logits_per_char": -0.8361244935255784, "num_chars": 13}, {"sum_logits": -8.85830307006836, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.292170524597168, "logits_per_token": -8.85830307006836, "logits_per_char": -1.2654718671526228, "num_chars": 7}, {"sum_logits": -11.74082088470459, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -11.74082088470459, "logits_per_char": -1.174082088470459, "num_chars": 10}, {"sum_logits": -0.6751015186309814, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -0.6751015186309814, "logits_per_char": -0.0964430740901402, "num_chars": 7}, {"sum_logits": -10.1316499710083, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -10.1316499710083, "logits_per_char": -2.532912492752075, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 131, "native_id": "d00d3ba777cb3889a45799d72fca0a50", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.404085636138916, "incorrect_loss_raw": 11.900070309638977, "correct_loss_per_char": 0.30946233055808325, "incorrect_loss_per_char": 1.1035244822502137, "correct_loss_per_token": 3.404085636138916, "incorrect_loss_per_token": 5.740774075190226, "correct_loss_uncond": -9.834859371185303, "incorrect_loss_uncond": -8.357210040092468}, "model_output": [{"sum_logits": -14.461158752441406, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -25.251323699951172, "logits_per_token": -4.820386250813802, "logits_per_char": -0.9640772501627605, "num_chars": 15}, {"sum_logits": -14.461158752441406, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -25.251323699951172, "logits_per_token": -4.820386250813802, "logits_per_char": -0.9640772501627605, "num_chars": 15}, {"sum_logits": -3.404085636138916, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -3.404085636138916, "logits_per_char": -0.30946233055808325, "num_chars": 11}, {"sum_logits": -10.71127986907959, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.296783447265625, "logits_per_token": -5.355639934539795, "logits_per_char": -0.8926066557566324, "num_chars": 12}, {"sum_logits": -7.966683864593506, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -7.966683864593506, "logits_per_char": -1.5933367729187011, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 132, "native_id": "b1f36d1c8ab7e5a28783cb38e8709c27", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6035041809082031, "incorrect_loss_raw": 9.027492046356201, "correct_loss_per_char": 0.2004380226135254, "incorrect_loss_per_char": 1.0447502596811815, "correct_loss_per_token": 1.6035041809082031, "incorrect_loss_per_token": 5.205595970153809, "correct_loss_uncond": -14.515727996826172, "incorrect_loss_uncond": -6.869949817657471}, "model_output": [{"sum_logits": -1.6035041809082031, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -16.119232177734375, "logits_per_token": -1.6035041809082031, "logits_per_char": -0.2004380226135254, "num_chars": 8}, {"sum_logits": -11.775227546691895, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.78504753112793, "logits_per_token": -5.887613773345947, "logits_per_char": -1.4719034433364868, "num_chars": 8}, {"sum_logits": -5.534799575805664, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.8895263671875, "logits_per_token": -5.534799575805664, "logits_per_char": -0.691849946975708, "num_chars": 8}, {"sum_logits": -9.819186210632324, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.20064353942871, "logits_per_token": -4.909593105316162, "logits_per_char": -0.8926532918756659, "num_chars": 11}, {"sum_logits": -8.980754852294922, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.714550018310547, "logits_per_token": -4.490377426147461, "logits_per_char": -1.1225943565368652, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 133, "native_id": "a5e76dd088aab4f89e2fe93f6de6e46d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.4777703285217285, "incorrect_loss_raw": 11.998775959014893, "correct_loss_per_char": 0.34444387142474836, "incorrect_loss_per_char": 1.6646507910319737, "correct_loss_per_token": 4.4777703285217285, "incorrect_loss_per_token": 10.893399953842163, "correct_loss_uncond": -10.164391040802002, "incorrect_loss_uncond": -1.4757678508758545}, "model_output": [{"sum_logits": -8.843008041381836, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.0657958984375, "logits_per_token": -4.421504020690918, "logits_per_char": -0.7369173367818197, "num_chars": 12}, {"sum_logits": -13.88965129852295, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.537956237792969, "logits_per_token": -13.88965129852295, "logits_per_char": -1.5432945887247722, "num_chars": 9}, {"sum_logits": -11.796709060668945, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.03989028930664, "logits_per_token": -11.796709060668945, "logits_per_char": -1.685244151524135, "num_chars": 7}, {"sum_logits": -4.4777703285217285, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -4.4777703285217285, "logits_per_char": -0.34444387142474836, "num_chars": 13}, {"sum_logits": -13.46573543548584, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.254532814025879, "logits_per_token": -13.46573543548584, "logits_per_char": -2.693147087097168, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 134, "native_id": "ac6f0e24dd6203cda43e1089dcf081d6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.060955047607422, "incorrect_loss_raw": 10.181202411651611, "correct_loss_per_char": 0.6717462539672852, "incorrect_loss_per_char": 0.9351373311852207, "correct_loss_per_token": 4.030477523803711, "incorrect_loss_per_token": 5.180774529774983, "correct_loss_uncond": -9.854270935058594, "incorrect_loss_uncond": -9.692288160324097}, "model_output": [{"sum_logits": -13.493417739868164, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -23.283273696899414, "logits_per_token": -4.497805913289388, "logits_per_char": -0.7937304552863625, "num_chars": 17}, {"sum_logits": -5.2191925048828125, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.823437690734863, "logits_per_token": -5.2191925048828125, "logits_per_char": -0.7455989292689732, "num_chars": 7}, {"sum_logits": -8.395753860473633, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.941822052001953, "logits_per_token": -4.197876930236816, "logits_per_char": -0.8395753860473633, "num_chars": 10}, {"sum_logits": -8.060955047607422, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.915225982666016, "logits_per_token": -4.030477523803711, "logits_per_char": -0.6717462539672852, "num_chars": 12}, {"sum_logits": -13.616445541381836, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -22.4454288482666, "logits_per_token": -6.808222770690918, "logits_per_char": -1.3616445541381836, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 135, "native_id": "1ab746bcd100ccf513055fe93c61010b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.133854866027832, "incorrect_loss_raw": 11.286617517471313, "correct_loss_per_char": 0.9037616517808702, "incorrect_loss_per_char": 1.1466271224476043, "correct_loss_per_token": 2.711284955342611, "incorrect_loss_per_token": 4.883409897486368, "correct_loss_uncond": -8.39020824432373, "incorrect_loss_uncond": -7.1075756549835205}, "model_output": [{"sum_logits": -8.614916801452637, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.798885345458984, "logits_per_token": -2.8716389338175454, "logits_per_char": -0.6153512001037598, "num_chars": 14}, {"sum_logits": -8.133854866027832, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.524063110351562, "logits_per_token": -2.711284955342611, "logits_per_char": -0.9037616517808702, "num_chars": 9}, {"sum_logits": -15.828202247619629, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.027477264404297, "logits_per_token": -7.9141011238098145, "logits_per_char": -1.3190168539683025, "num_chars": 12}, {"sum_logits": -9.622655868530273, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.520139694213867, "logits_per_token": -3.207551956176758, "logits_per_char": -1.0691839853922527, "num_chars": 9}, {"sum_logits": -11.080695152282715, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.230270385742188, "logits_per_token": -5.540347576141357, "logits_per_char": -1.5829564503261022, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 136, "native_id": "af836abc58e0daf36df1d8d6830b70c5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.110363960266113, "incorrect_loss_raw": 12.026050567626953, "correct_loss_per_char": 0.5359037623685949, "incorrect_loss_per_char": 0.9984764881301346, "correct_loss_per_token": 3.036787986755371, "incorrect_loss_per_token": 8.152610182762146, "correct_loss_uncond": -8.957840919494629, "incorrect_loss_uncond": -4.713182687759399}, "model_output": [{"sum_logits": -10.76317024230957, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.95476245880127, "logits_per_token": -10.76317024230957, "logits_per_char": -1.5375957489013672, "num_chars": 7}, {"sum_logits": -9.110363960266113, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.068204879760742, "logits_per_token": -3.036787986755371, "logits_per_char": -0.5359037623685949, "num_chars": 17}, {"sum_logits": -6.353508949279785, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.989152908325195, "logits_per_token": -6.353508949279785, "logits_per_char": -0.6353508949279785, "num_chars": 10}, {"sum_logits": -17.447383880615234, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.73453712463379, "logits_per_token": -8.723691940307617, "logits_per_char": -0.9182833621376439, "num_chars": 19}, {"sum_logits": -13.540139198303223, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.278480529785156, "logits_per_token": -6.770069599151611, "logits_per_char": -0.9026759465535482, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 137, "native_id": "2ed66cfd206723a006b37599b516ad6e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.516818046569824, "incorrect_loss_raw": 12.813561081886292, "correct_loss_per_char": 0.342990423503675, "incorrect_loss_per_char": 1.6527319940236898, "correct_loss_per_token": 2.1722726821899414, "incorrect_loss_per_token": 11.036292433738708, "correct_loss_uncond": -14.155430793762207, "incorrect_loss_uncond": -2.2003201246261597}, "model_output": [{"sum_logits": -7.865499973297119, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.776552200317383, "logits_per_token": -7.865499973297119, "logits_per_char": -0.6554583311080933, "num_chars": 12}, {"sum_logits": -14.218149185180664, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.562978744506836, "logits_per_token": -7.109074592590332, "logits_per_char": -1.0937037834754357, "num_chars": 13}, {"sum_logits": -14.574361801147461, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.242709159851074, "logits_per_token": -14.574361801147461, "logits_per_char": -2.4290603001912436, "num_chars": 6}, {"sum_logits": -14.596233367919922, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.473284721374512, "logits_per_token": -14.596233367919922, "logits_per_char": -2.432705561319987, "num_chars": 6}, {"sum_logits": -6.516818046569824, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.67224884033203, "logits_per_token": -2.1722726821899414, "logits_per_char": -0.342990423503675, "num_chars": 19}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 138, "native_id": "e89a2762d578cb7bc2cc0a5b2a16d933", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.36463451385498, "incorrect_loss_raw": 9.149573802947998, "correct_loss_per_char": 1.1240576830777256, "incorrect_loss_per_char": 1.5336714245024183, "correct_loss_per_token": 6.18231725692749, "incorrect_loss_per_token": 9.149573802947998, "correct_loss_uncond": -7.513182640075684, "incorrect_loss_uncond": -1.7573411464691162}, "model_output": [{"sum_logits": -10.361907958984375, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.443742752075195, "logits_per_token": -10.361907958984375, "logits_per_char": -1.7269846598307292, "num_chars": 6}, {"sum_logits": -9.973798751831055, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -9.106559753417969, "logits_per_token": -9.973798751831055, "logits_per_char": -1.424828393118722, "num_chars": 7}, {"sum_logits": -8.173236846923828, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -9.830394744873047, "logits_per_token": -8.173236846923828, "logits_per_char": -1.6346473693847656, "num_chars": 5}, {"sum_logits": -8.089351654052734, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -10.246962547302246, "logits_per_token": -8.089351654052734, "logits_per_char": -1.3482252756754558, "num_chars": 6}, {"sum_logits": -12.36463451385498, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.877817153930664, "logits_per_token": -6.18231725692749, "logits_per_char": -1.1240576830777256, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 139, "native_id": "43cec0fff43a976fade9112d02b66021", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.524804592132568, "incorrect_loss_raw": 12.168889999389648, "correct_loss_per_char": 0.46040038267771405, "incorrect_loss_per_char": 1.0601446314172431, "correct_loss_per_token": 5.524804592132568, "incorrect_loss_per_token": 6.389941334724426, "correct_loss_uncond": -10.198296070098877, "incorrect_loss_uncond": -5.86844801902771}, "model_output": [{"sum_logits": -5.524804592132568, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -5.524804592132568, "logits_per_char": -0.46040038267771405, "num_chars": 12}, {"sum_logits": -13.49173355102539, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.628097534179688, "logits_per_token": -6.745866775512695, "logits_per_char": -1.037825657771184, "num_chars": 13}, {"sum_logits": -14.43991470336914, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.994152069091797, "logits_per_token": -7.21995735168457, "logits_per_char": -1.110762669489934, "num_chars": 13}, {"sum_logits": -8.543951034545898, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.910656929016113, "logits_per_token": -8.543951034545898, "logits_per_char": -1.2205644335065569, "num_chars": 7}, {"sum_logits": -12.199960708618164, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.616445541381836, "logits_per_token": -3.049990177154541, "logits_per_char": -0.8714257649012974, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 140, "native_id": "30e66db11e0257a14a17108b90cd69fb", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.5157155990600586, "incorrect_loss_raw": 13.234062194824219, "correct_loss_per_char": 0.22870141809636896, "incorrect_loss_per_char": 2.2849472165107727, "correct_loss_per_token": 2.5157155990600586, "incorrect_loss_per_token": 13.234062194824219, "correct_loss_uncond": -11.722911834716797, "incorrect_loss_uncond": 2.003288984298706}, "model_output": [{"sum_logits": -14.677913665771484, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.617708206176758, "logits_per_token": -14.677913665771484, "logits_per_char": -2.4463189442952475, "num_chars": 6}, {"sum_logits": -15.253942489624023, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.228611946105957, "logits_per_token": -15.253942489624023, "logits_per_char": -1.906742811203003, "num_chars": 8}, {"sum_logits": -11.572452545166016, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -10.757699966430664, "logits_per_token": -11.572452545166016, "logits_per_char": -1.9287420908610027, "num_chars": 6}, {"sum_logits": -2.5157155990600586, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.238627433776855, "logits_per_token": -2.5157155990600586, "logits_per_char": -0.22870141809636896, "num_chars": 11}, {"sum_logits": -11.431940078735352, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -10.319072723388672, "logits_per_token": -11.431940078735352, "logits_per_char": -2.857985019683838, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 141, "native_id": "f21ef67b31bd36a3174b6b4c7b4bbc7b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.145413398742676, "incorrect_loss_raw": 13.47152042388916, "correct_loss_per_char": 0.831401218067516, "incorrect_loss_per_char": 1.468309598220022, "correct_loss_per_token": 4.572706699371338, "incorrect_loss_per_token": 9.380147457122803, "correct_loss_uncond": -9.252068519592285, "incorrect_loss_uncond": -2.0320472717285156}, "model_output": [{"sum_logits": -16.51483917236328, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.050079345703125, "logits_per_token": -8.25741958618164, "logits_per_char": -0.8692020617033306, "num_chars": 19}, {"sum_logits": -8.485977172851562, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.18120002746582, "logits_per_token": -8.485977172851562, "logits_per_char": -0.8485977172851562, "num_chars": 10}, {"sum_logits": -12.669120788574219, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.51126480102539, "logits_per_token": -12.669120788574219, "logits_per_char": -2.533824157714844, "num_chars": 5}, {"sum_logits": -16.216144561767578, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.271726608276367, "logits_per_token": -8.108072280883789, "logits_per_char": -1.6216144561767578, "num_chars": 10}, {"sum_logits": -9.145413398742676, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.39748191833496, "logits_per_token": -4.572706699371338, "logits_per_char": -0.831401218067516, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 142, "native_id": "e476e2c8c278eaecfe1a8b884b6aeb8e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.1825919151306152, "incorrect_loss_raw": 9.52826851606369, "correct_loss_per_char": 0.4546559878758022, "incorrect_loss_per_char": 1.2602634504892305, "correct_loss_per_token": 3.1825919151306152, "incorrect_loss_per_token": 6.6061301827430725, "correct_loss_uncond": -8.657000064849854, "incorrect_loss_uncond": -6.2881603837013245}, "model_output": [{"sum_logits": -1.3010280132293701, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -13.551077842712402, "logits_per_token": -1.3010280132293701, "logits_per_char": -0.18586114474705287, "num_chars": 7}, {"sum_logits": -3.1825919151306152, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.839591979980469, "logits_per_token": -3.1825919151306152, "logits_per_char": -0.4546559878758022, "num_chars": 7}, {"sum_logits": -12.555606842041016, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.97003936767578, "logits_per_token": -6.277803421020508, "logits_per_char": -0.965815910926232, "num_chars": 13}, {"sum_logits": -13.43493938446045, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -13.43493938446045, "logits_per_char": -2.68698787689209, "num_chars": 5}, {"sum_logits": -10.821499824523926, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.439374923706055, "logits_per_token": -5.410749912261963, "logits_per_char": -1.2023888693915472, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 143, "native_id": "191e3c676f05a11d6b2565d8c27d2001", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.620909690856934, "incorrect_loss_raw": 12.029042720794678, "correct_loss_per_char": 0.5517424742380778, "incorrect_loss_per_char": 2.160898586114248, "correct_loss_per_token": 3.310454845428467, "incorrect_loss_per_token": 7.879281520843506, "correct_loss_uncond": -17.39342975616455, "incorrect_loss_uncond": -3.79315447807312}, "model_output": [{"sum_logits": -6.620909690856934, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -24.014339447021484, "logits_per_token": -3.310454845428467, "logits_per_char": -0.5517424742380778, "num_chars": 12}, {"sum_logits": -10.51001262664795, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -10.51001262664795, "logits_per_char": -2.6275031566619873, "num_chars": 4}, {"sum_logits": -19.61604118347168, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -19.08209228515625, "logits_per_token": -9.80802059173584, "logits_per_char": -3.923208236694336, "num_chars": 5}, {"sum_logits": -13.582048416137695, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -17.71338653564453, "logits_per_token": -6.791024208068848, "logits_per_char": -1.3582048416137695, "num_chars": 10}, {"sum_logits": -4.408068656921387, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -4.408068656921387, "logits_per_char": -0.7346781094868978, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 144, "native_id": "99098375c7b651d524eebac72e358238", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.49079132080078, "incorrect_loss_raw": 13.993649959564209, "correct_loss_per_char": 1.1779136657714844, "incorrect_loss_per_char": 1.1450934809523743, "correct_loss_per_token": 8.24539566040039, "incorrect_loss_per_token": 8.03154706954956, "correct_loss_uncond": -2.724668502807617, "incorrect_loss_uncond": -3.844385862350464}, "model_output": [{"sum_logits": -8.277776718139648, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -8.277776718139648, "logits_per_char": -1.1825395311628069, "num_chars": 7}, {"sum_logits": -13.38786506652832, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.88188362121582, "logits_per_token": -6.69393253326416, "logits_per_char": -0.669393253326416, "num_chars": 20}, {"sum_logits": -14.260465621948242, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.637351989746094, "logits_per_token": -7.130232810974121, "logits_per_char": -1.2964059656316584, "num_chars": 11}, {"sum_logits": -16.49079132080078, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.2154598236084, "logits_per_token": -8.24539566040039, "logits_per_char": -1.1779136657714844, "num_chars": 14}, {"sum_logits": -20.048492431640625, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.40934944152832, "logits_per_token": -10.024246215820312, "logits_per_char": -1.432035173688616, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 145, "native_id": "290fac9f881a83d8bfb34355f8e71044", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.385258674621582, "incorrect_loss_raw": 7.898970603942871, "correct_loss_per_char": 0.5520740396836225, "incorrect_loss_per_char": 0.6337306254949324, "correct_loss_per_token": 3.128419558207194, "incorrect_loss_per_token": 5.001391649246216, "correct_loss_uncond": -10.540316581726074, "incorrect_loss_uncond": -9.617589235305786}, "model_output": [{"sum_logits": -7.27397346496582, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.740436553955078, "logits_per_token": -3.63698673248291, "logits_per_char": -0.5595364203819861, "num_chars": 13}, {"sum_logits": -8.748488426208496, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.510189056396484, "logits_per_token": -4.374244213104248, "logits_per_char": -0.583232561747233, "num_chars": 15}, {"sum_logits": -7.158169746398926, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.9293155670166, "logits_per_token": -3.579084873199463, "logits_per_char": -0.5506284420306866, "num_chars": 13}, {"sum_logits": -8.415250778198242, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.886298179626465, "logits_per_token": -8.415250778198242, "logits_per_char": -0.8415250778198242, "num_chars": 10}, {"sum_logits": -9.385258674621582, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.925575256347656, "logits_per_token": -3.128419558207194, "logits_per_char": -0.5520740396836225, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 146, "native_id": "6c36226b23377a0dd0188bf56840e22a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.5818850994110107, "incorrect_loss_raw": 11.88963532447815, "correct_loss_per_char": 0.17576501104566786, "incorrect_loss_per_char": 0.8512589036968858, "correct_loss_per_token": 1.5818850994110107, "incorrect_loss_per_token": 5.229746262232463, "correct_loss_uncond": -10.530910730361938, "incorrect_loss_uncond": -7.296797037124634}, "model_output": [{"sum_logits": -14.833245277404785, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.82364273071289, "logits_per_token": -4.944415092468262, "logits_per_char": -0.8240691820780436, "num_chars": 18}, {"sum_logits": -8.456583023071289, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.093063354492188, "logits_per_token": -2.8188610076904297, "logits_per_char": -0.6505063863900992, "num_chars": 13}, {"sum_logits": -1.5818850994110107, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -12.11279582977295, "logits_per_token": -1.5818850994110107, "logits_per_char": -0.17576501104566786, "num_chars": 9}, {"sum_logits": -16.669506072998047, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -25.020069122314453, "logits_per_token": -5.556502024332683, "logits_per_char": -0.980559180764591, "num_chars": 17}, {"sum_logits": -7.599206924438477, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.808954238891602, "logits_per_token": -7.599206924438477, "logits_per_char": -0.9499008655548096, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 147, "native_id": "aa5aa36557a5fbb93391506182f1025c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.921992301940918, "incorrect_loss_raw": 8.857040643692017, "correct_loss_per_char": 0.32466581132676864, "incorrect_loss_per_char": 1.124813195258852, "correct_loss_per_token": 2.921992301940918, "incorrect_loss_per_token": 8.857040643692017, "correct_loss_uncond": -8.879770278930664, "incorrect_loss_uncond": -4.689310550689697}, "model_output": [{"sum_logits": -8.60129451751709, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.045044898986816, "logits_per_token": -8.60129451751709, "logits_per_char": -1.2287563596452986, "num_chars": 7}, {"sum_logits": -11.560270309448242, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.808954238891602, "logits_per_token": -11.560270309448242, "logits_per_char": -1.4450337886810303, "num_chars": 8}, {"sum_logits": -2.921992301940918, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.801762580871582, "logits_per_token": -2.921992301940918, "logits_per_char": -0.32466581132676864, "num_chars": 9}, {"sum_logits": -5.966070175170898, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.944307327270508, "logits_per_token": -5.966070175170898, "logits_per_char": -0.6628966861300998, "num_chars": 9}, {"sum_logits": -9.300527572631836, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.38709831237793, "logits_per_token": -9.300527572631836, "logits_per_char": -1.1625659465789795, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 148, "native_id": "a38df3e750b1edd30f905e17af803c61", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.7040886878967285, "incorrect_loss_raw": 17.562121152877808, "correct_loss_per_char": 0.33520443439483644, "incorrect_loss_per_char": 2.0379402213626436, "correct_loss_per_token": 3.3520443439483643, "incorrect_loss_per_token": 10.945948481559753, "correct_loss_uncond": -12.832787990570068, "incorrect_loss_uncond": -1.1114423274993896}, "model_output": [{"sum_logits": -17.372961044311523, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.08889389038086, "logits_per_token": -8.686480522155762, "logits_per_char": -2.895493507385254, "num_chars": 6}, {"sum_logits": -17.319103240966797, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.578399658203125, "logits_per_token": -17.319103240966797, "logits_per_char": -1.1546068827311198, "num_chars": 15}, {"sum_logits": -6.7040886878967285, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.536876678466797, "logits_per_token": -3.3520443439483643, "logits_per_char": -0.33520443439483644, "num_chars": 20}, {"sum_logits": -14.720564842224121, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.475858688354492, "logits_per_token": -7.3602824211120605, "logits_per_char": -2.9441129684448244, "num_chars": 5}, {"sum_logits": -20.83585548400879, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.551101684570312, "logits_per_token": -10.417927742004395, "logits_per_char": -1.1575475268893771, "num_chars": 18}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 149, "native_id": "dba51270f789c75a2e38a5201b124d99", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.895684242248535, "incorrect_loss_raw": 8.062831997871399, "correct_loss_per_char": 0.49478421211242674, "incorrect_loss_per_char": 0.8589194294006106, "correct_loss_per_token": 2.473921060562134, "incorrect_loss_per_token": 6.519381403923035, "correct_loss_uncond": -9.700888633728027, "incorrect_loss_uncond": -6.798473000526428}, "model_output": [{"sum_logits": -6.584572792053223, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.025790214538574, "logits_per_token": -6.584572792053223, "logits_per_char": -0.7316191991170248, "num_chars": 9}, {"sum_logits": -12.347604751586914, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -6.173802375793457, "logits_per_char": -0.9498157501220703, "num_chars": 13}, {"sum_logits": -4.677529811859131, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.4557523727417, "logits_per_token": -4.677529811859131, "logits_per_char": -0.5197255346510146, "num_chars": 9}, {"sum_logits": -8.641620635986328, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.940627098083496, "logits_per_token": -8.641620635986328, "logits_per_char": -1.2345172337123327, "num_chars": 7}, {"sum_logits": -9.895684242248535, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.596572875976562, "logits_per_token": -2.473921060562134, "logits_per_char": -0.49478421211242674, "num_chars": 20}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 150, "native_id": "1be8ec824eb0c7218b6bc160fd191428", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9856781959533691, "incorrect_loss_raw": 11.593335270881653, "correct_loss_per_char": 0.0758213996887207, "incorrect_loss_per_char": 1.0856997865722293, "correct_loss_per_token": 0.9856781959533691, "incorrect_loss_per_token": 9.137499511241913, "correct_loss_uncond": -13.656483173370361, "incorrect_loss_uncond": -3.840680956840515}, "model_output": [{"sum_logits": -0.9856781959533691, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -0.9856781959533691, "logits_per_char": -0.0758213996887207, "num_chars": 13}, {"sum_logits": -14.78547477722168, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.708595275878906, "logits_per_token": -14.78547477722168, "logits_per_char": -1.05610534123012, "num_chars": 14}, {"sum_logits": -13.706619262695312, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -13.706619262695312, "logits_per_char": -2.2844365437825522, "num_chars": 6}, {"sum_logits": -13.097790718078613, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.541488647460938, "logits_per_token": -3.2744476795196533, "logits_per_char": -0.5239116287231446, "num_chars": 25}, {"sum_logits": -4.783456325531006, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.414422035217285, "logits_per_token": -4.783456325531006, "logits_per_char": -0.4783456325531006, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 151, "native_id": "0e80f2afe5c4f652e8720b52d7c06c87", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.70225715637207, "incorrect_loss_raw": 15.790796041488647, "correct_loss_per_char": 0.6092961051247336, "incorrect_loss_per_char": 1.4370583935389443, "correct_loss_per_token": 6.70225715637207, "incorrect_loss_per_token": 8.810010313987732, "correct_loss_uncond": -7.684761047363281, "incorrect_loss_uncond": -3.6414685249328613}, "model_output": [{"sum_logits": -6.84958553314209, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.869179725646973, "logits_per_token": -6.84958553314209, "logits_per_char": -0.7610650592380099, "num_chars": 9}, {"sum_logits": -6.70225715637207, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.387018203735352, "logits_per_token": -6.70225715637207, "logits_per_char": -0.6092961051247336, "num_chars": 11}, {"sum_logits": -14.666996955871582, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.156875610351562, "logits_per_token": -14.666996955871582, "logits_per_char": -2.095285279410226, "num_chars": 7}, {"sum_logits": -13.247233390808105, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.441146850585938, "logits_per_token": -6.623616695404053, "logits_per_char": -1.4719148212009006, "num_chars": 9}, {"sum_logits": -28.399368286132812, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -31.261856079101562, "logits_per_token": -7.099842071533203, "logits_per_char": -1.4199684143066407, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 152, "native_id": "b67971747e95ba425a5b81e0ba8d0b28", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.557836532592773, "incorrect_loss_raw": 10.811365604400635, "correct_loss_per_char": 0.40986478328704834, "incorrect_loss_per_char": 1.2680228611779591, "correct_loss_per_token": 3.2789182662963867, "incorrect_loss_per_token": 6.959473133087158, "correct_loss_uncond": -12.911346435546875, "incorrect_loss_uncond": -6.228713274002075}, "model_output": [{"sum_logits": -19.333538055419922, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -9.666769027709961, "logits_per_char": -2.148170895046658, "num_chars": 9}, {"sum_logits": -4.957798957824707, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -4.957798957824707, "logits_per_char": -0.7082569939749581, "num_chars": 7}, {"sum_logits": -7.4725236892700195, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -7.4725236892700195, "logits_per_char": -1.0675033841814314, "num_chars": 7}, {"sum_logits": -6.557836532592773, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -3.2789182662963867, "logits_per_char": -0.40986478328704834, "num_chars": 16}, {"sum_logits": -11.48160171508789, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -5.740800857543945, "logits_per_char": -1.148160171508789, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 153, "native_id": "fcd39cfa321728fea069a6ae4285b06f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.76524829864502, "incorrect_loss_raw": 9.875724792480469, "correct_loss_per_char": 0.7968407544222745, "incorrect_loss_per_char": 0.7869641450735239, "correct_loss_per_token": 4.38262414932251, "incorrect_loss_per_token": 5.492211739222208, "correct_loss_uncond": -10.284506797790527, "incorrect_loss_uncond": -6.34090256690979}, "model_output": [{"sum_logits": -6.9846343994140625, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.343454360961914, "logits_per_token": -2.328211466471354, "logits_per_char": -0.5372795691856971, "num_chars": 13}, {"sum_logits": -9.934581756591797, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.66762924194336, "logits_per_token": -3.3115272521972656, "logits_per_char": -0.7096129826136998, "num_chars": 14}, {"sum_logits": -10.074533462524414, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.297904014587402, "logits_per_token": -10.074533462524414, "logits_per_char": -1.0074533462524413, "num_chars": 10}, {"sum_logits": -8.76524829864502, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.049755096435547, "logits_per_token": -4.38262414932251, "logits_per_char": -0.7968407544222745, "num_chars": 11}, {"sum_logits": -12.509149551391602, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.55752182006836, "logits_per_token": -6.254574775695801, "logits_per_char": -0.8935106822422573, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 154, "native_id": "cb6766fb25daee911fc8e9816b98938c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.375420570373535, "incorrect_loss_raw": 13.863940954208374, "correct_loss_per_char": 1.1250382336703213, "incorrect_loss_per_char": 1.4224532008171082, "correct_loss_per_token": 6.187710285186768, "incorrect_loss_per_token": 6.270156025886536, "correct_loss_uncond": -2.0794763565063477, "incorrect_loss_uncond": -5.620112657546997}, "model_output": [{"sum_logits": -20.120054244995117, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -22.595016479492188, "logits_per_token": -6.706684748331706, "logits_per_char": -1.3413369496663412, "num_chars": 15}, {"sum_logits": -8.42300033569336, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.904632568359375, "logits_per_token": -8.42300033569336, "logits_per_char": -2.10575008392334, "num_chars": 4}, {"sum_logits": -5.880215644836426, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.54056167602539, "logits_per_token": -2.940107822418213, "logits_per_char": -0.49001797040303546, "num_chars": 12}, {"sum_logits": -21.032493591308594, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.89600372314453, "logits_per_token": -7.010831197102864, "logits_per_char": -1.752707799275716, "num_chars": 12}, {"sum_logits": -12.375420570373535, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.454896926879883, "logits_per_token": -6.187710285186768, "logits_per_char": -1.1250382336703213, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 155, "native_id": "54231f875bb7fe4d3e4afb6eae64387c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.499960899353027, "incorrect_loss_raw": 14.433603465557098, "correct_loss_per_char": 0.9545418999411843, "incorrect_loss_per_char": 1.4046857844269465, "correct_loss_per_token": 5.249980449676514, "incorrect_loss_per_token": 7.25187732776006, "correct_loss_uncond": -8.362711906433105, "incorrect_loss_uncond": -3.175755798816681}, "model_output": [{"sum_logits": -11.756858825683594, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.246232032775879, "logits_per_token": -11.756858825683594, "logits_per_char": -2.3513717651367188, "num_chars": 5}, {"sum_logits": -24.712566375732422, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.86172866821289, "logits_per_token": -8.23752212524414, "logits_per_char": -1.7651833125523158, "num_chars": 14}, {"sum_logits": -18.377790451049805, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.679433822631836, "logits_per_token": -6.125930150349935, "logits_per_char": -1.020988358391656, "num_chars": 18}, {"sum_logits": -10.499960899353027, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.862672805786133, "logits_per_token": -5.249980449676514, "logits_per_char": -0.9545418999411843, "num_chars": 11}, {"sum_logits": -2.8871982097625732, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.650042533874512, "logits_per_token": -2.8871982097625732, "logits_per_char": -0.4811997016270955, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 156, "native_id": "7d7f7d7a8ae3b20ca9fc0da6efe467b4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2107839584350586, "incorrect_loss_raw": 13.996542572975159, "correct_loss_per_char": 0.2009803598577326, "incorrect_loss_per_char": 1.186738336086273, "correct_loss_per_token": 2.2107839584350586, "incorrect_loss_per_token": 7.1636059284210205, "correct_loss_uncond": -12.583166122436523, "incorrect_loss_uncond": -4.688517451286316}, "model_output": [{"sum_logits": -5.66494607925415, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.684649467468262, "logits_per_token": -5.66494607925415, "logits_per_char": -1.1329892158508301, "num_chars": 5}, {"sum_logits": -2.2107839584350586, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -2.2107839584350586, "logits_per_char": -0.2009803598577326, "num_chars": 11}, {"sum_logits": -11.400018692016602, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.05655288696289, "logits_per_token": -11.400018692016602, "logits_per_char": -1.4250023365020752, "num_chars": 8}, {"sum_logits": -31.484575271606445, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -35.14986038208008, "logits_per_token": -7.871143817901611, "logits_per_char": -1.2593830108642579, "num_chars": 25}, {"sum_logits": -7.4366302490234375, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -3.7183151245117188, "logits_per_char": -0.9295787811279297, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 157, "native_id": "31b72d4e4ae7c672c20e27e42499ec79", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.921066284179688, "incorrect_loss_raw": 7.5251710414886475, "correct_loss_per_char": 1.4868443806966145, "incorrect_loss_per_char": 1.1964574821411618, "correct_loss_per_token": 4.460533142089844, "incorrect_loss_per_token": 7.5251710414886475, "correct_loss_uncond": -5.869792938232422, "incorrect_loss_uncond": -6.5311598777771}, "model_output": [{"sum_logits": -8.976086616516113, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.535295486450195, "logits_per_token": -8.976086616516113, "logits_per_char": -0.9973429573906792, "num_chars": 9}, {"sum_logits": -5.234994888305664, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.124066352844238, "logits_per_token": -5.234994888305664, "logits_per_char": -1.0469989776611328, "num_chars": 5}, {"sum_logits": -8.252033233642578, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.76504135131836, "logits_per_token": -8.252033233642578, "logits_per_char": -1.6504066467285157, "num_chars": 5}, {"sum_logits": -7.637569427490234, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.800920486450195, "logits_per_token": -7.637569427490234, "logits_per_char": -1.0910813467843192, "num_chars": 7}, {"sum_logits": -8.921066284179688, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.79085922241211, "logits_per_token": -4.460533142089844, "logits_per_char": -1.4868443806966145, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 158, "native_id": "26ce83b8e9a263079aa8cdbd5258d667", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.423788070678711, "incorrect_loss_raw": 6.261418581008911, "correct_loss_per_char": 0.9359764522976346, "incorrect_loss_per_char": 0.9730848310485719, "correct_loss_per_token": 8.423788070678711, "incorrect_loss_per_token": 6.261418581008911, "correct_loss_uncond": -6.656965255737305, "incorrect_loss_uncond": -4.305882453918457}, "model_output": [{"sum_logits": -8.423788070678711, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.080753326416016, "logits_per_token": -8.423788070678711, "logits_per_char": -0.9359764522976346, "num_chars": 9}, {"sum_logits": -4.8052778244018555, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.506307601928711, "logits_per_token": -4.8052778244018555, "logits_per_char": -0.6864682606288365, "num_chars": 7}, {"sum_logits": -9.603894233703613, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -9.592881202697754, "logits_per_token": -9.603894233703613, "logits_per_char": -1.0670993593004015, "num_chars": 9}, {"sum_logits": -4.856636047363281, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -9.153438568115234, "logits_per_token": -4.856636047363281, "logits_per_char": -0.6938051496233258, "num_chars": 7}, {"sum_logits": -5.7798662185668945, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.016576766967773, "logits_per_token": -5.7798662185668945, "logits_per_char": -1.4449665546417236, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 159, "native_id": "30138608d4934a75cf0911a06b021374", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.809492111206055, "incorrect_loss_raw": 17.298755645751953, "correct_loss_per_char": 0.9727845873151507, "incorrect_loss_per_char": 1.4130857761411368, "correct_loss_per_token": 6.809492111206055, "incorrect_loss_per_token": 8.649377822875977, "correct_loss_uncond": -5.080950736999512, "incorrect_loss_uncond": -4.90968132019043}, "model_output": [{"sum_logits": -17.49555015563965, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.609106063842773, "logits_per_token": -8.747775077819824, "logits_per_char": -0.920818429244192, "num_chars": 19}, {"sum_logits": -19.028949737548828, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -27.251564025878906, "logits_per_token": -9.514474868774414, "logits_per_char": -2.1143277486165366, "num_chars": 9}, {"sum_logits": -16.236448287963867, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.828012466430664, "logits_per_token": -8.118224143981934, "logits_per_char": -1.3530373573303223, "num_chars": 12}, {"sum_logits": -16.43407440185547, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.145065307617188, "logits_per_token": -8.217037200927734, "logits_per_char": -1.2641595693734975, "num_chars": 13}, {"sum_logits": -6.809492111206055, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.890442848205566, "logits_per_token": -6.809492111206055, "logits_per_char": -0.9727845873151507, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 160, "native_id": "01abce8c4964371d85a5be2019f75827", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.787588596343994, "incorrect_loss_raw": 8.431498527526855, "correct_loss_per_char": 0.7234485745429993, "incorrect_loss_per_char": 0.8998745414945815, "correct_loss_per_token": 5.787588596343994, "incorrect_loss_per_token": 5.073432087898254, "correct_loss_uncond": -10.603217601776123, "incorrect_loss_uncond": -7.564343452453613}, "model_output": [{"sum_logits": -5.787588596343994, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.390806198120117, "logits_per_token": -5.787588596343994, "logits_per_char": -0.7234485745429993, "num_chars": 8}, {"sum_logits": -13.878990173339844, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.6516056060791, "logits_per_token": -6.939495086669922, "logits_per_char": -1.156582514444987, "num_chars": 12}, {"sum_logits": -12.985541343688965, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -6.492770671844482, "logits_per_char": -1.4428379270765517, "num_chars": 9}, {"sum_logits": -4.967270851135254, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -4.967270851135254, "logits_per_char": -0.827878475189209, "num_chars": 6}, {"sum_logits": -1.8941917419433594, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -1.8941917419433594, "logits_per_char": -0.17219924926757812, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 161, "native_id": "3e2222c99e11fca2ad4af2d470eb8ea2_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8355066776275635, "incorrect_loss_raw": 11.599915742874146, "correct_loss_per_char": 0.2739647626876831, "incorrect_loss_per_char": 1.0993905877019023, "correct_loss_per_token": 1.9177533388137817, "incorrect_loss_per_token": 6.4424954652786255, "correct_loss_uncond": -14.556398153305054, "incorrect_loss_uncond": -5.804229259490967}, "model_output": [{"sum_logits": -11.505701065063477, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.273624420166016, "logits_per_token": -5.752850532531738, "logits_per_char": -1.1505701065063476, "num_chars": 10}, {"sum_logits": -12.705615043640137, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.9415225982666, "logits_per_token": -6.352807521820068, "logits_per_char": -0.9075439316885812, "num_chars": 14}, {"sum_logits": -5.140300750732422, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -5.140300750732422, "logits_per_char": -1.0280601501464843, "num_chars": 5}, {"sum_logits": -3.8355066776275635, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.391904830932617, "logits_per_token": -1.9177533388137817, "logits_per_char": -0.2739647626876831, "num_chars": 14}, {"sum_logits": -17.048046112060547, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.735116958618164, "logits_per_token": -8.524023056030273, "logits_per_char": -1.3113881624661958, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 162, "native_id": "847dbf5b73c3e8d49bb9a36491d95e79", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.9923095703125, "incorrect_loss_raw": 6.940752029418945, "correct_loss_per_char": 0.42802211216517855, "incorrect_loss_per_char": 0.798538523060935, "correct_loss_per_token": 2.99615478515625, "incorrect_loss_per_token": 5.777899384498596, "correct_loss_uncond": -12.949213027954102, "incorrect_loss_uncond": -8.02703070640564}, "model_output": [{"sum_logits": -6.020151138305664, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -6.020151138305664, "logits_per_char": -0.752518892288208, "num_chars": 8}, {"sum_logits": -9.302821159362793, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.156919479370117, "logits_per_token": -4.6514105796813965, "logits_per_char": -0.6644872256687709, "num_chars": 14}, {"sum_logits": -5.9923095703125, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.9415225982666, "logits_per_token": -2.99615478515625, "logits_per_char": -0.42802211216517855, "num_chars": 14}, {"sum_logits": -7.990995407104492, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -7.990995407104492, "logits_per_char": -1.141570772443499, "num_chars": 7}, {"sum_logits": -4.449040412902832, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -4.449040412902832, "logits_per_char": -0.6355772018432617, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 163, "native_id": "fa031cff8e11e75c68d6a99ef0e5ca3a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.95949649810791, "incorrect_loss_raw": 12.467764854431152, "correct_loss_per_char": 1.991899299621582, "incorrect_loss_per_char": 1.2209461473283314, "correct_loss_per_token": 9.95949649810791, "incorrect_loss_per_token": 7.53158962726593, "correct_loss_uncond": -3.0673465728759766, "incorrect_loss_uncond": -3.672440767288208}, "model_output": [{"sum_logits": -11.716338157653809, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.985848426818848, "logits_per_token": -3.9054460525512695, "logits_per_char": -1.301815350850423, "num_chars": 9}, {"sum_logits": -9.95949649810791, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.026843070983887, "logits_per_token": -9.95949649810791, "logits_per_char": -1.991899299621582, "num_chars": 5}, {"sum_logits": -12.300153732299805, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.07139778137207, "logits_per_token": -12.300153732299805, "logits_per_char": -1.0250128110249836, "num_chars": 12}, {"sum_logits": -9.942822456359863, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.780000686645508, "logits_per_token": -9.942822456359863, "logits_per_char": -1.420403208051409, "num_chars": 7}, {"sum_logits": -15.911745071411133, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -3.977936267852783, "logits_per_char": -1.1365532193865096, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 164, "native_id": "c592258c88295756833e9796e881057b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.728231430053711, "incorrect_loss_raw": 10.895438194274902, "correct_loss_per_char": 0.14401928583780924, "incorrect_loss_per_char": 1.045359887860038, "correct_loss_per_token": 0.8641157150268555, "incorrect_loss_per_token": 6.563732385635376, "correct_loss_uncond": -20.54483985900879, "incorrect_loss_uncond": -7.679683446884155}, "model_output": [{"sum_logits": -11.207490921020508, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.63726806640625, "logits_per_token": -5.603745460510254, "logits_per_char": -0.933957576751709, "num_chars": 12}, {"sum_logits": -8.928106307983398, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.605059623718262, "logits_per_token": -8.928106307983398, "logits_per_char": -1.1160132884979248, "num_chars": 8}, {"sum_logits": -1.728231430053711, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -0.8641157150268555, "logits_per_char": -0.14401928583780924, "num_chars": 12}, {"sum_logits": -9.770254135131836, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -4.885127067565918, "logits_per_char": -0.8882049213756215, "num_chars": 11}, {"sum_logits": -13.675901412963867, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.056079864501953, "logits_per_token": -6.837950706481934, "logits_per_char": -1.243263764814897, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 165, "native_id": "e1403a7c581bc263aea2ed8d179826d1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.429039478302002, "incorrect_loss_raw": 6.617487221956253, "correct_loss_per_char": 0.3690866231918335, "incorrect_loss_per_char": 0.6935555366651407, "correct_loss_per_token": 2.214519739151001, "incorrect_loss_per_token": 4.139220267534256, "correct_loss_uncond": -12.716338634490967, "incorrect_loss_uncond": -10.251785963773727}, "model_output": [{"sum_logits": -4.429039478302002, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -2.214519739151001, "logits_per_char": -0.3690866231918335, "num_chars": 12}, {"sum_logits": -9.648290634155273, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.770475387573242, "logits_per_token": -4.824145317077637, "logits_per_char": -1.3783272334507533, "num_chars": 7}, {"sum_logits": -10.177845001220703, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.393962860107422, "logits_per_token": -5.088922500610352, "logits_per_char": -0.636115312576294, "num_chars": 16}, {"sum_logits": -5.090188026428223, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.108352661132812, "logits_per_token": -5.090188026428223, "logits_per_char": -0.5655764473809136, "num_chars": 9}, {"sum_logits": -1.553625226020813, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -1.553625226020813, "logits_per_char": -0.19420315325260162, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 166, "native_id": "15c38f66e811d6ed68cde931bc31d93c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.1566667556762695, "incorrect_loss_raw": 13.100656032562256, "correct_loss_per_char": 0.38479167222976685, "incorrect_loss_per_char": 1.2777706483999889, "correct_loss_per_token": 3.0783333778381348, "incorrect_loss_per_token": 7.468972086906433, "correct_loss_uncond": -13.325556755065918, "incorrect_loss_uncond": -3.524362802505493}, "model_output": [{"sum_logits": -20.846162796020508, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.676286697387695, "logits_per_token": -10.423081398010254, "logits_per_char": -1.3028851747512817, "num_chars": 16}, {"sum_logits": -12.449518203735352, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.734373092651367, "logits_per_token": -6.224759101867676, "logits_per_char": -1.7785026005336217, "num_chars": 7}, {"sum_logits": -7.349152565002441, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.532423973083496, "logits_per_token": -7.349152565002441, "logits_per_char": -1.0498789378574915, "num_chars": 7}, {"sum_logits": -11.757790565490723, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.556991577148438, "logits_per_token": -5.878895282745361, "logits_per_char": -0.9798158804575602, "num_chars": 12}, {"sum_logits": -6.1566667556762695, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.482223510742188, "logits_per_token": -3.0783333778381348, "logits_per_char": -0.38479167222976685, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 167, "native_id": "1ac54dbf6b67f27daa3d456416047584", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.39385986328125, "incorrect_loss_raw": 12.919029235839844, "correct_loss_per_char": 0.339385986328125, "incorrect_loss_per_char": 1.549153886948313, "correct_loss_per_token": 1.696929931640625, "incorrect_loss_per_token": 7.259755452473958, "correct_loss_uncond": -16.95589828491211, "incorrect_loss_uncond": -3.722945213317871}, "model_output": [{"sum_logits": -10.312861442565918, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.654029846191406, "logits_per_token": -5.156430721282959, "logits_per_char": -0.6445538401603699, "num_chars": 16}, {"sum_logits": -15.511448860168457, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.3958740234375, "logits_per_token": -7.7557244300842285, "logits_per_char": -2.215921265738351, "num_chars": 7}, {"sum_logits": -11.264396667480469, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -11.264396667480469, "logits_per_char": -1.8773994445800781, "num_chars": 6}, {"sum_logits": -3.39385986328125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.34975814819336, "logits_per_token": -1.696929931640625, "logits_per_char": -0.339385986328125, "num_chars": 10}, {"sum_logits": -14.587409973144531, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.903663635253906, "logits_per_token": -4.862469991048177, "logits_per_char": -1.4587409973144532, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 168, "native_id": "21763a65765b5405c9a54484c2e54a72", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.067882537841797, "incorrect_loss_raw": 12.449128866195679, "correct_loss_per_char": 1.0056568781534831, "incorrect_loss_per_char": 1.1508720755577087, "correct_loss_per_token": 4.022627512613933, "incorrect_loss_per_token": 9.62433409690857, "correct_loss_uncond": -4.8153533935546875, "incorrect_loss_uncond": -3.8724029064178467}, "model_output": [{"sum_logits": -12.067882537841797, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.883235931396484, "logits_per_token": -4.022627512613933, "logits_per_char": -1.0056568781534831, "num_chars": 12}, {"sum_logits": -9.693512916564941, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.745762825012207, "logits_per_token": -9.693512916564941, "logits_per_char": -0.9693512916564941, "num_chars": 10}, {"sum_logits": -10.285144805908203, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -10.285144805908203, "logits_per_char": -1.0285144805908204, "num_chars": 10}, {"sum_logits": -16.948768615722656, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -5.649589538574219, "logits_per_char": -0.9969863891601562, "num_chars": 17}, {"sum_logits": -12.869089126586914, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.065410614013672, "logits_per_token": -12.869089126586914, "logits_per_char": -1.6086361408233643, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 169, "native_id": "c492b8b9754a181c924c1df19998cbc7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.145188331604004, "incorrect_loss_raw": 10.644418716430664, "correct_loss_per_char": 0.4677443937821822, "incorrect_loss_per_char": 1.2579317905686118, "correct_loss_per_token": 5.145188331604004, "incorrect_loss_per_token": 8.94032073020935, "correct_loss_uncond": -9.57544231414795, "incorrect_loss_uncond": -4.6044182777404785}, "model_output": [{"sum_logits": -11.302698135375977, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.794787406921387, "logits_per_token": -11.302698135375977, "logits_per_char": -1.412837266921997, "num_chars": 8}, {"sum_logits": -9.121381759643555, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.192404747009277, "logits_per_token": -9.121381759643555, "logits_per_char": -1.1401727199554443, "num_chars": 8}, {"sum_logits": -5.145188331604004, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.720630645751953, "logits_per_token": -5.145188331604004, "logits_per_char": -0.4677443937821822, "num_chars": 11}, {"sum_logits": -13.632783889770508, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.419578552246094, "logits_per_token": -6.816391944885254, "logits_per_char": -1.7040979862213135, "num_chars": 8}, {"sum_logits": -8.520811080932617, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.588577270507812, "logits_per_token": -8.520811080932617, "logits_per_char": -0.7746191891756925, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 170, "native_id": "fff554fffa1a0adc64b8d1e21d55534b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7933671474456787, "incorrect_loss_raw": 12.551133036613464, "correct_loss_per_char": 0.08815190527174208, "incorrect_loss_per_char": 1.401925112116031, "correct_loss_per_token": 0.7933671474456787, "incorrect_loss_per_token": 8.00849727789561, "correct_loss_uncond": -11.090965509414673, "incorrect_loss_uncond": -3.0315552949905396}, "model_output": [{"sum_logits": -13.067767143249512, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.29326057434082, "logits_per_token": -4.355922381083171, "logits_per_char": -1.3067767143249511, "num_chars": 10}, {"sum_logits": -4.579728603363037, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.641383171081543, "logits_per_token": -4.579728603363037, "logits_per_char": -0.5724660754203796, "num_chars": 8}, {"sum_logits": -0.7933671474456787, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -11.884332656860352, "logits_per_token": -0.7933671474456787, "logits_per_char": -0.08815190527174208, "num_chars": 9}, {"sum_logits": -18.917396545410156, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.813152313232422, "logits_per_token": -9.458698272705078, "logits_per_char": -1.455184349646935, "num_chars": 13}, {"sum_logits": -13.639639854431152, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.58295726776123, "logits_per_token": -13.639639854431152, "logits_per_char": -2.273273309071859, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 171, "native_id": "8ea5720718c0e122efa6277edb511569", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.395872116088867, "incorrect_loss_raw": 12.866452932357788, "correct_loss_per_char": 0.9328746795654297, "incorrect_loss_per_char": 0.9780416774816539, "correct_loss_per_token": 4.197936058044434, "incorrect_loss_per_token": 7.182734489440918, "correct_loss_uncond": -6.13790225982666, "incorrect_loss_uncond": -5.293789863586426}, "model_output": [{"sum_logits": -14.101083755493164, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.91646957397461, "logits_per_token": -4.700361251831055, "logits_per_char": -0.8294755150290096, "num_chars": 17}, {"sum_logits": -16.64277458190918, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.830303192138672, "logits_per_token": -8.32138729095459, "logits_per_char": -1.1887696129935128, "num_chars": 14}, {"sum_logits": -10.025527954101562, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.571563720703125, "logits_per_token": -5.012763977050781, "logits_per_char": -1.0025527954101563, "num_chars": 10}, {"sum_logits": -8.395872116088867, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -4.197936058044434, "logits_per_char": -0.9328746795654297, "num_chars": 9}, {"sum_logits": -10.696425437927246, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.32263469696045, "logits_per_token": -10.696425437927246, "logits_per_char": -0.8913687864939371, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 172, "native_id": "23e4257a49972efd8a97672f060be1c1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.795361518859863, "incorrect_loss_raw": 11.672531604766846, "correct_loss_per_char": 0.6177601380781694, "incorrect_loss_per_char": 1.1280676362860915, "correct_loss_per_token": 3.3976807594299316, "incorrect_loss_per_token": 4.887726187705994, "correct_loss_uncond": -14.290871620178223, "incorrect_loss_uncond": -7.724113941192627}, "model_output": [{"sum_logits": -6.795361518859863, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.086233139038086, "logits_per_token": -3.3976807594299316, "logits_per_char": -0.6177601380781694, "num_chars": 11}, {"sum_logits": -9.092535018920898, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.83324432373047, "logits_per_token": -3.0308450063069663, "logits_per_char": -0.6994257706862229, "num_chars": 13}, {"sum_logits": -13.672415733337402, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.680492401123047, "logits_per_token": -4.557471911112468, "logits_per_char": -0.9114943822224935, "num_chars": 15}, {"sum_logits": -6.429352760314941, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.433429718017578, "logits_per_token": -3.2146763801574707, "logits_per_char": -0.7143725289238824, "num_chars": 9}, {"sum_logits": -17.49582290649414, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.639415740966797, "logits_per_token": -8.74791145324707, "logits_per_char": -2.1869778633117676, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 173, "native_id": "a018d65a74b9e77d81014fd8f6d78f77", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.3467183113098145, "incorrect_loss_raw": 13.499344825744629, "correct_loss_per_char": 0.41128602394690883, "incorrect_loss_per_char": 1.195348236018035, "correct_loss_per_token": 2.6733591556549072, "incorrect_loss_per_token": 7.744956016540527, "correct_loss_uncond": -13.763621807098389, "incorrect_loss_uncond": -6.735712766647339}, "model_output": [{"sum_logits": -13.629873275756836, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -6.814936637878418, "logits_per_char": -1.1358227729797363, "num_chars": 12}, {"sum_logits": -13.231576919555664, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -26.33397674560547, "logits_per_token": -6.615788459777832, "logits_per_char": -1.0178136091965895, "num_chars": 13}, {"sum_logits": -19.173660278320312, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.94070816040039, "logits_per_token": -9.586830139160156, "logits_per_char": -1.7430600253018467, "num_chars": 11}, {"sum_logits": -5.3467183113098145, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.110340118408203, "logits_per_token": -2.6733591556549072, "logits_per_char": -0.41128602394690883, "num_chars": 13}, {"sum_logits": -7.962268829345703, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -7.962268829345703, "logits_per_char": -0.884696536593967, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 174, "native_id": "24ceaf5c10863e73919b5f1b0f2db38e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.41226863861084, "incorrect_loss_raw": 8.234389781951904, "correct_loss_per_char": 1.0687114397684734, "incorrect_loss_per_char": 1.7997644662857055, "correct_loss_per_token": 6.41226863861084, "incorrect_loss_per_token": 8.234389781951904, "correct_loss_uncond": -8.971264839172363, "incorrect_loss_uncond": -6.08738374710083}, "model_output": [{"sum_logits": -14.995864868164062, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.299443244934082, "logits_per_token": -14.995864868164062, "logits_per_char": -3.7489662170410156, "num_chars": 4}, {"sum_logits": -6.005175590515137, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.38680648803711, "logits_per_token": -6.005175590515137, "logits_per_char": -1.5012938976287842, "num_chars": 4}, {"sum_logits": -6.0897722244262695, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.3965425491333, "logits_per_token": -6.0897722244262695, "logits_per_char": -1.217954444885254, "num_chars": 5}, {"sum_logits": -5.846746444702148, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -5.846746444702148, "logits_per_char": -0.7308433055877686, "num_chars": 8}, {"sum_logits": -6.41226863861084, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.383533477783203, "logits_per_token": -6.41226863861084, "logits_per_char": -1.0687114397684734, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 175, "native_id": "900492bd731f8f615ed7c08155737d44", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.947257995605469, "incorrect_loss_raw": 10.232577800750732, "correct_loss_per_char": 0.7456048329671224, "incorrect_loss_per_char": 0.9756390267894383, "correct_loss_per_token": 4.473628997802734, "incorrect_loss_per_token": 6.500808000564575, "correct_loss_uncond": -8.92715835571289, "incorrect_loss_uncond": -5.848041534423828}, "model_output": [{"sum_logits": -15.215998649597168, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.924055099487305, "logits_per_token": -5.071999549865723, "logits_per_char": -1.0868570463997977, "num_chars": 14}, {"sum_logits": -8.716896057128906, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.777267456054688, "logits_per_token": -8.716896057128906, "logits_per_char": -1.0896120071411133, "num_chars": 8}, {"sum_logits": -9.566160202026367, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.741230010986328, "logits_per_token": -4.783080101013184, "logits_per_char": -0.7971800168355306, "num_chars": 12}, {"sum_logits": -7.431256294250488, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.879924774169922, "logits_per_token": -7.431256294250488, "logits_per_char": -0.928907036781311, "num_chars": 8}, {"sum_logits": -8.947257995605469, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.87441635131836, "logits_per_token": -4.473628997802734, "logits_per_char": -0.7456048329671224, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 176, "native_id": "4e3f85dc92eaad4ae6bc6529d62e382c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.694571495056152, "incorrect_loss_raw": 10.794261455535889, "correct_loss_per_char": 0.7904155904596503, "incorrect_loss_per_char": 1.2809534921512735, "correct_loss_per_token": 4.347285747528076, "incorrect_loss_per_token": 7.628376841545105, "correct_loss_uncond": -8.678324699401855, "incorrect_loss_uncond": -7.492821931838989}, "model_output": [{"sum_logits": -10.5574312210083, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.40142822265625, "logits_per_token": -10.5574312210083, "logits_per_char": -2.11148624420166, "num_chars": 5}, {"sum_logits": -8.694571495056152, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.372896194458008, "logits_per_token": -4.347285747528076, "logits_per_char": -0.7904155904596503, "num_chars": 11}, {"sum_logits": -14.421623229980469, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.87954330444336, "logits_per_token": -7.210811614990234, "logits_per_char": -1.1093556330754206, "num_chars": 13}, {"sum_logits": -7.292537689208984, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.143855094909668, "logits_per_token": -7.292537689208984, "logits_per_char": -0.911567211151123, "num_chars": 8}, {"sum_logits": -10.9054536819458, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.723506927490234, "logits_per_token": -5.4527268409729, "logits_per_char": -0.991404880176891, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 177, "native_id": "fa1f17ca535c7e875f4f58510dc2f430", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.0785911083221436, "incorrect_loss_raw": 4.884858310222626, "correct_loss_per_char": 0.4397987297603062, "incorrect_loss_per_char": 0.8255454364277068, "correct_loss_per_token": 3.0785911083221436, "incorrect_loss_per_token": 4.884858310222626, "correct_loss_uncond": -12.056246042251587, "incorrect_loss_uncond": -8.713561356067657}, "model_output": [{"sum_logits": -3.0785911083221436, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.13483715057373, "logits_per_token": -3.0785911083221436, "logits_per_char": -0.4397987297603062, "num_chars": 7}, {"sum_logits": -7.050910949707031, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.371214866638184, "logits_per_token": -7.050910949707031, "logits_per_char": -1.1751518249511719, "num_chars": 6}, {"sum_logits": -5.8426513671875, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -5.8426513671875, "logits_per_char": -0.9737752278645834, "num_chars": 6}, {"sum_logits": -3.567279815673828, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.579615592956543, "logits_per_token": -3.567279815673828, "logits_per_char": -0.7134559631347657, "num_chars": 5}, {"sum_logits": -3.0785911083221436, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.13483715057373, "logits_per_token": -3.0785911083221436, "logits_per_char": -0.4397987297603062, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 178, "native_id": "76b6f0765a3b2fba71021f902142edc0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.3750224113464355, "incorrect_loss_raw": 12.072947263717651, "correct_loss_per_char": 0.8194469345940484, "incorrect_loss_per_char": 1.2383848569332025, "correct_loss_per_token": 2.458340803782145, "incorrect_loss_per_token": 9.137670159339905, "correct_loss_uncond": -7.1276421546936035, "incorrect_loss_uncond": -4.006597518920898}, "model_output": [{"sum_logits": -12.262285232543945, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -12.262285232543945, "logits_per_char": -1.3624761369493272, "num_chars": 9}, {"sum_logits": -12.547286987304688, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -12.547286987304688, "logits_per_char": -1.3941429985894098, "num_chars": 9}, {"sum_logits": -7.3750224113464355, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.502664566040039, "logits_per_token": -2.458340803782145, "logits_per_char": -0.8194469345940484, "num_chars": 9}, {"sum_logits": -16.925823211669922, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.808048248291016, "logits_per_token": -8.462911605834961, "logits_per_char": -1.6925823211669921, "num_chars": 10}, {"sum_logits": -6.556393623352051, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.189672470092773, "logits_per_token": -3.2781968116760254, "logits_per_char": -0.5043379710270808, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 179, "native_id": "f1368ab1d4ee05d72d555474fcd737d7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.398716926574707, "incorrect_loss_raw": 11.076088547706604, "correct_loss_per_char": 0.7635197205977007, "incorrect_loss_per_char": 0.8640555295137949, "correct_loss_per_token": 4.1993584632873535, "incorrect_loss_per_token": 5.501704136530559, "correct_loss_uncond": -10.164654731750488, "incorrect_loss_uncond": -6.967553734779358}, "model_output": [{"sum_logits": -9.421134948730469, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.526161193847656, "logits_per_token": -4.710567474365234, "logits_per_char": -0.8564668135209517, "num_chars": 11}, {"sum_logits": -8.398716926574707, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.563371658325195, "logits_per_token": -4.1993584632873535, "logits_per_char": -0.7635197205977007, "num_chars": 11}, {"sum_logits": -5.33691930770874, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.521513938903809, "logits_per_token": -5.33691930770874, "logits_per_char": -0.7624170439583915, "num_chars": 7}, {"sum_logits": -12.663378715515137, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.23167610168457, "logits_per_token": -6.331689357757568, "logits_per_char": -0.8442252477010092, "num_chars": 15}, {"sum_logits": -16.88292121887207, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.895217895507812, "logits_per_token": -5.62764040629069, "logits_per_char": -0.9931130128748277, "num_chars": 17}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 180, "native_id": "3dee8fc7f0a3fbf4de111b6686fca157", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6767739057540894, "incorrect_loss_raw": 10.615555942058563, "correct_loss_per_char": 0.16767739057540892, "incorrect_loss_per_char": 1.1034593071256364, "correct_loss_per_token": 1.6767739057540894, "incorrect_loss_per_token": 6.9519577622413635, "correct_loss_uncond": -15.493159413337708, "incorrect_loss_uncond": -7.22802072763443}, "model_output": [{"sum_logits": -14.348673820495605, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -7.174336910247803, "logits_per_char": -1.1957228183746338, "num_chars": 12}, {"sum_logits": -1.616576910018921, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -12.351519584655762, "logits_per_token": -1.616576910018921, "logits_per_char": -0.3233153820037842, "num_chars": 5}, {"sum_logits": -11.536861419677734, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.943571090698242, "logits_per_token": -11.536861419677734, "logits_per_char": -1.648123059953962, "num_chars": 7}, {"sum_logits": -1.6767739057540894, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -1.6767739057540894, "logits_per_char": -0.16767739057540892, "num_chars": 10}, {"sum_logits": -14.960111618041992, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.80614471435547, "logits_per_token": -7.480055809020996, "logits_per_char": -1.246675968170166, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 181, "native_id": "ea0e7771afd86a59fd9f7764b77e3fa4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.016902923583984, "incorrect_loss_raw": 10.16084361076355, "correct_loss_per_char": 1.377112865447998, "incorrect_loss_per_char": 0.9265641990552778, "correct_loss_per_token": 5.508451461791992, "incorrect_loss_per_token": 6.678480625152588, "correct_loss_uncond": -5.1187744140625, "incorrect_loss_uncond": -5.177096366882324}, "model_output": [{"sum_logits": -17.134218215942383, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.333938598632812, "logits_per_token": -8.567109107971191, "logits_per_char": -0.9018009587338096, "num_chars": 19}, {"sum_logits": -6.353133201599121, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -6.353133201599121, "logits_per_char": -0.9075904573713031, "num_chars": 7}, {"sum_logits": -11.016902923583984, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.135677337646484, "logits_per_token": -5.508451461791992, "logits_per_char": -1.377112865447998, "num_chars": 8}, {"sum_logits": -6.431337356567383, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -6.431337356567383, "logits_per_char": -1.0718895594278972, "num_chars": 6}, {"sum_logits": -10.724685668945312, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.993436813354492, "logits_per_token": -5.362342834472656, "logits_per_char": -0.8249758206881009, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 182, "native_id": "2c845646032bbf27fb3904330d59d324", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.80116081237793, "incorrect_loss_raw": 11.51611852645874, "correct_loss_per_char": 0.8167634010314941, "incorrect_loss_per_char": 1.1051476710984687, "correct_loss_per_token": 4.900580406188965, "incorrect_loss_per_token": 4.903072436650594, "correct_loss_uncond": -9.337892532348633, "incorrect_loss_uncond": -7.461309909820557}, "model_output": [{"sum_logits": -10.695460319519043, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.102951049804688, "logits_per_token": -5.3477301597595215, "logits_per_char": -1.5279229027884347, "num_chars": 7}, {"sum_logits": -12.842384338378906, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -23.56149673461914, "logits_per_token": -6.421192169189453, "logits_per_char": -1.2842384338378907, "num_chars": 10}, {"sum_logits": -20.519683837890625, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -6.839894612630208, "logits_per_char": -1.207040225758272, "num_chars": 17}, {"sum_logits": -2.0069456100463867, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.136329650878906, "logits_per_token": -1.0034728050231934, "logits_per_char": -0.40138912200927734, "num_chars": 5}, {"sum_logits": -9.80116081237793, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.139053344726562, "logits_per_token": -4.900580406188965, "logits_per_char": -0.8167634010314941, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 183, "native_id": "bc08c354e5bead6863ea4a29cb8fa359", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.834891319274902, "incorrect_loss_raw": 15.209248065948486, "correct_loss_per_char": 0.4020524305455825, "incorrect_loss_per_char": 1.1265950281541426, "correct_loss_per_token": 2.2782971064249673, "incorrect_loss_per_token": 6.113425413767497, "correct_loss_uncond": -13.203343391418457, "incorrect_loss_uncond": -4.7233662605285645}, "model_output": [{"sum_logits": -20.110960006713867, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.2967472076416, "logits_per_token": -6.703653335571289, "logits_per_char": -1.4364971433367049, "num_chars": 14}, {"sum_logits": -11.445751190185547, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.2061767578125, "logits_per_token": -5.722875595092773, "logits_per_char": -0.8175536564418248, "num_chars": 14}, {"sum_logits": -15.677806854248047, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.252992630004883, "logits_per_token": -5.225935618082683, "logits_per_char": -1.2059851426344652, "num_chars": 13}, {"sum_logits": -13.602474212646484, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.97454071044922, "logits_per_token": -6.801237106323242, "logits_per_char": -1.0463441702035756, "num_chars": 13}, {"sum_logits": -6.834891319274902, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.03823471069336, "logits_per_token": -2.2782971064249673, "logits_per_char": -0.4020524305455825, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 184, "native_id": "fb35c7aa5694bab2cde4b7257bfae003", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.194067001342773, "incorrect_loss_raw": 12.052064299583435, "correct_loss_per_char": 0.5630970001220703, "incorrect_loss_per_char": 1.5378101269404092, "correct_loss_per_token": 6.194067001342773, "incorrect_loss_per_token": 8.139138261477152, "correct_loss_uncond": -7.113712310791016, "incorrect_loss_uncond": -2.8598934412002563}, "model_output": [{"sum_logits": -13.033004760742188, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -13.033004760742188, "logits_per_char": -1.6291255950927734, "num_chars": 8}, {"sum_logits": -7.972255229949951, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.863107681274414, "logits_per_token": -7.972255229949951, "logits_per_char": -1.3287092049916585, "num_chars": 6}, {"sum_logits": -6.194067001342773, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.307779312133789, "logits_per_token": -6.194067001342773, "logits_per_char": -0.5630970001220703, "num_chars": 11}, {"sum_logits": -14.901763916015625, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.186935424804688, "logits_per_token": -7.4508819580078125, "logits_per_char": -1.6557515462239583, "num_chars": 9}, {"sum_logits": -12.301233291625977, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.875415802001953, "logits_per_token": -4.100411097208659, "logits_per_char": -1.537654161453247, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 185, "native_id": "e2a9f0041d17a9944377a91bef5e0d0d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.426046371459961, "incorrect_loss_raw": 10.095435380935669, "correct_loss_per_char": 0.771302318572998, "incorrect_loss_per_char": 1.3788457829456826, "correct_loss_per_token": 5.14201545715332, "incorrect_loss_per_token": 7.465379118919373, "correct_loss_uncond": -6.938325881958008, "incorrect_loss_uncond": -3.9438796043395996}, "model_output": [{"sum_logits": -6.962733268737793, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.123089790344238, "logits_per_token": -3.4813666343688965, "logits_per_char": -0.8703416585922241, "num_chars": 8}, {"sum_logits": -8.802661895751953, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.523576736450195, "logits_per_token": -8.802661895751953, "logits_per_char": -1.2575231279645647, "num_chars": 7}, {"sum_logits": -15.426046371459961, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.36437225341797, "logits_per_token": -5.14201545715332, "logits_per_char": -0.771302318572998, "num_chars": 20}, {"sum_logits": -10.538629531860352, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.941996574401855, "logits_per_token": -10.538629531860352, "logits_per_char": -2.10772590637207, "num_chars": 5}, {"sum_logits": -14.077716827392578, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.568596839904785, "logits_per_token": -7.038858413696289, "logits_per_char": -1.2797924388538708, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 186, "native_id": "ae56eff01d05422ddbcb26be7181356a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.10069465637207, "incorrect_loss_raw": 11.08111023902893, "correct_loss_per_char": 0.7000534351055439, "incorrect_loss_per_char": 1.0637890810918327, "correct_loss_per_token": 4.550347328186035, "incorrect_loss_per_token": 9.492539167404175, "correct_loss_uncond": -8.922355651855469, "incorrect_loss_uncond": -4.237613201141357}, "model_output": [{"sum_logits": -11.731249809265137, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.904325485229492, "logits_per_token": -11.731249809265137, "logits_per_char": -1.3034722010294597, "num_chars": 9}, {"sum_logits": -9.10069465637207, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -4.550347328186035, "logits_per_char": -0.7000534351055439, "num_chars": 13}, {"sum_logits": -11.693695068359375, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -11.693695068359375, "logits_per_char": -1.2992994520399306, "num_chars": 9}, {"sum_logits": -8.190927505493164, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.593358993530273, "logits_per_token": -8.190927505493164, "logits_per_char": -0.7446297732266512, "num_chars": 11}, {"sum_logits": -12.708568572998047, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.031539916992188, "logits_per_token": -6.354284286499023, "logits_per_char": -0.9077548980712891, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 187, "native_id": "895aa97bb84d874d71b2aed572cebfdd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.058460235595703, "incorrect_loss_raw": 11.42510998249054, "correct_loss_per_char": 1.5620511372884114, "incorrect_loss_per_char": 1.2886135825089045, "correct_loss_per_token": 7.029230117797852, "incorrect_loss_per_token": 8.072538018226624, "correct_loss_uncond": -2.489473342895508, "incorrect_loss_uncond": -5.4424415826797485}, "model_output": [{"sum_logits": -5.601466655731201, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.299443244934082, "logits_per_token": -5.601466655731201, "logits_per_char": -1.4003666639328003, "num_chars": 4}, {"sum_logits": -13.278397560119629, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.45858383178711, "logits_per_token": -13.278397560119629, "logits_per_char": -1.896913937159947, "num_chars": 7}, {"sum_logits": -16.497676849365234, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.34688377380371, "logits_per_token": -8.248838424682617, "logits_per_char": -0.8248838424682617, "num_chars": 20}, {"sum_logits": -10.322898864746094, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.36529541015625, "logits_per_token": -5.161449432373047, "logits_per_char": -1.0322898864746093, "num_chars": 10}, {"sum_logits": -14.058460235595703, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.54793357849121, "logits_per_token": -7.029230117797852, "logits_per_char": -1.5620511372884114, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 188, "native_id": "9d625e948e9c3777e7cc54ed8ffea135", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.99839448928833, "incorrect_loss_raw": 12.627337217330933, "correct_loss_per_char": 0.43739965558052063, "incorrect_loss_per_char": 1.7760161257925486, "correct_loss_per_token": 3.499197244644165, "incorrect_loss_per_token": 12.627337217330933, "correct_loss_uncond": -13.948523998260498, "incorrect_loss_uncond": -1.1396219730377197}, "model_output": [{"sum_logits": -6.99839448928833, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.946918487548828, "logits_per_token": -3.499197244644165, "logits_per_char": -0.43739965558052063, "num_chars": 16}, {"sum_logits": -12.980222702026367, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -12.980222702026367, "logits_per_char": -1.8543175288609095, "num_chars": 7}, {"sum_logits": -12.807968139648438, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -12.807968139648438, "logits_per_char": -2.5615936279296876, "num_chars": 5}, {"sum_logits": -15.0733642578125, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.90210247039795, "logits_per_token": -15.0733642578125, "logits_per_char": -1.8841705322265625, "num_chars": 8}, {"sum_logits": -9.647793769836426, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -9.647793769836426, "logits_per_char": -0.8039828141530355, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 189, "native_id": "d107d67d525a686fbd8282314d2ea33c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.434645652770996, "incorrect_loss_raw": 18.7612886428833, "correct_loss_per_char": 0.48692913055419923, "incorrect_loss_per_char": 1.6721040663265048, "correct_loss_per_token": 2.434645652770996, "incorrect_loss_per_token": 12.819326400756836, "correct_loss_uncond": -12.690500259399414, "incorrect_loss_uncond": 0.38156747817993164}, "model_output": [{"sum_logits": -2.434645652770996, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.12514591217041, "logits_per_token": -2.434645652770996, "logits_per_char": -0.48692913055419923, "num_chars": 5}, {"sum_logits": -24.1319580078125, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -24.782716751098633, "logits_per_token": -12.06597900390625, "logits_per_char": -1.7237112862723214, "num_chars": 14}, {"sum_logits": -23.40373992919922, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.778446197509766, "logits_per_token": -11.70186996459961, "logits_per_char": -1.3002077738444011, "num_chars": 18}, {"sum_logits": -14.863809585571289, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -14.863809585571289, "logits_per_char": -1.8579761981964111, "num_chars": 8}, {"sum_logits": -12.645647048950195, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -12.645647048950195, "logits_per_char": -1.806521006992885, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 190, "native_id": "fee5ff19811750ad019665af7b36b3c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0686306953430176, "incorrect_loss_raw": 13.511085867881775, "correct_loss_per_char": 0.6137261390686035, "incorrect_loss_per_char": 1.7041942497094473, "correct_loss_per_token": 3.0686306953430176, "incorrect_loss_per_token": 9.338278651237488, "correct_loss_uncond": -11.753090381622314, "incorrect_loss_uncond": -3.2545498609542847}, "model_output": [{"sum_logits": -3.0686306953430176, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.821721076965332, "logits_per_token": -3.0686306953430176, "logits_per_char": -0.6137261390686035, "num_chars": 5}, {"sum_logits": -6.672848224639893, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.545310020446777, "logits_per_token": -6.672848224639893, "logits_per_char": -1.3345696449279785, "num_chars": 5}, {"sum_logits": -13.98903751373291, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.96087646484375, "logits_per_token": -13.98903751373291, "logits_per_char": -1.7486296892166138, "num_chars": 8}, {"sum_logits": -15.080804824829102, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.029136657714844, "logits_per_token": -7.540402412414551, "logits_per_char": -2.51346747080485, "num_chars": 6}, {"sum_logits": -18.301652908325195, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.527219772338867, "logits_per_token": -9.150826454162598, "logits_per_char": -1.2201101938883463, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 191, "native_id": "e69da59cbcf2a302e4523571eba8186b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.003950119018555, "incorrect_loss_raw": 8.128708124160767, "correct_loss_per_char": 0.5717107227870396, "incorrect_loss_per_char": 0.9043591016814823, "correct_loss_per_token": 8.003950119018555, "incorrect_loss_per_token": 8.128708124160767, "correct_loss_uncond": -6.392856597900391, "incorrect_loss_uncond": -5.423824787139893}, "model_output": [{"sum_logits": -6.810842990875244, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -6.810842990875244, "logits_per_char": -0.6810842990875244, "num_chars": 10}, {"sum_logits": -10.066028594970703, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.871743202209473, "logits_per_token": -10.066028594970703, "logits_per_char": -1.1184476216634114, "num_chars": 9}, {"sum_logits": -5.929195880889893, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -5.929195880889893, "logits_per_char": -0.8470279829842704, "num_chars": 7}, {"sum_logits": -9.708765029907227, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -9.708765029907227, "logits_per_char": -0.9708765029907227, "num_chars": 10}, {"sum_logits": -8.003950119018555, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.396806716918945, "logits_per_token": -8.003950119018555, "logits_per_char": -0.5717107227870396, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 192, "native_id": "2dd138a63b5895cf737ced793cc668e7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.56561279296875, "incorrect_loss_raw": 9.180469989776611, "correct_loss_per_char": 0.608716462787829, "incorrect_loss_per_char": 0.7662681189321336, "correct_loss_per_token": 3.8552042643229165, "incorrect_loss_per_token": 4.0020566781361895, "correct_loss_uncond": -8.737602233886719, "incorrect_loss_uncond": -9.270776271820068}, "model_output": [{"sum_logits": -5.497189044952393, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.79379653930664, "logits_per_token": -2.7485945224761963, "logits_per_char": -0.6871486306190491, "num_chars": 8}, {"sum_logits": -9.59009075164795, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.838199615478516, "logits_per_token": -4.795045375823975, "logits_per_char": -0.6850064822605678, "num_chars": 14}, {"sum_logits": -14.116279602050781, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.336471557617188, "logits_per_token": -4.705426534016927, "logits_per_char": -0.9410853068033854, "num_chars": 15}, {"sum_logits": -7.518320560455322, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.836517333984375, "logits_per_token": -3.759160280227661, "logits_per_char": -0.7518320560455323, "num_chars": 10}, {"sum_logits": -11.56561279296875, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.30321502685547, "logits_per_token": -3.8552042643229165, "logits_per_char": -0.608716462787829, "num_chars": 19}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 193, "native_id": "b33047f46db680a9b630c13e8ca115cc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.419710159301758, "incorrect_loss_raw": 4.703460335731506, "correct_loss_per_char": 0.7016425132751465, "incorrect_loss_per_char": 0.8255469451347988, "correct_loss_per_token": 4.209855079650879, "incorrect_loss_per_token": 4.115732729434967, "correct_loss_uncond": -8.654504776000977, "incorrect_loss_uncond": -8.168603777885437}, "model_output": [{"sum_logits": -6.472099781036377, "num_tokens": 1, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -12.464104652404785, "logits_per_token": -6.472099781036377, "logits_per_char": -1.6180249452590942, "num_chars": 4}, {"sum_logits": -4.961956977844238, "num_tokens": 1, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -4.961956977844238, "logits_per_char": -0.8269928296407064, "num_chars": 6}, {"sum_logits": -2.6779637336730957, "num_tokens": 1, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -12.333724975585938, "logits_per_token": -2.6779637336730957, "logits_per_char": -0.33474546670913696, "num_chars": 8}, {"sum_logits": -4.7018208503723145, "num_tokens": 2, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -15.832003593444824, "logits_per_token": -2.3509104251861572, "logits_per_char": -0.5224245389302572, "num_chars": 9}, {"sum_logits": -8.419710159301758, "num_tokens": 2, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -17.074214935302734, "logits_per_token": -4.209855079650879, "logits_per_char": -0.7016425132751465, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 194, "native_id": "f20d40bc4af588223e880e0bb58b27b8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.79066276550293, "incorrect_loss_raw": 14.758977890014648, "correct_loss_per_char": 0.48255523045857746, "incorrect_loss_per_char": 1.403750590844588, "correct_loss_per_token": 2.895331382751465, "incorrect_loss_per_token": 7.379488945007324, "correct_loss_uncond": -14.45930290222168, "incorrect_loss_uncond": -3.445159912109375}, "model_output": [{"sum_logits": -16.26254653930664, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.381784439086914, "logits_per_token": -8.13127326965332, "logits_per_char": -1.4784133217551492, "num_chars": 11}, {"sum_logits": -10.562300682067871, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.78326416015625, "logits_per_token": -5.2811503410339355, "logits_per_char": -1.056230068206787, "num_chars": 10}, {"sum_logits": -5.79066276550293, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.24996566772461, "logits_per_token": -2.895331382751465, "logits_per_char": -0.48255523045857746, "num_chars": 12}, {"sum_logits": -15.482220649719238, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.434581756591797, "logits_per_token": -7.741110324859619, "logits_per_char": -1.4074746045199307, "num_chars": 11}, {"sum_logits": -16.728843688964844, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.216920852661133, "logits_per_token": -8.364421844482422, "logits_per_char": -1.6728843688964843, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 195, "native_id": "b6b66d4519a84b8331ea55f84767e9df", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.267025470733643, "incorrect_loss_raw": 21.507288217544556, "correct_loss_per_char": 0.3047875336238316, "incorrect_loss_per_char": 2.063921623880213, "correct_loss_per_token": 2.1335127353668213, "incorrect_loss_per_token": 8.763396739959717, "correct_loss_uncond": -11.541480541229248, "incorrect_loss_uncond": -4.218889474868774}, "model_output": [{"sum_logits": -4.267025470733643, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.80850601196289, "logits_per_token": -2.1335127353668213, "logits_per_char": -0.3047875336238316, "num_chars": 14}, {"sum_logits": -13.333564758300781, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.32390785217285, "logits_per_token": -6.666782379150391, "logits_per_char": -1.2121422507546165, "num_chars": 11}, {"sum_logits": -34.20506286621094, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -34.03660202026367, "logits_per_token": -11.401687622070312, "logits_per_char": -3.4205062866210936, "num_chars": 10}, {"sum_logits": -13.560873985290527, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.429122924804688, "logits_per_token": -4.520291328430176, "logits_per_char": -1.130072832107544, "num_chars": 12}, {"sum_logits": -24.929651260375977, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -29.11507797241211, "logits_per_token": -12.464825630187988, "logits_per_char": -2.4929651260375976, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 196, "native_id": "952cf4b2f7a434b2eeae9f4c7ed89c0a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.967855930328369, "incorrect_loss_raw": 10.620104789733887, "correct_loss_per_char": 0.5668365614754813, "incorrect_loss_per_char": 1.961630827188492, "correct_loss_per_token": 3.967855930328369, "incorrect_loss_per_token": 10.620104789733887, "correct_loss_uncond": -7.3730788230896, "incorrect_loss_uncond": -2.6780059337615967}, "model_output": [{"sum_logits": -7.89699649810791, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -10.796917915344238, "logits_per_token": -7.89699649810791, "logits_per_char": -1.9742491245269775, "num_chars": 4}, {"sum_logits": -8.72091293334961, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.950617790222168, "logits_per_token": -8.72091293334961, "logits_per_char": -1.7441825866699219, "num_chars": 5}, {"sum_logits": -11.93703842163086, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -11.93703842163086, "logits_per_char": -2.387407684326172, "num_chars": 5}, {"sum_logits": -13.925471305847168, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.139683723449707, "logits_per_token": -13.925471305847168, "logits_per_char": -1.740683913230896, "num_chars": 8}, {"sum_logits": -3.967855930328369, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.340934753417969, "logits_per_token": -3.967855930328369, "logits_per_char": -0.5668365614754813, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 197, "native_id": "b63e5cd88bfe75d29ff9fdc6dd97fed6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.678044319152832, "incorrect_loss_raw": 6.907666504383087, "correct_loss_per_char": 0.4678044319152832, "incorrect_loss_per_char": 0.837748003922976, "correct_loss_per_token": 2.339022159576416, "incorrect_loss_per_token": 4.681252062320709, "correct_loss_uncond": -10.105219841003418, "incorrect_loss_uncond": -9.45567911863327}, "model_output": [{"sum_logits": -4.678044319152832, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.78326416015625, "logits_per_token": -2.339022159576416, "logits_per_char": -0.4678044319152832, "num_chars": 10}, {"sum_logits": -7.39189338684082, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.576138496398926, "logits_per_token": -7.39189338684082, "logits_per_char": -1.2319822311401367, "num_chars": 6}, {"sum_logits": -7.704952239990234, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.947349548339844, "logits_per_token": -3.852476119995117, "logits_per_char": -0.8561058044433594, "num_chars": 9}, {"sum_logits": -2.427457094192505, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.910294532775879, "logits_per_token": -2.427457094192505, "logits_per_char": -0.485491418838501, "num_chars": 5}, {"sum_logits": -10.106363296508789, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.01959991455078, "logits_per_token": -5.0531816482543945, "logits_per_char": -0.7774125612699069, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 198, "native_id": "ec5a336080e37fbe95d72ad5f9c65ba7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.943675994873047, "incorrect_loss_raw": 15.5497887134552, "correct_loss_per_char": 1.657279332478841, "incorrect_loss_per_char": 1.3669305086135863, "correct_loss_per_token": 9.943675994873047, "incorrect_loss_per_token": 11.37197494506836, "correct_loss_uncond": -6.559963226318359, "incorrect_loss_uncond": -1.9896693229675293}, "model_output": [{"sum_logits": -13.0263671875, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.510114669799805, "logits_per_token": -13.0263671875, "logits_per_char": -1.6282958984375, "num_chars": 8}, {"sum_logits": -15.750277519226074, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.527297019958496, "logits_per_token": -15.750277519226074, "logits_per_char": -1.9687846899032593, "num_chars": 8}, {"sum_logits": -9.943675994873047, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.503639221191406, "logits_per_token": -9.943675994873047, "logits_per_char": -1.657279332478841, "num_chars": 6}, {"sum_logits": -17.46123504638672, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.38828468322754, "logits_per_token": -8.73061752319336, "logits_per_char": -0.8730617523193359, "num_chars": 20}, {"sum_logits": -15.961275100708008, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.732135772705078, "logits_per_token": -7.980637550354004, "logits_per_char": -0.9975796937942505, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 199, "native_id": "6386bcf080633bc3eeb3317a5435b7b7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.561152458190918, "incorrect_loss_raw": 9.616764068603516, "correct_loss_per_char": 0.937307494027274, "incorrect_loss_per_char": 1.6529258330663046, "correct_loss_per_token": 6.561152458190918, "incorrect_loss_per_token": 9.616764068603516, "correct_loss_uncond": -6.515586853027344, "incorrect_loss_uncond": -4.566860914230347}, "model_output": [{"sum_logits": -9.89726734161377, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.035384178161621, "logits_per_token": -9.89726734161377, "logits_per_char": -1.9794534683227538, "num_chars": 5}, {"sum_logits": -9.355281829833984, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.632002830505371, "logits_per_token": -9.355281829833984, "logits_per_char": -1.169410228729248, "num_chars": 8}, {"sum_logits": -11.401853561401367, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.709630012512207, "logits_per_token": -11.401853561401367, "logits_per_char": -1.9003089269002278, "num_chars": 6}, {"sum_logits": -7.812653541564941, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.35748291015625, "logits_per_token": -7.812653541564941, "logits_per_char": -1.5625307083129882, "num_chars": 5}, {"sum_logits": -6.561152458190918, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.076739311218262, "logits_per_token": -6.561152458190918, "logits_per_char": -0.937307494027274, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 200, "native_id": "43ab0ff711e60d51f943bbd2cdd6515a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.016365051269531, "incorrect_loss_raw": 8.46304178237915, "correct_loss_per_char": 0.5010228157043457, "incorrect_loss_per_char": 1.0956567869041907, "correct_loss_per_token": 4.008182525634766, "incorrect_loss_per_token": 8.46304178237915, "correct_loss_uncond": -10.941871643066406, "incorrect_loss_uncond": -5.735113143920898}, "model_output": [{"sum_logits": -9.208588600158691, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -9.208588600158691, "logits_per_char": -1.3155126571655273, "num_chars": 7}, {"sum_logits": -7.282909393310547, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -7.282909393310547, "logits_per_char": -1.2138182322184246, "num_chars": 6}, {"sum_logits": -9.29242992401123, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.701706886291504, "logits_per_token": -9.29242992401123, "logits_per_char": -0.8447663567282937, "num_chars": 11}, {"sum_logits": -8.068239212036133, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -8.068239212036133, "logits_per_char": -1.0085299015045166, "num_chars": 8}, {"sum_logits": -8.016365051269531, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.958236694335938, "logits_per_token": -4.008182525634766, "logits_per_char": -0.5010228157043457, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 201, "native_id": "11c4c78d61e8212f0984fd07eb22b669", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.608494758605957, "incorrect_loss_raw": 10.825685977935791, "correct_loss_per_char": 0.6583563940865653, "incorrect_loss_per_char": 1.1531984348334108, "correct_loss_per_token": 4.608494758605957, "incorrect_loss_per_token": 7.0614480177561445, "correct_loss_uncond": -12.042925834655762, "incorrect_loss_uncond": -5.043600797653198}, "model_output": [{"sum_logits": -4.608494758605957, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -4.608494758605957, "logits_per_char": -0.6583563940865653, "num_chars": 7}, {"sum_logits": -6.451306343078613, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -6.451306343078613, "logits_per_char": -1.2902612686157227, "num_chars": 5}, {"sum_logits": -12.765385627746582, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.990612030029297, "logits_per_token": -4.255128542582194, "logits_per_char": -0.9819527405958909, "num_chars": 13}, {"sum_logits": -10.99266242980957, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -10.99266242980957, "logits_per_char": -1.570380347115653, "num_chars": 7}, {"sum_logits": -13.093389511108398, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.938308715820312, "logits_per_token": -6.546694755554199, "logits_per_char": -0.7701993830063764, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 202, "native_id": "e61891746aa94ab57aaa754614034aef", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.879292488098145, "incorrect_loss_raw": 11.067314505577087, "correct_loss_per_char": 0.5439646244049072, "incorrect_loss_per_char": 1.0720070559870112, "correct_loss_per_token": 2.719823122024536, "incorrect_loss_per_token": 9.12142562866211, "correct_loss_uncond": -3.640528678894043, "incorrect_loss_uncond": -4.988125443458557}, "model_output": [{"sum_logits": -15.567111015319824, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.546363830566406, "logits_per_token": -7.783555507659912, "logits_per_char": -0.972944438457489, "num_chars": 16}, {"sum_logits": -6.644394397735596, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.019386291503906, "logits_per_token": -6.644394397735596, "logits_per_char": -0.9491991996765137, "num_chars": 7}, {"sum_logits": -10.879292488098145, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.519821166992188, "logits_per_token": -2.719823122024536, "logits_per_char": -0.5439646244049072, "num_chars": 20}, {"sum_logits": -11.479145050048828, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.23922348022461, "logits_per_token": -11.479145050048828, "logits_per_char": -1.0435586409135298, "num_chars": 11}, {"sum_logits": -10.578607559204102, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.416786193847656, "logits_per_token": -10.578607559204102, "logits_per_char": -1.3223259449005127, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 203, "native_id": "97da9aa4ea4b22744ec51cba49f35bfc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.793842315673828, "incorrect_loss_raw": 7.198664307594299, "correct_loss_per_char": 0.7587684631347656, "incorrect_loss_per_char": 1.3320094758556003, "correct_loss_per_token": 3.793842315673828, "incorrect_loss_per_token": 7.198664307594299, "correct_loss_uncond": -11.355438232421875, "incorrect_loss_uncond": -6.094541430473328}, "model_output": [{"sum_logits": -7.665930271148682, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -7.665930271148682, "logits_per_char": -1.9164825677871704, "num_chars": 4}, {"sum_logits": -6.444587707519531, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -6.444587707519531, "logits_per_char": -1.0740979512532551, "num_chars": 6}, {"sum_logits": -3.793842315673828, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.149280548095703, "logits_per_token": -3.793842315673828, "logits_per_char": -0.7587684631347656, "num_chars": 5}, {"sum_logits": -10.068374633789062, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.154549598693848, "logits_per_token": -10.068374633789062, "logits_per_char": -1.6780624389648438, "num_chars": 6}, {"sum_logits": -4.615764617919922, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.52496337890625, "logits_per_token": -4.615764617919922, "logits_per_char": -0.6593949454171317, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 204, "native_id": "46241bc83e8d81196ae5783b2b9854a4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.243764877319336, "incorrect_loss_raw": 15.92297101020813, "correct_loss_per_char": 1.2039786252108486, "incorrect_loss_per_char": 1.4980163314403632, "correct_loss_per_token": 6.621882438659668, "incorrect_loss_per_token": 8.67245602607727, "correct_loss_uncond": -9.049829483032227, "incorrect_loss_uncond": -3.0989937782287598}, "model_output": [{"sum_logits": -16.889949798583984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.992469787597656, "logits_per_token": -8.444974899291992, "logits_per_char": -1.4074958165486653, "num_chars": 12}, {"sum_logits": -16.485126495361328, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.43353271484375, "logits_per_token": -8.242563247680664, "logits_per_char": -1.268086653489333, "num_chars": 13}, {"sum_logits": -5.6877641677856445, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.974520683288574, "logits_per_token": -5.6877641677856445, "logits_per_char": -1.4219410419464111, "num_chars": 4}, {"sum_logits": -24.629043579101562, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.687335968017578, "logits_per_token": -12.314521789550781, "logits_per_char": -1.8945418137770433, "num_chars": 13}, {"sum_logits": -13.243764877319336, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.293594360351562, "logits_per_token": -6.621882438659668, "logits_per_char": -1.2039786252108486, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 205, "native_id": "18844d3aa4e52b331b5382c8244cf4db", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.746105194091797, "incorrect_loss_raw": 16.99209451675415, "correct_loss_per_char": 0.44200809185321516, "incorrect_loss_per_char": 1.337577589149149, "correct_loss_per_token": 2.8730525970458984, "incorrect_loss_per_token": 7.520707845687866, "correct_loss_uncond": -13.347658157348633, "incorrect_loss_uncond": -3.897275924682617}, "model_output": [{"sum_logits": -5.746105194091797, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.09376335144043, "logits_per_token": -2.8730525970458984, "logits_per_char": -0.44200809185321516, "num_chars": 13}, {"sum_logits": -14.143957138061523, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.100549697875977, "logits_per_token": -7.071978569030762, "logits_per_char": -1.4143957138061523, "num_chars": 10}, {"sum_logits": -13.82672119140625, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.46237564086914, "logits_per_token": -6.913360595703125, "logits_per_char": -1.0635939378004808, "num_chars": 13}, {"sum_logits": -23.408145904541016, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.761734008789062, "logits_per_token": -7.802715301513672, "logits_per_char": -1.950678825378418, "num_chars": 12}, {"sum_logits": -16.589553833007812, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -22.23282241821289, "logits_per_token": -8.294776916503906, "logits_per_char": -0.9216418796115451, "num_chars": 18}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 206, "native_id": "056b33c7050c167b0d4348d40d169358", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.85237979888916, "incorrect_loss_raw": 10.321723937988281, "correct_loss_per_char": 0.9753966331481934, "incorrect_loss_per_char": 1.1886285156957688, "correct_loss_per_token": 5.85237979888916, "incorrect_loss_per_token": 6.666934529940288, "correct_loss_uncond": -6.72800350189209, "incorrect_loss_uncond": -6.632546663284302}, "model_output": [{"sum_logits": -8.160061836242676, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.509014129638672, "logits_per_token": -4.080030918121338, "logits_per_char": -0.9066735373602973, "num_chars": 9}, {"sum_logits": -5.85237979888916, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -5.85237979888916, "logits_per_char": -0.9753966331481934, "num_chars": 6}, {"sum_logits": -6.751094818115234, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -6.751094818115234, "logits_per_char": -1.3502189636230468, "num_chars": 5}, {"sum_logits": -10.567049026489258, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.479060173034668, "logits_per_token": -10.567049026489258, "logits_per_char": -1.5095784323556083, "num_chars": 7}, {"sum_logits": -15.808690071105957, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.523784637451172, "logits_per_token": -5.269563357035319, "logits_per_char": -0.9880431294441223, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 207, "native_id": "31d7dd1d00aabe411568df3e72d5b5e0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.639673233032227, "incorrect_loss_raw": 10.208166599273682, "correct_loss_per_char": 0.9599636925591363, "incorrect_loss_per_char": 1.4730724817746643, "correct_loss_per_token": 8.639673233032227, "incorrect_loss_per_token": 6.347140789031982, "correct_loss_uncond": -5.3266096115112305, "incorrect_loss_uncond": -4.965498685836792}, "model_output": [{"sum_logits": -9.802157402038574, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.53412437438965, "logits_per_token": -4.901078701019287, "logits_per_char": -0.8911052183671431, "num_chars": 11}, {"sum_logits": -9.944459915161133, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.415973663330078, "logits_per_token": -9.944459915161133, "logits_per_char": -1.9888919830322265, "num_chars": 5}, {"sum_logits": -11.967645645141602, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -5.983822822570801, "logits_per_char": -1.7096636635916573, "num_chars": 7}, {"sum_logits": -8.639673233032227, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.966282844543457, "logits_per_token": -8.639673233032227, "logits_per_char": -0.9599636925591363, "num_chars": 9}, {"sum_logits": -9.118403434753418, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.20969009399414, "logits_per_token": -4.559201717376709, "logits_per_char": -1.302629062107631, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 208, "native_id": "cbf3dd48b4d591fc872a53cd4b9dd3af", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.526021480560303, "incorrect_loss_raw": 17.585684299468994, "correct_loss_per_char": 0.4350680987040202, "incorrect_loss_per_char": 1.5186042645038702, "correct_loss_per_token": 3.2630107402801514, "incorrect_loss_per_token": 7.972431182861328, "correct_loss_uncond": -14.161055088043213, "incorrect_loss_uncond": -6.713649749755859}, "model_output": [{"sum_logits": -15.15781021118164, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.55019760131836, "logits_per_token": -7.57890510559082, "logits_per_char": -1.1659854008601263, "num_chars": 13}, {"sum_logits": -27.760639190673828, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -38.4371223449707, "logits_per_token": -5.552127838134766, "logits_per_char": -1.1566932996114094, "num_chars": 24}, {"sum_logits": -6.526021480560303, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.687076568603516, "logits_per_token": -3.2630107402801514, "logits_per_char": -0.4350680987040202, "num_chars": 15}, {"sum_logits": -17.331192016601562, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.90479278564453, "logits_per_token": -8.665596008300781, "logits_per_char": -1.7331192016601562, "num_chars": 10}, {"sum_logits": -10.093095779418945, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -10.093095779418945, "logits_per_char": -2.018619155883789, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 209, "native_id": "60e8f1a86d4063895f340cd1e3c55f50", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.808110237121582, "incorrect_loss_raw": 13.682700634002686, "correct_loss_per_char": 1.0621623259324293, "incorrect_loss_per_char": 1.001027196172684, "correct_loss_per_token": 6.904055118560791, "incorrect_loss_per_token": 8.152939955393474, "correct_loss_uncond": -4.123831748962402, "incorrect_loss_uncond": -3.823969841003418}, "model_output": [{"sum_logits": -13.319511413574219, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.483266830444336, "logits_per_token": -6.659755706787109, "logits_per_char": -0.8879674275716146, "num_chars": 15}, {"sum_logits": -10.85457706451416, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.15999984741211, "logits_per_token": -10.85457706451416, "logits_per_char": -0.7753269331795829, "num_chars": 14}, {"sum_logits": -13.808110237121582, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.931941986083984, "logits_per_token": -6.904055118560791, "logits_per_char": -1.0621623259324293, "num_chars": 13}, {"sum_logits": -23.18893051147461, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -26.343524932861328, "logits_per_token": -7.72964350382487, "logits_per_char": -1.2882739173041449, "num_chars": 18}, {"sum_logits": -7.367783546447754, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.03989028930664, "logits_per_token": -7.367783546447754, "logits_per_char": -1.0525405066353934, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 210, "native_id": "eee8cb7a0d806a62d2de24831f82e3e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.148681640625, "incorrect_loss_raw": 10.424136638641357, "correct_loss_per_char": 0.28624378551136365, "incorrect_loss_per_char": 1.1285895906915568, "correct_loss_per_token": 3.148681640625, "incorrect_loss_per_token": 10.424136638641357, "correct_loss_uncond": -11.116531372070312, "incorrect_loss_uncond": -3.7180423736572266}, "model_output": [{"sum_logits": -3.148681640625, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.265213012695312, "logits_per_token": -3.148681640625, "logits_per_char": -0.28624378551136365, "num_chars": 11}, {"sum_logits": -9.8497953414917, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.990914344787598, "logits_per_token": -9.8497953414917, "logits_per_char": -1.0944217046101887, "num_chars": 9}, {"sum_logits": -7.646951675415039, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.127937316894531, "logits_per_token": -7.646951675415039, "logits_per_char": -0.8496612972683377, "num_chars": 9}, {"sum_logits": -10.861945152282715, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.989985466003418, "logits_per_token": -10.861945152282715, "logits_per_char": -1.3577431440353394, "num_chars": 8}, {"sum_logits": -13.337854385375977, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.459878921508789, "logits_per_token": -13.337854385375977, "logits_per_char": -1.2125322168523616, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 211, "native_id": "9a23a7f04e63bf9f4c7dfe50c58abfd2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.159179210662842, "incorrect_loss_raw": 8.280418157577515, "correct_loss_per_char": 0.7698974013328552, "incorrect_loss_per_char": 1.3196772480294818, "correct_loss_per_token": 6.159179210662842, "incorrect_loss_per_token": 8.280418157577515, "correct_loss_uncond": -7.206338405609131, "incorrect_loss_uncond": -5.372781276702881}, "model_output": [{"sum_logits": -10.330389022827148, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -10.330389022827148, "logits_per_char": -1.7217315038045247, "num_chars": 6}, {"sum_logits": -10.594733238220215, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.40595531463623, "logits_per_token": -10.594733238220215, "logits_per_char": -1.513533319745745, "num_chars": 7}, {"sum_logits": -6.159179210662842, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.365517616271973, "logits_per_token": -6.159179210662842, "logits_per_char": -0.7698974013328552, "num_chars": 8}, {"sum_logits": -6.918338298797607, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.748185157775879, "logits_per_token": -6.918338298797607, "logits_per_char": -1.3836676597595214, "num_chars": 5}, {"sum_logits": -5.278212070465088, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.38709831237793, "logits_per_token": -5.278212070465088, "logits_per_char": -0.659776508808136, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 212, "native_id": "e3426e4f60c142aa3d813479f79d6305", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.456316947937012, "incorrect_loss_raw": 9.736454010009766, "correct_loss_per_char": 0.4960288134488193, "incorrect_loss_per_char": 1.0565497858183726, "correct_loss_per_token": 5.456316947937012, "incorrect_loss_per_token": 4.348173201084137, "correct_loss_uncond": -9.506499290466309, "incorrect_loss_uncond": -6.987100601196289}, "model_output": [{"sum_logits": -14.287137985229492, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -3.571784496307373, "logits_per_char": -1.020509856087821, "num_chars": 14}, {"sum_logits": -13.24522876739502, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.698442459106445, "logits_per_token": -4.41507625579834, "logits_per_char": -1.3245228767395019, "num_chars": 10}, {"sum_logits": -5.456316947937012, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -5.456316947937012, "logits_per_char": -0.4960288134488193, "num_chars": 11}, {"sum_logits": -4.015234470367432, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.166976928710938, "logits_per_token": -2.007617235183716, "logits_per_char": -0.40152344703674314, "num_chars": 10}, {"sum_logits": -7.398214817047119, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -7.398214817047119, "logits_per_char": -1.479642963409424, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 213, "native_id": "3526550b02d9594abd4fc43553010fc6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.641212463378906, "incorrect_loss_raw": 9.777937054634094, "correct_loss_per_char": 1.3773160661969865, "incorrect_loss_per_char": 0.9211409697930018, "correct_loss_per_token": 9.641212463378906, "incorrect_loss_per_token": 4.888968527317047, "correct_loss_uncond": -7.0102081298828125, "incorrect_loss_uncond": -7.913328051567078}, "model_output": [{"sum_logits": -10.064393043518066, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.958351135253906, "logits_per_token": -5.032196521759033, "logits_per_char": -0.6709595362345377, "num_chars": 15}, {"sum_logits": -11.155145645141602, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.7442626953125, "logits_per_token": -5.577572822570801, "logits_per_char": -0.9295954704284668, "num_chars": 12}, {"sum_logits": -10.981246948242188, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.061481475830078, "logits_per_token": -5.490623474121094, "logits_per_char": -1.2201385498046875, "num_chars": 9}, {"sum_logits": -9.641212463378906, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -9.641212463378906, "logits_per_char": -1.3773160661969865, "num_chars": 7}, {"sum_logits": -6.9109625816345215, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.000965118408203, "logits_per_token": -3.4554812908172607, "logits_per_char": -0.8638703227043152, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 214, "native_id": "e567c94d88829fb07a30e3d46c02e664", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.119937896728516, "incorrect_loss_raw": 15.878446817398071, "correct_loss_per_char": 0.7314196995326451, "incorrect_loss_per_char": 1.4128742103270668, "correct_loss_per_token": 5.119937896728516, "incorrect_loss_per_token": 6.890669584274292, "correct_loss_uncond": -11.216180801391602, "incorrect_loss_uncond": -1.328996181488037}, "model_output": [{"sum_logits": -17.645179748535156, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.366657257080078, "logits_per_token": -4.411294937133789, "logits_per_char": -1.0379517499138327, "num_chars": 17}, {"sum_logits": -17.645179748535156, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.366657257080078, "logits_per_token": -4.411294937133789, "logits_per_char": -1.0379517499138327, "num_chars": 17}, {"sum_logits": -9.256749153137207, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.406512260437012, "logits_per_token": -9.256749153137207, "logits_per_char": -1.8513498306274414, "num_chars": 5}, {"sum_logits": -18.966678619384766, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.689945220947266, "logits_per_token": -9.483339309692383, "logits_per_char": -1.7242435108531604, "num_chars": 11}, {"sum_logits": -5.119937896728516, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.336118698120117, "logits_per_token": -5.119937896728516, "logits_per_char": -0.7314196995326451, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 215, "native_id": "cf5a710c931779fb3dde198e0ace3b6a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.493686676025391, "incorrect_loss_raw": 14.135082721710205, "correct_loss_per_char": 0.5903351523659446, "incorrect_loss_per_char": 1.2494857360016216, "correct_loss_per_token": 3.2468433380126953, "incorrect_loss_per_token": 6.132232069969177, "correct_loss_uncond": -9.817314147949219, "incorrect_loss_uncond": -8.851975917816162}, "model_output": [{"sum_logits": -10.439502716064453, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.338302612304688, "logits_per_token": -5.219751358032227, "logits_per_char": -0.9490457014604048, "num_chars": 11}, {"sum_logits": -20.19822120666504, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.77570152282715, "logits_per_token": -10.09911060333252, "logits_per_char": -2.019822120666504, "num_chars": 10}, {"sum_logits": -14.964948654174805, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -26.45539093017578, "logits_per_token": -3.741237163543701, "logits_per_char": -0.9353092908859253, "num_chars": 16}, {"sum_logits": -10.937658309936523, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.37883949279785, "logits_per_token": -5.468829154968262, "logits_per_char": -1.0937658309936524, "num_chars": 10}, {"sum_logits": -6.493686676025391, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.31100082397461, "logits_per_token": -3.2468433380126953, "logits_per_char": -0.5903351523659446, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 216, "native_id": "0f2377604e628c55ba588366139396b9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.404988408088684, "incorrect_loss_raw": 8.132763087749481, "correct_loss_per_char": 0.15610982312096489, "incorrect_loss_per_char": 0.841270277897517, "correct_loss_per_token": 0.702494204044342, "incorrect_loss_per_token": 5.804910719394684, "correct_loss_uncond": -14.976954340934753, "incorrect_loss_uncond": -6.631400883197784}, "model_output": [{"sum_logits": -8.04239273071289, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.329529762268066, "logits_per_token": -8.04239273071289, "logits_per_char": -1.0052990913391113, "num_chars": 8}, {"sum_logits": -6.680208206176758, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.702447891235352, "logits_per_token": -6.680208206176758, "logits_per_char": -0.8350260257720947, "num_chars": 8}, {"sum_logits": -15.365348815917969, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.12720489501953, "logits_per_token": -7.682674407958984, "logits_per_char": -1.2804457346598308, "num_chars": 12}, {"sum_logits": -2.4431025981903076, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.897473335266113, "logits_per_token": -0.8143675327301025, "logits_per_char": -0.24431025981903076, "num_chars": 10}, {"sum_logits": -1.404988408088684, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -16.381942749023438, "logits_per_token": -0.702494204044342, "logits_per_char": -0.15610982312096489, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 217, "native_id": "ada088b7c97de80336ad043757c2db16", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6617467403411865, "incorrect_loss_raw": 14.090937852859497, "correct_loss_per_char": 0.7323493480682373, "incorrect_loss_per_char": 1.948475209871928, "correct_loss_per_token": 3.6617467403411865, "incorrect_loss_per_token": 11.672953844070435, "correct_loss_uncond": -7.357328653335571, "incorrect_loss_uncond": -0.3776588439941406}, "model_output": [{"sum_logits": -3.6617467403411865, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -3.6617467403411865, "logits_per_char": -0.7323493480682373, "num_chars": 5}, {"sum_logits": -19.3438720703125, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.581262588500977, "logits_per_token": -9.67193603515625, "logits_per_char": -1.2089920043945312, "num_chars": 16}, {"sum_logits": -11.099143028259277, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.425796508789062, "logits_per_token": -11.099143028259277, "logits_per_char": -1.8498571713765461, "num_chars": 6}, {"sum_logits": -12.447868347167969, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -12.447868347167969, "logits_per_char": -2.489573669433594, "num_chars": 5}, {"sum_logits": -13.472867965698242, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.201011657714844, "logits_per_token": -13.472867965698242, "logits_per_char": -2.2454779942830405, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 218, "native_id": "beef0aa2058297904bb4acc1dc340c85", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.163324356079102, "incorrect_loss_raw": 14.530242443084717, "correct_loss_per_char": 0.9239385778253729, "incorrect_loss_per_char": 1.6616641283035278, "correct_loss_per_token": 5.081662178039551, "incorrect_loss_per_token": 7.764632940292358, "correct_loss_uncond": -7.97309684753418, "incorrect_loss_uncond": -1.1886754035949707}, "model_output": [{"sum_logits": -13.221603393554688, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.35986328125, "logits_per_token": -6.610801696777344, "logits_per_char": -1.3221603393554688, "num_chars": 10}, {"sum_logits": -10.163324356079102, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.13642120361328, "logits_per_token": -5.081662178039551, "logits_per_char": -0.9239385778253729, "num_chars": 11}, {"sum_logits": -13.16476058959961, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.11971378326416, "logits_per_token": -13.16476058959961, "logits_per_char": -3.2911901473999023, "num_chars": 4}, {"sum_logits": -13.397272109985352, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.918330192565918, "logits_per_token": -6.698636054992676, "logits_per_char": -1.1164393424987793, "num_chars": 12}, {"sum_logits": -18.33733367919922, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.477764129638672, "logits_per_token": -4.584333419799805, "logits_per_char": -0.916866683959961, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 219, "native_id": "ba9a05bd2086c0d37733e26479d6630f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.907262802124023, "incorrect_loss_raw": 12.789231181144714, "correct_loss_per_char": 0.7674736446804471, "incorrect_loss_per_char": 1.3319691771116011, "correct_loss_per_token": 3.4536314010620117, "incorrect_loss_per_token": 8.068651735782623, "correct_loss_uncond": -13.659467697143555, "incorrect_loss_uncond": -3.6250134706497192}, "model_output": [{"sum_logits": -12.7416353225708, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.541637420654297, "logits_per_token": -6.3708176612854, "logits_per_char": -1.0618029435475667, "num_chars": 12}, {"sum_logits": -17.031986236572266, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.29062843322754, "logits_per_token": -8.515993118286133, "logits_per_char": -1.4193321863810222, "num_chars": 12}, {"sum_logits": -13.392289161682129, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -13.392289161682129, "logits_per_char": -2.232048193613688, "num_chars": 6}, {"sum_logits": -7.991014003753662, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.868898391723633, "logits_per_token": -3.995507001876831, "logits_per_char": -0.6146933849041278, "num_chars": 13}, {"sum_logits": -6.907262802124023, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.566730499267578, "logits_per_token": -3.4536314010620117, "logits_per_char": -0.7674736446804471, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 220, "native_id": "6b0bf501aa68b06ddc5ad72ac5ff68fc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.124625205993652, "incorrect_loss_raw": 11.596477031707764, "correct_loss_per_char": 0.7320893151419503, "incorrect_loss_per_char": 1.4587077993193482, "correct_loss_per_token": 5.124625205993652, "incorrect_loss_per_token": 7.232119083404541, "correct_loss_uncond": -9.094568252563477, "incorrect_loss_uncond": -5.0831732749938965}, "model_output": [{"sum_logits": -11.471044540405273, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.510272979736328, "logits_per_token": -11.471044540405273, "logits_per_char": -2.8677611351013184, "num_chars": 4}, {"sum_logits": -12.329144477844238, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -6.164572238922119, "logits_per_char": -0.7252437928143669, "num_chars": 17}, {"sum_logits": -5.124625205993652, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -5.124625205993652, "logits_per_char": -0.7320893151419503, "num_chars": 7}, {"sum_logits": -13.251054763793945, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.96778106689453, "logits_per_token": -6.625527381896973, "logits_per_char": -1.204641342163086, "num_chars": 11}, {"sum_logits": -9.334664344787598, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -4.667332172393799, "logits_per_char": -1.037184927198622, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 221, "native_id": "926298bbdd03ce96acfeb4408b888b61", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.264294624328613, "incorrect_loss_raw": 7.15842080116272, "correct_loss_per_char": 1.0660736560821533, "incorrect_loss_per_char": 1.1883261855159486, "correct_loss_per_token": 4.264294624328613, "incorrect_loss_per_token": 7.15842080116272, "correct_loss_uncond": -8.503982543945312, "incorrect_loss_uncond": -7.147001266479492}, "model_output": [{"sum_logits": -5.41730260848999, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.252589225769043, "logits_per_token": -5.41730260848999, "logits_per_char": -0.6771628260612488, "num_chars": 8}, {"sum_logits": -4.264294624328613, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.768277168273926, "logits_per_token": -4.264294624328613, "logits_per_char": -1.0660736560821533, "num_chars": 4}, {"sum_logits": -8.882488250732422, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.24931812286377, "logits_per_token": -8.882488250732422, "logits_per_char": -1.7764976501464844, "num_chars": 5}, {"sum_logits": -10.581705093383789, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.797646522521973, "logits_per_token": -10.581705093383789, "logits_per_char": -1.7636175155639648, "num_chars": 6}, {"sum_logits": -3.7521872520446777, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.922134399414062, "logits_per_token": -3.7521872520446777, "logits_per_char": -0.5360267502920968, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 222, "native_id": "faa0aa438b94c19be8ff52ee80d9e298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.90231704711914, "incorrect_loss_raw": 10.301714897155762, "correct_loss_per_char": 0.9911197315562855, "incorrect_loss_per_char": 0.9711379898627891, "correct_loss_per_token": 5.45115852355957, "incorrect_loss_per_token": 5.150857448577881, "correct_loss_uncond": -9.025150299072266, "incorrect_loss_uncond": -7.852242469787598}, "model_output": [{"sum_logits": -10.90231704711914, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.927467346191406, "logits_per_token": -5.45115852355957, "logits_per_char": -0.9911197315562855, "num_chars": 11}, {"sum_logits": -11.823816299438477, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.947349548339844, "logits_per_token": -5.911908149719238, "logits_per_char": -1.3137573666042752, "num_chars": 9}, {"sum_logits": -9.381200790405273, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.665353775024414, "logits_per_token": -4.690600395202637, "logits_per_char": -0.6700857707432338, "num_chars": 14}, {"sum_logits": -7.531795501708984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.79379653930664, "logits_per_token": -3.765897750854492, "logits_per_char": -0.941474437713623, "num_chars": 8}, {"sum_logits": -12.470046997070312, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.20932960510254, "logits_per_token": -6.235023498535156, "logits_per_char": -0.9592343843900241, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 223, "native_id": "9310c39a0752f28640c3a05cba1d5ca7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.245193004608154, "incorrect_loss_raw": 12.440193176269531, "correct_loss_per_char": 0.7806491255760193, "incorrect_loss_per_char": 1.5430362478479163, "correct_loss_per_token": 3.122596502304077, "incorrect_loss_per_token": 10.04619026184082, "correct_loss_uncond": -9.46281385421753, "incorrect_loss_uncond": -3.849271297454834}, "model_output": [{"sum_logits": -7.4683732986450195, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.246232032775879, "logits_per_token": -7.4683732986450195, "logits_per_char": -1.4936746597290038, "num_chars": 5}, {"sum_logits": -14.54459285736084, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.30998420715332, "logits_per_token": -14.54459285736084, "logits_per_char": -2.077798979622977, "num_chars": 7}, {"sum_logits": -19.152023315429688, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -9.576011657714844, "logits_per_char": -1.741093028675426, "num_chars": 11}, {"sum_logits": -8.595783233642578, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.706355094909668, "logits_per_token": -8.595783233642578, "logits_per_char": -0.8595783233642578, "num_chars": 10}, {"sum_logits": -6.245193004608154, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.708006858825684, "logits_per_token": -3.122596502304077, "logits_per_char": -0.7806491255760193, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 224, "native_id": "fee5f4e9d8e37f0183e36eb9b8dbcbb9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.789091110229492, "incorrect_loss_raw": 10.29936933517456, "correct_loss_per_char": 0.5563636507306781, "incorrect_loss_per_char": 0.9629827790790133, "correct_loss_per_token": 3.894545555114746, "incorrect_loss_per_token": 7.8632941246032715, "correct_loss_uncond": -7.228029251098633, "incorrect_loss_uncond": -5.533269882202148}, "model_output": [{"sum_logits": -7.789091110229492, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.017120361328125, "logits_per_token": -3.894545555114746, "logits_per_char": -0.5563636507306781, "num_chars": 14}, {"sum_logits": -11.028783798217773, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.511117935180664, "logits_per_token": -5.514391899108887, "logits_per_char": -1.1028783798217774, "num_chars": 10}, {"sum_logits": -12.369922637939453, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.179875373840332, "logits_per_token": -12.369922637939453, "logits_per_char": -1.0308268864949544, "num_chars": 12}, {"sum_logits": -9.338953018188477, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.668292045593262, "logits_per_token": -9.338953018188477, "logits_per_char": -0.7782460848490397, "num_chars": 12}, {"sum_logits": -8.459817886352539, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -4.2299089431762695, "logits_per_char": -0.9399797651502821, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 225, "native_id": "5392af3f1c4665e95ff3354e5115de42", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.964794635772705, "incorrect_loss_raw": 9.29864490032196, "correct_loss_per_char": 0.4970662196477254, "incorrect_loss_per_char": 1.163474837371281, "correct_loss_per_token": 5.964794635772705, "incorrect_loss_per_token": 6.698670148849487, "correct_loss_uncond": -8.421775341033936, "incorrect_loss_uncond": -5.855006575584412}, "model_output": [{"sum_logits": -10.144128799438477, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.86549186706543, "logits_per_token": -10.144128799438477, "logits_per_char": -1.4491612570626395, "num_chars": 7}, {"sum_logits": -6.25065279006958, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -6.25065279006958, "logits_per_char": -1.250130558013916, "num_chars": 5}, {"sum_logits": -7.522336006164551, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.291243553161621, "logits_per_token": -3.7611680030822754, "logits_per_char": -0.6268613338470459, "num_chars": 12}, {"sum_logits": -5.964794635772705, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.38656997680664, "logits_per_token": -5.964794635772705, "logits_per_char": -0.4970662196477254, "num_chars": 12}, {"sum_logits": -13.277462005615234, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.62598419189453, "logits_per_token": -6.638731002807617, "logits_per_char": -1.3277462005615235, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 226, "native_id": "4c5c74b3287492d6ddb2da4c8c0fd51a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.706600189208984, "incorrect_loss_raw": 10.451747059822083, "correct_loss_per_char": 0.7474470699534697, "incorrect_loss_per_char": 1.2310222251074654, "correct_loss_per_token": 4.235533396402995, "incorrect_loss_per_token": 5.225873529911041, "correct_loss_uncond": -7.402336120605469, "incorrect_loss_uncond": -5.427773594856262}, "model_output": [{"sum_logits": -13.37812328338623, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.139748573303223, "logits_per_token": -6.689061641693115, "logits_per_char": -1.48645814259847, "num_chars": 9}, {"sum_logits": -14.321819305419922, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.139053344726562, "logits_per_token": -7.160909652709961, "logits_per_char": -1.193484942118327, "num_chars": 12}, {"sum_logits": -12.706600189208984, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -4.235533396402995, "logits_per_char": -0.7474470699534697, "num_chars": 17}, {"sum_logits": -10.102108001708984, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.102951049804688, "logits_per_token": -5.051054000854492, "logits_per_char": -1.4431582859584264, "num_chars": 7}, {"sum_logits": -4.004937648773193, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.136329650878906, "logits_per_token": -2.0024688243865967, "logits_per_char": -0.8009875297546387, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 227, "native_id": "52f3eb6c9a6b9671050fc769d465ed03", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.714823722839355, "incorrect_loss_raw": 12.420582294464111, "correct_loss_per_char": 0.7653445516313825, "incorrect_loss_per_char": 1.3820446587744213, "correct_loss_per_token": 5.357411861419678, "incorrect_loss_per_token": 10.509183645248413, "correct_loss_uncond": -7.990654945373535, "incorrect_loss_uncond": -2.384323835372925}, "model_output": [{"sum_logits": -10.714823722839355, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.70547866821289, "logits_per_token": -5.357411861419678, "logits_per_char": -0.7653445516313825, "num_chars": 14}, {"sum_logits": -8.189706802368164, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.872519493103027, "logits_per_token": -8.189706802368164, "logits_per_char": -1.3649511337280273, "num_chars": 6}, {"sum_logits": -15.550869941711426, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.022927284240723, "logits_per_token": -15.550869941711426, "logits_per_char": -1.2959058284759521, "num_chars": 12}, {"sum_logits": -15.291189193725586, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.01616668701172, "logits_per_token": -7.645594596862793, "logits_per_char": -1.0922277995518275, "num_chars": 14}, {"sum_logits": -10.65056324005127, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -10.65056324005127, "logits_per_char": -1.7750938733418782, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 228, "native_id": "03ee30b5801b61aee791a551a9d9a49f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.148212432861328, "incorrect_loss_raw": 14.43753969669342, "correct_loss_per_char": 0.46801931207830255, "incorrect_loss_per_char": 1.1702725736093726, "correct_loss_per_token": 5.148212432861328, "incorrect_loss_per_token": 7.731491406758627, "correct_loss_uncond": -9.645737648010254, "incorrect_loss_uncond": -4.6769779920578}, "model_output": [{"sum_logits": -5.148212432861328, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -5.148212432861328, "logits_per_char": -0.46801931207830255, "num_chars": 11}, {"sum_logits": -7.97153902053833, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.92566204071045, "logits_per_token": -7.97153902053833, "logits_per_char": -0.797153902053833, "num_chars": 10}, {"sum_logits": -11.445393562316895, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.557573318481445, "logits_per_token": -5.722696781158447, "logits_per_char": -1.0404903238469905, "num_chars": 11}, {"sum_logits": -6.680981636047363, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -8.69396686553955, "logits_per_token": -6.680981636047363, "logits_per_char": -1.3361963272094726, "num_chars": 5}, {"sum_logits": -31.652244567871094, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -38.28086853027344, "logits_per_token": -10.550748189290365, "logits_per_char": -1.507249741327195, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 229, "native_id": "6d1d483745bc0aae0f4dd04e851ceffb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.431893825531006, "incorrect_loss_raw": 9.901006042957306, "correct_loss_per_char": 0.5847176205028187, "incorrect_loss_per_char": 0.9309707008657002, "correct_loss_per_token": 6.431893825531006, "incorrect_loss_per_token": 5.995922297239304, "correct_loss_uncond": -8.806945323944092, "incorrect_loss_uncond": -5.9746429324150085}, "model_output": [{"sum_logits": -3.1156165599823, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.839492797851562, "logits_per_token": -1.55780827999115, "logits_per_char": -0.25963471333185834, "num_chars": 12}, {"sum_logits": -6.431893825531006, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.238839149475098, "logits_per_token": -6.431893825531006, "logits_per_char": -0.5847176205028187, "num_chars": 11}, {"sum_logits": -4.764732837677002, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -4.764732837677002, "logits_per_char": -0.5955916047096252, "num_chars": 8}, {"sum_logits": -21.09379005432129, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.061994552612305, "logits_per_token": -7.03126335144043, "logits_per_char": -2.109379005432129, "num_chars": 10}, {"sum_logits": -10.629884719848633, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.396806716918945, "logits_per_token": -10.629884719848633, "logits_per_char": -0.759277479989188, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 230, "native_id": "bf10bfda7328c8671e15adf8546b64d7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.30073881149292, "incorrect_loss_raw": 8.640409231185913, "correct_loss_per_char": 0.4818853464993564, "incorrect_loss_per_char": 1.0377897514237298, "correct_loss_per_token": 2.65036940574646, "incorrect_loss_per_token": 6.800188660621643, "correct_loss_uncond": -11.531296253204346, "incorrect_loss_uncond": -4.961824893951416}, "model_output": [{"sum_logits": -5.125904560089111, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.594215393066406, "logits_per_token": -5.125904560089111, "logits_per_char": -0.8543174266815186, "num_chars": 6}, {"sum_logits": -14.72176456451416, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.635356903076172, "logits_per_token": -7.36088228225708, "logits_per_char": -1.472176456451416, "num_chars": 10}, {"sum_logits": -5.30073881149292, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.832035064697266, "logits_per_token": -2.65036940574646, "logits_per_char": -0.4818853464993564, "num_chars": 11}, {"sum_logits": -8.7359037399292, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.244208335876465, "logits_per_token": -8.7359037399292, "logits_per_char": -0.9706559711032443, "num_chars": 9}, {"sum_logits": -5.978064060211182, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -10.935155868530273, "logits_per_token": -5.978064060211182, "logits_per_char": -0.8540091514587402, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 231, "native_id": "0b3a3ee40dd25be9735ac5e3342ca4dd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2140281200408936, "incorrect_loss_raw": 13.260751008987427, "correct_loss_per_char": 0.3571142355600993, "incorrect_loss_per_char": 1.4206393419650563, "correct_loss_per_token": 1.0713427066802979, "incorrect_loss_per_token": 7.030412872632344, "correct_loss_uncond": -11.533801317214966, "incorrect_loss_uncond": -3.3196651935577393}, "model_output": [{"sum_logits": -10.450278282165527, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -5.225139141082764, "logits_per_char": -1.1611420313517253, "num_chars": 9}, {"sum_logits": -21.796958923339844, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.2936954498291, "logits_per_token": -7.265652974446614, "logits_per_char": -1.1472083643863076, "num_chars": 19}, {"sum_logits": -10.465951919555664, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.808954238891602, "logits_per_token": -10.465951919555664, "logits_per_char": -1.308243989944458, "num_chars": 8}, {"sum_logits": -10.329814910888672, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.685240745544434, "logits_per_token": -5.164907455444336, "logits_per_char": -2.0659629821777346, "num_chars": 5}, {"sum_logits": -3.2140281200408936, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.74782943725586, "logits_per_token": -1.0713427066802979, "logits_per_char": -0.3571142355600993, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 232, "native_id": "77e2a0b469b56bea81921a4a945ffcb5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.002630233764648, "incorrect_loss_raw": 10.387565612792969, "correct_loss_per_char": 0.9002630233764648, "incorrect_loss_per_char": 0.8376286120641799, "correct_loss_per_token": 9.002630233764648, "incorrect_loss_per_token": 3.929573893547058, "correct_loss_uncond": -5.102828025817871, "incorrect_loss_uncond": -6.245017051696777}, "model_output": [{"sum_logits": -9.002630233764648, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -9.002630233764648, "logits_per_char": -0.9002630233764648, "num_chars": 10}, {"sum_logits": -4.909011363983154, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.825310707092285, "logits_per_token": -4.909011363983154, "logits_per_char": -0.5454457071092393, "num_chars": 9}, {"sum_logits": -6.595885753631592, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.47786808013916, "logits_per_token": -3.297942876815796, "logits_per_char": -0.7328761948479546, "num_chars": 9}, {"sum_logits": -15.520045280456543, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.084766387939453, "logits_per_token": -3.8800113201141357, "logits_per_char": -1.0346696853637696, "num_chars": 15}, {"sum_logits": -14.525320053100586, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.142385482788086, "logits_per_token": -3.6313300132751465, "logits_per_char": -1.037522860935756, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 233, "native_id": "dc964e4f6df6b70815e81e466d0ff717", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.050869941711426, "incorrect_loss_raw": 10.02813458442688, "correct_loss_per_char": 1.2627174854278564, "incorrect_loss_per_char": 1.5506610223225188, "correct_loss_per_token": 5.050869941711426, "incorrect_loss_per_token": 6.93165922164917, "correct_loss_uncond": -7.522516250610352, "incorrect_loss_uncond": -4.713846445083618}, "model_output": [{"sum_logits": -15.337389945983887, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.405479431152344, "logits_per_token": -7.668694972991943, "logits_per_char": -2.1910557065691267, "num_chars": 7}, {"sum_logits": -5.050869941711426, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.573386192321777, "logits_per_token": -5.050869941711426, "logits_per_char": -1.2627174854278564, "num_chars": 4}, {"sum_logits": -10.542197227478027, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.224937438964844, "logits_per_token": -10.542197227478027, "logits_per_char": -2.1084394454956055, "num_chars": 5}, {"sum_logits": -9.434412956237793, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.212604522705078, "logits_per_token": -4.7172064781188965, "logits_per_char": -0.9434412956237793, "num_chars": 10}, {"sum_logits": -4.7985382080078125, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.124902725219727, "logits_per_token": -4.7985382080078125, "logits_per_char": -0.9597076416015625, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 234, "native_id": "6b9221c1af583ffb43580857d6fde38a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.471747875213623, "incorrect_loss_raw": 6.047224670648575, "correct_loss_per_char": 0.4119579792022705, "incorrect_loss_per_char": 0.6289563874403635, "correct_loss_per_token": 2.471747875213623, "incorrect_loss_per_token": 4.671465069055557, "correct_loss_uncond": -10.478427410125732, "incorrect_loss_uncond": -8.527899831533432}, "model_output": [{"sum_logits": -8.25061321258545, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.517041206359863, "logits_per_token": -8.25061321258545, "logits_per_char": -0.8250613212585449, "num_chars": 10}, {"sum_logits": -2.471747875213623, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.950175285339355, "logits_per_token": -2.471747875213623, "logits_per_char": -0.4119579792022705, "num_chars": 6}, {"sum_logits": -1.207323670387268, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -10.933771133422852, "logits_per_token": -1.207323670387268, "logits_per_char": -0.2414647340774536, "num_chars": 5}, {"sum_logits": -3.7248849868774414, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.448589324951172, "logits_per_token": -3.7248849868774414, "logits_per_char": -0.5321264266967773, "num_chars": 7}, {"sum_logits": -11.00607681274414, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.40109634399414, "logits_per_token": -5.50303840637207, "logits_per_char": -0.9171730677286783, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 235, "native_id": "4dc2c4596b08e9bfd893174e67bff40a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.14584493637085, "incorrect_loss_raw": 12.709552526473999, "correct_loss_per_char": 0.5717605484856499, "incorrect_loss_per_char": 1.0611769556999207, "correct_loss_per_token": 2.572922468185425, "incorrect_loss_per_token": 7.26084582010905, "correct_loss_uncond": -10.97765588760376, "incorrect_loss_uncond": -4.945097923278809}, "model_output": [{"sum_logits": -11.987203598022461, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.828012466430664, "logits_per_token": -5.9936017990112305, "logits_per_char": -0.9989336331685384, "num_chars": 12}, {"sum_logits": -9.350593566894531, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.537456512451172, "logits_per_token": -4.675296783447266, "logits_per_char": -0.7792161305745443, "num_chars": 12}, {"sum_logits": -12.81152057647705, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.653477668762207, "logits_per_token": -12.81152057647705, "logits_per_char": -1.4235022862752278, "num_chars": 9}, {"sum_logits": -5.14584493637085, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.12350082397461, "logits_per_token": -2.572922468185425, "logits_per_char": -0.5717605484856499, "num_chars": 9}, {"sum_logits": -16.688892364501953, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.599655151367188, "logits_per_token": -5.562964121500651, "logits_per_char": -1.043055772781372, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 236, "native_id": "8ae24d3ff199077a59e0d970feb665b7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.74431037902832, "incorrect_loss_raw": 14.970187425613403, "correct_loss_per_char": 0.9786925315856934, "incorrect_loss_per_char": 1.517547657814893, "correct_loss_per_token": 5.87215518951416, "incorrect_loss_per_token": 8.216002941131592, "correct_loss_uncond": -9.529630661010742, "incorrect_loss_uncond": -3.341407537460327}, "model_output": [{"sum_logits": -11.74431037902832, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.273941040039062, "logits_per_token": -5.87215518951416, "logits_per_char": -0.9786925315856934, "num_chars": 12}, {"sum_logits": -19.11281394958496, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.25178337097168, "logits_per_token": -9.55640697479248, "logits_per_char": -1.19455087184906, "num_chars": 16}, {"sum_logits": -15.592994689941406, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.79379653930664, "logits_per_token": -7.796497344970703, "logits_per_char": -1.9491243362426758, "num_chars": 8}, {"sum_logits": -19.327667236328125, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.345497131347656, "logits_per_token": -9.663833618164062, "logits_per_char": -1.7570606578480115, "num_chars": 11}, {"sum_logits": -5.847273826599121, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.855302810668945, "logits_per_token": -5.847273826599121, "logits_per_char": -1.1694547653198242, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 237, "native_id": "d64a676e9d22e7edd12e7f4ce267a9f0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.020054817199707, "incorrect_loss_raw": 5.154886424541473, "correct_loss_per_char": 0.6040109634399414, "incorrect_loss_per_char": 0.5815896987915039, "correct_loss_per_token": 3.020054817199707, "incorrect_loss_per_token": 4.096222221851349, "correct_loss_uncond": -8.209635734558105, "incorrect_loss_uncond": -12.29740697145462}, "model_output": [{"sum_logits": -1.807574987411499, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -1.807574987411499, "logits_per_char": -0.3012624979019165, "num_chars": 6}, {"sum_logits": -3.020054817199707, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -3.020054817199707, "logits_per_char": -0.6040109634399414, "num_chars": 5}, {"sum_logits": -8.469313621520996, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.17279052734375, "logits_per_token": -4.234656810760498, "logits_per_char": -0.6049509729657855, "num_chars": 14}, {"sum_logits": -7.129538536071777, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.290563583374023, "logits_per_token": -7.129538536071777, "logits_per_char": -1.0185055051531111, "num_chars": 7}, {"sum_logits": -3.213118553161621, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -3.213118553161621, "logits_per_char": -0.40163981914520264, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 238, "native_id": "54ecb521df1d0f5b130a393c42b4126d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0196257829666138, "incorrect_loss_raw": 12.686180472373962, "correct_loss_per_char": 0.10196257829666137, "incorrect_loss_per_char": 1.4565156897688225, "correct_loss_per_token": 1.0196257829666138, "incorrect_loss_per_token": 6.369571963946025, "correct_loss_uncond": -13.085832476615906, "incorrect_loss_uncond": -5.230192303657532}, "model_output": [{"sum_logits": -12.934762954711914, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.07953643798828, "logits_per_token": -6.467381477355957, "logits_per_char": -1.4371958838568792, "num_chars": 9}, {"sum_logits": -11.135242462158203, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.438270568847656, "logits_per_token": -5.567621231079102, "logits_per_char": -1.855873743693034, "num_chars": 6}, {"sum_logits": -1.0196257829666138, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": true, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -1.0196257829666138, "logits_per_char": -0.10196257829666137, "num_chars": 10}, {"sum_logits": -19.84714698791504, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -22.96535301208496, "logits_per_token": -6.615715662638347, "logits_per_char": -1.1674792345832377, "num_chars": 17}, {"sum_logits": -6.827569484710693, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.182331085205078, "logits_per_token": -6.827569484710693, "logits_per_char": -1.3655138969421388, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 239, "native_id": "b7276bb9139ec25c98c7e3822404eb6c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.193352699279785, "incorrect_loss_raw": 7.304443955421448, "correct_loss_per_char": 0.45619324275425505, "incorrect_loss_per_char": 0.88794178527499, "correct_loss_per_token": 3.193352699279785, "incorrect_loss_per_token": 7.304443955421448, "correct_loss_uncond": -11.626325607299805, "incorrect_loss_uncond": -7.27356493473053}, "model_output": [{"sum_logits": -4.719424724578857, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -4.719424724578857, "logits_per_char": -0.6742035320826939, "num_chars": 7}, {"sum_logits": -3.193352699279785, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.81967830657959, "logits_per_token": -3.193352699279785, "logits_per_char": -0.45619324275425505, "num_chars": 7}, {"sum_logits": -11.08466625213623, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.825310707092285, "logits_per_token": -11.08466625213623, "logits_per_char": -1.2316295835706923, "num_chars": 9}, {"sum_logits": -7.0761613845825195, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -7.0761613845825195, "logits_per_char": -0.5896801153818766, "num_chars": 12}, {"sum_logits": -6.337523460388184, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -6.337523460388184, "logits_per_char": -1.0562539100646973, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 240, "native_id": "ecb8758b0d088f9aedc182a516dd1190", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0873749256134033, "incorrect_loss_raw": 8.852732181549072, "correct_loss_per_char": 0.41747498512268066, "incorrect_loss_per_char": 0.9116887614496515, "correct_loss_per_token": 2.0873749256134033, "incorrect_loss_per_token": 7.550179481506348, "correct_loss_uncond": -12.319905996322632, "incorrect_loss_uncond": -5.803559064865112}, "model_output": [{"sum_logits": -6.430391311645508, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -6.430391311645508, "logits_per_char": -0.9186273302350726, "num_chars": 7}, {"sum_logits": -8.429052352905273, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -8.429052352905273, "logits_per_char": -0.9365613725450304, "num_chars": 9}, {"sum_logits": -10.420421600341797, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.36029624938965, "logits_per_token": -5.210210800170898, "logits_per_char": -0.947311054576527, "num_chars": 11}, {"sum_logits": -10.131063461303711, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -10.131063461303711, "logits_per_char": -0.844255288441976, "num_chars": 12}, {"sum_logits": -2.0873749256134033, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.407280921936035, "logits_per_token": -2.0873749256134033, "logits_per_char": -0.41747498512268066, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 241, "native_id": "f2645d0ee8662b6553954cee7e77979e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.810866832733154, "incorrect_loss_raw": 13.360628366470337, "correct_loss_per_char": 0.8678740925259061, "incorrect_loss_per_char": 1.907290136246454, "correct_loss_per_token": 3.905433416366577, "incorrect_loss_per_token": 9.550777077674866, "correct_loss_uncond": -6.722907543182373, "incorrect_loss_uncond": -1.2017936706542969}, "model_output": [{"sum_logits": -11.117715835571289, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.321873664855957, "logits_per_token": -11.117715835571289, "logits_per_char": -1.8529526392618816, "num_chars": 6}, {"sum_logits": -7.810866832733154, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -3.905433416366577, "logits_per_char": -0.8678740925259061, "num_chars": 9}, {"sum_logits": -11.845987319946289, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -11.845987319946289, "logits_per_char": -2.369197463989258, "num_chars": 5}, {"sum_logits": -14.5674409866333, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.979316711425781, "logits_per_token": -7.28372049331665, "logits_per_char": -2.0810629980904713, "num_chars": 7}, {"sum_logits": -15.911369323730469, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.473548889160156, "logits_per_token": -7.955684661865234, "logits_per_char": -1.3259474436442058, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 242, "native_id": "ea6d1a739ea841be282e13789270651e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.475393295288086, "incorrect_loss_raw": 17.988991260528564, "correct_loss_per_char": 0.8057994842529297, "incorrect_loss_per_char": 1.4471597425521365, "correct_loss_per_token": 3.491797765096029, "incorrect_loss_per_token": 10.133008400599163, "correct_loss_uncond": -11.906034469604492, "incorrect_loss_uncond": -1.3591065406799316}, "model_output": [{"sum_logits": -20.487577438354492, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.524456024169922, "logits_per_token": -10.243788719177246, "logits_per_char": -1.1381987465752497, "num_chars": 18}, {"sum_logits": -19.376686096191406, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.983226776123047, "logits_per_token": -9.688343048095703, "logits_per_char": -2.152965121799045, "num_chars": 9}, {"sum_logits": -10.475393295288086, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.381427764892578, "logits_per_token": -3.491797765096029, "logits_per_char": -0.8057994842529297, "num_chars": 13}, {"sum_logits": -17.237699508666992, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.423744201660156, "logits_per_token": -5.745899836222331, "logits_per_char": -1.4364749590555828, "num_chars": 12}, {"sum_logits": -14.854001998901367, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.46096420288086, "logits_per_token": -14.854001998901367, "logits_per_char": -1.061000142778669, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 243, "native_id": "c82ed0c2a2e115452b4d596c5faafbcf", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.040714263916016, "incorrect_loss_raw": 9.994839549064636, "correct_loss_per_char": 1.008142852783203, "incorrect_loss_per_char": 1.149425046595316, "correct_loss_per_token": 5.040714263916016, "incorrect_loss_per_token": 6.416268388430278, "correct_loss_uncond": -6.946971893310547, "incorrect_loss_uncond": -7.4742738008499146}, "model_output": [{"sum_logits": -8.357311248779297, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.650419235229492, "logits_per_token": -8.357311248779297, "logits_per_char": -1.3928852081298828, "num_chars": 6}, {"sum_logits": -13.084259033203125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.866344451904297, "logits_per_token": -6.5421295166015625, "logits_per_char": -1.8691798618861608, "num_chars": 7}, {"sum_logits": -6.8795552253723145, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.757722854614258, "logits_per_token": -6.8795552253723145, "logits_per_char": -0.6879555225372315, "num_chars": 10}, {"sum_logits": -5.040714263916016, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.987686157226562, "logits_per_token": -5.040714263916016, "logits_per_char": -1.008142852783203, "num_chars": 5}, {"sum_logits": -11.658232688903809, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.601966857910156, "logits_per_token": -3.886077562967936, "logits_per_char": -0.6476795938279893, "num_chars": 18}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 244, "native_id": "163d83851ecd4a4144b31b8738e4c335", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.1579549312591553, "incorrect_loss_raw": 10.84403920173645, "correct_loss_per_char": 0.3596591552098592, "incorrect_loss_per_char": 1.4128382574084173, "correct_loss_per_token": 2.1579549312591553, "incorrect_loss_per_token": 8.253862261772156, "correct_loss_uncond": -13.996647119522095, "incorrect_loss_uncond": -7.275232791900635}, "model_output": [{"sum_logits": -13.455268859863281, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.154216766357422, "logits_per_token": -6.727634429931641, "logits_per_char": -1.4950298733181424, "num_chars": 9}, {"sum_logits": -2.1579549312591553, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.15460205078125, "logits_per_token": -2.1579549312591553, "logits_per_char": -0.3596591552098592, "num_chars": 6}, {"sum_logits": -10.893655776977539, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.451787948608398, "logits_per_token": -10.893655776977539, "logits_per_char": -1.815609296162923, "num_chars": 6}, {"sum_logits": -11.761085510253906, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.405667304992676, "logits_per_token": -11.761085510253906, "logits_per_char": -1.6801550728934151, "num_chars": 7}, {"sum_logits": -7.266146659851074, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.465415954589844, "logits_per_token": -3.633073329925537, "logits_per_char": -0.6605587872591886, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 245, "native_id": "095767956c500ca1af7cf7671556de5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.496767044067383, "incorrect_loss_raw": 12.112928867340088, "correct_loss_per_char": 1.0827945073445637, "incorrect_loss_per_char": 1.301929177143873, "correct_loss_per_token": 6.496767044067383, "incorrect_loss_per_token": 10.29915452003479, "correct_loss_uncond": -8.670104026794434, "incorrect_loss_uncond": -1.7971110343933105}, "model_output": [{"sum_logits": -6.496767044067383, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.166871070861816, "logits_per_token": -6.496767044067383, "logits_per_char": -1.0827945073445637, "num_chars": 6}, {"sum_logits": -13.90337085723877, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.272710800170898, "logits_per_token": -13.90337085723877, "logits_per_char": -1.9861958367483956, "num_chars": 7}, {"sum_logits": -8.372391700744629, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.322914123535156, "logits_per_token": -8.372391700744629, "logits_per_char": -0.930265744527181, "num_chars": 9}, {"sum_logits": -11.66575813293457, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.51531982421875, "logits_per_token": -11.66575813293457, "logits_per_char": -0.9721465110778809, "num_chars": 12}, {"sum_logits": -14.510194778442383, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.529214859008789, "logits_per_token": -7.255097389221191, "logits_per_char": -1.319108616222035, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 246, "native_id": "d31ee38f67d1173275e120b8ad36039c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.706398010253906, "incorrect_loss_raw": 11.79048466682434, "correct_loss_per_char": 0.9733089100230824, "incorrect_loss_per_char": 1.144034002835934, "correct_loss_per_token": 5.353199005126953, "incorrect_loss_per_token": 7.347072601318359, "correct_loss_uncond": -10.13034439086914, "incorrect_loss_uncond": -5.17314076423645}, "model_output": [{"sum_logits": -9.739068031311035, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -4.869534015655518, "logits_per_char": -0.8115890026092529, "num_chars": 12}, {"sum_logits": -14.19029712677002, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.44722843170166, "logits_per_token": -7.09514856338501, "logits_per_char": -1.419029712677002, "num_chars": 10}, {"sum_logits": -10.706398010253906, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.836742401123047, "logits_per_token": -5.353199005126953, "logits_per_char": -0.9733089100230824, "num_chars": 11}, {"sum_logits": -11.614642143249512, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.100279808044434, "logits_per_token": -11.614642143249512, "logits_per_char": -1.451830267906189, "num_chars": 8}, {"sum_logits": -11.617931365966797, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.43353271484375, "logits_per_token": -5.808965682983398, "logits_per_char": -0.8936870281512921, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 247, "native_id": "c410a4626dfce4b4cfd3e5937602cd77", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5278563499450684, "incorrect_loss_raw": 9.69847571849823, "correct_loss_per_char": 0.44098204374313354, "incorrect_loss_per_char": 0.9860907524824142, "correct_loss_per_token": 3.5278563499450684, "incorrect_loss_per_token": 7.043681502342224, "correct_loss_uncond": -8.802655696868896, "incorrect_loss_uncond": -5.4774781465530396}, "model_output": [{"sum_logits": -3.5278563499450684, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.330512046813965, "logits_per_token": -3.5278563499450684, "logits_per_char": -0.44098204374313354, "num_chars": 8}, {"sum_logits": -21.238353729248047, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.10118293762207, "logits_per_token": -10.619176864624023, "logits_per_char": -1.4158902486165366, "num_chars": 15}, {"sum_logits": -5.949654579162598, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.733114242553711, "logits_per_token": -5.949654579162598, "logits_per_char": -1.1899309158325195, "num_chars": 5}, {"sum_logits": -8.078038215637207, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.539006233215332, "logits_per_token": -8.078038215637207, "logits_per_char": -0.8975598017374674, "num_chars": 9}, {"sum_logits": -3.5278563499450684, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.330512046813965, "logits_per_token": -3.5278563499450684, "logits_per_char": -0.44098204374313354, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 248, "native_id": "14d760e43728e9e4643c414627f2b596", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.78211784362793, "incorrect_loss_raw": 8.626307845115662, "correct_loss_per_char": 0.9757908715142144, "incorrect_loss_per_char": 1.1735947794384427, "correct_loss_per_token": 8.78211784362793, "incorrect_loss_per_token": 8.626307845115662, "correct_loss_uncond": -5.894843101501465, "incorrect_loss_uncond": -4.500725865364075}, "model_output": [{"sum_logits": -7.400599002838135, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.0285062789917, "logits_per_token": -7.400599002838135, "logits_per_char": -0.8222887780931261, "num_chars": 9}, {"sum_logits": -10.257136344909668, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -11.66850471496582, "logits_per_token": -10.257136344909668, "logits_per_char": -1.4653051921299525, "num_chars": 7}, {"sum_logits": -8.21151065826416, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.889263153076172, "logits_per_token": -8.21151065826416, "logits_per_char": -1.1730729511805944, "num_chars": 7}, {"sum_logits": -8.635985374450684, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.921860694885254, "logits_per_token": -8.635985374450684, "logits_per_char": -1.2337121963500977, "num_chars": 7}, {"sum_logits": -8.78211784362793, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.676960945129395, "logits_per_token": -8.78211784362793, "logits_per_char": -0.9757908715142144, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 249, "native_id": "abcf1b550b4d44f46d4f68b8e1d98ec8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.844167709350586, "incorrect_loss_raw": 9.47764778137207, "correct_loss_per_char": 0.3844167709350586, "incorrect_loss_per_char": 0.9824051963431495, "correct_loss_per_token": 1.922083854675293, "incorrect_loss_per_token": 6.163969337940216, "correct_loss_uncond": -14.964181900024414, "incorrect_loss_uncond": -8.157397508621216}, "model_output": [{"sum_logits": -10.149968147277832, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.931836128234863, "logits_per_token": -10.149968147277832, "logits_per_char": -1.268746018409729, "num_chars": 8}, {"sum_logits": -10.894207000732422, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -10.894207000732422, "logits_per_char": -1.5563152858189173, "num_chars": 7}, {"sum_logits": -12.09803581237793, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.831790924072266, "logits_per_token": -2.419607162475586, "logits_per_char": -0.8065357208251953, "num_chars": 15}, {"sum_logits": -3.844167709350586, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.808349609375, "logits_per_token": -1.922083854675293, "logits_per_char": -0.3844167709350586, "num_chars": 10}, {"sum_logits": -4.768380165100098, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.3708553314209, "logits_per_token": -1.1920950412750244, "logits_per_char": -0.2980237603187561, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 250, "native_id": "5b8af6f26335dbd501b0104c71e26d9e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.832159042358398, "incorrect_loss_raw": 12.74909257888794, "correct_loss_per_char": 1.6386931737263997, "incorrect_loss_per_char": 1.3997754848364627, "correct_loss_per_token": 4.916079521179199, "incorrect_loss_per_token": 11.144215822219849, "correct_loss_uncond": -7.66779899597168, "incorrect_loss_uncond": -1.7499189376831055}, "model_output": [{"sum_logits": -12.839014053344727, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.37188720703125, "logits_per_token": -6.419507026672363, "logits_per_char": -1.2839014053344726, "num_chars": 10}, {"sum_logits": -9.832159042358398, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.499958038330078, "logits_per_token": -4.916079521179199, "logits_per_char": -1.6386931737263997, "num_chars": 6}, {"sum_logits": -12.296039581298828, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.811025619506836, "logits_per_token": -12.296039581298828, "logits_per_char": -1.2296039581298828, "num_chars": 10}, {"sum_logits": -9.696294784545898, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.019183158874512, "logits_per_token": -9.696294784545898, "logits_per_char": -1.6160491307576497, "num_chars": 6}, {"sum_logits": -16.165021896362305, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -16.165021896362305, "logits_per_char": -1.469547445123846, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 251, "native_id": "4364b4b342fb7b44434bd6694bf8fd51", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.63015604019165, "incorrect_loss_raw": 9.941519141197205, "correct_loss_per_char": 0.28938475251197815, "incorrect_loss_per_char": 0.6717736973907009, "correct_loss_per_token": 1.54338534673055, "incorrect_loss_per_token": 3.833238462607066, "correct_loss_uncond": -11.678176403045654, "incorrect_loss_uncond": -10.288925766944885}, "model_output": [{"sum_logits": -7.628075122833252, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.420150756835938, "logits_per_token": -3.814037561416626, "logits_per_char": -0.693461374803023, "num_chars": 11}, {"sum_logits": -9.417235374450684, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.31100082397461, "logits_per_token": -4.708617687225342, "logits_per_char": -0.856112306768244, "num_chars": 11}, {"sum_logits": -13.561285018920898, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -23.745128631591797, "logits_per_token": -4.5204283396403, "logits_per_char": -0.5650535424550375, "num_chars": 24}, {"sum_logits": -4.63015604019165, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.308332443237305, "logits_per_token": -1.54338534673055, "logits_per_char": -0.28938475251197815, "num_chars": 16}, {"sum_logits": -9.159481048583984, "num_tokens": 4, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -22.445499420166016, "logits_per_token": -2.289870262145996, "logits_per_char": -0.572467565536499, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 252, "native_id": "3ffe67fb009529d9b0c49ccd7141ee4a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.492568969726562, "incorrect_loss_raw": 12.87635588645935, "correct_loss_per_char": 0.8492568969726563, "incorrect_loss_per_char": 1.0345601723307656, "correct_loss_per_token": 4.246284484863281, "incorrect_loss_per_token": 7.541027069091797, "correct_loss_uncond": -9.018548965454102, "incorrect_loss_uncond": -4.355265140533447}, "model_output": [{"sum_logits": -12.92574691772461, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.017120361328125, "logits_per_token": -6.462873458862305, "logits_per_char": -0.9232676369803292, "num_chars": 14}, {"sum_logits": -9.285594940185547, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.427593231201172, "logits_per_token": -4.642797470092773, "logits_per_char": -0.7737995783487955, "num_chars": 12}, {"sum_logits": -20.471288681030273, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.813478469848633, "logits_per_token": -10.235644340515137, "logits_per_char": -1.7059407234191895, "num_chars": 12}, {"sum_logits": -8.492568969726562, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.511117935180664, "logits_per_token": -4.246284484863281, "logits_per_char": -0.8492568969726563, "num_chars": 10}, {"sum_logits": -8.822793006896973, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.668292045593262, "logits_per_token": -8.822793006896973, "logits_per_char": -0.7352327505747477, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 253, "native_id": "f372587fa4c99d5bebf0d0eb987c44e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9354328513145447, "incorrect_loss_raw": 9.019305229187012, "correct_loss_per_char": 0.10393698347939385, "incorrect_loss_per_char": 1.3322648865836006, "correct_loss_per_token": 0.9354328513145447, "incorrect_loss_per_token": 8.420093655586243, "correct_loss_uncond": -13.457041323184967, "incorrect_loss_uncond": -6.667252779006958}, "model_output": [{"sum_logits": -9.869138717651367, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.336503982543945, "logits_per_token": -9.869138717651367, "logits_per_char": -1.233642339706421, "num_chars": 8}, {"sum_logits": -4.793692588806152, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -2.396846294403076, "logits_per_char": -0.3994743824005127, "num_chars": 12}, {"sum_logits": -11.143025398254395, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.441018104553223, "logits_per_token": -11.143025398254395, "logits_per_char": -2.2286050796508787, "num_chars": 5}, {"sum_logits": -10.271364212036133, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.823331832885742, "logits_per_token": -10.271364212036133, "logits_per_char": -1.4673377445765905, "num_chars": 7}, {"sum_logits": -0.9354328513145447, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -0.9354328513145447, "logits_per_char": -0.10393698347939385, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 254, "native_id": "d35a8a3bd560fdd651ecf314878ed30f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.891036033630371, "incorrect_loss_raw": 9.009631276130676, "correct_loss_per_char": 0.6264578212391246, "incorrect_loss_per_char": 1.2625262090793024, "correct_loss_per_token": 3.4455180168151855, "incorrect_loss_per_token": 7.2019747495651245, "correct_loss_uncond": -10.367838859558105, "incorrect_loss_uncond": -6.8524781465530396}, "model_output": [{"sum_logits": -6.51932954788208, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.667871475219727, "logits_per_token": -6.51932954788208, "logits_per_char": -1.62983238697052, "num_chars": 4}, {"sum_logits": -9.382936477661133, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.718061447143555, "logits_per_token": -9.382936477661133, "logits_per_char": -1.1728670597076416, "num_chars": 8}, {"sum_logits": -14.461252212524414, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.088661193847656, "logits_per_token": -7.230626106262207, "logits_per_char": -1.1124040163480318, "num_chars": 13}, {"sum_logits": -5.675006866455078, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -5.675006866455078, "logits_per_char": -1.1350013732910156, "num_chars": 5}, {"sum_logits": -6.891036033630371, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.258874893188477, "logits_per_token": -3.4455180168151855, "logits_per_char": -0.6264578212391246, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 255, "native_id": "0542414710025f56b0c26e1bae5c4d06", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.313555717468262, "incorrect_loss_raw": 10.068890690803528, "correct_loss_per_char": 0.9471965936514047, "incorrect_loss_per_char": 1.675322236617406, "correct_loss_per_token": 3.0783889293670654, "incorrect_loss_per_token": 8.021039843559265, "correct_loss_uncond": -3.7964162826538086, "incorrect_loss_uncond": -3.9601722955703735}, "model_output": [{"sum_logits": -12.313555717468262, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.10997200012207, "logits_per_token": -3.0783889293670654, "logits_per_char": -0.9471965936514047, "num_chars": 13}, {"sum_logits": -8.524855613708496, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.66987133026123, "logits_per_token": -8.524855613708496, "logits_per_char": -1.7049711227416993, "num_chars": 5}, {"sum_logits": -16.3828067779541, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -8.19140338897705, "logits_per_char": -1.8203118642171223, "num_chars": 9}, {"sum_logits": -7.991629600524902, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.224944114685059, "logits_per_token": -7.991629600524902, "logits_per_char": -1.3319382667541504, "num_chars": 6}, {"sum_logits": -7.376270771026611, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.485767364501953, "logits_per_token": -7.376270771026611, "logits_per_char": -1.8440676927566528, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 256, "native_id": "1875f70cf736c68c7a9df3ef870224a1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.491852760314941, "incorrect_loss_raw": 11.251482486724854, "correct_loss_per_char": 0.9153087933858236, "incorrect_loss_per_char": 0.8268280389217231, "correct_loss_per_token": 5.491852760314941, "incorrect_loss_per_token": 5.625741243362427, "correct_loss_uncond": -7.380666732788086, "incorrect_loss_uncond": -6.116748332977295}, "model_output": [{"sum_logits": -5.491852760314941, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.872519493103027, "logits_per_token": -5.491852760314941, "logits_per_char": -0.9153087933858236, "num_chars": 6}, {"sum_logits": -11.567914009094238, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.41663360595703, "logits_per_token": -5.783957004547119, "logits_per_char": -0.7229946255683899, "num_chars": 16}, {"sum_logits": -9.855685234069824, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.945926666259766, "logits_per_token": -4.927842617034912, "logits_per_char": -0.7581296333899865, "num_chars": 13}, {"sum_logits": -8.34037971496582, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.58175277709961, "logits_per_token": -4.17018985748291, "logits_per_char": -0.5560253143310547, "num_chars": 15}, {"sum_logits": -15.241950988769531, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.528610229492188, "logits_per_token": -7.620975494384766, "logits_per_char": -1.270162582397461, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 257, "native_id": "83250ae2dfeb2e3886ead4cde8e1290f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.622995376586914, "incorrect_loss_raw": 14.928292036056519, "correct_loss_per_char": 0.41061882745651973, "incorrect_loss_per_char": 1.298710109637334, "correct_loss_per_token": 2.874331792195638, "incorrect_loss_per_token": 7.258423884709676, "correct_loss_uncond": -10.767969131469727, "incorrect_loss_uncond": -3.3530893325805664}, "model_output": [{"sum_logits": -9.744402885437012, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.694806098937988, "logits_per_token": -9.744402885437012, "logits_per_char": -0.9744402885437011, "num_chars": 10}, {"sum_logits": -15.798225402832031, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.741010665893555, "logits_per_token": -7.899112701416016, "logits_per_char": -1.316518783569336, "num_chars": 12}, {"sum_logits": -8.622995376586914, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.39096450805664, "logits_per_token": -2.874331792195638, "logits_per_char": -0.41061882745651973, "num_chars": 21}, {"sum_logits": -19.68954849243164, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.474590301513672, "logits_per_token": -6.563182830810547, "logits_per_char": -1.7899589538574219, "num_chars": 11}, {"sum_logits": -14.48099136352539, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.215118408203125, "logits_per_token": -4.82699712117513, "logits_per_char": -1.1139224125788763, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 258, "native_id": "70c39372c0d50566554fd72c768b75f6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.15120267868042, "incorrect_loss_raw": 11.061781764030457, "correct_loss_per_char": 1.0216003826686315, "incorrect_loss_per_char": 1.1163999540637237, "correct_loss_per_token": 7.15120267868042, "incorrect_loss_per_token": 11.061781764030457, "correct_loss_uncond": -8.869608402252197, "incorrect_loss_uncond": -2.762635111808777}, "model_output": [{"sum_logits": -7.952151775360107, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -7.952151775360107, "logits_per_char": -0.8835724194844564, "num_chars": 9}, {"sum_logits": -12.148921966552734, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.963855743408203, "logits_per_token": -12.148921966552734, "logits_per_char": -1.104447451504794, "num_chars": 11}, {"sum_logits": -13.982967376708984, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.801762580871582, "logits_per_token": -13.982967376708984, "logits_per_char": -1.5536630418565538, "num_chars": 9}, {"sum_logits": -10.1630859375, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.512303352355957, "logits_per_token": -10.1630859375, "logits_per_char": -0.9239169034090909, "num_chars": 11}, {"sum_logits": -7.15120267868042, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.020811080932617, "logits_per_token": -7.15120267868042, "logits_per_char": -1.0216003826686315, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 259, "native_id": "c21ec5b367f409a0288d616f626555ae", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.022344589233398, "incorrect_loss_raw": 13.244633197784424, "correct_loss_per_char": 0.7293040535666726, "incorrect_loss_per_char": 1.3684149563312529, "correct_loss_per_token": 4.011172294616699, "incorrect_loss_per_token": 10.184217691421509, "correct_loss_uncond": -10.181619644165039, "incorrect_loss_uncond": -3.4107086658477783}, "model_output": [{"sum_logits": -8.022344589233398, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.203964233398438, "logits_per_token": -4.011172294616699, "logits_per_char": -0.7293040535666726, "num_chars": 11}, {"sum_logits": -12.687199592590332, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -6.343599796295166, "logits_per_char": -1.2687199592590332, "num_chars": 10}, {"sum_logits": -13.100622177124023, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.074918746948242, "logits_per_token": -13.100622177124023, "logits_per_char": -1.1909656524658203, "num_chars": 11}, {"sum_logits": -11.796124458312988, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.916038513183594, "logits_per_token": -5.898062229156494, "logits_per_char": -1.4745155572891235, "num_chars": 8}, {"sum_logits": -15.394586563110352, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.490666389465332, "logits_per_token": -15.394586563110352, "logits_per_char": -1.539458656311035, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 260, "native_id": "a2cd03ed068f6d613e85f3a60f4db0a1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.623603820800781, "incorrect_loss_raw": 10.091211080551147, "correct_loss_per_char": 0.7029504776000977, "incorrect_loss_per_char": 1.3882757073098964, "correct_loss_per_token": 5.623603820800781, "incorrect_loss_per_token": 7.48156460126241, "correct_loss_uncond": -9.161089897155762, "incorrect_loss_uncond": -4.624578475952148}, "model_output": [{"sum_logits": -6.769842147827148, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -6.769842147827148, "logits_per_char": -1.692460536956787, "num_chars": 4}, {"sum_logits": -13.255722999572754, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -13.255722999572754, "logits_per_char": -1.6569653749465942, "num_chars": 8}, {"sum_logits": -5.623603820800781, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -5.623603820800781, "logits_per_char": -0.7029504776000977, "num_chars": 8}, {"sum_logits": -4.681400299072266, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.036821365356445, "logits_per_token": -4.681400299072266, "logits_per_char": -0.7802333831787109, "num_chars": 6}, {"sum_logits": -15.657878875732422, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.27172088623047, "logits_per_token": -5.219292958577474, "logits_per_char": -1.423443534157493, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 261, "native_id": "d2871dc28c82471e5d7f71f79e49c257", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.6403613090515137, "incorrect_loss_raw": 7.568719148635864, "correct_loss_per_char": 0.606726884841919, "incorrect_loss_per_char": 0.5634972268884831, "correct_loss_per_token": 3.6403613090515137, "incorrect_loss_per_token": 4.013514757156372, "correct_loss_uncond": -8.944003582000732, "incorrect_loss_uncond": -12.708508729934692}, "model_output": [{"sum_logits": -2.069063186645508, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -2.069063186645508, "logits_per_char": -0.22989590962727866, "num_chars": 9}, {"sum_logits": -10.389135360717773, "num_tokens": 2, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -22.123933792114258, "logits_per_token": -5.194567680358887, "logits_per_char": -0.944466850974343, "num_chars": 11}, {"sum_logits": -12.03499984741211, "num_tokens": 4, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -29.839584350585938, "logits_per_token": -3.0087499618530273, "logits_per_char": -0.5014583269755045, "num_chars": 24}, {"sum_logits": -3.6403613090515137, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -3.6403613090515137, "logits_per_char": -0.606726884841919, "num_chars": 6}, {"sum_logits": -5.781678199768066, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -5.781678199768066, "logits_per_char": -0.5781678199768067, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 262, "native_id": "94770e75c4e2000e717b4218ddff19e8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.159910678863525, "incorrect_loss_raw": 10.602041244506836, "correct_loss_per_char": 0.4773273785909017, "incorrect_loss_per_char": 1.0125069779790803, "correct_loss_per_token": 2.3866368929545083, "incorrect_loss_per_token": 5.416364808877309, "correct_loss_uncond": -11.92728567123413, "incorrect_loss_uncond": -7.276367425918579}, "model_output": [{"sum_logits": -9.929716110229492, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.224944114685059, "logits_per_token": -9.929716110229492, "logits_per_char": -1.6549526850382488, "num_chars": 6}, {"sum_logits": -8.89135456085205, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.48340606689453, "logits_per_token": -2.2228386402130127, "logits_per_char": -0.6350967543465751, "num_chars": 14}, {"sum_logits": -13.683856010437012, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.412267684936523, "logits_per_token": -4.561285336812337, "logits_per_char": -1.0526043084951549, "num_chars": 13}, {"sum_logits": -7.159910678863525, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.087196350097656, "logits_per_token": -2.3866368929545083, "logits_per_char": -0.4773273785909017, "num_chars": 15}, {"sum_logits": -9.903238296508789, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.393016815185547, "logits_per_token": -4.9516191482543945, "logits_per_char": -0.7073741640363421, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 263, "native_id": "08ad17d3ca1838b8724d21cf5921ec52", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.439393043518066, "incorrect_loss_raw": 12.329679727554321, "correct_loss_per_char": 0.49533792642446667, "incorrect_loss_per_char": 1.0116863913228866, "correct_loss_per_token": 3.219696521759033, "incorrect_loss_per_token": 6.6909873485565186, "correct_loss_uncond": -11.261073112487793, "incorrect_loss_uncond": -5.873492240905762}, "model_output": [{"sum_logits": -12.913631439208984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.492868423461914, "logits_per_token": -6.456815719604492, "logits_per_char": -0.8609087626139323, "num_chars": 15}, {"sum_logits": -19.33700180053711, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.309406280517578, "logits_per_token": -9.668500900268555, "logits_per_char": -1.3812144143240792, "num_chars": 14}, {"sum_logits": -7.423906326293945, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.638421058654785, "logits_per_token": -7.423906326293945, "logits_per_char": -1.237317721048991, "num_chars": 6}, {"sum_logits": -6.439393043518066, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.70046615600586, "logits_per_token": -3.219696521759033, "logits_per_char": -0.49533792642446667, "num_chars": 13}, {"sum_logits": -9.644179344177246, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.371992111206055, "logits_per_token": -3.214726448059082, "logits_per_char": -0.5673046673045439, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 264, "native_id": "21fb76bd8349628b441c76f47c33e77b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.089774131774902, "incorrect_loss_raw": 13.236943244934082, "correct_loss_per_char": 0.6492695808410645, "incorrect_loss_per_char": 1.6833594964610206, "correct_loss_per_token": 2.2724435329437256, "incorrect_loss_per_token": 9.343621373176575, "correct_loss_uncond": -8.633801460266113, "incorrect_loss_uncond": -3.5878024101257324}, "model_output": [{"sum_logits": -9.089774131774902, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -2.2724435329437256, "logits_per_char": -0.6492695808410645, "num_chars": 14}, {"sum_logits": -10.44042682647705, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.07139778137207, "logits_per_token": -10.44042682647705, "logits_per_char": -0.8700355688730875, "num_chars": 12}, {"sum_logits": -11.360771179199219, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.026843070983887, "logits_per_token": -11.360771179199219, "logits_per_char": -2.2721542358398437, "num_chars": 5}, {"sum_logits": -21.749305725097656, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.446643829345703, "logits_per_token": -10.874652862548828, "logits_per_char": -2.416589525010851, "num_chars": 9}, {"sum_logits": -9.397269248962402, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.754097938537598, "logits_per_token": -4.698634624481201, "logits_per_char": -1.1746586561203003, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 265, "native_id": "e151b44e0a7bf08a1dd3c861eef09161", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.527690887451172, "incorrect_loss_raw": 9.126126289367676, "correct_loss_per_char": 0.8159613609313965, "incorrect_loss_per_char": 1.2752805715515498, "correct_loss_per_token": 6.527690887451172, "incorrect_loss_per_token": 6.884718577067058, "correct_loss_uncond": -8.035956382751465, "incorrect_loss_uncond": -6.024661540985107}, "model_output": [{"sum_logits": -6.351256370544434, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.781716346740723, "logits_per_token": -6.351256370544434, "logits_per_char": -1.5878140926361084, "num_chars": 4}, {"sum_logits": -13.448446273803711, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -4.482815424601237, "logits_per_char": -0.8965630849202474, "num_chars": 15}, {"sum_logits": -6.527690887451172, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -6.527690887451172, "logits_per_char": -0.8159613609313965, "num_chars": 8}, {"sum_logits": -9.674479484558105, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.903817176818848, "logits_per_token": -9.674479484558105, "logits_per_char": -1.6124132474263508, "num_chars": 6}, {"sum_logits": -7.030323028564453, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -7.030323028564453, "logits_per_char": -1.0043318612234933, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 266, "native_id": "46351b3a6beb694c5f623583a3b1473d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 16.074962615966797, "incorrect_loss_raw": 16.64385461807251, "correct_loss_per_char": 3.2149925231933594, "incorrect_loss_per_char": 2.3858449278455796, "correct_loss_per_token": 8.037481307983398, "incorrect_loss_per_token": 13.589252948760986, "correct_loss_uncond": -3.007129669189453, "incorrect_loss_uncond": 0.7189459800720215}, "model_output": [{"sum_logits": -13.220746994018555, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.537405014038086, "logits_per_token": -13.220746994018555, "logits_per_char": -2.203457832336426, "num_chars": 6}, {"sum_logits": -16.074962615966797, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.08209228515625, "logits_per_token": -8.037481307983398, "logits_per_char": -3.2149925231933594, "num_chars": 5}, {"sum_logits": -16.811925888061523, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -16.811925888061523, "logits_per_char": -4.202981472015381, "num_chars": 4}, {"sum_logits": -24.436813354492188, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.014339447021484, "logits_per_token": -12.218406677246094, "logits_per_char": -2.036401112874349, "num_chars": 12}, {"sum_logits": -12.105932235717773, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -12.105932235717773, "logits_per_char": -1.1005392941561611, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 267, "native_id": "db75e16788cf56d5dfb9773eaf91fe7e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.750317096710205, "incorrect_loss_raw": 13.00587248802185, "correct_loss_per_char": 0.6389241218566895, "incorrect_loss_per_char": 1.0819349103503757, "correct_loss_per_token": 5.750317096710205, "incorrect_loss_per_token": 6.4187789758046465, "correct_loss_uncond": -7.363291263580322, "incorrect_loss_uncond": -5.5004048347473145}, "model_output": [{"sum_logits": -9.761329650878906, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -9.761329650878906, "logits_per_char": -1.2201662063598633, "num_chars": 8}, {"sum_logits": -14.12770938873291, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.176210403442383, "logits_per_token": -4.70923646291097, "logits_per_char": -0.784872743818495, "num_chars": 18}, {"sum_logits": -10.958396911621094, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.411941528320312, "logits_per_token": -5.479198455810547, "logits_per_char": -1.0958396911621093, "num_chars": 10}, {"sum_logits": -17.176054000854492, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.478607177734375, "logits_per_token": -5.725351333618164, "logits_per_char": -1.2268610000610352, "num_chars": 14}, {"sum_logits": -5.750317096710205, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.113608360290527, "logits_per_token": -5.750317096710205, "logits_per_char": -0.6389241218566895, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 268, "native_id": "ffd89796a9b09bef56c5803f188764c6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.448802947998047, "incorrect_loss_raw": 14.722888946533203, "correct_loss_per_char": 0.7448802947998047, "incorrect_loss_per_char": 1.1263353054975216, "correct_loss_per_token": 3.7244014739990234, "incorrect_loss_per_token": 7.361444473266602, "correct_loss_uncond": -12.999635696411133, "incorrect_loss_uncond": -6.940176963806152}, "model_output": [{"sum_logits": -7.448802947998047, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.44843864440918, "logits_per_token": -3.7244014739990234, "logits_per_char": -0.7448802947998047, "num_chars": 10}, {"sum_logits": -17.689414978027344, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -22.063255310058594, "logits_per_token": -8.844707489013672, "logits_per_char": -1.2635296412876673, "num_chars": 14}, {"sum_logits": -13.79800796508789, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.770599365234375, "logits_per_token": -6.899003982543945, "logits_per_char": -1.2543643604625354, "num_chars": 11}, {"sum_logits": -15.649240493774414, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.914505004882812, "logits_per_token": -7.824620246887207, "logits_per_char": -1.2037877302903395, "num_chars": 13}, {"sum_logits": -11.754892349243164, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -22.90390396118164, "logits_per_token": -5.877446174621582, "logits_per_char": -0.7836594899495443, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 269, "native_id": "5622e49306bb82ec1cec817ad0506c60", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.239900588989258, "incorrect_loss_raw": 11.765161156654358, "correct_loss_per_char": 0.6239900588989258, "incorrect_loss_per_char": 1.6522301711045302, "correct_loss_per_token": 6.239900588989258, "incorrect_loss_per_token": 11.765161156654358, "correct_loss_uncond": -6.789924621582031, "incorrect_loss_uncond": -1.5338917970657349}, "model_output": [{"sum_logits": -7.568920612335205, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.88928508758545, "logits_per_token": -7.568920612335205, "logits_per_char": -1.513784122467041, "num_chars": 5}, {"sum_logits": -13.673601150512695, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.575199127197266, "logits_per_token": -13.673601150512695, "logits_per_char": -1.2430546500466086, "num_chars": 11}, {"sum_logits": -6.239900588989258, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.029825210571289, "logits_per_token": -6.239900588989258, "logits_per_char": -0.6239900588989258, "num_chars": 10}, {"sum_logits": -14.573713302612305, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.169634819030762, "logits_per_token": -14.573713302612305, "logits_per_char": -1.0409795216151647, "num_chars": 14}, {"sum_logits": -11.244409561157227, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -11.244409561157227, "logits_per_char": -2.8111023902893066, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 270, "native_id": "6efaeb796307036719635242fa5ad0f3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.331472396850586, "incorrect_loss_raw": 10.588817477226257, "correct_loss_per_char": 0.7219120661417643, "incorrect_loss_per_char": 0.791966952076034, "correct_loss_per_token": 4.331472396850586, "incorrect_loss_per_token": 5.671668728192647, "correct_loss_uncond": -8.911236763000488, "incorrect_loss_uncond": -7.789140820503235}, "model_output": [{"sum_logits": -7.547160625457764, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.296594619750977, "logits_per_token": -7.547160625457764, "logits_per_char": -0.9433950781822205, "num_chars": 8}, {"sum_logits": -9.691558837890625, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.932151794433594, "logits_per_token": -4.8457794189453125, "logits_per_char": -0.6461039225260417, "num_chars": 15}, {"sum_logits": -11.529308319091797, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.08415985107422, "logits_per_token": -5.764654159545898, "logits_per_char": -0.8235220227922712, "num_chars": 14}, {"sum_logits": -13.587242126464844, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.19892692565918, "logits_per_token": -4.529080708821614, "logits_per_char": -0.7548467848036025, "num_chars": 18}, {"sum_logits": -4.331472396850586, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.242709159851074, "logits_per_token": -4.331472396850586, "logits_per_char": -0.7219120661417643, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 271, "native_id": "114d310d1198abffaf8b88dab5a55aa7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.181572914123535, "incorrect_loss_raw": 16.860633850097656, "correct_loss_per_char": 0.8346884467385032, "incorrect_loss_per_char": 1.305823839183176, "correct_loss_per_token": 4.590786457061768, "incorrect_loss_per_token": 8.144832770029705, "correct_loss_uncond": -4.203474044799805, "incorrect_loss_uncond": -1.8785700798034668}, "model_output": [{"sum_logits": -21.507736206054688, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.256065368652344, "logits_per_token": -7.1692454020182295, "logits_per_char": -0.9776243730024858, "num_chars": 22}, {"sum_logits": -17.307937622070312, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.13232421875, "logits_per_token": -8.653968811035156, "logits_per_char": -1.4423281351725261, "num_chars": 12}, {"sum_logits": -9.181572914123535, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.38504695892334, "logits_per_token": -4.590786457061768, "logits_per_char": -0.8346884467385032, "num_chars": 11}, {"sum_logits": -23.74148941040039, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.36046600341797, "logits_per_token": -11.870744705200195, "logits_per_char": -1.8262684161846454, "num_chars": 13}, {"sum_logits": -4.885372161865234, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.20796012878418, "logits_per_token": -4.885372161865234, "logits_per_char": -0.9770744323730469, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 272, "native_id": "0f79faf5337706f2e0e39c15bbd2e99a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.8194451332092285, "incorrect_loss_raw": 11.357542514801025, "correct_loss_per_char": 0.6819445133209229, "incorrect_loss_per_char": 1.3988309171464708, "correct_loss_per_token": 3.4097225666046143, "incorrect_loss_per_token": 9.670589804649353, "correct_loss_uncond": -11.521047115325928, "incorrect_loss_uncond": -4.838382005691528}, "model_output": [{"sum_logits": -11.444587707519531, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.483644485473633, "logits_per_token": -11.444587707519531, "logits_per_char": -1.4305734634399414, "num_chars": 8}, {"sum_logits": -6.8194451332092285, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.340492248535156, "logits_per_token": -3.4097225666046143, "logits_per_char": -0.6819445133209229, "num_chars": 10}, {"sum_logits": -13.495621681213379, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.907474517822266, "logits_per_token": -6.7478108406066895, "logits_per_char": -0.74975676006741, "num_chars": 18}, {"sum_logits": -10.605133056640625, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.006134033203125, "logits_per_token": -10.605133056640625, "logits_per_char": -1.7675221761067708, "num_chars": 6}, {"sum_logits": -9.884827613830566, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.386445045471191, "logits_per_token": -9.884827613830566, "logits_per_char": -1.647471268971761, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 273, "native_id": "b62d7d1b5eec31be0b65146a9fc069e0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.0019612312316895, "incorrect_loss_raw": 9.839969158172607, "correct_loss_per_char": 0.5386124024024377, "incorrect_loss_per_char": 0.9010282067165403, "correct_loss_per_token": 3.5009806156158447, "incorrect_loss_per_token": 7.07068657875061, "correct_loss_uncond": -13.027774333953857, "incorrect_loss_uncond": -7.196559429168701}, "model_output": [{"sum_logits": -6.528048515319824, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -10.636194229125977, "logits_per_token": -6.528048515319824, "logits_per_char": -0.7253387239244249, "num_chars": 9}, {"sum_logits": -7.0019612312316895, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.029735565185547, "logits_per_token": -3.5009806156158447, "logits_per_char": -0.5386124024024377, "num_chars": 13}, {"sum_logits": -10.75689697265625, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.349876403808594, "logits_per_token": -5.378448486328125, "logits_per_char": -0.7683497837611607, "num_chars": 14}, {"sum_logits": -11.397363662719727, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.844615936279297, "logits_per_token": -5.698681831359863, "logits_per_char": -1.1397363662719726, "num_chars": 10}, {"sum_logits": -10.677567481994629, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.315427780151367, "logits_per_token": -10.677567481994629, "logits_per_char": -0.9706879529086027, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 274, "native_id": "1342c6aec9f5179d6ea6fa5fefbe5188", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.411712646484375, "incorrect_loss_raw": 9.719560146331787, "correct_loss_per_char": 0.6722651890345982, "incorrect_loss_per_char": 1.019186883706313, "correct_loss_per_token": 2.3529281616210938, "incorrect_loss_per_token": 4.4124832550684605, "correct_loss_uncond": -9.512924194335938, "incorrect_loss_uncond": -8.292382001876831}, "model_output": [{"sum_logits": -10.735123634338379, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.024173736572266, "logits_per_token": -3.5783745447794595, "logits_per_char": -1.3418904542922974, "num_chars": 8}, {"sum_logits": -15.291778564453125, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.353641510009766, "logits_per_token": -7.6458892822265625, "logits_per_char": -1.5291778564453125, "num_chars": 10}, {"sum_logits": -4.515986442565918, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -2.257993221282959, "logits_per_char": -0.5644983053207397, "num_chars": 8}, {"sum_logits": -9.411712646484375, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.924636840820312, "logits_per_token": -2.3529281616210938, "logits_per_char": -0.6722651890345982, "num_chars": 14}, {"sum_logits": -8.335351943969727, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.820775985717773, "logits_per_token": -4.167675971984863, "logits_per_char": -0.6411809187669021, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 275, "native_id": "c74ae684ba6c76e2a913493483678c9d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.428291320800781, "incorrect_loss_raw": 9.524103879928589, "correct_loss_per_char": 0.6190242767333984, "incorrect_loss_per_char": 1.1106775492624519, "correct_loss_per_token": 3.7141456604003906, "incorrect_loss_per_token": 7.532331466674805, "correct_loss_uncond": -9.074474334716797, "incorrect_loss_uncond": -5.380395889282227}, "model_output": [{"sum_logits": -7.428291320800781, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.502765655517578, "logits_per_token": -3.7141456604003906, "logits_per_char": -0.6190242767333984, "num_chars": 12}, {"sum_logits": -8.515909194946289, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.922826766967773, "logits_per_token": -8.515909194946289, "logits_per_char": -1.2165584564208984, "num_chars": 7}, {"sum_logits": -15.934179306030273, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.109270095825195, "logits_per_token": -7.967089653015137, "logits_per_char": -0.9373046650606043, "num_chars": 17}, {"sum_logits": -8.691620826721191, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.557395935058594, "logits_per_token": -8.691620826721191, "logits_per_char": -1.7383241653442383, "num_chars": 5}, {"sum_logits": -4.954706192016602, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.0285062789917, "logits_per_token": -4.954706192016602, "logits_per_char": -0.5505229102240669, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 276, "native_id": "411e50225637b76187cc36b24fe3127c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9100942611694336, "incorrect_loss_raw": 8.982343912124634, "correct_loss_per_char": 0.35546311465176667, "incorrect_loss_per_char": 1.4720663400796743, "correct_loss_per_token": 1.9550471305847168, "incorrect_loss_per_token": 8.982343912124634, "correct_loss_uncond": -13.44583797454834, "incorrect_loss_uncond": -4.409908294677734}, "model_output": [{"sum_logits": -10.346759796142578, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.684649467468262, "logits_per_token": -10.346759796142578, "logits_per_char": -2.0693519592285154, "num_chars": 5}, {"sum_logits": -10.543079376220703, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -10.543079376220703, "logits_per_char": -0.811006105863131, "num_chars": 13}, {"sum_logits": -3.9100942611694336, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.355932235717773, "logits_per_token": -1.9550471305847168, "logits_per_char": -0.35546311465176667, "num_chars": 11}, {"sum_logits": -5.477540016174316, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.14702033996582, "logits_per_token": -5.477540016174316, "logits_per_char": -1.0955080032348632, "num_chars": 5}, {"sum_logits": -9.561996459960938, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.344260215759277, "logits_per_token": -9.561996459960938, "logits_per_char": -1.9123992919921875, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 277, "native_id": "2a0e82bbf1471290c93c8f2a11af197f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.694101333618164, "incorrect_loss_raw": 10.425036668777466, "correct_loss_per_char": 0.9694101333618164, "incorrect_loss_per_char": 1.1437669531984644, "correct_loss_per_token": 4.847050666809082, "incorrect_loss_per_token": 6.825492779413859, "correct_loss_uncond": -7.473108291625977, "incorrect_loss_uncond": -5.56721830368042}, "model_output": [{"sum_logits": -12.100959777832031, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.37051010131836, "logits_per_token": -6.050479888916016, "logits_per_char": -1.7287085396902901, "num_chars": 7}, {"sum_logits": -10.06167221069336, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.838521003723145, "logits_per_token": -10.06167221069336, "logits_per_char": -1.006167221069336, "num_chars": 10}, {"sum_logits": -9.694101333618164, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.16720962524414, "logits_per_token": -4.847050666809082, "logits_per_char": -0.9694101333618164, "num_chars": 10}, {"sum_logits": -7.0159711837768555, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.333724975585938, "logits_per_token": -7.0159711837768555, "logits_per_char": -0.8769963979721069, "num_chars": 8}, {"sum_logits": -12.521543502807617, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.4262638092041, "logits_per_token": -4.173847834269206, "logits_per_char": -0.9631956540621244, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 278, "native_id": "eaadd7a4b18cb48c00f85c3975750fe7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.696203231811523, "incorrect_loss_raw": 12.152964115142822, "correct_loss_per_char": 0.3354430879865374, "incorrect_loss_per_char": 1.6272512078285217, "correct_loss_per_token": 4.696203231811523, "incorrect_loss_per_token": 10.43212342262268, "correct_loss_uncond": -7.380123138427734, "incorrect_loss_uncond": -2.133988380432129}, "model_output": [{"sum_logits": -4.696203231811523, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.076326370239258, "logits_per_token": -4.696203231811523, "logits_per_char": -0.3354430879865374, "num_chars": 14}, {"sum_logits": -8.608973503112793, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.327005386352539, "logits_per_token": -8.608973503112793, "logits_per_char": -1.4348289171854656, "num_chars": 6}, {"sum_logits": -13.766725540161133, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -6.883362770080566, "logits_per_char": -1.7208406925201416, "num_chars": 8}, {"sum_logits": -15.290365219116211, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.20698356628418, "logits_per_token": -15.290365219116211, "logits_per_char": -1.529036521911621, "num_chars": 10}, {"sum_logits": -10.945792198181152, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.764643669128418, "logits_per_token": -10.945792198181152, "logits_per_char": -1.8242986996968586, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 279, "native_id": "403c9b067ef7363efffa822bb08c5426", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6552225351333618, "incorrect_loss_raw": 10.609090328216553, "correct_loss_per_char": 0.15047477592121472, "incorrect_loss_per_char": 1.0900874890779195, "correct_loss_per_token": 0.551740845044454, "incorrect_loss_per_token": 6.808489282925923, "correct_loss_uncond": -14.026423811912537, "incorrect_loss_uncond": -8.171788930892944}, "model_output": [{"sum_logits": -13.0181303024292, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.56818962097168, "logits_per_token": -6.5090651512146, "logits_per_char": -0.6851647527594316, "num_chars": 19}, {"sum_logits": -1.6552225351333618, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.681646347045898, "logits_per_token": -0.551740845044454, "logits_per_char": -0.15047477592121472, "num_chars": 11}, {"sum_logits": -13.040008544921875, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.65340805053711, "logits_per_token": -4.346669514973958, "logits_per_char": -1.185455322265625, "num_chars": 11}, {"sum_logits": -5.759372711181641, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -5.759372711181641, "logits_per_char": -0.7199215888977051, "num_chars": 8}, {"sum_logits": -10.618849754333496, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.903817176818848, "logits_per_token": -10.618849754333496, "logits_per_char": -1.769808292388916, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 280, "native_id": "adf228312401c9ff421a4da1b46bb70a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.436941146850586, "incorrect_loss_raw": 11.611020565032959, "correct_loss_per_char": 0.8883529390607562, "incorrect_loss_per_char": 1.1809689809878667, "correct_loss_per_token": 4.145647048950195, "incorrect_loss_per_token": 5.216658353805542, "correct_loss_uncond": -6.978921890258789, "incorrect_loss_uncond": -6.3773462772369385}, "model_output": [{"sum_logits": -12.3742094039917, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.408809661865234, "logits_per_token": -6.18710470199585, "logits_per_char": -1.2374209403991698, "num_chars": 10}, {"sum_logits": -5.967219352722168, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -2.983609676361084, "logits_per_char": -0.3729512095451355, "num_chars": 16}, {"sum_logits": -13.970207214355469, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.912304878234863, "logits_per_token": -6.985103607177734, "logits_per_char": -2.3283678690592446, "num_chars": 6}, {"sum_logits": -14.1324462890625, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.163169860839844, "logits_per_token": -4.7108154296875, "logits_per_char": -0.7851359049479166, "num_chars": 18}, {"sum_logits": -12.436941146850586, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.415863037109375, "logits_per_token": -4.145647048950195, "logits_per_char": -0.8883529390607562, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 281, "native_id": "57c85e4c7ea2501ef9d8f304b524e2e4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.754903554916382, "incorrect_loss_raw": 7.987505912780762, "correct_loss_per_char": 0.22957529624303183, "incorrect_loss_per_char": 0.8498384338158826, "correct_loss_per_token": 1.377451777458191, "incorrect_loss_per_token": 4.735867559909821, "correct_loss_uncond": -17.804256677627563, "incorrect_loss_uncond": -9.22965955734253}, "model_output": [{"sum_logits": -5.9245781898498535, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.105173110961914, "logits_per_token": -2.9622890949249268, "logits_per_char": -0.4231841564178467, "num_chars": 14}, {"sum_logits": -2.754903554916382, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -1.377451777458191, "logits_per_char": -0.22957529624303183, "num_chars": 12}, {"sum_logits": -10.55230712890625, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.884376525878906, "logits_per_token": -5.276153564453125, "logits_per_char": -1.055230712890625, "num_chars": 10}, {"sum_logits": -5.936916828155518, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -5.936916828155518, "logits_per_char": -1.1873833656311035, "num_chars": 5}, {"sum_logits": -9.536221504211426, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.047225952148438, "logits_per_token": -4.768110752105713, "logits_per_char": -0.7335555003239558, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 282, "native_id": "c22f30eee57f7191ee07e9a916460f68", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.820892572402954, "incorrect_loss_raw": 10.851745843887329, "correct_loss_per_char": 0.3134325080447727, "incorrect_loss_per_char": 1.589882837023054, "correct_loss_per_token": 2.820892572402954, "incorrect_loss_per_token": 9.543396949768066, "correct_loss_uncond": -11.198853254318237, "incorrect_loss_uncond": -2.622422933578491}, "model_output": [{"sum_logits": -2.820892572402954, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -2.820892572402954, "logits_per_char": -0.3134325080447727, "num_chars": 9}, {"sum_logits": -10.44454574584961, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.495441436767578, "logits_per_token": -10.44454574584961, "logits_per_char": -1.4920779636928014, "num_chars": 7}, {"sum_logits": -12.611786842346191, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.016385078430176, "logits_per_token": -12.611786842346191, "logits_per_char": -2.522357368469238, "num_chars": 5}, {"sum_logits": -10.466791152954102, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.58175277709961, "logits_per_token": -5.233395576477051, "logits_per_char": -0.6977860768636067, "num_chars": 15}, {"sum_logits": -9.883859634399414, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.803095817565918, "logits_per_token": -9.883859634399414, "logits_per_char": -1.647309939066569, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 283, "native_id": "026cb9c07a583ec933f2c4c67ae73836", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.214778900146484, "incorrect_loss_raw": 10.559455335140228, "correct_loss_per_char": 1.242955780029297, "incorrect_loss_per_char": 0.7715320112233494, "correct_loss_per_token": 6.214778900146484, "incorrect_loss_per_token": 5.436363905668259, "correct_loss_uncond": -5.968475341796875, "incorrect_loss_uncond": -8.476563036441803}, "model_output": [{"sum_logits": -3.9942071437835693, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.258224487304688, "logits_per_token": -1.9971035718917847, "logits_per_char": -0.3631097403439609, "num_chars": 11}, {"sum_logits": -6.214778900146484, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.18325424194336, "logits_per_token": -6.214778900146484, "logits_per_char": -1.242955780029297, "num_chars": 5}, {"sum_logits": -21.01016616821289, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -24.130605697631836, "logits_per_token": -7.00338872273763, "logits_per_char": -1.2358921275419348, "num_chars": 17}, {"sum_logits": -6.73272705078125, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.032142639160156, "logits_per_token": -2.2442423502604165, "logits_per_char": -0.6120660955255682, "num_chars": 11}, {"sum_logits": -10.500720977783203, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -10.500720977783203, "logits_per_char": -0.8750600814819336, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 284, "native_id": "c57ed32566a2db1ec3d6e4fd595b9d05", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.968687534332275, "incorrect_loss_raw": 16.307921648025513, "correct_loss_per_char": 0.4687463255489574, "incorrect_loss_per_char": 1.0327851574830333, "correct_loss_per_token": 2.6562291781107583, "incorrect_loss_per_token": 6.827415490150452, "correct_loss_uncond": -10.055932521820068, "incorrect_loss_uncond": -3.480003595352173}, "model_output": [{"sum_logits": -12.180594444274902, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.906938552856445, "logits_per_token": -6.090297222137451, "logits_per_char": -1.1073267676613547, "num_chars": 11}, {"sum_logits": -17.687271118164062, "num_tokens": 5, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.935707092285156, "logits_per_token": -3.5374542236328126, "logits_per_char": -0.8843635559082031, "num_chars": 20}, {"sum_logits": -7.968687534332275, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.024620056152344, "logits_per_token": -2.6562291781107583, "logits_per_char": -0.4687463255489574, "num_chars": 17}, {"sum_logits": -19.63239860534668, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.78234100341797, "logits_per_token": -9.81619930267334, "logits_per_char": -1.0906888114081488, "num_chars": 18}, {"sum_logits": -15.731422424316406, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.526714324951172, "logits_per_token": -7.865711212158203, "logits_per_char": -1.048761494954427, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 285, "native_id": "93b52e7ea1acf10db891e9355e234123", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7762069702148438, "incorrect_loss_raw": 9.334810137748718, "correct_loss_per_char": 0.17351293563842773, "incorrect_loss_per_char": 1.0012455985826605, "correct_loss_per_token": 0.9254023234049479, "incorrect_loss_per_token": 5.680077254772186, "correct_loss_uncond": -13.532125473022461, "incorrect_loss_uncond": -7.967016577720642}, "model_output": [{"sum_logits": -2.7762069702148438, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -16.308332443237305, "logits_per_token": -0.9254023234049479, "logits_per_char": -0.17351293563842773, "num_chars": 16}, {"sum_logits": -6.5384345054626465, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.39954376220703, "logits_per_token": -3.2692172527313232, "logits_per_char": -0.3846137944389792, "num_chars": 17}, {"sum_logits": -14.236551284790039, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.365501403808594, "logits_per_token": -7.1182756423950195, "logits_per_char": -0.8897844552993774, "num_chars": 16}, {"sum_logits": -8.101377487182617, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.464104652404785, "logits_per_token": -8.101377487182617, "logits_per_char": -2.0253443717956543, "num_chars": 4}, {"sum_logits": -8.46287727355957, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.97815704345703, "logits_per_token": -4.231438636779785, "logits_per_char": -0.7052397727966309, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 286, "native_id": "dbdad44029098d4b1d202d6d857d6092", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.407332420349121, "incorrect_loss_raw": 8.21568775177002, "correct_loss_per_char": 0.9012220700581869, "incorrect_loss_per_char": 1.130305039031165, "correct_loss_per_token": 5.407332420349121, "incorrect_loss_per_token": 8.21568775177002, "correct_loss_uncond": -7.967053413391113, "incorrect_loss_uncond": -6.704141139984131}, "model_output": [{"sum_logits": -5.407332420349121, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -5.407332420349121, "logits_per_char": -0.9012220700581869, "num_chars": 6}, {"sum_logits": -9.713679313659668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -9.713679313659668, "logits_per_char": -1.2142099142074585, "num_chars": 8}, {"sum_logits": -7.384901523590088, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -7.384901523590088, "logits_per_char": -1.0549859319414412, "num_chars": 7}, {"sum_logits": -6.7568888664245605, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.798263549804688, "logits_per_token": -6.7568888664245605, "logits_per_char": -0.9652698380606515, "num_chars": 7}, {"sum_logits": -9.007281303405762, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -9.007281303405762, "logits_per_char": -1.2867544719151087, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 287, "native_id": "69d0f70c173dda17934836d618ca7093", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.208014965057373, "incorrect_loss_raw": 6.848990559577942, "correct_loss_per_char": 0.5148582117898124, "incorrect_loss_per_char": 1.0855926578243573, "correct_loss_per_token": 2.4026716550191245, "incorrect_loss_per_token": 5.860333959261576, "correct_loss_uncond": -10.4053635597229, "incorrect_loss_uncond": -7.406663775444031}, "model_output": [{"sum_logits": -9.635403633117676, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.831130981445312, "logits_per_token": -9.635403633117676, "logits_per_char": -1.6059006055196126, "num_chars": 6}, {"sum_logits": -5.931939601898193, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.6328182220459, "logits_per_token": -1.9773132006327312, "logits_per_char": -0.3707462251186371, "num_chars": 16}, {"sum_logits": -7.208014965057373, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.613378524780273, "logits_per_token": -2.4026716550191245, "logits_per_char": -0.5148582117898124, "num_chars": 14}, {"sum_logits": -8.871478080749512, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.666776657104492, "logits_per_token": -8.871478080749512, "logits_per_char": -1.7742956161499024, "num_chars": 5}, {"sum_logits": -2.9571409225463867, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.891891479492188, "logits_per_token": -2.9571409225463867, "logits_per_char": -0.5914281845092774, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 288, "native_id": "e5697a25935c5249d2108f55e245f3e4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5357286930084229, "incorrect_loss_raw": 12.89865517616272, "correct_loss_per_char": 0.3839321732521057, "incorrect_loss_per_char": 1.3550466420381122, "correct_loss_per_token": 1.5357286930084229, "incorrect_loss_per_token": 7.765607436498006, "correct_loss_uncond": -10.002536535263062, "incorrect_loss_uncond": -4.113572597503662}, "model_output": [{"sum_logits": -7.961437702178955, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -7.961437702178955, "logits_per_char": -1.137348243168422, "num_chars": 7}, {"sum_logits": -7.738712787628174, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -7.738712787628174, "logits_per_char": -1.1055303982325964, "num_chars": 7}, {"sum_logits": -1.5357286930084229, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.538265228271484, "logits_per_token": -1.5357286930084229, "logits_per_char": -0.3839321732521057, "num_chars": 4}, {"sum_logits": -15.509735107421875, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.70873260498047, "logits_per_token": -5.169911702473958, "logits_per_char": -0.9123373592601103, "num_chars": 17}, {"sum_logits": -20.384735107421875, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.369308471679688, "logits_per_token": -10.192367553710938, "logits_per_char": -2.2649705674913196, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 289, "native_id": "99af85081085e6228c6d78c95be01968", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.38602066040039, "incorrect_loss_raw": 8.973639011383057, "correct_loss_per_char": 0.8386020660400391, "incorrect_loss_per_char": 1.1897078117250879, "correct_loss_per_token": 8.38602066040039, "incorrect_loss_per_token": 6.759369850158691, "correct_loss_uncond": -7.13455867767334, "incorrect_loss_uncond": -7.870481967926025}, "model_output": [{"sum_logits": -9.545923233032227, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.020811080932617, "logits_per_token": -9.545923233032227, "logits_per_char": -1.3637033190046037, "num_chars": 7}, {"sum_logits": -7.994189262390137, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.976659774780273, "logits_per_token": -3.9970946311950684, "logits_per_char": -0.6661824385325114, "num_chars": 12}, {"sum_logits": -8.634479522705078, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.313276290893555, "logits_per_token": -8.634479522705078, "logits_per_char": -0.7849526838822798, "num_chars": 11}, {"sum_logits": -9.719964027404785, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.065736770629883, "logits_per_token": -4.859982013702393, "logits_per_char": -1.943992805480957, "num_chars": 5}, {"sum_logits": -8.38602066040039, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.52057933807373, "logits_per_token": -8.38602066040039, "logits_per_char": -0.8386020660400391, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 290, "native_id": "235094c966bcbdc94701b41b969f9c75", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 20.499732971191406, "incorrect_loss_raw": 13.169392585754395, "correct_loss_per_char": 0.8541555404663086, "incorrect_loss_per_char": 1.228408111466302, "correct_loss_per_token": 6.833244323730469, "incorrect_loss_per_token": 10.392558097839355, "correct_loss_uncond": 1.7467994689941406, "incorrect_loss_uncond": -0.6377015113830566}, "model_output": [{"sum_logits": -16.661006927490234, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.223766326904297, "logits_per_token": -5.553668975830078, "logits_per_char": -0.9256114959716797, "num_chars": 18}, {"sum_logits": -20.499732971191406, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.752933502197266, "logits_per_token": -6.833244323730469, "logits_per_char": -0.8541555404663086, "num_chars": 24}, {"sum_logits": -11.184833526611328, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.598287582397461, "logits_per_token": -11.184833526611328, "logits_per_char": -1.242759280734592, "num_chars": 9}, {"sum_logits": -14.348182678222656, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.47290325164795, "logits_per_token": -14.348182678222656, "logits_per_char": -1.4348182678222656, "num_chars": 10}, {"sum_logits": -10.48354721069336, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.933419227600098, "logits_per_token": -10.48354721069336, "logits_per_char": -1.31044340133667, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 291, "native_id": "99789083502af9bf111876a00fae44ac", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.729092597961426, "incorrect_loss_raw": 6.436346411705017, "correct_loss_per_char": 0.6714686613816482, "incorrect_loss_per_char": 0.7347779228573754, "correct_loss_per_token": 8.729092597961426, "incorrect_loss_per_token": 4.1553241312503815, "correct_loss_uncond": -6.6639862060546875, "incorrect_loss_uncond": -8.401659846305847}, "model_output": [{"sum_logits": -6.10567569732666, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.683123588562012, "logits_per_token": -6.10567569732666, "logits_per_char": -0.87223938533238, "num_chars": 7}, {"sum_logits": -3.8937814235687256, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.47786808013916, "logits_per_token": -1.9468907117843628, "logits_per_char": -0.4326423803965251, "num_chars": 9}, {"sum_logits": -8.729092597961426, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -8.729092597961426, "logits_per_char": -0.6714686613816482, "num_chars": 13}, {"sum_logits": -14.35439682006836, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.268585205078125, "logits_per_token": -7.17719841003418, "logits_per_char": -1.435439682006836, "num_chars": 10}, {"sum_logits": -1.3915317058563232, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -1.3915317058563232, "logits_per_char": -0.19879024369376047, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 292, "native_id": "1d44fb5f4b7f1e23ff6c1c083db81ba1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.254226207733154, "incorrect_loss_raw": 14.99555778503418, "correct_loss_per_char": 0.3867478370666504, "incorrect_loss_per_char": 1.3321994770340053, "correct_loss_per_token": 2.127113103866577, "incorrect_loss_per_token": 6.166042248408, "correct_loss_uncond": -10.566845417022705, "incorrect_loss_uncond": -3.6506385803222656}, "model_output": [{"sum_logits": -16.256649017333984, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.279172897338867, "logits_per_token": -8.128324508666992, "logits_per_char": -1.8062943352593317, "num_chars": 9}, {"sum_logits": -16.932050704956055, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -8.466025352478027, "logits_per_char": -1.8813389672173395, "num_chars": 9}, {"sum_logits": -10.336296081542969, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -2.584074020385742, "logits_per_char": -0.5440155832391036, "num_chars": 19}, {"sum_logits": -16.45723533630371, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -5.485745112101237, "logits_per_char": -1.0971490224202474, "num_chars": 15}, {"sum_logits": -4.254226207733154, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.82107162475586, "logits_per_token": -2.127113103866577, "logits_per_char": -0.3867478370666504, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 293, "native_id": "194b66240f6fab75749c1e30ed09ea09", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.246323585510254, "incorrect_loss_raw": 16.824108600616455, "correct_loss_per_char": 0.40579044818878174, "incorrect_loss_per_char": 1.4466130529131207, "correct_loss_per_token": 3.246323585510254, "incorrect_loss_per_token": 7.61314328511556, "correct_loss_uncond": -9.476048469543457, "incorrect_loss_uncond": -5.178882122039795}, "model_output": [{"sum_logits": -17.525110244750977, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.06351661682129, "logits_per_token": -8.762555122375488, "logits_per_char": -1.251793588910784, "num_chars": 14}, {"sum_logits": -13.26020622253418, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.457244873046875, "logits_per_token": -6.63010311126709, "logits_per_char": -1.3260206222534179, "num_chars": 10}, {"sum_logits": -19.173864364624023, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.796125411987305, "logits_per_token": -6.391288121541341, "logits_per_char": -1.4749126434326172, "num_chars": 13}, {"sum_logits": -17.33725357055664, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.69507598876953, "logits_per_token": -8.66862678527832, "logits_per_char": -1.733725357055664, "num_chars": 10}, {"sum_logits": -3.246323585510254, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -3.246323585510254, "logits_per_char": -0.40579044818878174, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 294, "native_id": "83dad4fe630fddbdcd5b18ef890c66f2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.123910427093506, "incorrect_loss_raw": 9.065790057182312, "correct_loss_per_char": 0.4710700328533466, "incorrect_loss_per_char": 0.6023240153278624, "correct_loss_per_token": 2.0413034756978354, "incorrect_loss_per_token": 4.794058442115784, "correct_loss_uncond": -11.312254428863525, "incorrect_loss_uncond": -6.993867754936218}, "model_output": [{"sum_logits": -8.588228225708008, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.87518310546875, "logits_per_token": -2.147057056427002, "logits_per_char": -0.4089632488432385, "num_chars": 21}, {"sum_logits": -8.269776344299316, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.58175277709961, "logits_per_token": -4.134888172149658, "logits_per_char": -0.5513184229532878, "num_chars": 15}, {"sum_logits": -6.383421421051025, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.582568168640137, "logits_per_token": -6.383421421051025, "logits_per_char": -0.7979276776313782, "num_chars": 8}, {"sum_logits": -13.021734237670898, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.199127197265625, "logits_per_token": -6.510867118835449, "logits_per_char": -0.6510867118835449, "num_chars": 20}, {"sum_logits": -6.123910427093506, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.43616485595703, "logits_per_token": -2.0413034756978354, "logits_per_char": -0.4710700328533466, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 295, "native_id": "3ebc5ddd2e97fe37fcb52aa2a9e2e1a7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.801956653594971, "incorrect_loss_raw": 15.903002262115479, "correct_loss_per_char": 0.52745060487227, "incorrect_loss_per_char": 1.0407044771138096, "correct_loss_per_token": 2.9009783267974854, "incorrect_loss_per_token": 11.213285446166992, "correct_loss_uncond": -12.478394985198975, "incorrect_loss_uncond": -1.6401646137237549}, "model_output": [{"sum_logits": -11.300332069396973, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.76818561553955, "logits_per_token": -11.300332069396973, "logits_per_char": -1.0273029153997248, "num_chars": 11}, {"sum_logits": -5.801956653594971, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.280351638793945, "logits_per_token": -2.9009783267974854, "logits_per_char": -0.52745060487227, "num_chars": 11}, {"sum_logits": -14.79394245147705, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.578399658203125, "logits_per_token": -14.79394245147705, "logits_per_char": -0.98626283009847, "num_chars": 15}, {"sum_logits": -18.5345458984375, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.795581817626953, "logits_per_token": -9.26727294921875, "logits_per_char": -1.3238961356026786, "num_chars": 14}, {"sum_logits": -18.98318862915039, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.030500411987305, "logits_per_token": -9.491594314575195, "logits_per_char": -0.8253560273543649, "num_chars": 23}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 296, "native_id": "9ed019338a48216de9eadf64faaf1ce0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.884470462799072, "incorrect_loss_raw": 8.911046981811523, "correct_loss_per_char": 0.7167700420726429, "incorrect_loss_per_char": 1.0256323210704021, "correct_loss_per_token": 2.628156820933024, "incorrect_loss_per_token": 6.708798289299011, "correct_loss_uncond": -7.797175884246826, "incorrect_loss_uncond": -6.628829002380371}, "model_output": [{"sum_logits": -10.395244598388672, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -10.395244598388672, "logits_per_char": -1.732540766398112, "num_chars": 6}, {"sum_logits": -8.119254112243652, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.3660945892334, "logits_per_token": -4.059627056121826, "logits_per_char": -0.6245580086341271, "num_chars": 13}, {"sum_logits": -7.630953788757324, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -7.630953788757324, "logits_per_char": -0.9538692235946655, "num_chars": 8}, {"sum_logits": -7.884470462799072, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.681646347045898, "logits_per_token": -2.628156820933024, "logits_per_char": -0.7167700420726429, "num_chars": 11}, {"sum_logits": -9.498735427856445, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.839492797851562, "logits_per_token": -4.749367713928223, "logits_per_char": -0.7915612856547037, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 297, "native_id": "d1d2585e0ba1160948b7c5822a99b7a1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8588008880615234, "incorrect_loss_raw": 12.155156970024109, "correct_loss_per_char": 0.6431334813435873, "incorrect_loss_per_char": 1.7930628857442312, "correct_loss_per_token": 3.8588008880615234, "incorrect_loss_per_token": 12.155156970024109, "correct_loss_uncond": -10.600247383117676, "incorrect_loss_uncond": -2.0184820890426636}, "model_output": [{"sum_logits": -10.326109886169434, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -16.718061447143555, "logits_per_token": -10.326109886169434, "logits_per_char": -1.2907637357711792, "num_chars": 8}, {"sum_logits": -3.8588008880615234, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -3.8588008880615234, "logits_per_char": -0.6431334813435873, "num_chars": 6}, {"sum_logits": -9.671660423278809, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -9.671660423278809, "logits_per_char": -1.9343320846557617, "num_chars": 5}, {"sum_logits": -20.68071746826172, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -20.68071746826172, "logits_per_char": -2.954388209751674, "num_chars": 7}, {"sum_logits": -7.942140102386475, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -7.942140102386475, "logits_per_char": -0.9927675127983093, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 298, "native_id": "e34a0d1331c6bd4574ffe308e3fbd389", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 17.023845672607422, "incorrect_loss_raw": 16.33280897140503, "correct_loss_per_char": 1.0639903545379639, "incorrect_loss_per_char": 1.3975198723021007, "correct_loss_per_token": 5.674615224202474, "incorrect_loss_per_token": 8.838154395421345, "correct_loss_uncond": -4.844860076904297, "incorrect_loss_uncond": -0.8657162189483643}, "model_output": [{"sum_logits": -12.275344848632812, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.806297302246094, "logits_per_token": -12.275344848632812, "logits_per_char": -1.3639272054036458, "num_chars": 9}, {"sum_logits": -20.704036712646484, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.2177791595459, "logits_per_token": -6.901345570882161, "logits_per_char": -1.4788597651890345, "num_chars": 14}, {"sum_logits": -15.275602340698242, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.44722843170166, "logits_per_token": -7.637801170349121, "logits_per_char": -1.5275602340698242, "num_chars": 10}, {"sum_logits": -17.076251983642578, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.322795867919922, "logits_per_token": -8.538125991821289, "logits_per_char": -1.2197322845458984, "num_chars": 14}, {"sum_logits": -17.023845672607422, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.86870574951172, "logits_per_token": -5.674615224202474, "logits_per_char": -1.0639903545379639, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 299, "native_id": "4858669d0193e5d9384dc37d4bb5c00c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.111206531524658, "incorrect_loss_raw": 11.058563470840454, "correct_loss_per_char": 0.30160093307495117, "incorrect_loss_per_char": 0.9507753047678206, "correct_loss_per_token": 2.111206531524658, "incorrect_loss_per_token": 5.529281735420227, "correct_loss_uncond": -13.179357051849365, "incorrect_loss_uncond": -8.390137910842896}, "model_output": [{"sum_logits": -2.111206531524658, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": true, "sum_logits_uncond": -15.290563583374023, "logits_per_token": -2.111206531524658, "logits_per_char": -0.30160093307495117, "num_chars": 7}, {"sum_logits": -13.394063949584961, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.88372039794922, "logits_per_token": -6.6970319747924805, "logits_per_char": -0.8371289968490601, "num_chars": 16}, {"sum_logits": -10.089522361755371, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.06729507446289, "logits_per_token": -5.0447611808776855, "logits_per_char": -1.1210580401950412, "num_chars": 9}, {"sum_logits": -13.809154510498047, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.472023010253906, "logits_per_token": -6.904577255249023, "logits_per_char": -1.1507628758748372, "num_chars": 12}, {"sum_logits": -6.9415130615234375, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.371767044067383, "logits_per_token": -3.4707565307617188, "logits_per_char": -0.6941513061523438, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 300, "native_id": "8fd82cdc253835814153fe7222e9967c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.409605979919434, "incorrect_loss_raw": 17.16311502456665, "correct_loss_per_char": 0.4917823618108576, "incorrect_loss_per_char": 1.3049516448607812, "correct_loss_per_token": 2.704802989959717, "incorrect_loss_per_token": 7.323184172312419, "correct_loss_uncond": -12.194661140441895, "incorrect_loss_uncond": -4.332440614700317}, "model_output": [{"sum_logits": -12.53173828125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.374744415283203, "logits_per_token": -6.265869140625, "logits_per_char": -0.9639798677884616, "num_chars": 13}, {"sum_logits": -11.466737747192383, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.765427589416504, "logits_per_token": -11.466737747192383, "logits_per_char": -1.6381053924560547, "num_chars": 7}, {"sum_logits": -24.93398666381836, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -31.408700942993164, "logits_per_token": -4.986797332763672, "logits_per_char": -1.3852214813232422, "num_chars": 18}, {"sum_logits": -19.71999740600586, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.433349609375, "logits_per_token": -6.57333246866862, "logits_per_char": -1.2324998378753662, "num_chars": 16}, {"sum_logits": -5.409605979919434, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.604267120361328, "logits_per_token": -2.704802989959717, "logits_per_char": -0.4917823618108576, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 301, "native_id": "66458bf8599c3ef1e7b50fa527531882", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.172996520996094, "incorrect_loss_raw": 13.459174871444702, "correct_loss_per_char": 0.8781997680664062, "incorrect_loss_per_char": 1.1869020287183698, "correct_loss_per_token": 2.634599304199219, "incorrect_loss_per_token": 8.191766500473022, "correct_loss_uncond": -9.499746322631836, "incorrect_loss_uncond": -2.811638593673706}, "model_output": [{"sum_logits": -15.017602920532227, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -10.704026222229004, "logits_per_token": -15.017602920532227, "logits_per_char": -1.3652366291392932, "num_chars": 11}, {"sum_logits": -9.960511207580566, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.49505043029785, "logits_per_token": -3.3201704025268555, "logits_per_char": -0.5859124239753274, "num_chars": 17}, {"sum_logits": -18.566743850708008, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.34930419921875, "logits_per_token": -9.283371925354004, "logits_per_char": -1.3261959893362862, "num_chars": 14}, {"sum_logits": -10.291841506958008, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -5.145920753479004, "logits_per_char": -1.4702630724225725, "num_chars": 7}, {"sum_logits": -13.172996520996094, "num_tokens": 5, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -22.67274284362793, "logits_per_token": -2.634599304199219, "logits_per_char": -0.8781997680664062, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 302, "native_id": "879239b8a788f3c9e3dfdd0862f3d7c5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.441910743713379, "incorrect_loss_raw": 8.485419154167175, "correct_loss_per_char": 0.7441910743713379, "incorrect_loss_per_char": 0.9376150898603133, "correct_loss_per_token": 2.4806369145711265, "incorrect_loss_per_token": 4.93632443745931, "correct_loss_uncond": -13.722796440124512, "incorrect_loss_uncond": -7.782514929771423}, "model_output": [{"sum_logits": -4.025404930114746, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -2.012702465057373, "logits_per_char": -0.28752892357962473, "num_chars": 14}, {"sum_logits": -5.819340229034424, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -5.819340229034424, "logits_per_char": -0.9698900381724039, "num_chars": 6}, {"sum_logits": -7.441910743713379, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -21.16470718383789, "logits_per_token": -2.4806369145711265, "logits_per_char": -0.7441910743713379, "num_chars": 10}, {"sum_logits": -5.821416854858398, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.479060173034668, "logits_per_token": -5.821416854858398, "logits_per_char": -0.8316309792654855, "num_chars": 7}, {"sum_logits": -18.275514602661133, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -20.938875198364258, "logits_per_token": -6.091838200887044, "logits_per_char": -1.6614104184237393, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 303, "native_id": "8a69e6df5e8ad6c9e6828aa66c59d046", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.434804916381836, "incorrect_loss_raw": 17.743157863616943, "correct_loss_per_char": 0.7764007023402623, "incorrect_loss_per_char": 1.5905490041983248, "correct_loss_per_token": 5.434804916381836, "incorrect_loss_per_token": 11.31583086649577, "correct_loss_uncond": -7.8487958908081055, "incorrect_loss_uncond": -1.6133792400360107}, "model_output": [{"sum_logits": -11.936548233032227, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.28764820098877, "logits_per_token": -11.936548233032227, "logits_per_char": -1.989424705505371, "num_chars": 6}, {"sum_logits": -30.85116958618164, "num_tokens": 6, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -33.81758499145508, "logits_per_token": -5.14186159769694, "logits_per_char": -1.4023258902809836, "num_chars": 22}, {"sum_logits": -13.675865173339844, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.906493186950684, "logits_per_token": -13.675865173339844, "logits_per_char": -1.5195405748155382, "num_chars": 9}, {"sum_logits": -5.434804916381836, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -5.434804916381836, "logits_per_char": -0.7764007023402623, "num_chars": 7}, {"sum_logits": -14.509048461914062, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.414422035217285, "logits_per_token": -14.509048461914062, "logits_per_char": -1.4509048461914062, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 304, "native_id": "8d275acea05fd16295c659c504576a9b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.242250442504883, "incorrect_loss_raw": 9.741291999816895, "correct_loss_per_char": 0.23158931732177734, "incorrect_loss_per_char": 1.5145192046960194, "correct_loss_per_token": 1.6211252212524414, "incorrect_loss_per_token": 8.195156574249268, "correct_loss_uncond": -13.458410263061523, "incorrect_loss_uncond": -5.629238843917847}, "model_output": [{"sum_logits": -12.78781795501709, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.603607177734375, "logits_per_token": -12.78781795501709, "logits_per_char": -3.1969544887542725, "num_chars": 4}, {"sum_logits": -3.242250442504883, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.700660705566406, "logits_per_token": -1.6211252212524414, "logits_per_char": -0.23158931732177734, "num_chars": 14}, {"sum_logits": -8.57627010345459, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -8.57627010345459, "logits_per_char": -1.0720337629318237, "num_chars": 8}, {"sum_logits": -8.246055603027344, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.477386474609375, "logits_per_token": -2.061513900756836, "logits_per_char": -0.7496414184570312, "num_chars": 11}, {"sum_logits": -9.355024337768555, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -9.355024337768555, "logits_per_char": -1.0394471486409504, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 305, "native_id": "91629c6f9e4af3e6acf385eb23fd8068", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.50568675994873, "incorrect_loss_raw": 7.824353218078613, "correct_loss_per_char": 0.85327569176169, "incorrect_loss_per_char": 1.006294842561086, "correct_loss_per_token": 7.252843379974365, "incorrect_loss_per_token": 5.481808423995972, "correct_loss_uncond": -6.534432411193848, "incorrect_loss_uncond": -6.8680503368377686}, "model_output": [{"sum_logits": -12.776402473449707, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.773482322692871, "logits_per_token": -6.3882012367248535, "logits_per_char": -1.4196002748277452, "num_chars": 9}, {"sum_logits": -5.963955879211426, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.93924617767334, "logits_per_token": -2.981977939605713, "logits_per_char": -0.49699632326761883, "num_chars": 12}, {"sum_logits": -4.531816482543945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.996745109558105, "logits_per_token": -4.531816482543945, "logits_per_char": -0.503535164727105, "num_chars": 9}, {"sum_logits": -14.50568675994873, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.040119171142578, "logits_per_token": -7.252843379974365, "logits_per_char": -0.85327569176169, "num_chars": 17}, {"sum_logits": -8.025238037109375, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.060140609741211, "logits_per_token": -8.025238037109375, "logits_per_char": -1.605047607421875, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 306, "native_id": "59eb56f366407ac7db72996be265883b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.958459854125977, "incorrect_loss_raw": 11.592373847961426, "correct_loss_per_char": 0.9948074817657471, "incorrect_loss_per_char": 1.795037539800008, "correct_loss_per_token": 7.958459854125977, "incorrect_loss_per_token": 11.592373847961426, "correct_loss_uncond": -4.227982521057129, "incorrect_loss_uncond": -1.3056354522705078}, "model_output": [{"sum_logits": -15.665153503417969, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.808587074279785, "logits_per_token": -15.665153503417969, "logits_per_char": -1.7405726114908855, "num_chars": 9}, {"sum_logits": -10.519362449645996, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.466108322143555, "logits_per_token": -10.519362449645996, "logits_per_char": -1.7532270749409993, "num_chars": 6}, {"sum_logits": -9.665616989135742, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.85123348236084, "logits_per_token": -9.665616989135742, "logits_per_char": -1.9331233978271485, "num_chars": 5}, {"sum_logits": -10.519362449645996, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.466108322143555, "logits_per_token": -10.519362449645996, "logits_per_char": -1.7532270749409993, "num_chars": 6}, {"sum_logits": -7.958459854125977, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.186442375183105, "logits_per_token": -7.958459854125977, "logits_per_char": -0.9948074817657471, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 307, "native_id": "4ab069f2e979d51f2c5929f590d09982", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.011569023132324, "incorrect_loss_raw": 8.60283625125885, "correct_loss_per_char": 0.35796921593802317, "incorrect_loss_per_char": 1.314106044624791, "correct_loss_per_token": 2.505784511566162, "incorrect_loss_per_token": 8.60283625125885, "correct_loss_uncond": -12.927823066711426, "incorrect_loss_uncond": -6.110050559043884}, "model_output": [{"sum_logits": -8.654502868652344, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.736997604370117, "logits_per_token": -8.654502868652344, "logits_per_char": -0.786772988059304, "num_chars": 11}, {"sum_logits": -8.435009002685547, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.464624404907227, "logits_per_token": -8.435009002685547, "logits_per_char": -1.4058348337809246, "num_chars": 6}, {"sum_logits": -5.011569023132324, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.93939208984375, "logits_per_token": -2.505784511566162, "logits_per_char": -0.35796921593802317, "num_chars": 14}, {"sum_logits": -12.016508102416992, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.79151439666748, "logits_per_token": -12.016508102416992, "logits_per_char": -2.002751350402832, "num_chars": 6}, {"sum_logits": -5.305325031280518, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -5.305325031280518, "logits_per_char": -1.0610650062561036, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 308, "native_id": "d6bb990e8c409d2b3af37a2da198e01f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.740655899047852, "incorrect_loss_raw": 14.357929468154907, "correct_loss_per_char": 1.0569735306959887, "incorrect_loss_per_char": 1.3301634977734278, "correct_loss_per_token": 6.870327949523926, "incorrect_loss_per_token": 10.014903783798218, "correct_loss_uncond": -4.954214096069336, "incorrect_loss_uncond": -3.5060999393463135}, "model_output": [{"sum_logits": -8.917610168457031, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.98984146118164, "logits_per_token": -8.917610168457031, "logits_per_char": -1.114701271057129, "num_chars": 8}, {"sum_logits": -13.740655899047852, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.694869995117188, "logits_per_token": -6.870327949523926, "logits_per_char": -1.0569735306959887, "num_chars": 13}, {"sum_logits": -16.31570816040039, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.07915496826172, "logits_per_token": -8.157854080200195, "logits_per_char": -1.3596423467000325, "num_chars": 12}, {"sum_logits": -13.769902229309082, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -13.769902229309082, "logits_per_char": -1.529989136589898, "num_chars": 9}, {"sum_logits": -18.428497314453125, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.417009353637695, "logits_per_token": -9.214248657226562, "logits_per_char": -1.3163212367466517, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 309, "native_id": "c5ad166ab5c5f5f067aa02b20f482523", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.571144104003906, "incorrect_loss_raw": 9.727505683898926, "correct_loss_per_char": 1.1963930130004883, "incorrect_loss_per_char": 1.3660472763909235, "correct_loss_per_token": 9.571144104003906, "incorrect_loss_per_token": 8.226306438446045, "correct_loss_uncond": -4.180805206298828, "incorrect_loss_uncond": -3.899592876434326}, "model_output": [{"sum_logits": -12.009593963623047, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.42584991455078, "logits_per_token": -6.004796981811523, "logits_per_char": -0.6671996646457248, "num_chars": 18}, {"sum_logits": -8.787093162536621, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -8.787093162536621, "logits_per_char": -2.1967732906341553, "num_chars": 4}, {"sum_logits": -9.349470138549805, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.88928508758545, "logits_per_token": -9.349470138549805, "logits_per_char": -1.869894027709961, "num_chars": 5}, {"sum_logits": -8.76386547088623, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.631166458129883, "logits_per_token": -8.76386547088623, "logits_per_char": -0.7303221225738525, "num_chars": 12}, {"sum_logits": -9.571144104003906, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.751949310302734, "logits_per_token": -9.571144104003906, "logits_per_char": -1.1963930130004883, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 310, "native_id": "ceafca2445b1b974d085a8cce38e8e44", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.943907737731934, "incorrect_loss_raw": 10.703294038772583, "correct_loss_per_char": 0.9929884672164917, "incorrect_loss_per_char": 0.7352082226011487, "correct_loss_per_token": 3.971953868865967, "incorrect_loss_per_token": 6.114504059155782, "correct_loss_uncond": -7.971156120300293, "incorrect_loss_uncond": -8.179117679595947}, "model_output": [{"sum_logits": -7.943907737731934, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.915063858032227, "logits_per_token": -3.971953868865967, "logits_per_char": -0.9929884672164917, "num_chars": 8}, {"sum_logits": -3.709418296813965, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.55059051513672, "logits_per_token": -1.8547091484069824, "logits_per_char": -0.412157588534885, "num_chars": 9}, {"sum_logits": -16.196054458618164, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -26.925085067749023, "logits_per_token": -5.398684819539388, "logits_per_char": -0.8098027229309082, "num_chars": 20}, {"sum_logits": -11.501541137695312, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.108128547668457, "logits_per_token": -11.501541137695312, "logits_per_char": -0.9584617614746094, "num_chars": 12}, {"sum_logits": -11.40616226196289, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.945842742919922, "logits_per_token": -5.703081130981445, "logits_per_char": -0.7604108174641927, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 311, "native_id": "2ef2ae21a2d3a9ecbd5c45ff378d10e3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4300713539123535, "incorrect_loss_raw": 13.254092693328857, "correct_loss_per_char": 0.4900101934160505, "incorrect_loss_per_char": 1.3480892570159824, "correct_loss_per_token": 3.4300713539123535, "incorrect_loss_per_token": 9.338632583618164, "correct_loss_uncond": -8.84693193435669, "incorrect_loss_uncond": -4.752318859100342}, "model_output": [{"sum_logits": -21.181095123291016, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.820106506347656, "logits_per_token": -10.590547561645508, "logits_per_char": -1.7650912602742512, "num_chars": 12}, {"sum_logits": -9.289779663085938, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.987024307250977, "logits_per_token": -9.289779663085938, "logits_per_char": -1.3271113804408483, "num_chars": 7}, {"sum_logits": -10.142585754394531, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.990610122680664, "logits_per_token": -5.071292877197266, "logits_per_char": -0.9220532503995028, "num_chars": 11}, {"sum_logits": -3.4300713539123535, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.277003288269043, "logits_per_token": -3.4300713539123535, "logits_per_char": -0.4900101934160505, "num_chars": 7}, {"sum_logits": -12.402910232543945, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.2279052734375, "logits_per_token": -12.402910232543945, "logits_per_char": -1.3781011369493272, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 312, "native_id": "793672da43fbc609e8c5760630c7e239", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.839528560638428, "incorrect_loss_raw": 11.30324411392212, "correct_loss_per_char": 0.6839528560638428, "incorrect_loss_per_char": 1.2736577952062929, "correct_loss_per_token": 6.839528560638428, "incorrect_loss_per_token": 5.65162205696106, "correct_loss_uncond": -7.537667751312256, "incorrect_loss_uncond": -5.344127655029297}, "model_output": [{"sum_logits": -11.89625072479248, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.594419479370117, "logits_per_token": -5.94812536239624, "logits_per_char": -1.189625072479248, "num_chars": 10}, {"sum_logits": -8.541207313537598, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.295787811279297, "logits_per_token": -4.270603656768799, "logits_per_char": -1.2201724733625139, "num_chars": 7}, {"sum_logits": -12.088447570800781, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.381830215454102, "logits_per_token": -6.044223785400391, "logits_per_char": -1.0989497791637073, "num_chars": 11}, {"sum_logits": -12.687070846557617, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -6.343535423278809, "logits_per_char": -1.5858838558197021, "num_chars": 8}, {"sum_logits": -6.839528560638428, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.377196311950684, "logits_per_token": -6.839528560638428, "logits_per_char": -0.6839528560638428, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 313, "native_id": "558cb0bc25387ce38d71f64ef6f1fa57", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.547077178955078, "incorrect_loss_raw": 18.358185291290283, "correct_loss_per_char": 0.9588251980868253, "incorrect_loss_per_char": 1.8029132982443645, "correct_loss_per_token": 5.273538589477539, "incorrect_loss_per_token": 7.350288351376852, "correct_loss_uncond": -9.901893615722656, "incorrect_loss_uncond": -2.568375587463379}, "model_output": [{"sum_logits": -16.695802688598633, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.720102310180664, "logits_per_token": -8.347901344299316, "logits_per_char": -1.8550891876220703, "num_chars": 9}, {"sum_logits": -10.547077178955078, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.448970794677734, "logits_per_token": -5.273538589477539, "logits_per_char": -0.9588251980868253, "num_chars": 11}, {"sum_logits": -25.038881301879883, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -24.40111541748047, "logits_per_token": -8.346293767293295, "logits_per_char": -2.782097922431098, "num_chars": 9}, {"sum_logits": -12.568281173706055, "num_tokens": 4, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -3.1420702934265137, "logits_per_char": -0.6614884828266344, "num_chars": 19}, {"sum_logits": -19.129776000976562, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -21.502880096435547, "logits_per_token": -9.564888000488281, "logits_per_char": -1.9129776000976562, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 314, "native_id": "2c9f4a98ce774cd734b6e384d95051a7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6509933471679688, "incorrect_loss_raw": 8.987840175628662, "correct_loss_per_char": 0.28084564208984375, "incorrect_loss_per_char": 0.9173193263457108, "correct_loss_per_token": 3.6509933471679688, "incorrect_loss_per_token": 5.992798527081808, "correct_loss_uncond": -10.983148574829102, "incorrect_loss_uncond": -6.921604156494141}, "model_output": [{"sum_logits": -9.256292343139648, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.943117141723633, "logits_per_token": -9.256292343139648, "logits_per_char": -1.3223274775913783, "num_chars": 7}, {"sum_logits": -3.6509933471679688, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.63414192199707, "logits_per_token": -3.6509933471679688, "logits_per_char": -0.28084564208984375, "num_chars": 13}, {"sum_logits": -10.600683212280273, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.818077087402344, "logits_per_token": -3.5335610707600913, "logits_per_char": -0.7571916580200195, "num_chars": 14}, {"sum_logits": -9.826088905334473, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.539337158203125, "logits_per_token": -4.913044452667236, "logits_per_char": -0.8932808095758612, "num_chars": 11}, {"sum_logits": -6.268296241760254, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.33724594116211, "logits_per_token": -6.268296241760254, "logits_per_char": -0.6964773601955838, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 315, "native_id": "33c84708785f88c19737ef5b0e31a64b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.600187301635742, "incorrect_loss_raw": 10.906859159469604, "correct_loss_per_char": 0.7384759462796725, "incorrect_loss_per_char": 1.3567799556823006, "correct_loss_per_token": 4.800093650817871, "incorrect_loss_per_token": 9.015012741088867, "correct_loss_uncond": -9.768709182739258, "incorrect_loss_uncond": -4.987776756286621}, "model_output": [{"sum_logits": -10.67848014831543, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -10.67848014831543, "logits_per_char": -1.5254971640450614, "num_chars": 7}, {"sum_logits": -15.134771347045898, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -22.29840087890625, "logits_per_token": -7.567385673522949, "logits_per_char": -1.1642131805419922, "num_chars": 13}, {"sum_logits": -8.086087226867676, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.789668083190918, "logits_per_token": -8.086087226867676, "logits_per_char": -1.347681204477946, "num_chars": 6}, {"sum_logits": -9.600187301635742, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.368896484375, "logits_per_token": -4.800093650817871, "logits_per_char": -0.7384759462796725, "num_chars": 13}, {"sum_logits": -9.728097915649414, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -9.728097915649414, "logits_per_char": -1.389728273664202, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 316, "native_id": "d867f76d000bdb59b9b4cb982bd7f0a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.422028541564941, "incorrect_loss_raw": 12.358738899230957, "correct_loss_per_char": 1.4277535676956177, "incorrect_loss_per_char": 1.1232713746089562, "correct_loss_per_token": 5.711014270782471, "incorrect_loss_per_token": 4.849331855773926, "correct_loss_uncond": -4.354203224182129, "incorrect_loss_uncond": -4.613859176635742}, "model_output": [{"sum_logits": -18.027551651000977, "num_tokens": 3, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -6.009183883666992, "logits_per_char": -1.0604442147647632, "num_chars": 17}, {"sum_logits": -16.748851776123047, "num_tokens": 3, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -17.909761428833008, "logits_per_token": -5.582950592041016, "logits_per_char": -1.6748851776123046, "num_chars": 10}, {"sum_logits": -10.280038833618164, "num_tokens": 3, "num_tokens_all": 172, "is_greedy": false, "sum_logits_uncond": -16.14378547668457, "logits_per_token": -3.4266796112060547, "logits_per_char": -1.0280038833618164, "num_chars": 10}, {"sum_logits": -11.422028541564941, "num_tokens": 2, "num_tokens_all": 171, "is_greedy": false, "sum_logits_uncond": -15.77623176574707, "logits_per_token": -5.711014270782471, "logits_per_char": -1.4277535676956177, "num_chars": 8}, {"sum_logits": -4.378513336181641, "num_tokens": 1, "num_tokens_all": 170, "is_greedy": false, "sum_logits_uncond": -13.727909088134766, "logits_per_token": -4.378513336181641, "logits_per_char": -0.7297522226969401, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 317, "native_id": "8c607d2e2e897d74048fcc794137b683", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.685689926147461, "incorrect_loss_raw": 12.34208071231842, "correct_loss_per_char": 0.6918349947248187, "incorrect_loss_per_char": 1.3003291817375153, "correct_loss_per_token": 3.2285633087158203, "incorrect_loss_per_token": 7.530990878740947, "correct_loss_uncond": -8.205217361450195, "incorrect_loss_uncond": -4.398289084434509}, "model_output": [{"sum_logits": -20.039634704589844, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.964811325073242, "logits_per_token": -10.019817352294922, "logits_per_char": -1.5415103618915265, "num_chars": 13}, {"sum_logits": -9.173974990844727, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -9.173974990844727, "logits_per_char": -1.8347949981689453, "num_chars": 5}, {"sum_logits": -13.836812973022461, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -4.612270991007487, "logits_per_char": -0.9224541982014974, "num_chars": 15}, {"sum_logits": -9.685689926147461, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.890907287597656, "logits_per_token": -3.2285633087158203, "logits_per_char": -0.6918349947248187, "num_chars": 14}, {"sum_logits": -6.31790018081665, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -6.31790018081665, "logits_per_char": -0.9025571686880929, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 318, "native_id": "5215e26c99b2a9b376fb1c70096a388a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.731084823608398, "incorrect_loss_raw": 12.46932053565979, "correct_loss_per_char": 1.9663856029510498, "incorrect_loss_per_char": 1.7175098371024082, "correct_loss_per_token": 7.865542411804199, "incorrect_loss_per_token": 7.049937168757121, "correct_loss_uncond": -1.1666946411132812, "incorrect_loss_uncond": -5.006922006607056}, "model_output": [{"sum_logits": -13.761030197143555, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.30510711669922, "logits_per_token": -4.5870100657145185, "logits_per_char": -1.5290033552381728, "num_chars": 9}, {"sum_logits": -15.731084823608398, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.89777946472168, "logits_per_token": -7.865542411804199, "logits_per_char": -1.9663856029510498, "num_chars": 8}, {"sum_logits": -11.164888381958008, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.354373931884766, "logits_per_token": -5.582444190979004, "logits_per_char": -1.8608147303263347, "num_chars": 6}, {"sum_logits": -11.109225273132324, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -11.109225273132324, "logits_per_char": -2.2218450546264648, "num_chars": 5}, {"sum_logits": -13.842138290405273, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.940265655517578, "logits_per_token": -6.921069145202637, "logits_per_char": -1.2583762082186611, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 319, "native_id": "668dc6bce771b10cbf6336f3ec76520a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.501948356628418, "incorrect_loss_raw": 12.489650249481201, "correct_loss_per_char": 0.9446609285142686, "incorrect_loss_per_char": 1.6709617669765766, "correct_loss_per_token": 4.250974178314209, "incorrect_loss_per_token": 12.489650249481201, "correct_loss_uncond": -6.031826019287109, "incorrect_loss_uncond": -1.2383074760437012}, "model_output": [{"sum_logits": -12.100479125976562, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -12.100479125976562, "logits_per_char": -0.9308060866135818, "num_chars": 13}, {"sum_logits": -8.501948356628418, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -4.250974178314209, "logits_per_char": -0.9446609285142686, "num_chars": 9}, {"sum_logits": -14.442529678344727, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.813013076782227, "logits_per_token": -14.442529678344727, "logits_per_char": -1.4442529678344727, "num_chars": 10}, {"sum_logits": -12.18567943572998, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.653560638427734, "logits_per_token": -12.18567943572998, "logits_per_char": -2.4371358871459963, "num_chars": 5}, {"sum_logits": -11.229912757873535, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.803095817565918, "logits_per_token": -11.229912757873535, "logits_per_char": -1.8716521263122559, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 320, "native_id": "a339fe08f1f50463ee180b797e99ebcc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.029446601867676, "incorrect_loss_raw": 9.272570371627808, "correct_loss_per_char": 0.502453883488973, "incorrect_loss_per_char": 1.5594613313674928, "correct_loss_per_token": 3.014723300933838, "incorrect_loss_per_token": 6.251101613044739, "correct_loss_uncond": -15.130328178405762, "incorrect_loss_uncond": -4.959275484085083}, "model_output": [{"sum_logits": -3.047679901123047, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -3.047679901123047, "logits_per_char": -0.6095359802246094, "num_chars": 5}, {"sum_logits": -15.184086799621582, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.709712982177734, "logits_per_token": -7.592043399810791, "logits_per_char": -2.5306811332702637, "num_chars": 6}, {"sum_logits": -9.870851516723633, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.883607864379883, "logits_per_token": -9.870851516723633, "logits_per_char": -1.9741703033447267, "num_chars": 5}, {"sum_logits": -6.029446601867676, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.159774780273438, "logits_per_token": -3.014723300933838, "logits_per_char": -0.502453883488973, "num_chars": 12}, {"sum_logits": -8.987663269042969, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.314987182617188, "logits_per_token": -4.493831634521484, "logits_per_char": -1.123457908630371, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 321, "native_id": "526cd34f5b2afefbbb7830434785f298", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.936970710754395, "incorrect_loss_raw": 10.077447414398193, "correct_loss_per_char": 1.787394142150879, "incorrect_loss_per_char": 1.9523865699768068, "correct_loss_per_token": 8.936970710754395, "incorrect_loss_per_token": 10.077447414398193, "correct_loss_uncond": -3.670987129211426, "incorrect_loss_uncond": -4.359739065170288}, "model_output": [{"sum_logits": -8.936970710754395, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.60795783996582, "logits_per_token": -8.936970710754395, "logits_per_char": -1.787394142150879, "num_chars": 5}, {"sum_logits": -14.077156066894531, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.90162467956543, "logits_per_token": -14.077156066894531, "logits_per_char": -2.3461926778157554, "num_chars": 6}, {"sum_logits": -8.496517181396484, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -8.496517181396484, "logits_per_char": -1.699303436279297, "num_chars": 5}, {"sum_logits": -8.03974723815918, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -8.03974723815918, "logits_per_char": -1.33995787302653, "num_chars": 6}, {"sum_logits": -9.696369171142578, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.551799774169922, "logits_per_token": -9.696369171142578, "logits_per_char": -2.4240922927856445, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 322, "native_id": "6c1c1c282cebe8917f607f0dbc1c102e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.987252235412598, "incorrect_loss_raw": 10.6784029006958, "correct_loss_per_char": 1.4968130588531494, "incorrect_loss_per_char": 1.276724178806629, "correct_loss_per_token": 5.987252235412598, "incorrect_loss_per_token": 7.527607321739197, "correct_loss_uncond": -7.282436370849609, "incorrect_loss_uncond": -4.386476755142212}, "model_output": [{"sum_logits": -9.197736740112305, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -10.18783950805664, "logits_per_token": -9.197736740112305, "logits_per_char": -1.5329561233520508, "num_chars": 6}, {"sum_logits": -13.507889747619629, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -6.7539448738098145, "logits_per_char": -1.5008766386244032, "num_chars": 9}, {"sum_logits": -8.309510231018066, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.797646522521973, "logits_per_token": -8.309510231018066, "logits_per_char": -1.3849183718363445, "num_chars": 6}, {"sum_logits": -11.698474884033203, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.79702377319336, "logits_per_token": -5.849237442016602, "logits_per_char": -0.6881455814137178, "num_chars": 17}, {"sum_logits": -5.987252235412598, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.269688606262207, "logits_per_token": -5.987252235412598, "logits_per_char": -1.4968130588531494, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 323, "native_id": "b5baf77d3855935c87f01f5fb2216667", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.394871711730957, "incorrect_loss_raw": 7.622180998325348, "correct_loss_per_char": 0.3596581141153971, "incorrect_loss_per_char": 1.036209966076745, "correct_loss_per_token": 2.6974358558654785, "incorrect_loss_per_token": 6.3591747879981995, "correct_loss_uncond": -12.131842613220215, "incorrect_loss_uncond": -7.7003085017204285}, "model_output": [{"sum_logits": -9.31634521484375, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.171759605407715, "logits_per_token": -9.31634521484375, "logits_per_char": -1.86326904296875, "num_chars": 5}, {"sum_logits": -8.70861530303955, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.496543884277344, "logits_per_token": -8.70861530303955, "logits_per_char": -0.9676239225599501, "num_chars": 9}, {"sum_logits": -2.3597137928009033, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.239700317382812, "logits_per_token": -2.3597137928009033, "logits_per_char": -0.47194275856018064, "num_chars": 5}, {"sum_logits": -5.394871711730957, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.526714324951172, "logits_per_token": -2.6974358558654785, "logits_per_char": -0.3596581141153971, "num_chars": 15}, {"sum_logits": -10.104049682617188, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.381954193115234, "logits_per_token": -5.052024841308594, "logits_per_char": -0.842004140218099, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 324, "native_id": "83808e92381b2e5f4cdf55d1391645ae", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.877527236938477, "incorrect_loss_raw": 11.481459140777588, "correct_loss_per_char": 0.9755054473876953, "incorrect_loss_per_char": 1.8437279122216361, "correct_loss_per_token": 4.877527236938477, "incorrect_loss_per_token": 11.481459140777588, "correct_loss_uncond": -7.820104598999023, "incorrect_loss_uncond": -2.516204833984375}, "model_output": [{"sum_logits": -11.695083618164062, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -11.695083618164062, "logits_per_char": -1.9491806030273438, "num_chars": 6}, {"sum_logits": -12.60413932800293, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.585806846618652, "logits_per_token": -12.60413932800293, "logits_per_char": -2.1006898880004883, "num_chars": 6}, {"sum_logits": -11.734566688537598, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -11.734566688537598, "logits_per_char": -1.6763666697910853, "num_chars": 7}, {"sum_logits": -9.892046928405762, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -9.892046928405762, "logits_per_char": -1.648674488067627, "num_chars": 6}, {"sum_logits": -4.877527236938477, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.6976318359375, "logits_per_token": -4.877527236938477, "logits_per_char": -0.9755054473876953, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 325, "native_id": "1a86310d7279097205a3403752c3b914", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.622802734375, "incorrect_loss_raw": 14.128315210342407, "correct_loss_per_char": 0.9580891927083334, "incorrect_loss_per_char": 1.4742569008043833, "correct_loss_per_token": 8.622802734375, "incorrect_loss_per_token": 6.469496846199036, "correct_loss_uncond": -5.947758674621582, "incorrect_loss_uncond": -2.2943005561828613}, "model_output": [{"sum_logits": -14.271858215332031, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.265045166015625, "logits_per_token": -4.757286071777344, "logits_per_char": -1.019418443952288, "num_chars": 14}, {"sum_logits": -8.622802734375, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.570561408996582, "logits_per_token": -8.622802734375, "logits_per_char": -0.9580891927083334, "num_chars": 9}, {"sum_logits": -18.49960708618164, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.680255889892578, "logits_per_token": -9.24980354309082, "logits_per_char": -2.6428010123116628, "num_chars": 7}, {"sum_logits": -11.72666072845459, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.867227554321289, "logits_per_token": -5.863330364227295, "logits_per_char": -0.7329162955284119, "num_chars": 16}, {"sum_logits": -12.015134811401367, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.877934455871582, "logits_per_token": -6.007567405700684, "logits_per_char": -1.501891851425171, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 326, "native_id": "b4130d1790948134f3aeab9d3d79c181", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2344825267791748, "incorrect_loss_raw": 10.580434918403625, "correct_loss_per_char": 0.20574708779652914, "incorrect_loss_per_char": 2.006864471236865, "correct_loss_per_token": 1.2344825267791748, "incorrect_loss_per_token": 7.753401398658752, "correct_loss_uncond": -10.087391138076782, "incorrect_loss_uncond": -5.601668477058411}, "model_output": [{"sum_logits": -1.2344825267791748, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.321873664855957, "logits_per_token": -1.2344825267791748, "logits_per_char": -0.20574708779652914, "num_chars": 6}, {"sum_logits": -7.721555709838867, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -7.721555709838867, "logits_per_char": -1.2869259516398113, "num_chars": 6}, {"sum_logits": -22.616268157958984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -24.973209381103516, "logits_per_token": -11.308134078979492, "logits_per_char": -4.523253631591797, "num_chars": 5}, {"sum_logits": -6.229605197906494, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -6.229605197906494, "logits_per_char": -0.7787006497383118, "num_chars": 8}, {"sum_logits": -5.754310607910156, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.966537475585938, "logits_per_token": -5.754310607910156, "logits_per_char": -1.438577651977539, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 327, "native_id": "a5097b7f56d20217679f28201801476f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6455793380737305, "incorrect_loss_raw": 9.909003019332886, "correct_loss_per_char": 0.30379827817281085, "incorrect_loss_per_char": 1.076366493058583, "correct_loss_per_token": 1.8227896690368652, "incorrect_loss_per_token": 4.875414848327637, "correct_loss_uncond": -12.490815162658691, "incorrect_loss_uncond": -8.213274478912354}, "model_output": [{"sum_logits": -6.509618759155273, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.483875274658203, "logits_per_token": -3.2548093795776367, "logits_per_char": -0.6509618759155273, "num_chars": 10}, {"sum_logits": -3.2233924865722656, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.326382637023926, "logits_per_token": -3.2233924865722656, "logits_per_char": -0.4604846409388951, "num_chars": 7}, {"sum_logits": -3.6455793380737305, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.136394500732422, "logits_per_token": -1.8227896690368652, "logits_per_char": -0.30379827817281085, "num_chars": 12}, {"sum_logits": -11.568257331848145, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.20184326171875, "logits_per_token": -3.856085777282715, "logits_per_char": -1.1568257331848144, "num_chars": 10}, {"sum_logits": -18.33474349975586, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -9.16737174987793, "logits_per_char": -2.0371937221950955, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 328, "native_id": "bcc5dd6292a64d8fa17cd07c360b335d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.5943996906280518, "incorrect_loss_raw": 6.278727889060974, "correct_loss_per_char": 0.18531426361628942, "incorrect_loss_per_char": 0.6962149528896108, "correct_loss_per_token": 1.2971998453140259, "incorrect_loss_per_token": 4.756598035494487, "correct_loss_uncond": -15.131708860397339, "incorrect_loss_uncond": -10.113149046897888}, "model_output": [{"sum_logits": -13.070005416870117, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -13.070005416870117, "logits_per_char": -1.8671436309814453, "num_chars": 7}, {"sum_logits": -9.132779121398926, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.640356063842773, "logits_per_token": -3.044259707132975, "logits_per_char": -0.5372223012587604, "num_chars": 17}, {"sum_logits": -2.5943996906280518, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.72610855102539, "logits_per_token": -1.2971998453140259, "logits_per_char": -0.18531426361628942, "num_chars": 14}, {"sum_logits": -2.019315242767334, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -2.019315242767334, "logits_per_char": -0.2019315242767334, "num_chars": 10}, {"sum_logits": -0.8928117752075195, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -12.351519584655762, "logits_per_token": -0.8928117752075195, "logits_per_char": -0.17856235504150392, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 329, "native_id": "cfc7fccb8449a2a950c9d2a50991420e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.841880798339844, "incorrect_loss_raw": 11.539483308792114, "correct_loss_per_char": 0.9887057713099888, "incorrect_loss_per_char": 0.9419059038542484, "correct_loss_per_token": 6.920940399169922, "incorrect_loss_per_token": 7.637794852256775, "correct_loss_uncond": -5.174285888671875, "incorrect_loss_uncond": -6.303987979888916}, "model_output": [{"sum_logits": -9.472274780273438, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.100486755371094, "logits_per_token": -9.472274780273438, "logits_per_char": -0.8611158891157671, "num_chars": 11}, {"sum_logits": -13.841880798339844, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.01616668701172, "logits_per_token": -6.920940399169922, "logits_per_char": -0.9887057713099888, "num_chars": 14}, {"sum_logits": -18.70865249633789, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.70547866821289, "logits_per_token": -9.354326248168945, "logits_per_char": -1.3363323211669922, "num_chars": 14}, {"sum_logits": -5.472150802612305, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -5.472150802612305, "logits_per_char": -0.9120251337687174, "num_chars": 6}, {"sum_logits": -12.504855155944824, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -25.25990867614746, "logits_per_token": -6.252427577972412, "logits_per_char": -0.6581502713655171, "num_chars": 19}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 330, "native_id": "2e83c5989a018bec6d5f5ac7d3b72f49", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.203371047973633, "incorrect_loss_raw": 14.05936574935913, "correct_loss_per_char": 0.47718238830566406, "incorrect_loss_per_char": 1.3176349748316265, "correct_loss_per_token": 3.1016855239868164, "incorrect_loss_per_token": 6.765242576599121, "correct_loss_uncond": -10.517478942871094, "incorrect_loss_uncond": -4.032325267791748}, "model_output": [{"sum_logits": -11.98896312713623, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.777267456054688, "logits_per_token": -11.98896312713623, "logits_per_char": -1.4986203908920288, "num_chars": 8}, {"sum_logits": -6.203371047973633, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.720849990844727, "logits_per_token": -3.1016855239868164, "logits_per_char": -0.47718238830566406, "num_chars": 13}, {"sum_logits": -7.090280532836914, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -7.090280532836914, "logits_per_char": -1.1817134221394856, "num_chars": 6}, {"sum_logits": -26.42607879638672, "num_tokens": 6, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -33.694915771484375, "logits_per_token": -4.404346466064453, "logits_per_char": -1.0570431518554688, "num_chars": 25}, {"sum_logits": -10.73214054107666, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.036157608032227, "logits_per_token": -3.5773801803588867, "logits_per_char": -1.5331629344395228, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 331, "native_id": "34b2d6aecdb5af8efacf0b0aa7e3989f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.442124843597412, "incorrect_loss_raw": 9.222046732902527, "correct_loss_per_char": 0.286843736966451, "incorrect_loss_per_char": 0.7116329621701014, "correct_loss_per_token": 1.721062421798706, "incorrect_loss_per_token": 4.611023366451263, "correct_loss_uncond": -17.28045892715454, "incorrect_loss_uncond": -11.628254532814026}, "model_output": [{"sum_logits": -3.442124843597412, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.722583770751953, "logits_per_token": -1.721062421798706, "logits_per_char": -0.286843736966451, "num_chars": 12}, {"sum_logits": -7.1523027420043945, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.323654174804688, "logits_per_token": -3.5761513710021973, "logits_per_char": -0.476820182800293, "num_chars": 15}, {"sum_logits": -7.161327838897705, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.166078567504883, "logits_per_token": -3.5806639194488525, "logits_per_char": -0.5967773199081421, "num_chars": 12}, {"sum_logits": -9.095409393310547, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.17770004272461, "logits_per_token": -4.547704696655273, "logits_per_char": -0.649672099522182, "num_chars": 14}, {"sum_logits": -13.479146957397461, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.73377227783203, "logits_per_token": -6.7395734786987305, "logits_per_char": -1.1232622464497883, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 332, "native_id": "2ec7f8fe7948f9997e73f9bff7ba6e05", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.385854721069336, "incorrect_loss_raw": 11.685784339904785, "correct_loss_per_char": 0.4896231564608487, "incorrect_loss_per_char": 1.2987424662618927, "correct_loss_per_token": 1.795284907023112, "incorrect_loss_per_token": 5.837772627671559, "correct_loss_uncond": -12.171819686889648, "incorrect_loss_uncond": -5.72779393196106}, "model_output": [{"sum_logits": -5.385854721069336, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.557674407958984, "logits_per_token": -1.795284907023112, "logits_per_char": -0.4896231564608487, "num_chars": 11}, {"sum_logits": -12.081892013549805, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.805604934692383, "logits_per_token": -4.027297337849935, "logits_per_char": -1.0983538194136186, "num_chars": 11}, {"sum_logits": -9.807737350463867, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.243385314941406, "logits_per_token": -4.903868675231934, "logits_per_char": -0.9807737350463868, "num_chars": 10}, {"sum_logits": -13.911444664001465, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.84844970703125, "logits_per_token": -3.477861166000366, "logits_per_char": -0.9274296442667643, "num_chars": 15}, {"sum_logits": -10.942063331604004, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.75687313079834, "logits_per_token": -10.942063331604004, "logits_per_char": -2.188412666320801, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 333, "native_id": "651785ed4f7b0bd2e7ca9f70a42acea5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.874726295471191, "incorrect_loss_raw": 9.372070670127869, "correct_loss_per_char": 0.7638584772745768, "incorrect_loss_per_char": 1.1659948651408736, "correct_loss_per_token": 6.874726295471191, "incorrect_loss_per_token": 6.987299859523773, "correct_loss_uncond": -6.870943069458008, "incorrect_loss_uncond": -5.827681183815002}, "model_output": [{"sum_logits": -6.874726295471191, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -6.874726295471191, "logits_per_char": -0.7638584772745768, "num_chars": 9}, {"sum_logits": -6.797855377197266, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -6.797855377197266, "logits_per_char": -1.3595710754394532, "num_chars": 5}, {"sum_logits": -7.3480143547058105, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.906938552856445, "logits_per_token": -3.6740071773529053, "logits_per_char": -0.6680013049732555, "num_chars": 11}, {"sum_logits": -11.612260818481445, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.943571090698242, "logits_per_token": -11.612260818481445, "logits_per_char": -1.6588944026402064, "num_chars": 7}, {"sum_logits": -11.730152130126953, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.473548889160156, "logits_per_token": -5.865076065063477, "logits_per_char": -0.9775126775105795, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 334, "native_id": "ee46995407eb6357bb5410d49d378629", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.158213138580322, "incorrect_loss_raw": 6.98087203502655, "correct_loss_per_char": 0.46202368206448025, "incorrect_loss_per_char": 0.9886442706698463, "correct_loss_per_token": 2.079106569290161, "incorrect_loss_per_token": 6.98087203502655, "correct_loss_uncond": -12.574775218963623, "incorrect_loss_uncond": -8.738726019859314}, "model_output": [{"sum_logits": -4.158213138580322, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -2.079106569290161, "logits_per_char": -0.46202368206448025, "num_chars": 9}, {"sum_logits": -9.07372760772705, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -9.07372760772705, "logits_per_char": -1.1342159509658813, "num_chars": 8}, {"sum_logits": -9.721497535705566, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.682865142822266, "logits_per_token": -9.721497535705566, "logits_per_char": -1.620249589284261, "num_chars": 6}, {"sum_logits": -5.819857597351074, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -5.819857597351074, "logits_per_char": -0.7274821996688843, "num_chars": 8}, {"sum_logits": -3.3084053993225098, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.30998420715332, "logits_per_token": -3.3084053993225098, "logits_per_char": -0.47262934276035856, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 335, "native_id": "303aedda3a5ab8d853cbe4edc4b914c6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9221309423446655, "incorrect_loss_raw": 11.700648307800293, "correct_loss_per_char": 0.10245899359385173, "incorrect_loss_per_char": 1.2632520039876303, "correct_loss_per_token": 0.9221309423446655, "incorrect_loss_per_token": 7.240985989570618, "correct_loss_uncond": -13.153000473976135, "incorrect_loss_uncond": -3.468538999557495}, "model_output": [{"sum_logits": -13.255210876464844, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -6.627605438232422, "logits_per_char": -1.3255210876464845, "num_chars": 10}, {"sum_logits": -9.227581024169922, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.930978775024414, "logits_per_token": -4.613790512084961, "logits_per_char": -1.0252867804633246, "num_chars": 9}, {"sum_logits": -0.9221309423446655, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -0.9221309423446655, "logits_per_char": -0.10245899359385173, "num_chars": 9}, {"sum_logits": -13.194506645202637, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -6.597253322601318, "logits_per_char": -1.4660562939114041, "num_chars": 9}, {"sum_logits": -11.12529468536377, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -11.12529468536377, "logits_per_char": -1.2361438539293077, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 336, "native_id": "720b98fbc365736597147c984f6bd301", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.235655784606934, "incorrect_loss_raw": 17.242165088653564, "correct_loss_per_char": 1.1123323440551758, "incorrect_loss_per_char": 1.5746015938845548, "correct_loss_per_token": 6.117827892303467, "incorrect_loss_per_token": 7.260608275731404, "correct_loss_uncond": -7.241555213928223, "incorrect_loss_uncond": -0.7548398971557617}, "model_output": [{"sum_logits": -16.543739318847656, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.91521453857422, "logits_per_token": -5.514579772949219, "logits_per_char": -1.3786449432373047, "num_chars": 12}, {"sum_logits": -12.235655784606934, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.477210998535156, "logits_per_token": -6.117827892303467, "logits_per_char": -1.1123323440551758, "num_chars": 11}, {"sum_logits": -19.392730712890625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.957216262817383, "logits_per_token": -9.696365356445312, "logits_per_char": -1.7629755193536931, "num_chars": 11}, {"sum_logits": -16.92454719543457, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.273942947387695, "logits_per_token": -8.462273597717285, "logits_per_char": -1.692454719543457, "num_chars": 10}, {"sum_logits": -16.107643127441406, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.841646194458008, "logits_per_token": -5.369214375813802, "logits_per_char": -1.4643311934037642, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 337, "native_id": "c611875b43b67b91030b889b267bbcb3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.032438278198242, "incorrect_loss_raw": 13.194948434829712, "correct_loss_per_char": 1.0860365231831868, "incorrect_loss_per_char": 1.1955109045963095, "correct_loss_per_token": 4.344146092732747, "incorrect_loss_per_token": 6.017938534418742, "correct_loss_uncond": -5.783285140991211, "incorrect_loss_uncond": -4.760006666183472}, "model_output": [{"sum_logits": -10.577458381652832, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.901641845703125, "logits_per_token": -5.288729190826416, "logits_per_char": -0.7555327415466309, "num_chars": 14}, {"sum_logits": -13.032438278198242, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.815723419189453, "logits_per_token": -4.344146092732747, "logits_per_char": -1.0860365231831868, "num_chars": 12}, {"sum_logits": -14.617600440979004, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.179946899414062, "logits_per_token": -7.308800220489502, "logits_per_char": -1.624177826775445, "num_chars": 9}, {"sum_logits": -13.675878524780273, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.940265655517578, "logits_per_token": -6.837939262390137, "logits_per_char": -1.2432616840709338, "num_chars": 11}, {"sum_logits": -13.908856391906738, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.79796600341797, "logits_per_token": -4.636285463968913, "logits_per_char": -1.1590713659922283, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 338, "native_id": "0547da29ffab9b441bae8870cd0f9dab", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.33731460571289, "incorrect_loss_raw": 9.443718552589417, "correct_loss_per_char": 0.6669510432652065, "incorrect_loss_per_char": 0.8899024094526584, "correct_loss_per_token": 4.668657302856445, "incorrect_loss_per_token": 6.168864369392395, "correct_loss_uncond": -8.694225311279297, "incorrect_loss_uncond": -4.853212952613831}, "model_output": [{"sum_logits": -9.33731460571289, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.031539916992188, "logits_per_token": -4.668657302856445, "logits_per_char": -0.6669510432652065, "num_chars": 14}, {"sum_logits": -5.511965274810791, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.153367042541504, "logits_per_token": -5.511965274810791, "logits_per_char": -0.6889956593513489, "num_chars": 8}, {"sum_logits": -10.12020492553711, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.972031593322754, "logits_per_token": -2.5300512313842773, "logits_per_char": -0.6325128078460693, "num_chars": 16}, {"sum_logits": -11.124177932739258, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.137699127197266, "logits_per_token": -11.124177932739258, "logits_per_char": -1.3905222415924072, "num_chars": 8}, {"sum_logits": -11.018526077270508, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.924628257751465, "logits_per_token": -5.509263038635254, "logits_per_char": -0.8475789290208083, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 339, "native_id": "21e312c7fd1a52341ce35b66457eab36", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.871620178222656, "incorrect_loss_raw": 14.678077459335327, "correct_loss_per_char": 1.233952522277832, "incorrect_loss_per_char": 1.171875396752969, "correct_loss_per_token": 4.935810089111328, "incorrect_loss_per_token": 8.008823990821838, "correct_loss_uncond": -9.954238891601562, "incorrect_loss_uncond": -3.8301968574523926}, "model_output": [{"sum_logits": -9.871620178222656, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -4.935810089111328, "logits_per_char": -1.233952522277832, "num_chars": 8}, {"sum_logits": -17.587970733642578, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.770668029785156, "logits_per_token": -8.793985366821289, "logits_per_char": -1.1725313822428385, "num_chars": 15}, {"sum_logits": -5.358282089233398, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.500212669372559, "logits_per_token": -5.358282089233398, "logits_per_char": -0.6697852611541748, "num_chars": 8}, {"sum_logits": -21.10988998413086, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.986698150634766, "logits_per_token": -10.55494499206543, "logits_per_char": -1.6238376910869892, "num_chars": 13}, {"sum_logits": -14.656167030334473, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.7755184173584, "logits_per_token": -7.328083515167236, "logits_per_char": -1.2213472525278728, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 340, "native_id": "82e26bc22af89c38d54aa2d00dcb8a2b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7917921543121338, "incorrect_loss_raw": 14.809492111206055, "correct_loss_per_char": 0.17917921543121337, "incorrect_loss_per_char": 1.1181104717746613, "correct_loss_per_token": 1.7917921543121338, "incorrect_loss_per_token": 7.752715428670247, "correct_loss_uncond": -12.083131551742554, "incorrect_loss_uncond": -5.796492338180542}, "model_output": [{"sum_logits": -1.7917921543121338, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -13.874923706054688, "logits_per_token": -1.7917921543121338, "logits_per_char": -0.17917921543121337, "num_chars": 10}, {"sum_logits": -11.624714851379395, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.990306854248047, "logits_per_token": -11.624714851379395, "logits_per_char": -1.2916349834865994, "num_chars": 9}, {"sum_logits": -14.205205917358398, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.561355590820312, "logits_per_token": -4.735068639119466, "logits_per_char": -0.8878253698348999, "num_chars": 16}, {"sum_logits": -25.009292602539062, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -32.46266174316406, "logits_per_token": -6.252323150634766, "logits_per_char": -0.8931890215192523, "num_chars": 28}, {"sum_logits": -8.398755073547363, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.409613609313965, "logits_per_token": -8.398755073547363, "logits_per_char": -1.3997925122578938, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 341, "native_id": "f75357e48c3026cfa4da3dba9f91bb21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.828832626342773, "incorrect_loss_raw": 11.928008317947388, "correct_loss_per_char": 1.0753484205766157, "incorrect_loss_per_char": 0.8659367059322602, "correct_loss_per_token": 5.914416313171387, "incorrect_loss_per_token": 5.498499910036723, "correct_loss_uncond": -8.498235702514648, "incorrect_loss_uncond": -9.315279722213745}, "model_output": [{"sum_logits": -15.061943054199219, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.199525833129883, "logits_per_token": -7.530971527099609, "logits_per_char": -0.8859966502470129, "num_chars": 17}, {"sum_logits": -9.642677307128906, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.14021110534668, "logits_per_token": -4.821338653564453, "logits_per_char": -0.8035564422607422, "num_chars": 12}, {"sum_logits": -11.172101974487305, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.141218185424805, "logits_per_token": -3.724033991495768, "logits_per_char": -0.6982563734054565, "num_chars": 16}, {"sum_logits": -11.835310935974121, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.492197036743164, "logits_per_token": -5.9176554679870605, "logits_per_char": -1.075937357815829, "num_chars": 11}, {"sum_logits": -11.828832626342773, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.327068328857422, "logits_per_token": -5.914416313171387, "logits_per_char": -1.0753484205766157, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 342, "native_id": "64931f9097155672bfe3e16f03b2c195", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.145591735839844, "incorrect_loss_raw": 7.276408433914185, "correct_loss_per_char": 0.5586901578036222, "incorrect_loss_per_char": 1.1568848189853487, "correct_loss_per_token": 6.145591735839844, "incorrect_loss_per_token": 7.276408433914185, "correct_loss_uncond": -7.302791595458984, "incorrect_loss_uncond": -6.625723838806152}, "model_output": [{"sum_logits": -6.702014923095703, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -6.702014923095703, "logits_per_char": -0.8377518653869629, "num_chars": 8}, {"sum_logits": -6.145591735839844, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.448383331298828, "logits_per_token": -6.145591735839844, "logits_per_char": -0.5586901578036222, "num_chars": 11}, {"sum_logits": -7.034819602966309, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -7.034819602966309, "logits_per_char": -1.0049742289951868, "num_chars": 7}, {"sum_logits": -8.668399810791016, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -8.668399810791016, "logits_per_char": -1.4447333017985027, "num_chars": 6}, {"sum_logits": -6.700399398803711, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -6.700399398803711, "logits_per_char": -1.3400798797607423, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 343, "native_id": "5de3248caa2e5ed83dd0ec45a15eae18", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.218585968017578, "incorrect_loss_raw": 12.946070671081543, "correct_loss_per_char": 1.4744169061834163, "incorrect_loss_per_char": 1.1594262613759532, "correct_loss_per_token": 5.406195322672526, "incorrect_loss_per_token": 7.083246310551962, "correct_loss_uncond": -3.207378387451172, "incorrect_loss_uncond": -6.704918622970581}, "model_output": [{"sum_logits": -25.198848724365234, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.66211700439453, "logits_per_token": -12.599424362182617, "logits_per_char": -1.9383729787973256, "num_chars": 13}, {"sum_logits": -16.218585968017578, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.42596435546875, "logits_per_token": -5.406195322672526, "logits_per_char": -1.4744169061834163, "num_chars": 11}, {"sum_logits": -8.67274284362793, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.68111228942871, "logits_per_token": -2.89091428120931, "logits_per_char": -0.6671340648944561, "num_chars": 13}, {"sum_logits": -7.772602081298828, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -7.772602081298828, "logits_per_char": -1.1103717258998327, "num_chars": 7}, {"sum_logits": -10.14008903503418, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.041534423828125, "logits_per_token": -5.07004451751709, "logits_per_char": -0.9218262759121981, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 344, "native_id": "0611dfbf5114084723d75f59b4f67412", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.458729267120361, "incorrect_loss_raw": 12.235620379447937, "correct_loss_per_char": 0.8917458534240723, "incorrect_loss_per_char": 1.2074058498655047, "correct_loss_per_token": 4.458729267120361, "incorrect_loss_per_token": 10.109798789024353, "correct_loss_uncond": -8.008220195770264, "incorrect_loss_uncond": -3.5354083776474}, "model_output": [{"sum_logits": -17.006572723388672, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -8.503286361694336, "logits_per_char": -1.062910795211792, "num_chars": 16}, {"sum_logits": -7.709614276885986, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -7.709614276885986, "logits_per_char": -1.1013734681265694, "num_chars": 7}, {"sum_logits": -14.51790714263916, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.35022735595703, "logits_per_token": -14.51790714263916, "logits_per_char": -1.451790714263916, "num_chars": 10}, {"sum_logits": -9.70838737487793, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -9.70838737487793, "logits_per_char": -1.2135484218597412, "num_chars": 8}, {"sum_logits": -4.458729267120361, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.466949462890625, "logits_per_token": -4.458729267120361, "logits_per_char": -0.8917458534240723, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 345, "native_id": "5b8d76889510384b38b72945e8d28f53", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.881374359130859, "incorrect_loss_raw": 9.709760665893555, "correct_loss_per_char": 0.48813743591308595, "incorrect_loss_per_char": 0.8062065908575127, "correct_loss_per_token": 2.4406871795654297, "incorrect_loss_per_token": 6.272518316904704, "correct_loss_uncond": -13.47970962524414, "incorrect_loss_uncond": -6.182621717453003}, "model_output": [{"sum_logits": -11.393048286437988, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.924055099487305, "logits_per_token": -3.797682762145996, "logits_per_char": -0.8137891633169991, "num_chars": 14}, {"sum_logits": -13.475473403930664, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.161336898803711, "logits_per_token": -13.475473403930664, "logits_per_char": -1.2250430367209695, "num_chars": 11}, {"sum_logits": -9.230405807495117, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.682374954223633, "logits_per_token": -3.0768019358317056, "logits_per_char": -0.6593147005353656, "num_chars": 14}, {"sum_logits": -4.740115165710449, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.801762580871582, "logits_per_token": -4.740115165710449, "logits_per_char": -0.5266794628567166, "num_chars": 9}, {"sum_logits": -4.881374359130859, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.361083984375, "logits_per_token": -2.4406871795654297, "logits_per_char": -0.48813743591308595, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 346, "native_id": "d81f5c49bc060dc799681bf4cacac73a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.516716003417969, "incorrect_loss_raw": 10.7354416847229, "correct_loss_per_char": 0.7097263336181641, "incorrect_loss_per_char": 0.98173748254776, "correct_loss_per_token": 4.258358001708984, "incorrect_loss_per_token": 5.155247052510579, "correct_loss_uncond": -10.250350952148438, "incorrect_loss_uncond": -5.207796335220337}, "model_output": [{"sum_logits": -5.099370956420898, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.669754028320312, "logits_per_token": -1.699790318806966, "logits_per_char": -0.3399580637613932, "num_chars": 15}, {"sum_logits": -12.00493049621582, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.581754684448242, "logits_per_token": -6.00246524810791, "logits_per_char": -1.200493049621582, "num_chars": 10}, {"sum_logits": -11.83486270904541, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.074214935302734, "logits_per_token": -5.917431354522705, "logits_per_char": -0.9862385590871176, "num_chars": 12}, {"sum_logits": -8.516716003417969, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.767066955566406, "logits_per_token": -4.258358001708984, "logits_per_char": -0.7097263336181641, "num_chars": 12}, {"sum_logits": -14.002602577209473, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.44722843170166, "logits_per_token": -7.001301288604736, "logits_per_char": -1.4002602577209473, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 347, "native_id": "aaf4fa38433c84b3bd0a86551259ce62", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.55682373046875, "incorrect_loss_raw": 15.817894458770752, "correct_loss_per_char": 0.7223014831542969, "incorrect_loss_per_char": 1.2966639124430142, "correct_loss_per_token": 3.8522745768229165, "incorrect_loss_per_token": 7.3112266063690186, "correct_loss_uncond": -8.290327072143555, "incorrect_loss_uncond": -2.03165864944458}, "model_output": [{"sum_logits": -14.345294952392578, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.849449157714844, "logits_per_token": -4.781764984130859, "logits_per_char": -0.8965809345245361, "num_chars": 16}, {"sum_logits": -16.415863037109375, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.568099975585938, "logits_per_token": -8.207931518554688, "logits_per_char": -1.094390869140625, "num_chars": 15}, {"sum_logits": -14.45355224609375, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.159011840820312, "logits_per_token": -7.226776123046875, "logits_per_char": -1.8066940307617188, "num_chars": 8}, {"sum_logits": -11.55682373046875, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.847150802612305, "logits_per_token": -3.8522745768229165, "logits_per_char": -0.7223014831542969, "num_chars": 16}, {"sum_logits": -18.056867599487305, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.821651458740234, "logits_per_token": -9.028433799743652, "logits_per_char": -1.3889898153451772, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 348, "native_id": "33ea932a876ac0361c9eefeff1d24e92", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.110673904418945, "incorrect_loss_raw": 12.518444776535034, "correct_loss_per_char": 0.9011859893798828, "incorrect_loss_per_char": 1.2160439019812679, "correct_loss_per_token": 8.110673904418945, "incorrect_loss_per_token": 8.6524076461792, "correct_loss_uncond": -6.344174385070801, "incorrect_loss_uncond": -3.5623888969421387}, "model_output": [{"sum_logits": -8.110673904418945, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.454848289489746, "logits_per_token": -8.110673904418945, "logits_per_char": -0.9011859893798828, "num_chars": 9}, {"sum_logits": -9.562456130981445, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.684649467468262, "logits_per_token": -9.562456130981445, "logits_per_char": -1.912491226196289, "num_chars": 5}, {"sum_logits": -13.781013488769531, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.065120697021484, "logits_per_token": -6.890506744384766, "logits_per_char": -0.7253164994089227, "num_chars": 19}, {"sum_logits": -17.14728355407715, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.357555389404297, "logits_per_token": -8.573641777038574, "logits_per_char": -0.8573641777038574, "num_chars": 20}, {"sum_logits": -9.583025932312012, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.216009140014648, "logits_per_token": -9.583025932312012, "logits_per_char": -1.3690037046160017, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 349, "native_id": "aead08289ca9abfcd169f935ea228ee5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.798182487487793, "incorrect_loss_raw": 10.147085189819336, "correct_loss_per_char": 1.163471135226163, "incorrect_loss_per_char": 0.8985776020275367, "correct_loss_per_token": 6.3990912437438965, "incorrect_loss_per_token": 6.149625142415365, "correct_loss_uncond": -4.8715925216674805, "incorrect_loss_uncond": -9.104639768600464}, "model_output": [{"sum_logits": -6.992252349853516, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.870928764343262, "logits_per_token": -3.496126174926758, "logits_per_char": -0.49944659641810824, "num_chars": 14}, {"sum_logits": -10.019206047058105, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.534561157226562, "logits_per_token": -3.3397353490193686, "logits_per_char": -0.7707081574660081, "num_chars": 13}, {"sum_logits": -11.948395729064941, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.574947357177734, "logits_per_token": -11.948395729064941, "logits_per_char": -1.4935494661331177, "num_chars": 8}, {"sum_logits": -12.798182487487793, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.669775009155273, "logits_per_token": -6.3990912437438965, "logits_per_char": -1.163471135226163, "num_chars": 11}, {"sum_logits": -11.628486633300781, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -24.02646255493164, "logits_per_token": -5.814243316650391, "logits_per_char": -0.830606188092913, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 350, "native_id": "adbddc80b10bf25f09c6c2bee4e3c59b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.206258296966553, "incorrect_loss_raw": 10.99448299407959, "correct_loss_per_char": 0.24742695864509134, "incorrect_loss_per_char": 1.2940865783464341, "correct_loss_per_token": 2.1031291484832764, "incorrect_loss_per_token": 7.81916081905365, "correct_loss_uncond": -13.06301736831665, "incorrect_loss_uncond": -5.227672815322876}, "model_output": [{"sum_logits": -7.239442825317383, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.98984146118164, "logits_per_token": -7.239442825317383, "logits_per_char": -0.9049303531646729, "num_chars": 8}, {"sum_logits": -9.462418556213379, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -4.7312092781066895, "logits_per_char": -0.675887039729527, "num_chars": 14}, {"sum_logits": -15.94015884399414, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.07915496826172, "logits_per_token": -7.97007942199707, "logits_per_char": -1.328346570332845, "num_chars": 12}, {"sum_logits": -11.335911750793457, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -11.335911750793457, "logits_per_char": -2.2671823501586914, "num_chars": 5}, {"sum_logits": -4.206258296966553, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -2.1031291484832764, "logits_per_char": -0.24742695864509134, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 351, "native_id": "1caf93d6a22dc8190e19c14bbe1fafda", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.730879783630371, "incorrect_loss_raw": 11.506989359855652, "correct_loss_per_char": 0.47757331530253094, "incorrect_loss_per_char": 0.7177205733630968, "correct_loss_per_token": 2.8654398918151855, "incorrect_loss_per_token": 6.402967790762584, "correct_loss_uncond": -9.21226978302002, "incorrect_loss_uncond": -6.320433020591736}, "model_output": [{"sum_logits": -11.235441207885742, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.32263469696045, "logits_per_token": -11.235441207885742, "logits_per_char": -0.9362867673238119, "num_chars": 12}, {"sum_logits": -5.730879783630371, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.94314956665039, "logits_per_token": -2.8654398918151855, "logits_per_char": -0.47757331530253094, "num_chars": 12}, {"sum_logits": -7.401334285736084, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.51513671875, "logits_per_token": -3.700667142868042, "logits_per_char": -0.5286667346954346, "num_chars": 14}, {"sum_logits": -18.118968963623047, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.41014862060547, "logits_per_token": -6.039656321207683, "logits_per_char": -0.7877812592879586, "num_chars": 23}, {"sum_logits": -9.272212982177734, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.061769485473633, "logits_per_token": -4.636106491088867, "logits_per_char": -0.6181475321451823, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 352, "native_id": "0bf4d64ad0eee7224acb3a4eb85accb2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.501313209533691, "incorrect_loss_raw": 14.674834966659546, "correct_loss_per_char": 0.7859018870762416, "incorrect_loss_per_char": 1.5334634613681148, "correct_loss_per_token": 5.501313209533691, "incorrect_loss_per_token": 8.365338404973347, "correct_loss_uncond": -10.553345680236816, "incorrect_loss_uncond": -1.2337298393249512}, "model_output": [{"sum_logits": -13.535348892211914, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.639222145080566, "logits_per_token": -13.535348892211914, "logits_per_char": -1.9336212703159876, "num_chars": 7}, {"sum_logits": -5.501313209533691, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.054658889770508, "logits_per_token": -5.501313209533691, "logits_per_char": -0.7859018870762416, "num_chars": 7}, {"sum_logits": -10.385673522949219, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.768199920654297, "logits_per_token": -5.192836761474609, "logits_per_char": -1.038567352294922, "num_chars": 10}, {"sum_logits": -15.935944557189941, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.437604904174805, "logits_per_token": -5.3119815190633135, "logits_per_char": -1.4487222324718128, "num_chars": 11}, {"sum_logits": -18.84237289428711, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.78923225402832, "logits_per_token": -9.421186447143555, "logits_per_char": -1.7129429903897373, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 353, "native_id": "b93532cae23e505628dd88568da3337e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.891429424285889, "incorrect_loss_raw": 4.927466094493866, "correct_loss_per_char": 1.3152382373809814, "incorrect_loss_per_char": 0.6029270878859929, "correct_loss_per_token": 7.891429424285889, "incorrect_loss_per_token": 4.698115140199661, "correct_loss_uncond": -8.139662265777588, "incorrect_loss_uncond": -11.438967764377594}, "model_output": [{"sum_logits": -6.174046039581299, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -6.174046039581299, "logits_per_char": -0.8820065770830426, "num_chars": 7}, {"sum_logits": -7.891429424285889, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -7.891429424285889, "logits_per_char": -1.3152382373809814, "num_chars": 6}, {"sum_logits": -1.8348076343536377, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -0.9174038171768188, "logits_per_char": -0.26211537633623394, "num_chars": 7}, {"sum_logits": -7.801597595214844, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -7.801597595214844, "logits_per_char": -0.7801597595214844, "num_chars": 10}, {"sum_logits": -3.8994131088256836, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -3.8994131088256836, "logits_per_char": -0.48742663860321045, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 354, "native_id": "2d3c9d3dff1a7a8253180cb3de1ceeea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.814879417419434, "incorrect_loss_raw": 9.69031023979187, "correct_loss_per_char": 0.6878399167742048, "incorrect_loss_per_char": 1.0649113466451456, "correct_loss_per_token": 4.814879417419434, "incorrect_loss_per_token": 5.7386549313863116, "correct_loss_uncond": -7.334414482116699, "incorrect_loss_uncond": -5.942212104797363}, "model_output": [{"sum_logits": -16.690216064453125, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.452665328979492, "logits_per_token": -5.563405354817708, "logits_per_char": -1.2838627741887019, "num_chars": 13}, {"sum_logits": -4.814879417419434, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -4.814879417419434, "logits_per_char": -0.6878399167742048, "num_chars": 7}, {"sum_logits": -9.359621047973633, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.663773536682129, "logits_per_token": -4.679810523986816, "logits_per_char": -0.9359621047973633, "num_chars": 10}, {"sum_logits": -8.793054580688477, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -8.793054580688477, "logits_per_char": -1.2561506543840681, "num_chars": 7}, {"sum_logits": -3.918349266052246, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.969649314880371, "logits_per_token": -3.918349266052246, "logits_per_char": -0.7836698532104492, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 355, "native_id": "70701f5d1d62e58d5c74e2e303bb4065", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.8393887281417847, "incorrect_loss_raw": 8.798771262168884, "correct_loss_per_char": 0.22992359101772308, "incorrect_loss_per_char": 1.2744953513145447, "correct_loss_per_token": 1.8393887281417847, "incorrect_loss_per_token": 6.780674695968628, "correct_loss_uncond": -10.494336247444153, "incorrect_loss_uncond": -5.66810929775238}, "model_output": [{"sum_logits": -1.8393887281417847, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -12.333724975585938, "logits_per_token": -1.8393887281417847, "logits_per_char": -0.22992359101772308, "num_chars": 8}, {"sum_logits": -5.672238349914551, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.832003593444824, "logits_per_token": -2.8361191749572754, "logits_per_char": -0.6302487055460612, "num_chars": 9}, {"sum_logits": -10.4725341796875, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.074214935302734, "logits_per_token": -5.23626708984375, "logits_per_char": -0.872711181640625, "num_chars": 12}, {"sum_logits": -12.599082946777344, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.102880477905273, "logits_per_token": -12.599082946777344, "logits_per_char": -2.519816589355469, "num_chars": 5}, {"sum_logits": -6.451229572296143, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -6.451229572296143, "logits_per_char": -1.0752049287160237, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 356, "native_id": "eacd87f297193033669a93160ae3776f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.20463752746582, "incorrect_loss_raw": 7.79425323009491, "correct_loss_per_char": 0.6377898454666138, "incorrect_loss_per_char": 0.8283329210111073, "correct_loss_per_token": 5.10231876373291, "incorrect_loss_per_token": 5.8378019730250035, "correct_loss_uncond": -9.264545440673828, "incorrect_loss_uncond": -9.326040863990784}, "model_output": [{"sum_logits": -9.091438293457031, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -4.545719146728516, "logits_per_char": -0.7576198577880859, "num_chars": 12}, {"sum_logits": -10.20463752746582, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -5.10231876373291, "logits_per_char": -0.6377898454666138, "num_chars": 16}, {"sum_logits": -9.185380935668945, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -9.185380935668945, "logits_per_char": -1.312197276524135, "num_chars": 7}, {"sum_logits": -4.92012882232666, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.934810638427734, "logits_per_token": -1.6400429407755535, "logits_per_char": -0.24600644111633302, "num_chars": 20}, {"sum_logits": -7.980064868927002, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -7.980064868927002, "logits_per_char": -0.9975081086158752, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 357, "native_id": "8e1b0792e441a5d54ae47a4b24f48977", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.764705657958984, "incorrect_loss_raw": 10.89195704460144, "correct_loss_per_char": 1.8764705657958984, "incorrect_loss_per_char": 1.2789351620225824, "correct_loss_per_token": 6.254901885986328, "incorrect_loss_per_token": 7.7657575607299805, "correct_loss_uncond": -5.5328216552734375, "incorrect_loss_uncond": -4.272337198257446}, "model_output": [{"sum_logits": -10.735154151916504, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.84581756591797, "logits_per_token": -5.367577075958252, "logits_per_char": -1.0735154151916504, "num_chars": 10}, {"sum_logits": -18.764705657958984, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.297527313232422, "logits_per_token": -6.254901885986328, "logits_per_char": -1.8764705657958984, "num_chars": 10}, {"sum_logits": -14.274441719055176, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -7.137220859527588, "logits_per_char": -1.0980339783888597, "num_chars": 13}, {"sum_logits": -9.924361228942871, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -9.924361228942871, "logits_per_char": -1.9848722457885741, "num_chars": 5}, {"sum_logits": -8.633871078491211, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.558618545532227, "logits_per_token": -8.633871078491211, "logits_per_char": -0.9593190087212456, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 358, "native_id": "b4cde6a56fb19afc84876ebf2fb9e71a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.535564422607422, "incorrect_loss_raw": 12.826413869857788, "correct_loss_per_char": 1.041197263277494, "incorrect_loss_per_char": 1.6264223514439222, "correct_loss_per_token": 4.511854807535808, "incorrect_loss_per_token": 7.767885088920593, "correct_loss_uncond": -6.465673446655273, "incorrect_loss_uncond": -9.187577724456787}, "model_output": [{"sum_logits": -4.691201210021973, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.418373107910156, "logits_per_token": -2.3456006050109863, "logits_per_char": -0.5864001512527466, "num_chars": 8}, {"sum_logits": -10.837425231933594, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.462380409240723, "logits_per_token": -10.837425231933594, "logits_per_char": -1.548203604561942, "num_chars": 7}, {"sum_logits": -13.535564422607422, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.001237869262695, "logits_per_token": -4.511854807535808, "logits_per_char": -1.041197263277494, "num_chars": 13}, {"sum_logits": -21.533597946166992, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -29.181190490722656, "logits_per_token": -10.766798973083496, "logits_per_char": -3.076228278023856, "num_chars": 7}, {"sum_logits": -14.243431091308594, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.994022369384766, "logits_per_token": -7.121715545654297, "logits_per_char": -1.2948573719371448, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 359, "native_id": "095c5bc5fbaf12b384e9f7df47fdec16", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.837364673614502, "incorrect_loss_raw": 16.002734661102295, "correct_loss_per_char": 0.7296705842018127, "incorrect_loss_per_char": 1.4979300744059372, "correct_loss_per_token": 5.837364673614502, "incorrect_loss_per_token": 8.945357203483582, "correct_loss_uncond": -8.947329044342041, "incorrect_loss_uncond": -3.2702786922454834}, "model_output": [{"sum_logits": -7.551918983459473, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -7.551918983459473, "logits_per_char": -1.0788455690656389, "num_chars": 7}, {"sum_logits": -23.95806121826172, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.865190505981445, "logits_per_token": -11.97903060913086, "logits_per_char": -1.4092977187212776, "num_chars": 17}, {"sum_logits": -5.837364673614502, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -5.837364673614502, "logits_per_char": -0.7296705842018127, "num_chars": 8}, {"sum_logits": -22.813304901123047, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.969623565673828, "logits_per_token": -11.406652450561523, "logits_per_char": -2.5348116556803384, "num_chars": 9}, {"sum_logits": -9.687653541564941, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.542523384094238, "logits_per_token": -4.843826770782471, "logits_per_char": -0.9687653541564941, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 360, "native_id": "494c501dbbfd36c602aae9e5b8e0cfff", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6703130602836609, "incorrect_loss_raw": 9.708123326301575, "correct_loss_per_char": 0.11171884338061015, "incorrect_loss_per_char": 1.144415154911223, "correct_loss_per_token": 0.6703130602836609, "incorrect_loss_per_token": 7.785937249660492, "correct_loss_uncond": -12.704072773456573, "incorrect_loss_uncond": -5.401923298835754}, "model_output": [{"sum_logits": -11.094889640808105, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -11.094889640808105, "logits_per_char": -1.584984234401158, "num_chars": 7}, {"sum_logits": -0.6703130602836609, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -0.6703130602836609, "logits_per_char": -0.11171884338061015, "num_chars": 6}, {"sum_logits": -12.360115051269531, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -12.360115051269531, "logits_per_char": -1.0300095876057942, "num_chars": 12}, {"sum_logits": -11.12830924987793, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.41086196899414, "logits_per_token": -5.564154624938965, "logits_per_char": -1.112830924987793, "num_chars": 10}, {"sum_logits": -4.249179363250732, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.38377571105957, "logits_per_token": -2.124589681625366, "logits_per_char": -0.8498358726501465, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 361, "native_id": "5a7f6fd97b2c9ad05f773bc8b2ecf441", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.108404636383057, "incorrect_loss_raw": 14.514246940612793, "correct_loss_per_char": 1.015486376626151, "incorrect_loss_per_char": 1.7614496571677072, "correct_loss_per_token": 7.108404636383057, "incorrect_loss_per_token": 10.874081373214722, "correct_loss_uncond": -6.370655536651611, "incorrect_loss_uncond": -0.2993960380554199}, "model_output": [{"sum_logits": -18.387073516845703, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.835200309753418, "logits_per_token": -9.193536758422852, "logits_per_char": -1.8387073516845702, "num_chars": 10}, {"sum_logits": -10.734251022338867, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.663773536682129, "logits_per_token": -5.367125511169434, "logits_per_char": -1.0734251022338868, "num_chars": 10}, {"sum_logits": -15.798210144042969, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -15.798210144042969, "logits_per_char": -2.2568871634347096, "num_chars": 7}, {"sum_logits": -7.108404636383057, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.479060173034668, "logits_per_token": -7.108404636383057, "logits_per_char": -1.015486376626151, "num_chars": 7}, {"sum_logits": -13.137453079223633, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.81967830657959, "logits_per_token": -13.137453079223633, "logits_per_char": -1.876779011317662, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 362, "native_id": "5279a2ea333ba8a5bf3a7637a7279da1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.407415866851807, "incorrect_loss_raw": 9.91608339548111, "correct_loss_per_char": 1.1018539667129517, "incorrect_loss_per_char": 1.183896697102449, "correct_loss_per_token": 4.407415866851807, "incorrect_loss_per_token": 7.359843283891678, "correct_loss_uncond": -4.703647136688232, "incorrect_loss_uncond": -7.206047713756561}, "model_output": [{"sum_logits": -10.51687240600586, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -10.51687240600586, "logits_per_char": -1.7528120676676433, "num_chars": 6}, {"sum_logits": -16.792482376098633, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.92172622680664, "logits_per_token": -8.396241188049316, "logits_per_char": -1.3993735313415527, "num_chars": 12}, {"sum_logits": -3.6574385166168213, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.142627716064453, "logits_per_token": -1.8287192583084106, "logits_per_char": -0.9143596291542053, "num_chars": 4}, {"sum_logits": -4.407415866851807, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -9.111063003540039, "logits_per_token": -4.407415866851807, "logits_per_char": -1.1018539667129517, "num_chars": 4}, {"sum_logits": -8.697540283203125, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -8.697540283203125, "logits_per_char": -0.6690415602463943, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 363, "native_id": "42c46e28baf0fc617a07419286178c0a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.475496292114258, "incorrect_loss_raw": 16.44542694091797, "correct_loss_per_char": 1.1844370365142822, "incorrect_loss_per_char": 1.559545507811118, "correct_loss_per_token": 4.737748146057129, "incorrect_loss_per_token": 8.673209547996521, "correct_loss_uncond": -6.826608657836914, "incorrect_loss_uncond": -1.691197395324707}, "model_output": [{"sum_logits": -20.110950469970703, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.332340240478516, "logits_per_token": -5.027737617492676, "logits_per_char": -0.8743891508682914, "num_chars": 23}, {"sum_logits": -6.834746360778809, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.00241470336914, "logits_per_token": -3.4173731803894043, "logits_per_char": -0.5695621967315674, "num_chars": 12}, {"sum_logits": -25.17656707763672, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.673725128173828, "logits_per_token": -12.58828353881836, "logits_per_char": -2.517656707763672, "num_chars": 10}, {"sum_logits": -13.659443855285645, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.538017272949219, "logits_per_token": -13.659443855285645, "logits_per_char": -2.276573975880941, "num_chars": 6}, {"sum_logits": -9.475496292114258, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.302104949951172, "logits_per_token": -4.737748146057129, "logits_per_char": -1.1844370365142822, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 364, "native_id": "c76304b4962f94ab9f20f09cf4a1a7c1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.2137064933776855, "incorrect_loss_raw": 10.71342134475708, "correct_loss_per_char": 1.035617748896281, "incorrect_loss_per_char": 1.449710766474406, "correct_loss_per_token": 6.2137064933776855, "incorrect_loss_per_token": 7.277263482411702, "correct_loss_uncond": -7.474890232086182, "incorrect_loss_uncond": -5.892549276351929}, "model_output": [{"sum_logits": -8.524805068969727, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.647007942199707, "logits_per_token": -8.524805068969727, "logits_per_char": -1.4208008448282878, "num_chars": 6}, {"sum_logits": -6.2137064933776855, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.688596725463867, "logits_per_token": -6.2137064933776855, "logits_per_char": -1.035617748896281, "num_chars": 6}, {"sum_logits": -20.616947174072266, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.085025787353516, "logits_per_token": -6.872315724690755, "logits_per_char": -2.9452781677246094, "num_chars": 7}, {"sum_logits": -5.5413665771484375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -5.5413665771484375, "logits_per_char": -0.6157073974609375, "num_chars": 9}, {"sum_logits": -8.17056655883789, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.977568626403809, "logits_per_token": -8.17056655883789, "logits_per_char": -0.8170566558837891, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 365, "native_id": "8b23cd355ffc8b6e7aa5459ffb21b4e0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.345006465911865, "incorrect_loss_raw": 9.207279622554779, "correct_loss_per_char": 0.8690012931823731, "incorrect_loss_per_char": 1.0732190442807747, "correct_loss_per_token": 4.345006465911865, "incorrect_loss_per_token": 5.867790758609772, "correct_loss_uncond": -7.49126672744751, "incorrect_loss_uncond": -8.075896084308624}, "model_output": [{"sum_logits": -8.906855583190918, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.494064331054688, "logits_per_token": -4.453427791595459, "logits_per_char": -0.7422379652659098, "num_chars": 12}, {"sum_logits": -4.345006465911865, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -4.345006465911865, "logits_per_char": -0.8690012931823731, "num_chars": 5}, {"sum_logits": -7.794876575469971, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -7.794876575469971, "logits_per_char": -1.5589753150939942, "num_chars": 5}, {"sum_logits": -2.318331003189087, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -2.318331003189087, "logits_per_char": -0.21075736392628064, "num_chars": 11}, {"sum_logits": -17.80905532836914, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.588336944580078, "logits_per_token": -8.90452766418457, "logits_per_char": -1.780905532836914, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 366, "native_id": "c35f7de9e9005fcf654cb0b23f17acd6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.519433975219727, "incorrect_loss_raw": 11.109978437423706, "correct_loss_per_char": 1.1298584938049316, "incorrect_loss_per_char": 1.0646193782488504, "correct_loss_per_token": 4.519433975219727, "incorrect_loss_per_token": 8.053494850794475, "correct_loss_uncond": -8.677152633666992, "incorrect_loss_uncond": -4.192123174667358}, "model_output": [{"sum_logits": -6.987680435180664, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.869611740112305, "logits_per_token": -6.987680435180664, "logits_per_char": -0.6987680435180664, "num_chars": 10}, {"sum_logits": -18.33890151977539, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.009559631347656, "logits_per_token": -6.112967173258464, "logits_per_char": -1.146181344985962, "num_chars": 16}, {"sum_logits": -4.519433975219727, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.196586608886719, "logits_per_token": -4.519433975219727, "logits_per_char": -1.1298584938049316, "num_chars": 4}, {"sum_logits": -9.129474639892578, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.582344055175781, "logits_per_token": -9.129474639892578, "logits_per_char": -1.3042106628417969, "num_chars": 7}, {"sum_logits": -9.983857154846191, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.746891021728516, "logits_per_token": -9.983857154846191, "logits_per_char": -1.109317461649577, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 367, "native_id": "d910859b9d1acae40456dbeaa8334bc0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.95933723449707, "incorrect_loss_raw": 10.847215414047241, "correct_loss_per_char": 0.3542383738926479, "incorrect_loss_per_char": 1.000053564707438, "correct_loss_per_token": 2.479668617248535, "incorrect_loss_per_token": 6.758849223454793, "correct_loss_uncond": -11.649040222167969, "incorrect_loss_uncond": -4.495249271392822}, "model_output": [{"sum_logits": -14.557366371154785, "num_tokens": 3, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.821550369262695, "logits_per_token": -4.852455457051595, "logits_per_char": -1.1197974131657527, "num_chars": 13}, {"sum_logits": -10.22330093383789, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.333358764648438, "logits_per_token": -10.22330093383789, "logits_per_char": -0.7864077641413763, "num_chars": 13}, {"sum_logits": -4.95933723449707, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.60837745666504, "logits_per_token": -2.479668617248535, "logits_per_char": -0.3542383738926479, "num_chars": 14}, {"sum_logits": -5.311086654663086, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -5.311086654663086, "logits_per_char": -0.8851811091105143, "num_chars": 6}, {"sum_logits": -13.297107696533203, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.906938552856445, "logits_per_token": -6.648553848266602, "logits_per_char": -1.2088279724121094, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 368, "native_id": "6ca8439d062de4d43d7d471c508b78db", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.035123825073242, "incorrect_loss_raw": 9.626762628555298, "correct_loss_per_char": 0.7719326019287109, "incorrect_loss_per_char": 1.155159256193373, "correct_loss_per_token": 5.017561912536621, "incorrect_loss_per_token": 6.989559769630432, "correct_loss_uncond": -8.826925277709961, "incorrect_loss_uncond": -7.302156209945679}, "model_output": [{"sum_logits": -10.035123825073242, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.862049102783203, "logits_per_token": -5.017561912536621, "logits_per_char": -0.7719326019287109, "num_chars": 13}, {"sum_logits": -8.032709121704102, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.872519493103027, "logits_per_token": -8.032709121704102, "logits_per_char": -1.3387848536173503, "num_chars": 6}, {"sum_logits": -9.376718521118164, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.762389183044434, "logits_per_token": -9.376718521118164, "logits_per_char": -0.9376718521118164, "num_chars": 10}, {"sum_logits": -8.231523513793945, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.351154327392578, "logits_per_token": -4.115761756896973, "logits_per_char": -0.9146137237548828, "num_chars": 9}, {"sum_logits": -12.86609935760498, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.729612350463867, "logits_per_token": -6.43304967880249, "logits_per_char": -1.4295665952894423, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 369, "native_id": "ddd8c62ec94b4f94eeefdd05b9208a71", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.772616386413574, "incorrect_loss_raw": 9.663222789764404, "correct_loss_per_char": 0.30806848737928605, "incorrect_loss_per_char": 0.9469171305497488, "correct_loss_per_token": 1.386308193206787, "incorrect_loss_per_token": 6.6760735511779785, "correct_loss_uncond": -17.278210639953613, "incorrect_loss_uncond": -7.781377792358398}, "model_output": [{"sum_logits": -14.498571395874023, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.69621467590332, "logits_per_token": -7.249285697937012, "logits_per_char": -0.9665714263916015, "num_chars": 15}, {"sum_logits": -11.955216407775879, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.675543785095215, "logits_per_token": -11.955216407775879, "logits_per_char": -1.4944020509719849, "num_chars": 8}, {"sum_logits": -2.772616386413574, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.050827026367188, "logits_per_token": -1.386308193206787, "logits_per_char": -0.30806848737928605, "num_chars": 9}, {"sum_logits": -9.398622512817383, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.107200622558594, "logits_per_token": -4.699311256408691, "logits_per_char": -0.6265748341878256, "num_chars": 15}, {"sum_logits": -2.800480842590332, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.299443244934082, "logits_per_token": -2.800480842590332, "logits_per_char": -0.700120210647583, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 370, "native_id": "72b638200414a526b598de0e01a044df", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.1856184005737305, "incorrect_loss_raw": 9.679497480392456, "correct_loss_per_char": 0.518561840057373, "incorrect_loss_per_char": 1.4263370273433207, "correct_loss_per_token": 5.1856184005737305, "incorrect_loss_per_token": 8.073926210403442, "correct_loss_uncond": -11.984314918518066, "incorrect_loss_uncond": -6.157543897628784}, "model_output": [{"sum_logits": -5.414027214050293, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.822380065917969, "logits_per_token": -5.414027214050293, "logits_per_char": -0.7734324591500419, "num_chars": 7}, {"sum_logits": -12.84457015991211, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -6.422285079956055, "logits_per_char": -1.1676881963556462, "num_chars": 11}, {"sum_logits": -5.1856184005737305, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -5.1856184005737305, "logits_per_char": -0.518561840057373, "num_chars": 10}, {"sum_logits": -10.629860877990723, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.402257919311523, "logits_per_token": -10.629860877990723, "logits_per_char": -2.1259721755981444, "num_chars": 5}, {"sum_logits": -9.8295316696167, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -9.8295316696167, "logits_per_char": -1.63825527826945, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 371, "native_id": "c770870c88f35f9d110217049c5a7334", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.8691234588623047, "incorrect_loss_raw": 5.981519728899002, "correct_loss_per_char": 0.318791495429145, "incorrect_loss_per_char": 0.7429627406864594, "correct_loss_per_token": 2.8691234588623047, "incorrect_loss_per_token": 5.981519728899002, "correct_loss_uncond": -10.657816886901855, "incorrect_loss_uncond": -8.177804440259933}, "model_output": [{"sum_logits": -6.669314384460449, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.190147399902344, "logits_per_token": -6.669314384460449, "logits_per_char": -0.5130241834200345, "num_chars": 13}, {"sum_logits": -2.8691234588623047, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.52694034576416, "logits_per_token": -2.8691234588623047, "logits_per_char": -0.318791495429145, "num_chars": 9}, {"sum_logits": -10.464162826538086, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.537578582763672, "logits_per_token": -10.464162826538086, "logits_per_char": -1.1626847585042317, "num_chars": 9}, {"sum_logits": -5.960890769958496, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.590518951416016, "logits_per_token": -5.960890769958496, "logits_per_char": -1.1921781539916991, "num_chars": 5}, {"sum_logits": -0.831710934638977, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.319051742553711, "logits_per_token": -0.831710934638977, "logits_per_char": -0.10396386682987213, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 372, "native_id": "1d8d9e3504c8c58a3b923ddc155c19b0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.661602020263672, "incorrect_loss_raw": 12.039271354675293, "correct_loss_per_char": 0.5661602020263672, "incorrect_loss_per_char": 1.1951848480436538, "correct_loss_per_token": 1.8872006734212239, "incorrect_loss_per_token": 6.986793677012126, "correct_loss_uncond": -13.403152465820312, "incorrect_loss_uncond": -4.7127203941345215}, "model_output": [{"sum_logits": -14.059375762939453, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.018943786621094, "logits_per_token": -7.029687881469727, "logits_per_char": -1.5621528625488281, "num_chars": 9}, {"sum_logits": -5.661602020263672, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.064754486083984, "logits_per_token": -1.8872006734212239, "logits_per_char": -0.5661602020263672, "num_chars": 10}, {"sum_logits": -19.770334243774414, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.279808044433594, "logits_per_token": -6.590111414591472, "logits_per_char": -1.3180222829182944, "num_chars": 15}, {"sum_logits": -5.555405616760254, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.903754234313965, "logits_per_token": -5.555405616760254, "logits_per_char": -0.925900936126709, "num_chars": 6}, {"sum_logits": -8.77196979522705, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -8.77196979522705, "logits_per_char": -0.9746633105807834, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 373, "native_id": "95acebea992a26c3a7c3bfb45845fa83", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5107271671295166, "incorrect_loss_raw": 9.589779138565063, "correct_loss_per_char": 0.5851211945215861, "incorrect_loss_per_char": 0.728657847309446, "correct_loss_per_token": 3.5107271671295166, "incorrect_loss_per_token": 4.794889569282532, "correct_loss_uncond": -11.571784734725952, "incorrect_loss_uncond": -9.918261289596558}, "model_output": [{"sum_logits": -7.8612470626831055, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.111448287963867, "logits_per_token": -3.9306235313415527, "logits_per_char": -0.604711312514085, "num_chars": 13}, {"sum_logits": -12.090272903442383, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.075716018676758, "logits_per_token": -6.045136451721191, "logits_per_char": -0.7556420564651489, "num_chars": 16}, {"sum_logits": -3.5107271671295166, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.082511901855469, "logits_per_token": -3.5107271671295166, "logits_per_char": -0.5851211945215861, "num_chars": 6}, {"sum_logits": -9.889097213745117, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.737808227539062, "logits_per_token": -4.944548606872559, "logits_per_char": -0.8990088376131925, "num_chars": 11}, {"sum_logits": -8.518499374389648, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.107189178466797, "logits_per_token": -4.259249687194824, "logits_per_char": -0.6552691826453576, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 374, "native_id": "c2c2a387fd9a6a26cff636008de21f71", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.638054847717285, "incorrect_loss_raw": 9.057466506958008, "correct_loss_per_char": 0.7092036565144857, "incorrect_loss_per_char": 1.1331144110110012, "correct_loss_per_token": 3.5460182825724282, "incorrect_loss_per_token": 6.011508385340373, "correct_loss_uncond": -10.573378562927246, "incorrect_loss_uncond": -8.308044195175171}, "model_output": [{"sum_logits": -7.661678314208984, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -7.661678314208984, "logits_per_char": -0.589359870323768, "num_chars": 13}, {"sum_logits": -10.638054847717285, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -3.5460182825724282, "logits_per_char": -0.7092036565144857, "num_chars": 15}, {"sum_logits": -9.056161880493164, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.142627716064453, "logits_per_token": -4.528080940246582, "logits_per_char": -2.264040470123291, "num_chars": 4}, {"sum_logits": -11.483627319335938, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -3.827875773111979, "logits_per_char": -0.6755074893727022, "num_chars": 17}, {"sum_logits": -8.028398513793945, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -8.028398513793945, "logits_per_char": -1.0035498142242432, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 375, "native_id": "57e96118fee6e2bbac5f59790fc833c0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.24865436553955, "incorrect_loss_raw": 10.767454862594604, "correct_loss_per_char": 0.8280408978462219, "incorrect_loss_per_char": 1.4402898252010345, "correct_loss_per_token": 4.416218121846517, "incorrect_loss_per_token": 9.54326057434082, "correct_loss_uncond": -4.556443214416504, "incorrect_loss_uncond": -3.3351588249206543}, "model_output": [{"sum_logits": -13.24865436553955, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.805097579956055, "logits_per_token": -4.416218121846517, "logits_per_char": -0.8280408978462219, "num_chars": 16}, {"sum_logits": -13.553571701049805, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.54640007019043, "logits_per_token": -13.553571701049805, "logits_per_char": -1.6941964626312256, "num_chars": 8}, {"sum_logits": -9.058364868164062, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.28764820098877, "logits_per_token": -9.058364868164062, "logits_per_char": -1.5097274780273438, "num_chars": 6}, {"sum_logits": -9.793554306030273, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -4.896777153015137, "logits_per_char": -1.2241942882537842, "num_chars": 8}, {"sum_logits": -10.664328575134277, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.727229118347168, "logits_per_token": -10.664328575134277, "logits_per_char": -1.3330410718917847, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 376, "native_id": "b9b82aa4c236cd342ff95455b8516a42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.4772491455078125, "incorrect_loss_raw": 12.778801918029785, "correct_loss_per_char": 0.2252044677734375, "incorrect_loss_per_char": 1.3197922739717696, "correct_loss_per_token": 1.2386245727539062, "incorrect_loss_per_token": 8.979133288065594, "correct_loss_uncond": -14.39279556274414, "incorrect_loss_uncond": -1.8429338932037354}, "model_output": [{"sum_logits": -2.4772491455078125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.870044708251953, "logits_per_token": -1.2386245727539062, "logits_per_char": -0.2252044677734375, "num_chars": 11}, {"sum_logits": -13.495559692382812, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -13.495559692382812, "logits_per_char": -1.3495559692382812, "num_chars": 10}, {"sum_logits": -11.849120140075684, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.502664566040039, "logits_per_token": -3.949706713358561, "logits_per_char": -1.3165689044528537, "num_chars": 9}, {"sum_logits": -14.598522186279297, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.074214935302734, "logits_per_token": -7.299261093139648, "logits_per_char": -1.2165435155232747, "num_chars": 12}, {"sum_logits": -11.172005653381348, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.419831275939941, "logits_per_token": -11.172005653381348, "logits_per_char": -1.3965007066726685, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 377, "native_id": "41fac392c6a5827c1b6682d5d3798e59", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.683487415313721, "incorrect_loss_raw": 12.087837934494019, "correct_loss_per_char": 0.8354359269142151, "incorrect_loss_per_char": 2.062330709184919, "correct_loss_per_token": 6.683487415313721, "incorrect_loss_per_token": 10.116963148117065, "correct_loss_uncond": -7.225884914398193, "incorrect_loss_uncond": -4.346911907196045}, "model_output": [{"sum_logits": -8.880335807800293, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.006080627441406, "logits_per_token": -8.880335807800293, "logits_per_char": -1.7760671615600585, "num_chars": 5}, {"sum_logits": -6.683487415313721, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.909372329711914, "logits_per_token": -6.683487415313721, "logits_per_char": -0.8354359269142151, "num_chars": 8}, {"sum_logits": -15.766998291015625, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.794763565063477, "logits_per_token": -7.8834991455078125, "logits_per_char": -1.1262141636439733, "num_chars": 14}, {"sum_logits": -11.57925796508789, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.010839462280273, "logits_per_token": -11.57925796508789, "logits_per_char": -2.315851593017578, "num_chars": 5}, {"sum_logits": -12.124759674072266, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.927315711975098, "logits_per_token": -12.124759674072266, "logits_per_char": -3.0311899185180664, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 378, "native_id": "5c224410a40c9269b1e542cfcb430d35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.097153186798096, "incorrect_loss_raw": 8.814631462097168, "correct_loss_per_char": 0.7281647409711566, "incorrect_loss_per_char": 1.4839316924413044, "correct_loss_per_token": 5.097153186798096, "incorrect_loss_per_token": 8.814631462097168, "correct_loss_uncond": -8.609031200408936, "incorrect_loss_uncond": -5.0295045375823975}, "model_output": [{"sum_logits": -9.036081314086914, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -9.036081314086914, "logits_per_char": -1.506013552347819, "num_chars": 6}, {"sum_logits": -5.097153186798096, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -5.097153186798096, "logits_per_char": -0.7281647409711566, "num_chars": 7}, {"sum_logits": -8.568166732788086, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -8.568166732788086, "logits_per_char": -1.7136333465576172, "num_chars": 5}, {"sum_logits": -5.431194305419922, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -5.431194305419922, "logits_per_char": -0.6788992881774902, "num_chars": 8}, {"sum_logits": -12.22308349609375, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -12.22308349609375, "logits_per_char": -2.0371805826822915, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 379, "native_id": "0b90c6710a65eb55fea4cc92895bf601", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.214325904846191, "incorrect_loss_raw": 13.202332973480225, "correct_loss_per_char": 1.2428651809692384, "incorrect_loss_per_char": 1.393126202592946, "correct_loss_per_token": 6.214325904846191, "incorrect_loss_per_token": 8.61637790997823, "correct_loss_uncond": -4.71944522857666, "incorrect_loss_uncond": -3.613697052001953}, "model_output": [{"sum_logits": -11.413450241088867, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.874828338623047, "logits_per_token": -5.706725120544434, "logits_per_char": -1.0375863855535334, "num_chars": 11}, {"sum_logits": -18.955642700195312, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.67431640625, "logits_per_token": -6.3185475667317705, "logits_per_char": -1.0530912611219618, "num_chars": 18}, {"sum_logits": -10.062206268310547, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.22351360321045, "logits_per_token": -10.062206268310547, "logits_per_char": -1.0062206268310547, "num_chars": 10}, {"sum_logits": -12.378032684326172, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.491461753845215, "logits_per_token": -12.378032684326172, "logits_per_char": -2.4756065368652345, "num_chars": 5}, {"sum_logits": -6.214325904846191, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -10.933771133422852, "logits_per_token": -6.214325904846191, "logits_per_char": -1.2428651809692384, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 380, "native_id": "70af2b5df22ec96901350dfa3c9ee74f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.200453281402588, "incorrect_loss_raw": 11.633037567138672, "correct_loss_per_char": 0.6545866619456898, "incorrect_loss_per_char": 2.1647650003433228, "correct_loss_per_token": 7.200453281402588, "incorrect_loss_per_token": 11.633037567138672, "correct_loss_uncond": -7.593496799468994, "incorrect_loss_uncond": -2.464897394180298}, "model_output": [{"sum_logits": -7.200453281402588, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -7.200453281402588, "logits_per_char": -0.6545866619456898, "num_chars": 11}, {"sum_logits": -12.074390411376953, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -12.973532676696777, "logits_per_token": -12.074390411376953, "logits_per_char": -2.0123984018961587, "num_chars": 6}, {"sum_logits": -12.58104133605957, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.717844009399414, "logits_per_token": -12.58104133605957, "logits_per_char": -2.096840222676595, "num_chars": 6}, {"sum_logits": -10.844419479370117, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.196586608886719, "logits_per_token": -10.844419479370117, "logits_per_char": -2.7111048698425293, "num_chars": 4}, {"sum_logits": -11.032299041748047, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.503776550292969, "logits_per_token": -11.032299041748047, "logits_per_char": -1.8387165069580078, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 381, "native_id": "f9243ef9f0037657c337d3c6a9832f05", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.706853151321411, "incorrect_loss_raw": 10.712579131126404, "correct_loss_per_char": 0.4633566439151764, "incorrect_loss_per_char": 1.4029655664686174, "correct_loss_per_token": 3.706853151321411, "incorrect_loss_per_token": 10.712579131126404, "correct_loss_uncond": -9.141717672348022, "incorrect_loss_uncond": -4.310136437416077}, "model_output": [{"sum_logits": -7.163045406341553, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.041784286499023, "logits_per_token": -7.163045406341553, "logits_per_char": -1.0232922009059362, "num_chars": 7}, {"sum_logits": -12.191184997558594, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.275694847106934, "logits_per_token": -12.191184997558594, "logits_per_char": -1.3545761108398438, "num_chars": 9}, {"sum_logits": -11.219718933105469, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.448232650756836, "logits_per_token": -11.219718933105469, "logits_per_char": -1.8699531555175781, "num_chars": 6}, {"sum_logits": -3.706853151321411, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.848570823669434, "logits_per_token": -3.706853151321411, "logits_per_char": -0.4633566439151764, "num_chars": 8}, {"sum_logits": -12.2763671875, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.325150489807129, "logits_per_token": -12.2763671875, "logits_per_char": -1.3640407986111112, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 382, "native_id": "27f2074270ea8a5e8f5ec2a017ec4a50", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.926891326904297, "incorrect_loss_raw": 14.609412908554077, "correct_loss_per_char": 1.2140990363226996, "incorrect_loss_per_char": 1.3638708030010436, "correct_loss_per_token": 5.463445663452148, "incorrect_loss_per_token": 5.927505175272623, "correct_loss_uncond": -4.808777809143066, "incorrect_loss_uncond": -5.3980772495269775}, "model_output": [{"sum_logits": -9.784823417663574, "num_tokens": 3, "num_tokens_all": 169, "is_greedy": false, "sum_logits_uncond": -15.183601379394531, "logits_per_token": -3.261607805887858, "logits_per_char": -1.2231029272079468, "num_chars": 8}, {"sum_logits": -14.830483436584473, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -20.737422943115234, "logits_per_token": -7.415241718292236, "logits_per_char": -1.4830483436584472, "num_chars": 10}, {"sum_logits": -10.926891326904297, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -5.463445663452148, "logits_per_char": -1.2140990363226996, "num_chars": 9}, {"sum_logits": -23.268007278442383, "num_tokens": 3, "num_tokens_all": 169, "is_greedy": false, "sum_logits_uncond": -24.478939056396484, "logits_per_token": -7.756002426147461, "logits_per_char": -1.7898467137263372, "num_chars": 13}, {"sum_logits": -10.554337501525879, "num_tokens": 2, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -19.62999725341797, "logits_per_token": -5.2771687507629395, "logits_per_char": -0.9594852274114435, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 383, "native_id": "63b3652d54c8c0e571f6bb50de318bf0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.150754928588867, "incorrect_loss_raw": 11.34265923500061, "correct_loss_per_char": 0.4292295773824056, "incorrect_loss_per_char": 1.4556837975032746, "correct_loss_per_token": 2.5753774642944336, "incorrect_loss_per_token": 9.768775701522827, "correct_loss_uncond": -11.289155960083008, "incorrect_loss_uncond": -3.845803737640381}, "model_output": [{"sum_logits": -15.266072273254395, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -15.266072273254395, "logits_per_char": -2.180867467607771, "num_chars": 7}, {"sum_logits": -5.150754928588867, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.439910888671875, "logits_per_token": -2.5753774642944336, "logits_per_char": -0.4292295773824056, "num_chars": 12}, {"sum_logits": -7.275014877319336, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.239700317382812, "logits_per_token": -7.275014877319336, "logits_per_char": -1.4550029754638671, "num_chars": 5}, {"sum_logits": -12.591068267822266, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.081687927246094, "logits_per_token": -6.295534133911133, "logits_per_char": -1.0492556889851887, "num_chars": 12}, {"sum_logits": -10.238481521606445, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.496543884277344, "logits_per_token": -10.238481521606445, "logits_per_char": -1.1376090579562717, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 384, "native_id": "0843c51212a3c2eee660fab5648c9e19", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9356999397277832, "incorrect_loss_raw": 17.651034355163574, "correct_loss_per_char": 0.4839249849319458, "incorrect_loss_per_char": 1.479892114540199, "correct_loss_per_token": 1.9356999397277832, "incorrect_loss_per_token": 7.2804187933603925, "correct_loss_uncond": -11.305830478668213, "incorrect_loss_uncond": -1.5875513553619385}, "model_output": [{"sum_logits": -17.471817016601562, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -20.811553955078125, "logits_per_token": -5.8239390055338545, "logits_per_char": -0.7941735007546165, "num_chars": 22}, {"sum_logits": -15.706083297729492, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.853041648864746, "logits_per_char": -1.745120366414388, "num_chars": 9}, {"sum_logits": -1.9356999397277832, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": true, "sum_logits_uncond": -13.241530418395996, "logits_per_token": -1.9356999397277832, "logits_per_char": -0.4839249849319458, "num_chars": 4}, {"sum_logits": -17.815692901611328, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.775218963623047, "logits_per_token": -8.907846450805664, "logits_per_char": -1.9795214335123699, "num_chars": 9}, {"sum_logits": -19.610544204711914, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -21.631900787353516, "logits_per_token": -6.536848068237305, "logits_per_char": -1.4007531574794225, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 385, "native_id": "1b3d286458a7e7f069222de0376d06da", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.658809661865234, "incorrect_loss_raw": 13.993566155433655, "correct_loss_per_char": 1.2954232957628038, "incorrect_loss_per_char": 1.4164380740637732, "correct_loss_per_token": 5.829404830932617, "incorrect_loss_per_token": 6.273127218087514, "correct_loss_uncond": -7.335750579833984, "incorrect_loss_uncond": -6.1393269300460815}, "model_output": [{"sum_logits": -18.016422271728516, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.855682373046875, "logits_per_token": -9.008211135864258, "logits_per_char": -1.8016422271728516, "num_chars": 10}, {"sum_logits": -12.647674560546875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -6.3238372802734375, "logits_per_char": -1.4052971733940973, "num_chars": 9}, {"sum_logits": -11.658809661865234, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -5.829404830932617, "logits_per_char": -1.2954232957628038, "num_chars": 9}, {"sum_logits": -7.942427158355713, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.398208618164062, "logits_per_token": -3.9712135791778564, "logits_per_char": -0.7220388325777921, "num_chars": 11}, {"sum_logits": -17.367740631103516, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.16470718383789, "logits_per_token": -5.789246877034505, "logits_per_char": -1.7367740631103517, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 386, "native_id": "86e2aabfb9d401567f04d87a648ff776", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3031020164489746, "incorrect_loss_raw": 9.329804301261902, "correct_loss_per_char": 0.4718717166355678, "incorrect_loss_per_char": 1.0253975899919632, "correct_loss_per_token": 3.3031020164489746, "incorrect_loss_per_token": 4.664902150630951, "correct_loss_uncond": -11.22047472000122, "incorrect_loss_uncond": -7.529874205589294}, "model_output": [{"sum_logits": -12.40598201751709, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.416587829589844, "logits_per_token": -6.202991008758545, "logits_per_char": -1.0338318347930908, "num_chars": 12}, {"sum_logits": -11.340656280517578, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.432003021240234, "logits_per_token": -5.670328140258789, "logits_per_char": -1.2600729200575087, "num_chars": 9}, {"sum_logits": -3.3031020164489746, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.523576736450195, "logits_per_token": -3.3031020164489746, "logits_per_char": -0.4718717166355678, "num_chars": 7}, {"sum_logits": -6.222341537475586, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.131707191467285, "logits_per_token": -3.111170768737793, "logits_per_char": -0.8889059339250837, "num_chars": 7}, {"sum_logits": -7.3502373695373535, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.458415985107422, "logits_per_token": -3.6751186847686768, "logits_per_char": -0.9187796711921692, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 387, "native_id": "092c24369367b3c7457198f3ce160fe3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.7858328819274902, "incorrect_loss_raw": 8.858615398406982, "correct_loss_per_char": 0.46430548032124835, "incorrect_loss_per_char": 0.7506728370984396, "correct_loss_per_token": 2.7858328819274902, "incorrect_loss_per_token": 3.806667248408, "correct_loss_uncond": -13.535662174224854, "incorrect_loss_uncond": -10.037875175476074}, "model_output": [{"sum_logits": -5.46917200088501, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.817781448364258, "logits_per_token": -2.734586000442505, "logits_per_char": -0.6076857778761122, "num_chars": 9}, {"sum_logits": -8.209823608398438, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.856203079223633, "logits_per_token": -4.104911804199219, "logits_per_char": -0.9122026231553819, "num_chars": 9}, {"sum_logits": -2.7858328819274902, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.321495056152344, "logits_per_token": -2.7858328819274902, "logits_per_char": -0.46430548032124835, "num_chars": 6}, {"sum_logits": -14.943370819091797, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.185869216918945, "logits_per_token": -4.981123606363933, "logits_per_char": -0.9962247212727865, "num_chars": 15}, {"sum_logits": -6.8120951652526855, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.72610855102539, "logits_per_token": -3.4060475826263428, "logits_per_char": -0.48657822608947754, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 388, "native_id": "cab9eea2a91b1bd5c0a01b11f594f154", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.127513885498047, "incorrect_loss_raw": 11.180169224739075, "correct_loss_per_char": 1.0115921714089133, "incorrect_loss_per_char": 1.3255992862913344, "correct_loss_per_token": 5.563756942749023, "incorrect_loss_per_token": 5.608098427454631, "correct_loss_uncond": -4.254316329956055, "incorrect_loss_uncond": -7.758100628852844}, "model_output": [{"sum_logits": -11.127513885498047, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.381830215454102, "logits_per_token": -5.563756942749023, "logits_per_char": -1.0115921714089133, "num_chars": 11}, {"sum_logits": -4.748712062835693, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.856059074401855, "logits_per_token": -4.748712062835693, "logits_per_char": -0.5276346736484103, "num_chars": 9}, {"sum_logits": -11.09765338897705, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.698768615722656, "logits_per_token": -5.548826694488525, "logits_per_char": -1.0088775808160955, "num_chars": 11}, {"sum_logits": -15.060506820678711, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.870458602905273, "logits_per_token": -7.5302534103393555, "logits_per_char": -2.5100844701131186, "num_chars": 6}, {"sum_logits": -13.813804626464844, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.32779312133789, "logits_per_token": -4.604601542154948, "logits_per_char": -1.2558004205877131, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 389, "native_id": "6e77de03bee86d6c20780e14f00944d0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0222930908203125, "incorrect_loss_raw": 13.357427597045898, "correct_loss_per_char": 0.12778663635253906, "incorrect_loss_per_char": 1.6359388237907775, "correct_loss_per_token": 1.0222930908203125, "incorrect_loss_per_token": 10.123572945594788, "correct_loss_uncond": -13.609709739685059, "incorrect_loss_uncond": -3.8059816360473633}, "model_output": [{"sum_logits": -17.247224807739258, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -28.911453247070312, "logits_per_token": -4.3118062019348145, "logits_per_char": -0.8623612403869629, "num_chars": 20}, {"sum_logits": -10.187945365905762, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.709630012512207, "logits_per_token": -10.187945365905762, "logits_per_char": -1.697990894317627, "num_chars": 6}, {"sum_logits": -1.0222930908203125, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.632002830505371, "logits_per_token": -1.0222930908203125, "logits_per_char": -0.12778663635253906, "num_chars": 8}, {"sum_logits": -14.658848762512207, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.076739311218262, "logits_per_token": -14.658848762512207, "logits_per_char": -2.0941212517874583, "num_chars": 7}, {"sum_logits": -11.335691452026367, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -11.335691452026367, "logits_per_char": -1.8892819086710613, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 390, "native_id": "7f25dbab26165b3c8800c2733ca759d6", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 16.112876892089844, "incorrect_loss_raw": 13.2838773727417, "correct_loss_per_char": 1.1509197780064173, "incorrect_loss_per_char": 1.4653024246986677, "correct_loss_per_token": 5.370958964029948, "incorrect_loss_per_token": 6.321831981341044, "correct_loss_uncond": -5.373096466064453, "incorrect_loss_uncond": -3.5412447452545166}, "model_output": [{"sum_logits": -11.01803970336914, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -5.50901985168457, "logits_per_char": -1.3772549629211426, "num_chars": 8}, {"sum_logits": -16.112876892089844, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.485973358154297, "logits_per_token": -5.370958964029948, "logits_per_char": -1.1509197780064173, "num_chars": 14}, {"sum_logits": -10.548917770385742, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -10.548917770385742, "logits_per_char": -1.506988252912249, "num_chars": 7}, {"sum_logits": -16.047027587890625, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.884632110595703, "logits_per_token": -5.349009195963542, "logits_per_char": -1.7830030653211806, "num_chars": 9}, {"sum_logits": -15.521524429321289, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.276830673217773, "logits_per_token": -3.8803811073303223, "logits_per_char": -1.193963417640099, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 391, "native_id": "9024493a3edbaf555fda5b477e835bf5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.903772354125977, "incorrect_loss_raw": 11.244872808456421, "correct_loss_per_char": 1.322641372680664, "incorrect_loss_per_char": 1.2604345215691461, "correct_loss_per_token": 11.903772354125977, "incorrect_loss_per_token": 6.624419053395589, "correct_loss_uncond": -2.0625104904174805, "incorrect_loss_uncond": -4.32624077796936}, "model_output": [{"sum_logits": -4.201305389404297, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -4.201305389404297, "logits_per_char": -0.7002175649007162, "num_chars": 6}, {"sum_logits": -9.49622631072998, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -9.49622631072998, "logits_per_char": -1.0551362567477756, "num_chars": 9}, {"sum_logits": -14.236948013305664, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.118474006652832, "logits_per_char": -1.5818831125895183, "num_chars": 9}, {"sum_logits": -17.045011520385742, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.575927734375, "logits_per_token": -5.681670506795247, "logits_per_char": -1.7045011520385742, "num_chars": 10}, {"sum_logits": -11.903772354125977, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.966282844543457, "logits_per_token": -11.903772354125977, "logits_per_char": -1.322641372680664, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 392, "native_id": "fc59ab1a9e6d2b51126dd828d30e9167", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.731962203979492, "incorrect_loss_raw": 11.552645444869995, "correct_loss_per_char": 0.40942587171282085, "incorrect_loss_per_char": 1.7152505702442593, "correct_loss_per_token": 2.865981101989746, "incorrect_loss_per_token": 9.425941467285156, "correct_loss_uncond": -10.900577545166016, "incorrect_loss_uncond": -4.355147123336792}, "model_output": [{"sum_logits": -12.810148239135742, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.006837844848633, "logits_per_token": -12.810148239135742, "logits_per_char": -2.5620296478271483, "num_chars": 5}, {"sum_logits": -17.01363182067871, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.388938903808594, "logits_per_token": -8.506815910339355, "logits_per_char": -1.8904035356309679, "num_chars": 9}, {"sum_logits": -8.64525318145752, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -8.64525318145752, "logits_per_char": -1.44087553024292, "num_chars": 6}, {"sum_logits": -5.731962203979492, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.632539749145508, "logits_per_token": -2.865981101989746, "logits_per_char": -0.40942587171282085, "num_chars": 14}, {"sum_logits": -7.741548538208008, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -7.741548538208008, "logits_per_char": -0.967693567276001, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 393, "native_id": "5a50ea4bb2d13dc4f620ebd45025d445", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6253119707107544, "incorrect_loss_raw": 9.829255819320679, "correct_loss_per_char": 0.06253119707107543, "incorrect_loss_per_char": 1.1718849539756775, "correct_loss_per_token": 0.6253119707107544, "incorrect_loss_per_token": 7.625005483627319, "correct_loss_uncond": -14.843496203422546, "incorrect_loss_uncond": -7.005053281784058}, "model_output": [{"sum_logits": -9.948485374450684, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.166871070861816, "logits_per_token": -9.948485374450684, "logits_per_char": -1.6580808957417805, "num_chars": 6}, {"sum_logits": -5.215940475463867, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.95476245880127, "logits_per_token": -5.215940475463867, "logits_per_char": -0.7451343536376953, "num_chars": 7}, {"sum_logits": -17.634002685546875, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.533191680908203, "logits_per_token": -8.817001342773438, "logits_per_char": -1.469500223795573, "num_chars": 12}, {"sum_logits": -6.518594741821289, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.682411193847656, "logits_per_token": -6.518594741821289, "logits_per_char": -0.8148243427276611, "num_chars": 8}, {"sum_logits": -0.6253119707107544, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -15.4688081741333, "logits_per_token": -0.6253119707107544, "logits_per_char": -0.06253119707107543, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 394, "native_id": "8becd2ee4e86258566a9c2b0e6d9544e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.9925994873046875, "incorrect_loss_raw": 12.770409226417542, "correct_loss_per_char": 0.420663130910773, "incorrect_loss_per_char": 1.131353427930047, "correct_loss_per_token": 2.6641998291015625, "incorrect_loss_per_token": 6.385204613208771, "correct_loss_uncond": -10.676986694335938, "incorrect_loss_uncond": -6.696879267692566}, "model_output": [{"sum_logits": -11.38616943359375, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.86435317993164, "logits_per_token": -5.693084716796875, "logits_per_char": -0.8132978166852679, "num_chars": 14}, {"sum_logits": -7.9925994873046875, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.669586181640625, "logits_per_token": -2.6641998291015625, "logits_per_char": -0.420663130910773, "num_chars": 19}, {"sum_logits": -7.049992084503174, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.969526290893555, "logits_per_token": -3.524996042251587, "logits_per_char": -0.542307083423321, "num_chars": 13}, {"sum_logits": -16.46878433227539, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.550174713134766, "logits_per_token": -8.234392166137695, "logits_per_char": -1.3723986943562825, "num_chars": 12}, {"sum_logits": -16.17669105529785, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.48509979248047, "logits_per_token": -8.088345527648926, "logits_per_char": -1.7974101172553167, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 395, "native_id": "2a21820a135e1a49883525c055c74a0b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9544248580932617, "incorrect_loss_raw": 9.438773274421692, "correct_loss_per_char": 0.39544248580932617, "incorrect_loss_per_char": 0.8209336961990867, "correct_loss_per_token": 1.9772124290466309, "incorrect_loss_per_token": 6.0557105143864955, "correct_loss_uncond": -16.272000312805176, "incorrect_loss_uncond": -6.578323245048523}, "model_output": [{"sum_logits": -7.893243789672852, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.11279582977295, "logits_per_token": -7.893243789672852, "logits_per_char": -0.8770270877414279, "num_chars": 9}, {"sum_logits": -11.980039596557617, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.92006492614746, "logits_per_token": -3.9933465321858725, "logits_per_char": -0.7047082115622127, "num_chars": 17}, {"sum_logits": -6.790693759918213, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.794161796569824, "logits_per_token": -6.790693759918213, "logits_per_char": -0.8488367199897766, "num_chars": 8}, {"sum_logits": -11.091115951538086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.241363525390625, "logits_per_token": -5.545557975769043, "logits_per_char": -0.8531627655029297, "num_chars": 13}, {"sum_logits": -3.9544248580932617, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.226425170898438, "logits_per_token": -1.9772124290466309, "logits_per_char": -0.39544248580932617, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 396, "native_id": "e5adfec0b5ba691ec752f9b5e0fb8084", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.163541793823242, "incorrect_loss_raw": 10.454111576080322, "correct_loss_per_char": 0.8605902989705404, "incorrect_loss_per_char": 1.1074789159827763, "correct_loss_per_token": 5.163541793823242, "incorrect_loss_per_token": 7.7509073416392, "correct_loss_uncond": -10.867549896240234, "incorrect_loss_uncond": -6.0865638256073}, "model_output": [{"sum_logits": -8.79552173614502, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.005489349365234, "logits_per_token": -2.9318405787150064, "logits_per_char": -0.439776086807251, "num_chars": 20}, {"sum_logits": -9.898271560668945, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -4.949135780334473, "logits_per_char": -1.0998079511854384, "num_chars": 9}, {"sum_logits": -8.940104484558105, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -8.940104484558105, "logits_per_char": -1.1175130605697632, "num_chars": 8}, {"sum_logits": -5.163541793823242, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -5.163541793823242, "logits_per_char": -0.8605902989705404, "num_chars": 6}, {"sum_logits": -14.182548522949219, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -14.182548522949219, "logits_per_char": -1.7728185653686523, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 397, "native_id": "406e15b76269d20b5448a91648094291", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.859992027282715, "incorrect_loss_raw": 9.701516449451447, "correct_loss_per_char": 0.8099986712137858, "incorrect_loss_per_char": 1.2064216700295178, "correct_loss_per_token": 4.859992027282715, "incorrect_loss_per_token": 9.33581891655922, "correct_loss_uncond": -8.708354949951172, "incorrect_loss_uncond": -4.339464604854584}, "model_output": [{"sum_logits": -16.358566284179688, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.05655288696289, "logits_per_token": -16.358566284179688, "logits_per_char": -2.044820785522461, "num_chars": 8}, {"sum_logits": -2.9255802631378174, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -15.418137550354004, "logits_per_token": -1.4627901315689087, "logits_per_char": -0.26596184210343793, "num_chars": 11}, {"sum_logits": -10.892760276794434, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -10.892760276794434, "logits_per_char": -1.5561086109706335, "num_chars": 7}, {"sum_logits": -8.629158973693848, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.983049392700195, "logits_per_token": -8.629158973693848, "logits_per_char": -0.9587954415215386, "num_chars": 9}, {"sum_logits": -4.859992027282715, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.568346977233887, "logits_per_token": -4.859992027282715, "logits_per_char": -0.8099986712137858, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 398, "native_id": "9c596382ea15768f95b5ef9ceec191dc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.076290130615234, "incorrect_loss_raw": 19.475735425949097, "correct_loss_per_char": 1.7251843043736048, "incorrect_loss_per_char": 1.308472529053688, "correct_loss_per_token": 12.076290130615234, "incorrect_loss_per_token": 6.641594409942627, "correct_loss_uncond": -4.600894927978516, "incorrect_loss_uncond": -0.45395874977111816}, "model_output": [{"sum_logits": -12.428813934326172, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.151918411254883, "logits_per_token": -6.214406967163086, "logits_per_char": -1.3809793260362413, "num_chars": 9}, {"sum_logits": -15.933754920959473, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.724233627319336, "logits_per_token": -7.966877460479736, "logits_per_char": -0.995859682559967, "num_chars": 16}, {"sum_logits": -12.076290130615234, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.67718505859375, "logits_per_token": -12.076290130615234, "logits_per_char": -1.7251843043736048, "num_chars": 7}, {"sum_logits": -22.080005645751953, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.54266357421875, "logits_per_token": -5.520001411437988, "logits_per_char": -1.8400004704793294, "num_chars": 12}, {"sum_logits": -27.46036720275879, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -25.29996109008789, "logits_per_token": -6.865091800689697, "logits_per_char": -1.0170506371392145, "num_chars": 27}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 399, "native_id": "7a3d0c94438a5c8a09364aaebb848a2c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.388859272003174, "incorrect_loss_raw": 8.162998914718628, "correct_loss_per_char": 0.7314765453338623, "incorrect_loss_per_char": 1.063453220701837, "correct_loss_per_token": 4.388859272003174, "incorrect_loss_per_token": 5.615379691123962, "correct_loss_uncond": -9.316308498382568, "incorrect_loss_uncond": -8.657409906387329}, "model_output": [{"sum_logits": -4.388859272003174, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.705167770385742, "logits_per_token": -4.388859272003174, "logits_per_char": -0.7314765453338623, "num_chars": 6}, {"sum_logits": -10.822622299194336, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.18752098083496, "logits_per_token": -5.411311149597168, "logits_per_char": -0.9838747544722124, "num_chars": 11}, {"sum_logits": -8.008148193359375, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.79429817199707, "logits_per_token": -8.008148193359375, "logits_per_char": -1.1440211704799108, "num_chars": 7}, {"sum_logits": -4.2628936767578125, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.666143417358398, "logits_per_token": -4.2628936767578125, "logits_per_char": -0.5328617095947266, "num_chars": 8}, {"sum_logits": -9.558331489562988, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.6336727142334, "logits_per_token": -4.779165744781494, "logits_per_char": -1.593055248260498, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 400, "native_id": "1ef68db97654f30cd3701b942fadc934", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.166193962097168, "incorrect_loss_raw": 9.006301999092102, "correct_loss_per_char": 0.4803643507115981, "incorrect_loss_per_char": 1.4578434983889261, "correct_loss_per_token": 2.722064654032389, "incorrect_loss_per_token": 9.006301999092102, "correct_loss_uncond": -14.555840492248535, "incorrect_loss_uncond": -5.403793931007385}, "model_output": [{"sum_logits": -12.342354774475098, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.306282997131348, "logits_per_token": -12.342354774475098, "logits_per_char": -2.057059129079183, "num_chars": 6}, {"sum_logits": -8.166193962097168, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -2.722064654032389, "logits_per_char": -0.4803643507115981, "num_chars": 17}, {"sum_logits": -6.635748386383057, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -6.635748386383057, "logits_per_char": -0.6635748386383057, "num_chars": 10}, {"sum_logits": -8.086676597595215, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.534889221191406, "logits_per_token": -8.086676597595215, "logits_per_char": -1.617335319519043, "num_chars": 5}, {"sum_logits": -8.960428237915039, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -8.960428237915039, "logits_per_char": -1.493404706319173, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 401, "native_id": "abb090bbc572be1016bcd5f261f28e76", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.247071266174316, "incorrect_loss_raw": 12.307583332061768, "correct_loss_per_char": 0.520589272181193, "incorrect_loss_per_char": 1.2804552248546057, "correct_loss_per_token": 6.247071266174316, "incorrect_loss_per_token": 8.754598617553711, "correct_loss_uncond": -11.361035346984863, "incorrect_loss_uncond": -4.536359071731567}, "model_output": [{"sum_logits": -7.837907791137695, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -7.837907791137695, "logits_per_char": -1.306317965189616, "num_chars": 6}, {"sum_logits": -12.968547821044922, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.331829071044922, "logits_per_token": -12.968547821044922, "logits_per_char": -1.8526496887207031, "num_chars": 7}, {"sum_logits": -6.247071266174316, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.60810661315918, "logits_per_token": -6.247071266174316, "logits_per_char": -0.520589272181193, "num_chars": 12}, {"sum_logits": -14.264893531799316, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.76356315612793, "logits_per_token": -7.132446765899658, "logits_per_char": -1.018920966557094, "num_chars": 14}, {"sum_logits": -14.158984184265137, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.972366333007812, "logits_per_token": -7.079492092132568, "logits_per_char": -0.9439322789510091, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 402, "native_id": "91f2532a832a35cba1b08a3c767be6da", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.44173896312713623, "incorrect_loss_raw": 11.717047691345215, "correct_loss_per_char": 0.06310556616101946, "incorrect_loss_per_char": 2.209114367053622, "correct_loss_per_token": 0.44173896312713623, "incorrect_loss_per_token": 11.717047691345215, "correct_loss_uncond": -16.72911560535431, "incorrect_loss_uncond": -2.215627670288086}, "model_output": [{"sum_logits": -14.510700225830078, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.462380409240723, "logits_per_token": -14.510700225830078, "logits_per_char": -2.0729571751185825, "num_chars": 7}, {"sum_logits": -10.927809715270996, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -10.927809715270996, "logits_per_char": -2.731952428817749, "num_chars": 4}, {"sum_logits": -13.7980318069458, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.634849548339844, "logits_per_token": -13.7980318069458, "logits_per_char": -2.75960636138916, "num_chars": 5}, {"sum_logits": -7.631649017333984, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.698734283447266, "logits_per_token": -7.631649017333984, "logits_per_char": -1.2719415028889973, "num_chars": 6}, {"sum_logits": -0.44173896312713623, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -17.170854568481445, "logits_per_token": -0.44173896312713623, "logits_per_char": -0.06310556616101946, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 403, "native_id": "f8544c9679d27b747dfad3b8d7aac87a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.001643180847168, "incorrect_loss_raw": 12.00630009174347, "correct_loss_per_char": 0.5557381312052408, "incorrect_loss_per_char": 1.7038684566815696, "correct_loss_per_token": 5.001643180847168, "incorrect_loss_per_token": 6.898270964622498, "correct_loss_uncond": -8.133816719055176, "incorrect_loss_uncond": -3.0883644819259644}, "model_output": [{"sum_logits": -15.215435028076172, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.607717514038086, "logits_per_char": -1.6906038920084636, "num_chars": 9}, {"sum_logits": -13.970956802368164, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.94958209991455, "logits_per_token": -6.985478401184082, "logits_per_char": -1.7463696002960205, "num_chars": 8}, {"sum_logits": -7.1609673500061035, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.179679870605469, "logits_per_token": -7.1609673500061035, "logits_per_char": -1.4321934700012207, "num_chars": 5}, {"sum_logits": -11.677841186523438, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.51372718811035, "logits_per_token": -5.838920593261719, "logits_per_char": -1.946306864420573, "num_chars": 6}, {"sum_logits": -5.001643180847168, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -5.001643180847168, "logits_per_char": -0.5557381312052408, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 404, "native_id": "a7f423c1636ba9e36d18e381928c5dcc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.673349380493164, "incorrect_loss_raw": 14.71979546546936, "correct_loss_per_char": 0.9591686725616455, "incorrect_loss_per_char": 1.5542284775347934, "correct_loss_per_token": 7.673349380493164, "incorrect_loss_per_token": 8.639652967453003, "correct_loss_uncond": -6.772583961486816, "incorrect_loss_uncond": -2.5823917388916016}, "model_output": [{"sum_logits": -10.238041877746582, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.91370677947998, "logits_per_token": -10.238041877746582, "logits_per_char": -1.2797552347183228, "num_chars": 8}, {"sum_logits": -18.140506744384766, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.01707649230957, "logits_per_token": -9.070253372192383, "logits_per_char": -2.015611860487196, "num_chars": 9}, {"sum_logits": -7.673349380493164, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.44593334197998, "logits_per_token": -7.673349380493164, "logits_per_char": -0.9591686725616455, "num_chars": 8}, {"sum_logits": -18.721839904785156, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -9.360919952392578, "logits_per_char": -2.0802044338650174, "num_chars": 9}, {"sum_logits": -11.778793334960938, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.800956726074219, "logits_per_token": -5.889396667480469, "logits_per_char": -0.8413423810686383, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 405, "native_id": "e1d354cbfcd620e5dacf83c17746c4b3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.786247253417969, "incorrect_loss_raw": 11.3568434715271, "correct_loss_per_char": 1.08736080593533, "incorrect_loss_per_char": 1.2946613345827376, "correct_loss_per_token": 4.893123626708984, "incorrect_loss_per_token": 7.899119853973389, "correct_loss_uncond": -9.97816276550293, "incorrect_loss_uncond": -6.254766225814819}, "model_output": [{"sum_logits": -12.079999923706055, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -6.039999961853027, "logits_per_char": -1.3422222137451172, "num_chars": 9}, {"sum_logits": -9.495403289794922, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -9.495403289794922, "logits_per_char": -1.3564861842564173, "num_chars": 7}, {"sum_logits": -15.581789016723633, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.65969467163086, "logits_per_token": -7.790894508361816, "logits_per_char": -1.2984824180603027, "num_chars": 12}, {"sum_logits": -9.786247253417969, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.7644100189209, "logits_per_token": -4.893123626708984, "logits_per_char": -1.08736080593533, "num_chars": 9}, {"sum_logits": -8.270181655883789, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -8.270181655883789, "logits_per_char": -1.1814545222691126, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 406, "native_id": "53e1e50d204f6ad5a0f69429eadae82e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.41649055480957, "incorrect_loss_raw": 13.672814130783081, "correct_loss_per_char": 0.7129433949788412, "incorrect_loss_per_char": 1.3195879538853963, "correct_loss_per_token": 3.208245277404785, "incorrect_loss_per_token": 5.612583899497986, "correct_loss_uncond": -6.10011100769043, "incorrect_loss_uncond": -9.541794061660767}, "model_output": [{"sum_logits": -6.41649055480957, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.5166015625, "logits_per_token": -3.208245277404785, "logits_per_char": -0.7129433949788412, "num_chars": 9}, {"sum_logits": -11.843852043151855, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.96038818359375, "logits_per_token": -5.921926021575928, "logits_per_char": -1.9739753405253093, "num_chars": 6}, {"sum_logits": -16.317642211914062, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -26.253246307373047, "logits_per_token": -3.2635284423828126, "logits_per_char": -1.0878428141276042, "num_chars": 15}, {"sum_logits": -13.961297988891602, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.706356048583984, "logits_per_token": -6.980648994445801, "logits_per_char": -1.0739459991455078, "num_chars": 13}, {"sum_logits": -12.568464279174805, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.93844223022461, "logits_per_token": -6.284232139587402, "logits_per_char": -1.142587661743164, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 407, "native_id": "48205cc84aab5e455b22e17c3cc7277d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.330260276794434, "incorrect_loss_raw": 11.50186562538147, "correct_loss_per_char": 0.8807328769138881, "incorrect_loss_per_char": 1.2820277418409074, "correct_loss_per_token": 6.165130138397217, "incorrect_loss_per_token": 7.6565773487091064, "correct_loss_uncond": -11.351632118225098, "incorrect_loss_uncond": -5.862263917922974}, "model_output": [{"sum_logits": -5.47092342376709, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -5.47092342376709, "logits_per_char": -1.094184684753418, "num_chars": 5}, {"sum_logits": -12.330260276794434, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.68189239501953, "logits_per_token": -6.165130138397217, "logits_per_char": -0.8807328769138881, "num_chars": 14}, {"sum_logits": -9.774232864379883, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.17648696899414, "logits_per_token": -9.774232864379883, "logits_per_char": -1.6290388107299805, "num_chars": 6}, {"sum_logits": -13.32559585571289, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.02646255493164, "logits_per_token": -6.662797927856445, "logits_per_char": -0.9518282754080636, "num_chars": 14}, {"sum_logits": -17.436710357666016, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.234493255615234, "logits_per_token": -8.718355178833008, "logits_per_char": -1.453059196472168, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 408, "native_id": "0f7419d25337e0a75503a015ae777905", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.875919342041016, "incorrect_loss_raw": 9.280503511428833, "correct_loss_per_char": 0.6250835765491832, "incorrect_loss_per_char": 1.3964488880974908, "correct_loss_per_token": 3.437959671020508, "incorrect_loss_per_token": 9.280503511428833, "correct_loss_uncond": -9.958976745605469, "incorrect_loss_uncond": -3.713395595550537}, "model_output": [{"sum_logits": -6.875919342041016, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.834896087646484, "logits_per_token": -3.437959671020508, "logits_per_char": -0.6250835765491832, "num_chars": 11}, {"sum_logits": -8.299269676208496, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.544071197509766, "logits_per_token": -8.299269676208496, "logits_per_char": -0.9221410751342773, "num_chars": 9}, {"sum_logits": -5.8442792892456055, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.185674667358398, "logits_per_token": -5.8442792892456055, "logits_per_char": -0.5844279289245605, "num_chars": 10}, {"sum_logits": -13.940301895141602, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -13.940301895141602, "logits_per_char": -2.7880603790283205, "num_chars": 5}, {"sum_logits": -9.038163185119629, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.940627098083496, "logits_per_token": -9.038163185119629, "logits_per_char": -1.291166169302804, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 409, "native_id": "5cac4da628f0a58db980649079bd5784", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.376008033752441, "incorrect_loss_raw": 14.405718564987183, "correct_loss_per_char": 0.5470010042190552, "incorrect_loss_per_char": 1.1357829211250183, "correct_loss_per_token": 4.376008033752441, "incorrect_loss_per_token": 4.905691564083099, "correct_loss_uncond": -11.555828094482422, "incorrect_loss_uncond": -2.7049808502197266}, "model_output": [{"sum_logits": -14.736226081848145, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.368113040924072, "logits_per_char": -1.6373584535386827, "num_chars": 9}, {"sum_logits": -9.799304008483887, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.542440414428711, "logits_per_token": -4.899652004241943, "logits_per_char": -0.6999502863202777, "num_chars": 14}, {"sum_logits": -14.75064754486084, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.332897186279297, "logits_per_token": -3.68766188621521, "logits_per_char": -0.9833765029907227, "num_chars": 15}, {"sum_logits": -4.376008033752441, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.931836128234863, "logits_per_token": -4.376008033752441, "logits_per_char": -0.5470010042190552, "num_chars": 8}, {"sum_logits": -18.33669662475586, "num_tokens": 5, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.831790924072266, "logits_per_token": -3.667339324951172, "logits_per_char": -1.2224464416503906, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 410, "native_id": "78d1218aeff70a70904767349e3c4c53", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.345271110534668, "incorrect_loss_raw": 9.746750712394714, "correct_loss_per_char": 1.0345271110534668, "incorrect_loss_per_char": 0.8758376905412385, "correct_loss_per_token": 5.172635555267334, "incorrect_loss_per_token": 4.873375356197357, "correct_loss_uncond": -8.438841819763184, "incorrect_loss_uncond": -10.639008641242981}, "model_output": [{"sum_logits": -9.606831550598145, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.336002349853516, "logits_per_token": -4.803415775299072, "logits_per_char": -0.8005692958831787, "num_chars": 12}, {"sum_logits": -10.34567642211914, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.156044006347656, "logits_per_token": -5.17283821105957, "logits_per_char": -0.9405160383744673, "num_chars": 11}, {"sum_logits": -10.345271110534668, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.78411293029785, "logits_per_token": -5.172635555267334, "logits_per_char": -1.0345271110534668, "num_chars": 10}, {"sum_logits": -7.935265064239502, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.582775115966797, "logits_per_token": -3.967632532119751, "logits_per_char": -0.5290176709493001, "num_chars": 15}, {"sum_logits": -11.09922981262207, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.468215942382812, "logits_per_token": -5.549614906311035, "logits_per_char": -1.2332477569580078, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 411, "native_id": "cce13a32fedb997c017d3fac87c34912", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.401455879211426, "incorrect_loss_raw": 8.924425601959229, "correct_loss_per_char": 0.7401455879211426, "incorrect_loss_per_char": 1.2739284069507153, "correct_loss_per_token": 7.401455879211426, "incorrect_loss_per_token": 8.924425601959229, "correct_loss_uncond": -5.050691604614258, "incorrect_loss_uncond": -6.151595115661621}, "model_output": [{"sum_logits": -12.512629508972168, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.923567771911621, "logits_per_token": -12.512629508972168, "logits_per_char": -1.7875185012817383, "num_chars": 7}, {"sum_logits": -7.401455879211426, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.452147483825684, "logits_per_token": -7.401455879211426, "logits_per_char": -0.7401455879211426, "num_chars": 10}, {"sum_logits": -8.609566688537598, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.593358993530273, "logits_per_token": -8.609566688537598, "logits_per_char": -0.7826878807761453, "num_chars": 11}, {"sum_logits": -6.81789493560791, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.61539649963379, "logits_per_token": -6.81789493560791, "logits_per_char": -0.97398499080113, "num_chars": 7}, {"sum_logits": -7.757611274719238, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.171759605407715, "logits_per_token": -7.757611274719238, "logits_per_char": -1.5515222549438477, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 412, "native_id": "6714487b839f648e348ac972ed114af3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.712118148803711, "incorrect_loss_raw": 15.534083127975464, "correct_loss_per_char": 1.3390147686004639, "incorrect_loss_per_char": 1.138776054341569, "correct_loss_per_token": 10.712118148803711, "incorrect_loss_per_token": 6.757447044054667, "correct_loss_uncond": -4.8108015060424805, "incorrect_loss_uncond": -8.263817071914673}, "model_output": [{"sum_logits": -15.75600528717041, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.567787170410156, "logits_per_token": -7.878002643585205, "logits_per_char": -1.575600528717041, "num_chars": 10}, {"sum_logits": -10.712118148803711, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.522919654846191, "logits_per_token": -10.712118148803711, "logits_per_char": -1.3390147686004639, "num_chars": 8}, {"sum_logits": -9.323970794677734, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.374744415283203, "logits_per_token": -4.661985397338867, "logits_per_char": -0.717228522667518, "num_chars": 13}, {"sum_logits": -12.826087951660156, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.634252548217773, "logits_per_token": -6.413043975830078, "logits_per_char": -0.9161491394042969, "num_chars": 14}, {"sum_logits": -24.230268478393555, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -30.614816665649414, "logits_per_token": -8.076756159464518, "logits_per_char": -1.3461260265774198, "num_chars": 18}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 413, "native_id": "3e536d9253bfac45de83e8ee291ca143", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.60837173461914, "incorrect_loss_raw": 6.934635758399963, "correct_loss_per_char": 2.121674346923828, "incorrect_loss_per_char": 1.2351138029779707, "correct_loss_per_token": 5.30418586730957, "incorrect_loss_per_token": 6.934635758399963, "correct_loss_uncond": -4.987615585327148, "incorrect_loss_uncond": -8.073577523231506}, "model_output": [{"sum_logits": -4.485097408294678, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -4.485097408294678, "logits_per_char": -0.44850974082946776, "num_chars": 10}, {"sum_logits": -10.60837173461914, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.595987319946289, "logits_per_token": -5.30418586730957, "logits_per_char": -2.121674346923828, "num_chars": 5}, {"sum_logits": -4.4170331954956055, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -4.4170331954956055, "logits_per_char": -0.7361721992492676, "num_chars": 6}, {"sum_logits": -9.938667297363281, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.892523765563965, "logits_per_token": -9.938667297363281, "logits_per_char": -2.4846668243408203, "num_chars": 4}, {"sum_logits": -8.897745132446289, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.928361892700195, "logits_per_token": -8.897745132446289, "logits_per_char": -1.271106447492327, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 414, "native_id": "9f830faa0f8e3d7fb3a658c15a5fbe63", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0910379886627197, "incorrect_loss_raw": 11.345081090927124, "correct_loss_per_char": 0.2575864990552266, "incorrect_loss_per_char": 1.2639030324088203, "correct_loss_per_token": 3.0910379886627197, "incorrect_loss_per_token": 9.940784692764282, "correct_loss_uncond": -10.564350843429565, "incorrect_loss_uncond": -3.85414719581604}, "model_output": [{"sum_logits": -12.841115951538086, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.537818908691406, "logits_per_token": -12.841115951538086, "logits_per_char": -1.2841115951538087, "num_chars": 10}, {"sum_logits": -10.15340805053711, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.206267356872559, "logits_per_token": -10.15340805053711, "logits_per_char": -1.1281564500596788, "num_chars": 9}, {"sum_logits": -11.151429176330566, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.203649520874023, "logits_per_token": -11.151429176330566, "logits_per_char": -1.239047686258952, "num_chars": 9}, {"sum_logits": -3.0910379886627197, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.655388832092285, "logits_per_token": -3.0910379886627197, "logits_per_char": -0.2575864990552266, "num_chars": 12}, {"sum_logits": -11.234371185302734, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -5.617185592651367, "logits_per_char": -1.4042963981628418, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 415, "native_id": "bbcef409e0acb71b515acc144d5b402c_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.936450719833374, "incorrect_loss_raw": 6.510318756103516, "correct_loss_per_char": 0.2624300479888916, "incorrect_loss_per_char": 0.7542462766944589, "correct_loss_per_token": 1.968225359916687, "incorrect_loss_per_token": 4.833497613668442, "correct_loss_uncond": -15.387203454971313, "incorrect_loss_uncond": -9.887781620025635}, "model_output": [{"sum_logits": -3.278665065765381, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -16.700660705566406, "logits_per_token": -1.6393325328826904, "logits_per_char": -0.23419036184038436, "num_chars": 14}, {"sum_logits": -9.398477554321289, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -9.398477554321289, "logits_per_char": -1.342639650617327, "num_chars": 7}, {"sum_logits": -6.757269382476807, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.477386474609375, "logits_per_token": -1.6893173456192017, "logits_per_char": -0.6142972165888007, "num_chars": 11}, {"sum_logits": -3.936450719833374, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.323654174804688, "logits_per_token": -1.968225359916687, "logits_per_char": -0.2624300479888916, "num_chars": 15}, {"sum_logits": -6.606863021850586, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -6.606863021850586, "logits_per_char": -0.8258578777313232, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 416, "native_id": "cbb0c9a69ca0922371a48177087ef407", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0268363952636719, "incorrect_loss_raw": 13.347532510757446, "correct_loss_per_char": 0.25670909881591797, "incorrect_loss_per_char": 1.3585815047606444, "correct_loss_per_token": 1.0268363952636719, "incorrect_loss_per_token": 7.006028135617574, "correct_loss_uncond": -10.511428833007812, "incorrect_loss_uncond": -3.5872409343719482}, "model_output": [{"sum_logits": -8.720661163330078, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -8.720661163330078, "logits_per_char": -2.1801652908325195, "num_chars": 4}, {"sum_logits": -18.187698364257812, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.957460403442383, "logits_per_token": -6.0625661214192705, "logits_per_char": -1.1367311477661133, "num_chars": 16}, {"sum_logits": -1.0268363952636719, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": true, "sum_logits_uncond": -11.538265228271484, "logits_per_token": -1.0268363952636719, "logits_per_char": -0.25670909881591797, "num_chars": 4}, {"sum_logits": -13.944002151489258, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.73629379272461, "logits_per_token": -6.972001075744629, "logits_per_char": -1.0726155501145582, "num_chars": 13}, {"sum_logits": -12.537768363952637, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.136394500732422, "logits_per_token": -6.268884181976318, "logits_per_char": -1.0448140303293865, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 417, "native_id": "b92f786638796fc028947ac0e9a44fef", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.308974266052246, "incorrect_loss_raw": 7.792405843734741, "correct_loss_per_char": 0.4506410190037319, "incorrect_loss_per_char": 0.810186918038006, "correct_loss_per_token": 1.5772435665130615, "incorrect_loss_per_token": 4.77803248167038, "correct_loss_uncond": -11.41460132598877, "incorrect_loss_uncond": -8.071292400360107}, "model_output": [{"sum_logits": -5.23876428604126, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.920656204223633, "logits_per_token": -2.61938214302063, "logits_per_char": -0.523876428604126, "num_chars": 10}, {"sum_logits": -7.054636478424072, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -7.054636478424072, "logits_per_char": -1.0078052112034388, "num_chars": 7}, {"sum_logits": -7.518680572509766, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.449238777160645, "logits_per_token": -3.759340286254883, "logits_per_char": -0.835408952501085, "num_chars": 9}, {"sum_logits": -11.357542037963867, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.378713607788086, "logits_per_token": -5.678771018981934, "logits_per_char": -0.8736570798433744, "num_chars": 13}, {"sum_logits": -6.308974266052246, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -1.5772435665130615, "logits_per_char": -0.4506410190037319, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 418, "native_id": "5abeb4a2126597d4ef7b5a32e9e22abf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.4140167236328125, "incorrect_loss_raw": 7.106924653053284, "correct_loss_per_char": 0.30175209045410156, "incorrect_loss_per_char": 0.9738644985925583, "correct_loss_per_token": 2.4140167236328125, "incorrect_loss_per_token": 6.712193071842194, "correct_loss_uncond": -11.790285110473633, "incorrect_loss_uncond": -7.440994620323181}, "model_output": [{"sum_logits": -3.1578526496887207, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.61488914489746, "logits_per_token": -1.5789263248443604, "logits_per_char": -0.26315438747406006, "num_chars": 12}, {"sum_logits": -5.141095161437988, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -5.141095161437988, "logits_per_char": -0.7344421659197126, "num_chars": 7}, {"sum_logits": -9.162422180175781, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -9.162422180175781, "logits_per_char": -1.5270703633626301, "num_chars": 6}, {"sum_logits": -10.966328620910645, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.496217727661133, "logits_per_token": -10.966328620910645, "logits_per_char": -1.3707910776138306, "num_chars": 8}, {"sum_logits": -2.4140167236328125, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -2.4140167236328125, "logits_per_char": -0.30175209045410156, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 419, "native_id": "8d4b0312f02be445e09a9462873d02bb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.811703681945801, "incorrect_loss_raw": 10.954872131347656, "correct_loss_per_char": 0.9764629602432251, "incorrect_loss_per_char": 1.4681387492588587, "correct_loss_per_token": 7.811703681945801, "incorrect_loss_per_token": 7.073902765909831, "correct_loss_uncond": -8.13585090637207, "incorrect_loss_uncond": -6.087841510772705}, "model_output": [{"sum_logits": -14.266551971435547, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.528358459472656, "logits_per_token": -7.133275985717773, "logits_per_char": -1.7833189964294434, "num_chars": 8}, {"sum_logits": -7.118715286254883, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -7.118715286254883, "logits_per_char": -1.4237430572509766, "num_chars": 5}, {"sum_logits": -9.848319053649902, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.987665176391602, "logits_per_token": -9.848319053649902, "logits_per_char": -1.4069027219499861, "num_chars": 7}, {"sum_logits": -7.811703681945801, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.947554588317871, "logits_per_token": -7.811703681945801, "logits_per_char": -0.9764629602432251, "num_chars": 8}, {"sum_logits": -12.585902214050293, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.818557739257812, "logits_per_token": -4.195300738016765, "logits_per_char": -1.2585902214050293, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 420, "native_id": "f7140f00ddd8d1c5d93b05ea32ad1fff", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.761535882949829, "incorrect_loss_raw": 12.730213403701782, "correct_loss_per_char": 0.7523071765899658, "incorrect_loss_per_char": 1.2576291668982733, "correct_loss_per_token": 3.761535882949829, "incorrect_loss_per_token": 6.8515132665634155, "correct_loss_uncond": -8.904780149459839, "incorrect_loss_uncond": -5.287829399108887}, "model_output": [{"sum_logits": -22.612390518188477, "num_tokens": 4, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -28.64661407470703, "logits_per_token": -5.653097629547119, "logits_per_char": -0.9421829382578532, "num_chars": 24}, {"sum_logits": -13.111015319824219, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -6.555507659912109, "logits_per_char": -1.6388769149780273, "num_chars": 8}, {"sum_logits": -10.325572967529297, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.249696731567383, "logits_per_token": -10.325572967529297, "logits_per_char": -1.4750818525041853, "num_chars": 7}, {"sum_logits": -3.761535882949829, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -3.761535882949829, "logits_per_char": -0.7523071765899658, "num_chars": 5}, {"sum_logits": -4.871874809265137, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -4.871874809265137, "logits_per_char": -0.9743749618530273, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 421, "native_id": "8b3b598a647dfd2d63fcedce5f461040", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1090056896209717, "incorrect_loss_raw": 10.635715246200562, "correct_loss_per_char": 0.14060037930806477, "incorrect_loss_per_char": 1.2736263163069375, "correct_loss_per_token": 1.0545028448104858, "incorrect_loss_per_token": 5.317857623100281, "correct_loss_uncond": -18.045201539993286, "incorrect_loss_uncond": -9.135986566543579}, "model_output": [{"sum_logits": -2.1090056896209717, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -1.0545028448104858, "logits_per_char": -0.14060037930806477, "num_chars": 15}, {"sum_logits": -8.087085723876953, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.726114273071289, "logits_per_token": -4.043542861938477, "logits_per_char": -1.0108857154846191, "num_chars": 8}, {"sum_logits": -12.003619194030762, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -26.074098587036133, "logits_per_token": -6.001809597015381, "logits_per_char": -0.9233553226177509, "num_chars": 13}, {"sum_logits": -10.471712112426758, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.180561065673828, "logits_per_token": -5.235856056213379, "logits_per_char": -1.1635235680474176, "num_chars": 9}, {"sum_logits": -11.980443954467773, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.106033325195312, "logits_per_token": -5.990221977233887, "logits_per_char": -1.9967406590779622, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 422, "native_id": "7a900bc3a373806b6c56f0e19534005f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.201345443725586, "incorrect_loss_raw": 12.504539012908936, "correct_loss_per_char": 1.0251681804656982, "incorrect_loss_per_char": 1.033636230580947, "correct_loss_per_token": 8.201345443725586, "incorrect_loss_per_token": 7.112993597984314, "correct_loss_uncond": -7.3215742111206055, "incorrect_loss_uncond": -5.936880588531494}, "model_output": [{"sum_logits": -17.3564453125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.420143127441406, "logits_per_token": -8.67822265625, "logits_per_char": -0.867822265625, "num_chars": 20}, {"sum_logits": -11.135030746459961, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.49556541442871, "logits_per_token": -5.5675153732299805, "logits_per_char": -0.6550018086152918, "num_chars": 17}, {"sum_logits": -6.8857927322387695, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -6.8857927322387695, "logits_per_char": -1.147632122039795, "num_chars": 6}, {"sum_logits": -14.640887260437012, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.991546630859375, "logits_per_token": -7.320443630218506, "logits_per_char": -1.4640887260437012, "num_chars": 10}, {"sum_logits": -8.201345443725586, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.522919654846191, "logits_per_token": -8.201345443725586, "logits_per_char": -1.0251681804656982, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 423, "native_id": "3d79c10ddf26a5ed7dc0bb168fb0b3ed", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.578218460083008, "incorrect_loss_raw": 10.709033250808716, "correct_loss_per_char": 0.32813049765194163, "incorrect_loss_per_char": 1.2498587086087183, "correct_loss_per_token": 1.8594061533610027, "incorrect_loss_per_token": 7.513238191604614, "correct_loss_uncond": -14.787246704101562, "incorrect_loss_uncond": -5.342402696609497}, "model_output": [{"sum_logits": -10.346004486083984, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.55059051513672, "logits_per_token": -5.173002243041992, "logits_per_char": -1.1495560540093317, "num_chars": 9}, {"sum_logits": -5.578218460083008, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.36546516418457, "logits_per_token": -1.8594061533610027, "logits_per_char": -0.32813049765194163, "num_chars": 17}, {"sum_logits": -7.555731773376465, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -7.555731773376465, "logits_per_char": -1.079390253339495, "num_chars": 7}, {"sum_logits": -9.714040756225586, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -9.714040756225586, "logits_per_char": -1.0793378618028429, "num_chars": 9}, {"sum_logits": -15.220355987548828, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.039682388305664, "logits_per_token": -7.610177993774414, "logits_per_char": -1.6911506652832031, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 424, "native_id": "b7091d2bfcea421d787ce9e7982f104a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.454058647155762, "incorrect_loss_raw": 10.7574303150177, "correct_loss_per_char": 0.4610041890825544, "incorrect_loss_per_char": 0.9564909597495934, "correct_loss_per_token": 2.151352882385254, "incorrect_loss_per_token": 6.007396896680197, "correct_loss_uncond": -11.228316307067871, "incorrect_loss_uncond": -8.838078260421753}, "model_output": [{"sum_logits": -8.274789810180664, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.161336898803711, "logits_per_token": -8.274789810180664, "logits_per_char": -0.7522536191073331, "num_chars": 11}, {"sum_logits": -9.736007690429688, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -24.95658302307129, "logits_per_token": -3.245335896809896, "logits_per_char": -0.6954291207449776, "num_chars": 14}, {"sum_logits": -6.454058647155762, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.682374954223633, "logits_per_token": -2.151352882385254, "logits_per_char": -0.4610041890825544, "num_chars": 14}, {"sum_logits": -13.597240447998047, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.903030395507812, "logits_per_token": -6.798620223999023, "logits_per_char": -1.2361127679998225, "num_chars": 11}, {"sum_logits": -11.421683311462402, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.361083984375, "logits_per_token": -5.710841655731201, "logits_per_char": -1.1421683311462403, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 425, "native_id": "d060ab71d0efff3cab5960089a6bb3a2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.377964973449707, "incorrect_loss_raw": 13.09925889968872, "correct_loss_per_char": 0.7616331794045188, "incorrect_loss_per_char": 1.3488904347495427, "correct_loss_per_token": 4.1889824867248535, "incorrect_loss_per_token": 9.096594333648682, "correct_loss_uncond": -7.6295881271362305, "incorrect_loss_uncond": -1.6162643432617188}, "model_output": [{"sum_logits": -10.07594108581543, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.55051040649414, "logits_per_token": -5.037970542907715, "logits_per_char": -1.1195490095350478, "num_chars": 9}, {"sum_logits": -8.377964973449707, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.007553100585938, "logits_per_token": -4.1889824867248535, "logits_per_char": -0.7616331794045188, "num_chars": 11}, {"sum_logits": -10.816996574401855, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.026094436645508, "logits_per_token": -10.816996574401855, "logits_per_char": -1.0816996574401856, "num_chars": 10}, {"sum_logits": -9.558722496032715, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.245857238769531, "logits_per_token": -9.558722496032715, "logits_per_char": -1.3655317851475306, "num_chars": 7}, {"sum_logits": -21.945375442504883, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.039630889892578, "logits_per_token": -10.972687721252441, "logits_per_char": -1.828781286875407, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 426, "native_id": "b399f6008d90dbd92bcce5abed4c1fd1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.386197328567505, "incorrect_loss_raw": 3.6186949610710144, "correct_loss_per_char": 0.677239465713501, "incorrect_loss_per_char": 0.3383414836156936, "correct_loss_per_token": 3.386197328567505, "incorrect_loss_per_token": 2.5173144936561584, "correct_loss_uncond": -11.701287984848022, "incorrect_loss_uncond": -12.202840387821198}, "model_output": [{"sum_logits": -3.386197328567505, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -3.386197328567505, "logits_per_char": -0.677239465713501, "num_chars": 5}, {"sum_logits": -4.405521869659424, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -2.202760934829712, "logits_per_char": -0.3146801335471017, "num_chars": 14}, {"sum_logits": -4.405521869659424, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -2.202760934829712, "logits_per_char": -0.3146801335471017, "num_chars": 14}, {"sum_logits": -2.1602370738983154, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.006837844848633, "logits_per_token": -2.1602370738983154, "logits_per_char": -0.4320474147796631, "num_chars": 5}, {"sum_logits": -3.5034990310668945, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -3.5034990310668945, "logits_per_char": -0.2919582525889079, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 427, "native_id": "80c19c62338edae0e8a1f5c6fec0d29a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.118938446044922, "incorrect_loss_raw": 9.687658071517944, "correct_loss_per_char": 0.790993160671658, "incorrect_loss_per_char": 1.7046730931316103, "correct_loss_per_token": 3.559469223022461, "incorrect_loss_per_token": 7.157555341720581, "correct_loss_uncond": -9.852333068847656, "incorrect_loss_uncond": -5.694921255111694}, "model_output": [{"sum_logits": -20.240821838378906, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.954610824584961, "logits_per_token": -10.120410919189453, "logits_per_char": -4.048164367675781, "num_chars": 5}, {"sum_logits": -7.118938446044922, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -3.559469223022461, "logits_per_char": -0.790993160671658, "num_chars": 9}, {"sum_logits": -4.61142635345459, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -4.61142635345459, "logits_per_char": -0.9222852706909179, "num_chars": 5}, {"sum_logits": -7.685479640960693, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.729222297668457, "logits_per_token": -7.685479640960693, "logits_per_char": -0.9606849551200867, "num_chars": 8}, {"sum_logits": -6.212904453277588, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.87264060974121, "logits_per_token": -6.212904453277588, "logits_per_char": -0.8875577790396554, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 428, "native_id": "1a4e83b433620cb2d7d806882f8d57e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.226667404174805, "incorrect_loss_raw": 14.419182300567627, "correct_loss_per_char": 0.7783334255218506, "incorrect_loss_per_char": 1.3873383707470364, "correct_loss_per_token": 6.226667404174805, "incorrect_loss_per_token": 9.664101521174114, "correct_loss_uncond": -8.283447265625, "incorrect_loss_uncond": -3.499260425567627}, "model_output": [{"sum_logits": -6.226667404174805, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.510114669799805, "logits_per_token": -6.226667404174805, "logits_per_char": -0.7783334255218506, "num_chars": 8}, {"sum_logits": -12.818353652954102, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.38828468322754, "logits_per_token": -6.409176826477051, "logits_per_char": -0.6409176826477051, "num_chars": 20}, {"sum_logits": -13.768468856811523, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.503639221191406, "logits_per_token": -13.768468856811523, "logits_per_char": -2.2947448094685874, "num_chars": 6}, {"sum_logits": -18.916719436645508, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.676342010498047, "logits_per_token": -6.305573145548503, "logits_per_char": -1.2611146291097006, "num_chars": 15}, {"sum_logits": -12.173187255859375, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.105504989624023, "logits_per_token": -12.173187255859375, "logits_per_char": -1.3525763617621527, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 429, "native_id": "b9e04a53c0ee7325b901de4d12d56884", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.5856012105941772, "incorrect_loss_raw": 9.664616584777832, "correct_loss_per_char": 0.31712024211883544, "incorrect_loss_per_char": 1.4461415196910048, "correct_loss_per_token": 1.5856012105941772, "incorrect_loss_per_token": 8.958127975463867, "correct_loss_uncond": -10.816656708717346, "incorrect_loss_uncond": -6.616720199584961}, "model_output": [{"sum_logits": -10.701460838317871, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -10.701460838317871, "logits_per_char": -2.140292167663574, "num_chars": 5}, {"sum_logits": -8.805781364440918, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -8.805781364440918, "logits_per_char": -0.8805781364440918, "num_chars": 10}, {"sum_logits": -1.5856012105941772, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.402257919311523, "logits_per_token": -1.5856012105941772, "logits_per_char": -0.31712024211883544, "num_chars": 5}, {"sum_logits": -5.651908874511719, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -2.8259544372558594, "logits_per_char": -0.5138098976828835, "num_chars": 11}, {"sum_logits": -13.49931526184082, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -13.49931526184082, "logits_per_char": -2.24988587697347, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 430, "native_id": "7490aa460f66000555a8a94008179cbb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.823762893676758, "incorrect_loss_raw": 9.826698660850525, "correct_loss_per_char": 0.25670571760697797, "incorrect_loss_per_char": 0.9318879814375014, "correct_loss_per_token": 2.823762893676758, "incorrect_loss_per_token": 6.8029584884643555, "correct_loss_uncond": -11.970187187194824, "incorrect_loss_uncond": -6.590108513832092}, "model_output": [{"sum_logits": -4.276379108428955, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.451205253601074, "logits_per_token": -4.276379108428955, "logits_per_char": -0.3054556506020682, "num_chars": 14}, {"sum_logits": -8.542351722717285, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.526714324951172, "logits_per_token": -4.271175861358643, "logits_per_char": -0.569490114847819, "num_chars": 15}, {"sum_logits": -15.64756965637207, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.581924438476562, "logits_per_token": -7.823784828186035, "logits_per_char": -1.3039641380310059, "num_chars": 12}, {"sum_logits": -10.840494155883789, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.10738468170166, "logits_per_token": -10.840494155883789, "logits_per_char": -1.5486420222691126, "num_chars": 7}, {"sum_logits": -2.823762893676758, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -2.823762893676758, "logits_per_char": -0.25670571760697797, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 431, "native_id": "ad8ee2965a33ff4b0e3d2ac732676594", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.964591979980469, "incorrect_loss_raw": 11.746399641036987, "correct_loss_per_char": 0.7976394653320312, "incorrect_loss_per_char": 0.8691516030284713, "correct_loss_per_token": 5.982295989990234, "incorrect_loss_per_token": 5.873199820518494, "correct_loss_uncond": -9.827259063720703, "incorrect_loss_uncond": -6.696020126342773}, "model_output": [{"sum_logits": -13.846658706665039, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.15003204345703, "logits_per_token": -6.9233293533325195, "logits_per_char": -0.7287715108771073, "num_chars": 19}, {"sum_logits": -9.365511894226074, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -12.449238777160645, "logits_per_token": -4.682755947113037, "logits_per_char": -1.040612432691786, "num_chars": 9}, {"sum_logits": -11.912723541259766, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -22.084911346435547, "logits_per_token": -5.956361770629883, "logits_per_char": -1.0829748673872515, "num_chars": 11}, {"sum_logits": -11.86070442199707, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.08549690246582, "logits_per_token": -5.930352210998535, "logits_per_char": -0.6242476011577406, "num_chars": 19}, {"sum_logits": -11.964591979980469, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -21.791851043701172, "logits_per_token": -5.982295989990234, "logits_per_char": -0.7976394653320312, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 432, "native_id": "64d2310eff6b661baeb41b4ccc392e35", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.444161415100098, "incorrect_loss_raw": 13.44478154182434, "correct_loss_per_char": 1.4040146741000088, "incorrect_loss_per_char": 0.9726979846046084, "correct_loss_per_token": 7.722080707550049, "incorrect_loss_per_token": 6.329117437203726, "correct_loss_uncond": -4.458868980407715, "incorrect_loss_uncond": -5.462562799453735}, "model_output": [{"sum_logits": -12.63577651977539, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.913702011108398, "logits_per_token": -12.63577651977539, "logits_per_char": -1.263577651977539, "num_chars": 10}, {"sum_logits": -11.784481048583984, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.783321380615234, "logits_per_token": -3.9281603495279946, "logits_per_char": -0.5892240524291992, "num_chars": 20}, {"sum_logits": -16.95378875732422, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.924055099487305, "logits_per_token": -5.651262919108073, "logits_per_char": -1.2109849112374442, "num_chars": 14}, {"sum_logits": -15.444161415100098, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.903030395507812, "logits_per_token": -7.722080707550049, "logits_per_char": -1.4040146741000088, "num_chars": 11}, {"sum_logits": -12.40507984161377, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.008298873901367, "logits_per_token": -3.1012699604034424, "logits_per_char": -0.8270053227742513, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 433, "native_id": "6b1f5ebd9d0dbc7e34a598456a6091a8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.358506679534912, "incorrect_loss_raw": 12.466887593269348, "correct_loss_per_char": 0.8176118532816569, "incorrect_loss_per_char": 1.5729656749301486, "correct_loss_per_token": 2.4528355598449707, "incorrect_loss_per_token": 9.746954321861267, "correct_loss_uncond": -12.648359775543213, "incorrect_loss_uncond": -3.6122642755508423}, "model_output": [{"sum_logits": -11.426855087280273, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.576496124267578, "logits_per_token": -11.426855087280273, "logits_per_char": -1.4283568859100342, "num_chars": 8}, {"sum_logits": -9.138233184814453, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.63089656829834, "logits_per_token": -9.138233184814453, "logits_per_char": -1.3054618835449219, "num_chars": 7}, {"sum_logits": -7.358506679534912, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.006866455078125, "logits_per_token": -2.4528355598449707, "logits_per_char": -0.8176118532816569, "num_chars": 9}, {"sum_logits": -21.75946617126465, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.07717514038086, "logits_per_token": -10.879733085632324, "logits_per_char": -2.719933271408081, "num_chars": 8}, {"sum_logits": -7.542995929718018, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.032039642333984, "logits_per_token": -7.542995929718018, "logits_per_char": -0.8381106588575575, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 434, "native_id": "080ef6941410139d6869e78122bc741e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.729595184326172, "incorrect_loss_raw": 14.389708995819092, "correct_loss_per_char": 0.9774662653605143, "incorrect_loss_per_char": 1.6339021487396304, "correct_loss_per_token": 3.909865061442057, "incorrect_loss_per_token": 6.821299314498901, "correct_loss_uncond": -7.2171478271484375, "incorrect_loss_uncond": -4.251703977584839}, "model_output": [{"sum_logits": -17.71723747253418, "num_tokens": 6, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.869060516357422, "logits_per_token": -2.95287291208903, "logits_per_char": -1.0421904395608341, "num_chars": 17}, {"sum_logits": -8.670702934265137, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.72342300415039, "logits_per_token": -2.8902343114217124, "logits_per_char": -0.6193359238760812, "num_chars": 14}, {"sum_logits": -11.729595184326172, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.94674301147461, "logits_per_token": -3.909865061442057, "logits_per_char": -0.9774662653605143, "num_chars": 12}, {"sum_logits": -19.457611083984375, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.673725128173828, "logits_per_token": -9.728805541992188, "logits_per_char": -1.9457611083984374, "num_chars": 10}, {"sum_logits": -11.713284492492676, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.299443244934082, "logits_per_token": -11.713284492492676, "logits_per_char": -2.928321123123169, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 435, "native_id": "6c70d98cfb8e97fda8caefcee761a229", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.139155387878418, "incorrect_loss_raw": 9.638954401016235, "correct_loss_per_char": 0.7139155387878418, "incorrect_loss_per_char": 0.9902993685159927, "correct_loss_per_token": 7.139155387878418, "incorrect_loss_per_token": 9.638954401016235, "correct_loss_uncond": -5.333747863769531, "incorrect_loss_uncond": -4.016430616378784}, "model_output": [{"sum_logits": -8.204992294311523, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.973532676696777, "logits_per_token": -8.204992294311523, "logits_per_char": -1.3674987157185872, "num_chars": 6}, {"sum_logits": -11.453532218933105, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.961477279663086, "logits_per_token": -11.453532218933105, "logits_per_char": -1.0412302017211914, "num_chars": 11}, {"sum_logits": -4.282660484313965, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.044368743896484, "logits_per_token": -4.282660484313965, "logits_per_char": -0.4282660484313965, "num_chars": 10}, {"sum_logits": -7.139155387878418, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.47290325164795, "logits_per_token": -7.139155387878418, "logits_per_char": -0.7139155387878418, "num_chars": 10}, {"sum_logits": -14.614632606506348, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -14.614632606506348, "logits_per_char": -1.124202508192796, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 436, "native_id": "75ac594b4fdbfba006e61315d1b2c815", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.470667839050293, "incorrect_loss_raw": 11.075353026390076, "correct_loss_per_char": 0.7169167399406433, "incorrect_loss_per_char": 1.2874069403379391, "correct_loss_per_token": 5.7353339195251465, "incorrect_loss_per_token": 7.0021031498909, "correct_loss_uncond": -9.795084953308105, "incorrect_loss_uncond": -5.857370734214783}, "model_output": [{"sum_logits": -11.470667839050293, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.2657527923584, "logits_per_token": -5.7353339195251465, "logits_per_char": -0.7169167399406433, "num_chars": 16}, {"sum_logits": -10.660263061523438, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.325549125671387, "logits_per_token": -5.330131530761719, "logits_per_char": -1.1844736735026042, "num_chars": 9}, {"sum_logits": -11.715413093566895, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -11.715413093566895, "logits_per_char": -2.343082618713379, "num_chars": 5}, {"sum_logits": -15.634684562683105, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.070846557617188, "logits_per_token": -7.817342281341553, "logits_per_char": -1.202668043283316, "num_chars": 13}, {"sum_logits": -6.291051387786865, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.85955047607422, "logits_per_token": -3.1455256938934326, "logits_per_char": -0.41940342585245766, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 437, "native_id": "5a8e7d2f97f76adb23fbd59a009d16f0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.916545867919922, "incorrect_loss_raw": 9.742084264755249, "correct_loss_per_char": 1.9860909779866536, "incorrect_loss_per_char": 1.027735778918633, "correct_loss_per_token": 11.916545867919922, "incorrect_loss_per_token": 7.479860703150432, "correct_loss_uncond": -4.023632049560547, "incorrect_loss_uncond": -7.0769126415252686}, "model_output": [{"sum_logits": -11.916545867919922, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -11.916545867919922, "logits_per_char": -1.9860909779866536, "num_chars": 6}, {"sum_logits": -9.47316837310791, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.622055053710938, "logits_per_token": -9.47316837310791, "logits_per_char": -1.1841460466384888, "num_chars": 8}, {"sum_logits": -7.742486000061035, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -16.108352661132812, "logits_per_token": -7.742486000061035, "logits_per_char": -0.8602762222290039, "num_chars": 9}, {"sum_logits": -8.179341316223145, "num_tokens": 1, "num_tokens_all": 168, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -8.179341316223145, "logits_per_char": -1.022417664527893, "num_chars": 8}, {"sum_logits": -13.573341369628906, "num_tokens": 3, "num_tokens_all": 170, "is_greedy": false, "sum_logits_uncond": -20.536924362182617, "logits_per_token": -4.524447123209636, "logits_per_char": -1.0441031822791467, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 438, "native_id": "178cb8153123716aa94f286b615149d4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.506407737731934, "incorrect_loss_raw": 8.419379472732544, "correct_loss_per_char": 0.9012815475463867, "incorrect_loss_per_char": 1.105176327278564, "correct_loss_per_token": 4.506407737731934, "incorrect_loss_per_token": 7.341818690299988, "correct_loss_uncond": -8.159908294677734, "incorrect_loss_uncond": -8.657840728759766}, "model_output": [{"sum_logits": -11.902018547058105, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.501020431518555, "logits_per_token": -11.902018547058105, "logits_per_char": -1.9836697578430176, "num_chars": 6}, {"sum_logits": -5.211626052856445, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -5.211626052856445, "logits_per_char": -1.042325210571289, "num_chars": 5}, {"sum_logits": -8.62048625946045, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.086233139038086, "logits_per_token": -4.310243129730225, "logits_per_char": -0.783680569041859, "num_chars": 11}, {"sum_logits": -4.506407737731934, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -4.506407737731934, "logits_per_char": -0.9012815475463867, "num_chars": 5}, {"sum_logits": -7.943387031555176, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.63414192199707, "logits_per_token": -7.943387031555176, "logits_per_char": -0.6110297716580905, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 439, "native_id": "cc917ca0e03c91a5141920f5a902a36c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.440486907958984, "incorrect_loss_raw": 11.651260018348694, "correct_loss_per_char": 0.8440486907958984, "incorrect_loss_per_char": 1.4334877865655082, "correct_loss_per_token": 2.813495635986328, "incorrect_loss_per_token": 5.899176021416982, "correct_loss_uncond": -8.210952758789062, "incorrect_loss_uncond": -6.620376706123352}, "model_output": [{"sum_logits": -8.440486907958984, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.651439666748047, "logits_per_token": -2.813495635986328, "logits_per_char": -0.8440486907958984, "num_chars": 10}, {"sum_logits": -6.987520217895508, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.541668891906738, "logits_per_token": -6.987520217895508, "logits_per_char": -0.9982171739850726, "num_chars": 7}, {"sum_logits": -6.861063480377197, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.64430046081543, "logits_per_token": -3.4305317401885986, "logits_per_char": -0.980151925768171, "num_chars": 7}, {"sum_logits": -19.19745635986328, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -24.17406463623047, "logits_per_token": -6.399152119954427, "logits_per_char": -2.39968204498291, "num_chars": 8}, {"sum_logits": -13.559000015258789, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.726512908935547, "logits_per_token": -6.7795000076293945, "logits_per_char": -1.3559000015258789, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 440, "native_id": "a7d51b753c2113d8b2dbd0ebb5375855_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0448191165924072, "incorrect_loss_raw": 12.571085929870605, "correct_loss_per_char": 0.17040159304936728, "incorrect_loss_per_char": 0.7991170823574066, "correct_loss_per_token": 1.0224095582962036, "incorrect_loss_per_token": 5.04424516359965, "correct_loss_uncond": -13.48431944847107, "incorrect_loss_uncond": -9.160338401794434}, "model_output": [{"sum_logits": -17.09688949584961, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -25.636783599853516, "logits_per_token": -5.698963165283203, "logits_per_char": -0.8548444747924805, "num_chars": 20}, {"sum_logits": -9.572017669677734, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.488744735717773, "logits_per_token": -4.786008834838867, "logits_per_char": -0.638134511311849, "num_chars": 15}, {"sum_logits": -12.694257736206055, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.562074661254883, "logits_per_token": -4.2314192454020185, "logits_per_char": -0.7933911085128784, "num_chars": 16}, {"sum_logits": -2.0448191165924072, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": true, "sum_logits_uncond": -15.529138565063477, "logits_per_token": -1.0224095582962036, "logits_per_char": -0.17040159304936728, "num_chars": 12}, {"sum_logits": -10.921178817749023, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.238094329833984, "logits_per_token": -5.460589408874512, "logits_per_char": -0.9100982348124186, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 441, "native_id": "e71da9e95b321763c86e879a47bbd327", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.344314575195312, "incorrect_loss_raw": 10.687394857406616, "correct_loss_per_char": 0.778692881266276, "incorrect_loss_per_char": 1.20220580727163, "correct_loss_per_token": 9.344314575195312, "incorrect_loss_per_token": 7.900582194328308, "correct_loss_uncond": -3.349606513977051, "incorrect_loss_uncond": -2.3404643535614014}, "model_output": [{"sum_logits": -9.520639419555664, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.316405296325684, "logits_per_token": -9.520639419555664, "logits_per_char": -1.190079927444458, "num_chars": 8}, {"sum_logits": -10.934438705444336, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -12.3843994140625, "logits_per_token": -10.934438705444336, "logits_per_char": -1.2149376339382596, "num_chars": 9}, {"sum_logits": -11.234871864318848, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -12.33724594116211, "logits_per_token": -5.617435932159424, "logits_per_char": -1.0213519876653498, "num_chars": 11}, {"sum_logits": -11.059629440307617, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -13.073386192321777, "logits_per_token": -5.529814720153809, "logits_per_char": -1.3824536800384521, "num_chars": 8}, {"sum_logits": -9.344314575195312, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -12.693921089172363, "logits_per_token": -9.344314575195312, "logits_per_char": -0.778692881266276, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 442, "native_id": "ec86900559a0faf2aef066e511a4cfa6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.83542799949646, "incorrect_loss_raw": 7.805135726928711, "correct_loss_per_char": 0.2181098461151123, "incorrect_loss_per_char": 0.9645308931668599, "correct_loss_per_token": 1.41771399974823, "incorrect_loss_per_token": 5.483060657978058, "correct_loss_uncond": -13.366543054580688, "incorrect_loss_uncond": -7.312595367431641}, "model_output": [{"sum_logits": -10.791107177734375, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.119783401489258, "logits_per_token": -5.3955535888671875, "logits_per_char": -1.7985178629557292, "num_chars": 6}, {"sum_logits": -2.83542799949646, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.20197105407715, "logits_per_token": -1.41771399974823, "logits_per_char": -0.2181098461151123, "num_chars": 13}, {"sum_logits": -7.78549337387085, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.789175033569336, "logits_per_token": -3.892746686935425, "logits_per_char": -0.6487911144892374, "num_chars": 12}, {"sum_logits": -6.78712797164917, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -6.78712797164917, "logits_per_char": -0.678712797164917, "num_chars": 10}, {"sum_logits": -5.856814384460449, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.71326732635498, "logits_per_token": -5.856814384460449, "logits_per_char": -0.7321017980575562, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 443, "native_id": "d312741df1b14bcbe358f4f30aff3994", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.959271430969238, "incorrect_loss_raw": 12.017501831054688, "correct_loss_per_char": 0.5959271430969239, "incorrect_loss_per_char": 1.4524921771807549, "correct_loss_per_token": 5.959271430969238, "incorrect_loss_per_token": 8.852815628051758, "correct_loss_uncond": -8.44289779663086, "incorrect_loss_uncond": -4.032933473587036}, "model_output": [{"sum_logits": -9.387983322143555, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.288496017456055, "logits_per_token": -9.387983322143555, "logits_per_char": -1.0431092580159504, "num_chars": 9}, {"sum_logits": -11.896171569824219, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.973584175109863, "logits_per_token": -11.896171569824219, "logits_per_char": -1.4870214462280273, "num_chars": 8}, {"sum_logits": -9.907526016235352, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.711023330688477, "logits_per_token": -9.907526016235352, "logits_per_char": -1.9815052032470704, "num_chars": 5}, {"sum_logits": -16.878326416015625, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -23.2286376953125, "logits_per_token": -4.219581604003906, "logits_per_char": -1.2983328012319713, "num_chars": 13}, {"sum_logits": -5.959271430969238, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.402169227600098, "logits_per_token": -5.959271430969238, "logits_per_char": -0.5959271430969239, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 444, "native_id": "0df3f58645b4bc306093845fb297a50e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.5512871742248535, "incorrect_loss_raw": 6.83008885383606, "correct_loss_per_char": 0.6551287174224854, "incorrect_loss_per_char": 0.682156373292972, "correct_loss_per_token": 3.2756435871124268, "incorrect_loss_per_token": 4.934940854708353, "correct_loss_uncond": -10.088937282562256, "incorrect_loss_uncond": -9.033875226974487}, "model_output": [{"sum_logits": -7.199103355407715, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.917130470275879, "logits_per_token": -3.5995516777038574, "logits_per_char": -0.7999003728230795, "num_chars": 9}, {"sum_logits": -4.92718505859375, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.797646522521973, "logits_per_token": -4.92718505859375, "logits_per_char": -0.821197509765625, "num_chars": 6}, {"sum_logits": -5.971560478210449, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.65363121032715, "logits_per_token": -1.9905201594034831, "logits_per_char": -0.3981040318806966, "num_chars": 15}, {"sum_logits": -9.222506523132324, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.087448120117188, "logits_per_token": -9.222506523132324, "logits_per_char": -0.7094235787024865, "num_chars": 13}, {"sum_logits": -6.5512871742248535, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.64022445678711, "logits_per_token": -3.2756435871124268, "logits_per_char": -0.6551287174224854, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 445, "native_id": "27d9b4df2ca50112d282331df4923e96", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.6917078495025635, "incorrect_loss_raw": 8.959241032600403, "correct_loss_per_char": 0.05764232079188029, "incorrect_loss_per_char": 1.1410589471007837, "correct_loss_per_token": 0.34585392475128174, "incorrect_loss_per_token": 5.396390199661255, "correct_loss_uncond": -15.422719717025757, "incorrect_loss_uncond": -7.429623007774353}, "model_output": [{"sum_logits": -7.3636674880981445, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.388635635375977, "logits_per_token": -3.6818337440490723, "logits_per_char": -0.6694243170998313, "num_chars": 11}, {"sum_logits": -10.952098846435547, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.691211700439453, "logits_per_token": -5.476049423217773, "logits_per_char": -0.7301399230957031, "num_chars": 15}, {"sum_logits": -10.187040328979492, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.388123512268066, "logits_per_token": -5.093520164489746, "logits_per_char": -1.6978400548299153, "num_chars": 6}, {"sum_logits": -7.334157466888428, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -7.334157466888428, "logits_per_char": -1.4668314933776856, "num_chars": 5}, {"sum_logits": -0.6917078495025635, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": true, "sum_logits_uncond": -16.11442756652832, "logits_per_token": -0.34585392475128174, "logits_per_char": -0.05764232079188029, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 446, "native_id": "ab755203f41a2e241f0ee8a53c54f287", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.591747760772705, "incorrect_loss_raw": 12.322435855865479, "correct_loss_per_char": 0.5839805969825158, "incorrect_loss_per_char": 1.201807006025489, "correct_loss_per_token": 3.7958738803863525, "incorrect_loss_per_token": 6.664955457051595, "correct_loss_uncond": -11.23655652999878, "incorrect_loss_uncond": -4.592909574508667}, "model_output": [{"sum_logits": -8.850997924804688, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.507018089294434, "logits_per_token": -8.850997924804688, "logits_per_char": -1.2644282749720983, "num_chars": 7}, {"sum_logits": -10.631492614746094, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.176170349121094, "logits_per_token": -5.315746307373047, "logits_per_char": -0.817807124211238, "num_chars": 13}, {"sum_logits": -15.34395980834961, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.92172622680664, "logits_per_token": -7.671979904174805, "logits_per_char": -1.2786633173624675, "num_chars": 12}, {"sum_logits": -7.591747760772705, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.828304290771484, "logits_per_token": -3.7958738803863525, "logits_per_char": -0.5839805969825158, "num_chars": 13}, {"sum_logits": -14.463293075561523, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.056467056274414, "logits_per_token": -4.821097691853841, "logits_per_char": -1.4463293075561523, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 447, "native_id": "f13efb91090dd28fd2b3c1f4dde680fd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.780069351196289, "incorrect_loss_raw": 10.736595273017883, "correct_loss_per_char": 0.45765113830566406, "incorrect_loss_per_char": 1.0376102685928343, "correct_loss_per_token": 3.8900346755981445, "incorrect_loss_per_token": 9.238944172859192, "correct_loss_uncond": -12.389205932617188, "incorrect_loss_uncond": -2.1744338274002075}, "model_output": [{"sum_logits": -7.780069351196289, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.169275283813477, "logits_per_token": -3.8900346755981445, "logits_per_char": -0.45765113830566406, "num_chars": 17}, {"sum_logits": -13.134082794189453, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.47290325164795, "logits_per_token": -13.134082794189453, "logits_per_char": -1.3134082794189452, "num_chars": 10}, {"sum_logits": -11.981208801269531, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.58299446105957, "logits_per_token": -5.990604400634766, "logits_per_char": -0.8558006286621094, "num_chars": 14}, {"sum_logits": -11.642770767211914, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.989931106567383, "logits_per_token": -11.642770767211914, "logits_per_char": -1.2936411963568792, "num_chars": 9}, {"sum_logits": -6.188318729400635, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.598287582397461, "logits_per_token": -6.188318729400635, "logits_per_char": -0.6875909699334039, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 448, "native_id": "e98031901c815e55040d9fe28c4d9387", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.587379455566406, "incorrect_loss_raw": 9.078520596027374, "correct_loss_per_char": 0.6992988586425781, "incorrect_loss_per_char": 1.0600452532370885, "correct_loss_per_token": 6.293689727783203, "incorrect_loss_per_token": 5.713559289773306, "correct_loss_uncond": -8.740459442138672, "incorrect_loss_uncond": -7.256992042064667}, "model_output": [{"sum_logits": -9.696834564208984, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -9.696834564208984, "logits_per_char": -1.6161390940348308, "num_chars": 6}, {"sum_logits": -11.734586715698242, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.832082748413086, "logits_per_token": -3.9115289052327475, "logits_per_char": -0.7823057810465495, "num_chars": 15}, {"sum_logits": -3.609086275100708, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -3.609086275100708, "logits_per_char": -0.902271568775177, "num_chars": 4}, {"sum_logits": -12.587379455566406, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.327838897705078, "logits_per_token": -6.293689727783203, "logits_per_char": -0.6992988586425781, "num_chars": 18}, {"sum_logits": -11.273574829101562, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.52893829345703, "logits_per_token": -5.636787414550781, "logits_per_char": -0.9394645690917969, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 449, "native_id": "fb64149cf01c5b496d986f56852273e9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.141847610473633, "incorrect_loss_raw": 9.52653193473816, "correct_loss_per_char": 0.6492588736794211, "incorrect_loss_per_char": 0.8792178433006804, "correct_loss_per_token": 3.5709238052368164, "incorrect_loss_per_token": 6.750279903411865, "correct_loss_uncond": -9.865507125854492, "incorrect_loss_uncond": -7.1374006271362305}, "model_output": [{"sum_logits": -9.795530319213867, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.696640014648438, "logits_per_token": -4.897765159606934, "logits_per_char": -0.8162941932678223, "num_chars": 12}, {"sum_logits": -12.414485931396484, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.16667366027832, "logits_per_token": -6.207242965698242, "logits_per_char": -0.6896936628553603, "num_chars": 18}, {"sum_logits": -10.891312599182129, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.553471565246582, "logits_per_token": -10.891312599182129, "logits_per_char": -1.5559017998831612, "num_chars": 7}, {"sum_logits": -7.141847610473633, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.007354736328125, "logits_per_token": -3.5709238052368164, "logits_per_char": -0.6492588736794211, "num_chars": 11}, {"sum_logits": -5.004798889160156, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -5.004798889160156, "logits_per_char": -0.45498171719637787, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 450, "native_id": "2ac72eaf30a633c410b1bd658bbef0ba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.416348457336426, "incorrect_loss_raw": 6.626543819904327, "correct_loss_per_char": 0.6742134961214933, "incorrect_loss_per_char": 0.9370611424247424, "correct_loss_per_token": 3.708174228668213, "incorrect_loss_per_token": 5.2454966604709625, "correct_loss_uncond": -12.841876029968262, "incorrect_loss_uncond": -8.846677005290985}, "model_output": [{"sum_logits": -2.802950143814087, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.153610229492188, "logits_per_token": -1.4014750719070435, "logits_per_char": -0.35036876797676086, "num_chars": 8}, {"sum_logits": -4.033716201782227, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.228081703186035, "logits_per_token": -4.033716201782227, "logits_per_char": -0.8067432403564453, "num_chars": 5}, {"sum_logits": -7.416348457336426, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.258224487304688, "logits_per_token": -3.708174228668213, "logits_per_char": -0.6742134961214933, "num_chars": 11}, {"sum_logits": -11.424081802368164, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.839284896850586, "logits_per_token": -11.424081802368164, "logits_per_char": -1.9040136337280273, "num_chars": 6}, {"sum_logits": -8.245427131652832, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.671906471252441, "logits_per_token": -4.122713565826416, "logits_per_char": -0.687118927637736, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 451, "native_id": "22fc45d9e6d0baea4a5b0526504225b8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.43617057800293, "incorrect_loss_raw": 11.089238166809082, "correct_loss_per_char": 0.9060284296671549, "incorrect_loss_per_char": 0.978072391099916, "correct_loss_per_token": 5.43617057800293, "incorrect_loss_per_token": 7.506747364997864, "correct_loss_uncond": -8.685277938842773, "incorrect_loss_uncond": -7.246613025665283}, "model_output": [{"sum_logits": -17.312488555908203, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.90144157409668, "logits_per_token": -8.656244277954102, "logits_per_char": -0.8656244277954102, "num_chars": 20}, {"sum_logits": -5.43617057800293, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -5.43617057800293, "logits_per_char": -0.9060284296671549, "num_chars": 6}, {"sum_logits": -8.53542423248291, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.30998420715332, "logits_per_token": -8.53542423248291, "logits_per_char": -1.21934631892613, "num_chars": 7}, {"sum_logits": -7.161602020263672, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.558618545532227, "logits_per_token": -7.161602020263672, "logits_per_char": -0.7957335578070747, "num_chars": 9}, {"sum_logits": -11.347437858581543, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.573360443115234, "logits_per_token": -5.6737189292907715, "logits_per_char": -1.0315852598710493, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 452, "native_id": "4ef3d70648ee3cea028bc5ed0fdfda28", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8351221084594727, "incorrect_loss_raw": 10.584187746047974, "correct_loss_per_char": 0.31959350903828937, "incorrect_loss_per_char": 1.5235669800213407, "correct_loss_per_token": 1.9175610542297363, "incorrect_loss_per_token": 7.296995639801025, "correct_loss_uncond": -14.579842567443848, "incorrect_loss_uncond": -4.842759847640991}, "model_output": [{"sum_logits": -10.636012077331543, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.778377532958984, "logits_per_token": -10.636012077331543, "logits_per_char": -2.1272024154663085, "num_chars": 5}, {"sum_logits": -8.548226356506348, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.523576736450195, "logits_per_token": -8.548226356506348, "logits_per_char": -1.221175193786621, "num_chars": 7}, {"sum_logits": -3.8351221084594727, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -18.41496467590332, "logits_per_token": -1.9175610542297363, "logits_per_char": -0.31959350903828937, "num_chars": 12}, {"sum_logits": -9.435072898864746, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.143455505371094, "logits_per_token": -3.145024299621582, "logits_per_char": -0.7862560749053955, "num_chars": 12}, {"sum_logits": -13.717439651489258, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.262380599975586, "logits_per_token": -6.858719825744629, "logits_per_char": -1.959634235927037, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 453, "native_id": "059155c50d1b04da7373e309868e67d2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.665811538696289, "incorrect_loss_raw": 6.318937063217163, "correct_loss_per_char": 1.266581153869629, "incorrect_loss_per_char": 0.9345837055690704, "correct_loss_per_token": 6.3329057693481445, "incorrect_loss_per_token": 5.342689514160156, "correct_loss_uncond": -6.473932266235352, "incorrect_loss_uncond": -7.999159574508667}, "model_output": [{"sum_logits": -4.1746673583984375, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -4.1746673583984375, "logits_per_char": -0.5218334197998047, "num_chars": 8}, {"sum_logits": -7.020010948181152, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.869331359863281, "logits_per_token": -7.020010948181152, "logits_per_char": -1.4040021896362305, "num_chars": 5}, {"sum_logits": -12.665811538696289, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -6.3329057693481445, "logits_per_char": -1.266581153869629, "num_chars": 10}, {"sum_logits": -6.271089553833008, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -6.271089553833008, "logits_per_char": -0.6967877282036675, "num_chars": 9}, {"sum_logits": -7.809980392456055, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -3.9049901962280273, "logits_per_char": -1.1157114846365792, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 454, "native_id": "33d023a6806390eb8195380331e17404_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.5613691806793213, "incorrect_loss_raw": 8.203781068325043, "correct_loss_per_char": 0.3957076867421468, "incorrect_loss_per_char": 1.0578381688821883, "correct_loss_per_token": 3.5613691806793213, "incorrect_loss_per_token": 5.667001664638519, "correct_loss_uncond": -9.574090719223022, "incorrect_loss_uncond": -10.065907061100006}, "model_output": [{"sum_logits": -9.060090065002441, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.682865142822266, "logits_per_token": -9.060090065002441, "logits_per_char": -1.5100150108337402, "num_chars": 6}, {"sum_logits": -7.616306304931641, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.37739372253418, "logits_per_token": -3.8081531524658203, "logits_per_char": -1.0880437578473772, "num_chars": 7}, {"sum_logits": -3.5613691806793213, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -3.5613691806793213, "logits_per_char": -0.3957076867421468, "num_chars": 9}, {"sum_logits": -3.460798978805542, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.082511901855469, "logits_per_token": -3.460798978805542, "logits_per_char": -0.5767998298009237, "num_chars": 6}, {"sum_logits": -12.677928924560547, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.93598175048828, "logits_per_token": -6.338964462280273, "logits_per_char": -1.0564940770467122, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 455, "native_id": "63f7ad481a63fc8c6dffe00519d4a167", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 14.459332466125488, "incorrect_loss_raw": 15.51024603843689, "correct_loss_per_char": 0.6885396412440709, "incorrect_loss_per_char": 1.6982363674375747, "correct_loss_per_token": 4.819777488708496, "incorrect_loss_per_token": 10.852490425109863, "correct_loss_uncond": -8.592547416687012, "incorrect_loss_uncond": -0.5199770927429199}, "model_output": [{"sum_logits": -14.459332466125488, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.0518798828125, "logits_per_token": -4.819777488708496, "logits_per_char": -0.6885396412440709, "num_chars": 21}, {"sum_logits": -10.258408546447754, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.509931564331055, "logits_per_token": -10.258408546447754, "logits_per_char": -1.0258408546447755, "num_chars": 10}, {"sum_logits": -16.93700408935547, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.131860733032227, "logits_per_token": -8.468502044677734, "logits_per_char": -1.693700408935547, "num_chars": 10}, {"sum_logits": -14.520530700683594, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.002091407775879, "logits_per_token": -14.520530700683594, "logits_per_char": -1.8150663375854492, "num_chars": 8}, {"sum_logits": -20.325040817260742, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -10.162520408630371, "logits_per_char": -2.258337868584527, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 456, "native_id": "a2daf73d33541af0846673afd8e49abe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.144443988800049, "incorrect_loss_raw": 10.521122455596924, "correct_loss_per_char": 0.4287036657333374, "incorrect_loss_per_char": 1.2741483062892764, "correct_loss_per_token": 5.144443988800049, "incorrect_loss_per_token": 9.336771488189697, "correct_loss_uncond": -9.633604526519775, "incorrect_loss_uncond": -6.2544121742248535}, "model_output": [{"sum_logits": -5.144443988800049, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.778048515319824, "logits_per_token": -5.144443988800049, "logits_per_char": -0.4287036657333374, "num_chars": 12}, {"sum_logits": -9.876508712768555, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.440893173217773, "logits_per_token": -9.876508712768555, "logits_per_char": -0.9876508712768555, "num_chars": 10}, {"sum_logits": -9.474807739257812, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.096349716186523, "logits_per_token": -4.737403869628906, "logits_per_char": -0.8613461581143466, "num_chars": 11}, {"sum_logits": -12.798015594482422, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.942888259887695, "logits_per_token": -12.798015594482422, "logits_per_char": -1.8282879420689173, "num_chars": 7}, {"sum_logits": -9.935157775878906, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.622007369995117, "logits_per_token": -9.935157775878906, "logits_per_char": -1.4193082536969865, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 457, "native_id": "7d70208061ae3185bcfc9e912ee9e141", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1999378204345703, "incorrect_loss_raw": 15.543099164962769, "correct_loss_per_char": 0.22856698717389787, "incorrect_loss_per_char": 1.092476805400499, "correct_loss_per_token": 0.7999844551086426, "incorrect_loss_per_token": 8.041357199350994, "correct_loss_uncond": -15.10916519165039, "incorrect_loss_uncond": -2.557110548019409}, "model_output": [{"sum_logits": -3.1999378204345703, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.30910301208496, "logits_per_token": -0.7999844551086426, "logits_per_char": -0.22856698717389787, "num_chars": 14}, {"sum_logits": -19.95094871520996, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.246797561645508, "logits_per_token": -9.97547435760498, "logits_per_char": -0.9975474357604981, "num_chars": 20}, {"sum_logits": -12.17420768737793, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -12.17420768737793, "logits_per_char": -1.3526897430419922, "num_chars": 9}, {"sum_logits": -9.952765464782715, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.298606872558594, "logits_per_token": -3.317588488260905, "logits_per_char": -0.4739412126087007, "num_chars": 21}, {"sum_logits": -20.09447479248047, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.249256134033203, "logits_per_token": -6.698158264160156, "logits_per_char": -1.5457288301908052, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 458, "native_id": "9003c4748b08d5a734747e499599ff20", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.328855514526367, "incorrect_loss_raw": 10.08150839805603, "correct_loss_per_char": 0.9041222163609096, "incorrect_loss_per_char": 1.4524061058248792, "correct_loss_per_token": 6.328855514526367, "incorrect_loss_per_token": 8.732133507728577, "correct_loss_uncond": -9.577676773071289, "incorrect_loss_uncond": -4.113636255264282}, "model_output": [{"sum_logits": -8.318421363830566, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.211878776550293, "logits_per_token": -8.318421363830566, "logits_per_char": -1.6636842727661132, "num_chars": 5}, {"sum_logits": -10.794999122619629, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.87460708618164, "logits_per_token": -5.3974995613098145, "logits_per_char": -1.3493748903274536, "num_chars": 8}, {"sum_logits": -6.328855514526367, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.906532287597656, "logits_per_token": -6.328855514526367, "logits_per_char": -0.9041222163609096, "num_chars": 7}, {"sum_logits": -8.119362831115723, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.335291862487793, "logits_per_token": -8.119362831115723, "logits_per_char": -1.1599089758736747, "num_chars": 7}, {"sum_logits": -13.093250274658203, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.358800888061523, "logits_per_token": -13.093250274658203, "logits_per_char": -1.6366562843322754, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 459, "native_id": "28aac6d39cdd270d2a6a28e1985484cb", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.169830799102783, "incorrect_loss_raw": 7.697033017873764, "correct_loss_per_char": 0.2712288498878479, "incorrect_loss_per_char": 1.3641681652038526, "correct_loss_per_token": 2.169830799102783, "incorrect_loss_per_token": 7.697033017873764, "correct_loss_uncond": -14.165021419525146, "incorrect_loss_uncond": -6.086531072854996}, "model_output": [{"sum_logits": -10.55594253540039, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -10.55594253540039, "logits_per_char": -1.7593237559000652, "num_chars": 6}, {"sum_logits": -11.464940071105957, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -11.464940071105957, "logits_per_char": -2.8662350177764893, "num_chars": 4}, {"sum_logits": -2.169830799102783, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.33485221862793, "logits_per_token": -2.169830799102783, "logits_per_char": -0.2712288498878479, "num_chars": 8}, {"sum_logits": -1.746198058128357, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -12.95104694366455, "logits_per_token": -1.746198058128357, "logits_per_char": -0.2910330096880595, "num_chars": 6}, {"sum_logits": -7.021051406860352, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.63414192199707, "logits_per_token": -7.021051406860352, "logits_per_char": -0.5400808774507962, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 460, "native_id": "8bdbb8caefcc607a9ec7579aa0c87cba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.157641410827637, "incorrect_loss_raw": 12.972855567932129, "correct_loss_per_char": 0.4798612594604492, "incorrect_loss_per_char": 1.4120281640585366, "correct_loss_per_token": 2.7192138036092124, "incorrect_loss_per_token": 6.319926301638286, "correct_loss_uncond": -11.337409019470215, "incorrect_loss_uncond": -2.8520607948303223}, "model_output": [{"sum_logits": -8.157641410827637, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.49505043029785, "logits_per_token": -2.7192138036092124, "logits_per_char": -0.4798612594604492, "num_chars": 17}, {"sum_logits": -12.028450012207031, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -10.704026222229004, "logits_per_token": -12.028450012207031, "logits_per_char": -1.0934954556551846, "num_chars": 11}, {"sum_logits": -14.963722229003906, "num_tokens": 5, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.67274284362793, "logits_per_token": -2.9927444458007812, "logits_per_char": -0.9975814819335938, "num_chars": 15}, {"sum_logits": -13.146685600280762, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.388023376464844, "logits_per_token": -4.382228533426921, "logits_per_char": -1.8780979428972517, "num_chars": 7}, {"sum_logits": -11.752564430236816, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -5.876282215118408, "logits_per_char": -1.6789377757481165, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 461, "native_id": "95a85df48902d23eb3fda25a99fca1a0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.815366268157959, "incorrect_loss_raw": 13.648079633712769, "correct_loss_per_char": 0.7815366268157959, "incorrect_loss_per_char": 1.2192315975824992, "correct_loss_per_token": 3.9076831340789795, "incorrect_loss_per_token": 6.577035983403524, "correct_loss_uncond": -9.25829553604126, "incorrect_loss_uncond": -5.509649991989136}, "model_output": [{"sum_logits": -17.373226165771484, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.75394058227539, "logits_per_token": -8.686613082885742, "logits_per_char": -1.7373226165771485, "num_chars": 10}, {"sum_logits": -7.815366268157959, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.07366180419922, "logits_per_token": -3.9076831340789795, "logits_per_char": -0.7815366268157959, "num_chars": 10}, {"sum_logits": -5.928092002868652, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.03896141052246, "logits_per_token": -1.976030667622884, "logits_per_char": -0.5928092002868652, "num_chars": 10}, {"sum_logits": -17.46916389465332, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.159305572509766, "logits_per_token": -8.73458194732666, "logits_per_char": -1.1646109263102213, "num_chars": 15}, {"sum_logits": -13.821836471557617, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.6787109375, "logits_per_token": -6.910918235778809, "logits_per_char": -1.3821836471557618, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 462, "native_id": "79c3378b7660d328902d7c0ad442a37f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.052183628082275, "incorrect_loss_raw": 15.789429545402527, "correct_loss_per_char": 1.210436725616455, "incorrect_loss_per_char": 1.2010712133513557, "correct_loss_per_token": 6.052183628082275, "incorrect_loss_per_token": 6.679909646511078, "correct_loss_uncond": -5.401909351348877, "incorrect_loss_uncond": -5.7913419008255005}, "model_output": [{"sum_logits": -7.527462482452393, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.151918411254883, "logits_per_token": -3.7637312412261963, "logits_per_char": -0.8363847202724881, "num_chars": 9}, {"sum_logits": -15.552985191345215, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.123661041259766, "logits_per_token": -7.776492595672607, "logits_per_char": -1.2960820992787678, "num_chars": 12}, {"sum_logits": -29.155323028564453, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -27.59119415283203, "logits_per_token": -9.718441009521484, "logits_per_char": -1.943688201904297, "num_chars": 15}, {"sum_logits": -10.921947479248047, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.45631217956543, "logits_per_token": -5.460973739624023, "logits_per_char": -0.7281298319498698, "num_chars": 15}, {"sum_logits": -6.052183628082275, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.454092979431152, "logits_per_token": -6.052183628082275, "logits_per_char": -1.210436725616455, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 463, "native_id": "8c12e5864463cfcd03f4d0ab67949d01", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.113142967224121, "incorrect_loss_raw": 17.844938278198242, "correct_loss_per_char": 1.0102857242931018, "incorrect_loss_per_char": 1.1712322811250848, "correct_loss_per_token": 5.5565714836120605, "incorrect_loss_per_token": 5.748786727587382, "correct_loss_uncond": -7.3659467697143555, "incorrect_loss_uncond": -4.263345718383789}, "model_output": [{"sum_logits": -21.697036743164062, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -28.221542358398438, "logits_per_token": -5.424259185791016, "logits_per_char": -0.9433494236158289, "num_chars": 23}, {"sum_logits": -11.113142967224121, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.479089736938477, "logits_per_token": -5.5565714836120605, "logits_per_char": -1.0102857242931018, "num_chars": 11}, {"sum_logits": -23.533437728881836, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.028722763061523, "logits_per_token": -5.883359432220459, "logits_per_char": -1.120639891851516, "num_chars": 21}, {"sum_logits": -17.82661247253418, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -8.91330623626709, "logits_per_char": -1.9807347191704645, "num_chars": 9}, {"sum_logits": -8.32266616821289, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.705862045288086, "logits_per_token": -2.7742220560709634, "logits_per_char": -0.64020508986253, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 464, "native_id": "e145618c2062eb9ea8928fdb0d42185e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.651999473571777, "incorrect_loss_raw": 15.859279870986938, "correct_loss_per_char": 1.0651999473571778, "incorrect_loss_per_char": 1.3061040781214086, "correct_loss_per_token": 5.325999736785889, "incorrect_loss_per_token": 8.48402162392934, "correct_loss_uncond": -8.94242000579834, "incorrect_loss_uncond": -4.317082405090332}, "model_output": [{"sum_logits": -10.651999473571777, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.594419479370117, "logits_per_token": -5.325999736785889, "logits_per_char": -1.0651999473571778, "num_chars": 10}, {"sum_logits": -25.951204299926758, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -25.477632522583008, "logits_per_token": -12.975602149963379, "logits_per_char": -1.9962464846097505, "num_chars": 13}, {"sum_logits": -9.430850982666016, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.377196311950684, "logits_per_token": -9.430850982666016, "logits_per_char": -0.9430850982666016, "num_chars": 10}, {"sum_logits": -14.98739242553711, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -25.533170700073242, "logits_per_token": -4.995797475179036, "logits_per_char": -0.6516257576320482, "num_chars": 23}, {"sum_logits": -13.067671775817871, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -6.5338358879089355, "logits_per_char": -1.6334589719772339, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 465, "native_id": "35872be88df5f6c4a6600020266a5458", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.34792947769165, "incorrect_loss_raw": 8.829763770103455, "correct_loss_per_char": 0.3105663912636893, "incorrect_loss_per_char": 1.1161358646656336, "correct_loss_per_token": 2.173964738845825, "incorrect_loss_per_token": 6.0244322419166565, "correct_loss_uncond": -13.844180583953857, "incorrect_loss_uncond": -8.161692023277283}, "model_output": [{"sum_logits": -15.246917724609375, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.935124397277832, "logits_per_token": -7.6234588623046875, "logits_per_char": -1.9058647155761719, "num_chars": 8}, {"sum_logits": -3.76167631149292, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -1.88083815574646, "logits_per_char": -0.19798296376278526, "num_chars": 19}, {"sum_logits": -4.34792947769165, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.192110061645508, "logits_per_token": -2.173964738845825, "logits_per_char": -0.3105663912636893, "num_chars": 14}, {"sum_logits": -3.43405818939209, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -1.717029094696045, "logits_per_char": -0.21462863683700562, "num_chars": 16}, {"sum_logits": -12.876402854919434, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.888906478881836, "logits_per_token": -12.876402854919434, "logits_per_char": -2.1460671424865723, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 466, "native_id": "055817d8d703d3c2802545e3fccdcde3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.8616251945495605, "incorrect_loss_raw": 7.1368619203567505, "correct_loss_per_char": 0.5516607420785087, "incorrect_loss_per_char": 0.9071524964911597, "correct_loss_per_token": 3.8616251945495605, "incorrect_loss_per_token": 6.244529187679291, "correct_loss_uncond": -10.664056301116943, "incorrect_loss_uncond": -7.211739897727966}, "model_output": [{"sum_logits": -6.979364395141602, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.747095108032227, "logits_per_token": -6.979364395141602, "logits_per_char": -0.6979364395141602, "num_chars": 10}, {"sum_logits": -3.8616251945495605, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.525681495666504, "logits_per_token": -3.8616251945495605, "logits_per_char": -0.5516607420785087, "num_chars": 7}, {"sum_logits": -7.837416648864746, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.406905174255371, "logits_per_token": -7.837416648864746, "logits_per_char": -1.5674833297729491, "num_chars": 5}, {"sum_logits": -7.138661861419678, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.71843719482422, "logits_per_token": -3.569330930709839, "logits_per_char": -0.8923327326774597, "num_chars": 8}, {"sum_logits": -6.592004776000977, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.52196979522705, "logits_per_token": -6.592004776000977, "logits_per_char": -0.47085748400006977, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 467, "native_id": "5ef6cdb85468df482e3aa6fa339d6e41", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.996767997741699, "incorrect_loss_raw": 11.663321256637573, "correct_loss_per_char": 0.3843667690570538, "incorrect_loss_per_char": 1.5558893001741834, "correct_loss_per_token": 2.4983839988708496, "incorrect_loss_per_token": 8.27556312084198, "correct_loss_uncond": -14.438471794128418, "incorrect_loss_uncond": -5.2106359004974365}, "model_output": [{"sum_logits": -4.996767997741699, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.435239791870117, "logits_per_token": -2.4983839988708496, "logits_per_char": -0.3843667690570538, "num_chars": 13}, {"sum_logits": -9.775609970092773, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -9.775609970092773, "logits_per_char": -1.9551219940185547, "num_chars": 5}, {"sum_logits": -14.359419822692871, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.666032791137695, "logits_per_token": -7.1797099113464355, "logits_per_char": -0.8974637389183044, "num_chars": 16}, {"sum_logits": -9.775609970092773, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -9.775609970092773, "logits_per_char": -1.9551219940185547, "num_chars": 5}, {"sum_logits": -12.742645263671875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -6.3713226318359375, "logits_per_char": -1.4158494737413194, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 468, "native_id": "1e939cc6fef999953d692b57caab254b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.869056701660156, "incorrect_loss_raw": 8.350902318954468, "correct_loss_per_char": 0.8579371134440105, "incorrect_loss_per_char": 1.6144923925399781, "correct_loss_per_token": 6.434528350830078, "incorrect_loss_per_token": 8.350902318954468, "correct_loss_uncond": -5.157508850097656, "incorrect_loss_uncond": -7.099609851837158}, "model_output": [{"sum_logits": -9.654190063476562, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.68338394165039, "logits_per_token": -9.654190063476562, "logits_per_char": -1.6090316772460938, "num_chars": 6}, {"sum_logits": -12.869056701660156, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.026565551757812, "logits_per_token": -6.434528350830078, "logits_per_char": -0.8579371134440105, "num_chars": 15}, {"sum_logits": -11.249210357666016, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -11.249210357666016, "logits_per_char": -2.249842071533203, "num_chars": 5}, {"sum_logits": -6.311476707458496, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.724822998046875, "logits_per_token": -6.311476707458496, "logits_per_char": -1.051912784576416, "num_chars": 6}, {"sum_logits": -6.188732147216797, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.551799774169922, "logits_per_token": -6.188732147216797, "logits_per_char": -1.5471830368041992, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 469, "native_id": "3a3b5d4a517ef70d25eb558f1a622937", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8913958072662354, "incorrect_loss_raw": 15.108322858810425, "correct_loss_per_char": 0.2628541642969305, "incorrect_loss_per_char": 2.0374423265457153, "correct_loss_per_token": 2.8913958072662354, "incorrect_loss_per_token": 10.158901770909626, "correct_loss_uncond": -11.42638087272644, "incorrect_loss_uncond": 0.42261767387390137}, "model_output": [{"sum_logits": -11.561615943908691, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -11.561615943908691, "logits_per_char": -2.3123231887817384, "num_chars": 5}, {"sum_logits": -12.863605499267578, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -6.431802749633789, "logits_per_char": -1.8376579284667969, "num_chars": 7}, {"sum_logits": -20.0488224029541, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.575927734375, "logits_per_token": -6.6829408009847, "logits_per_char": -2.00488224029541, "num_chars": 10}, {"sum_logits": -15.959247589111328, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.965703964233398, "logits_per_token": -15.959247589111328, "logits_per_char": -1.994905948638916, "num_chars": 8}, {"sum_logits": -2.8913958072662354, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.317776679992676, "logits_per_token": -2.8913958072662354, "logits_per_char": -0.2628541642969305, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 470, "native_id": "a943522f7d407cef369d5d3f1bf48589", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.584532260894775, "incorrect_loss_raw": 8.502949595451355, "correct_loss_per_char": 0.3873254271114574, "incorrect_loss_per_char": 0.8925133423927503, "correct_loss_per_token": 2.1948440869649253, "incorrect_loss_per_token": 5.5342225432395935, "correct_loss_uncond": -16.137502193450928, "incorrect_loss_uncond": -10.289003491401672}, "model_output": [{"sum_logits": -5.107798099517822, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.464292526245117, "logits_per_token": -2.553899049758911, "logits_per_char": -0.39290754611675555, "num_chars": 13}, {"sum_logits": -5.118997573852539, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -2.5594987869262695, "logits_per_char": -0.42658313115437824, "num_chars": 12}, {"sum_logits": -6.584532260894775, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -2.1948440869649253, "logits_per_char": -0.3873254271114574, "num_chars": 17}, {"sum_logits": -10.261981964111328, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.897102355957031, "logits_per_token": -10.261981964111328, "logits_per_char": -1.710330327351888, "num_chars": 6}, {"sum_logits": -13.52302074432373, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.53334617614746, "logits_per_token": -6.761510372161865, "logits_per_char": -1.0402323649479792, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 471, "native_id": "57a343d72031b668e5eb91868420e915", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.199209690093994, "incorrect_loss_raw": 8.303681015968323, "correct_loss_per_char": 0.30583586412317615, "incorrect_loss_per_char": 0.948452561190634, "correct_loss_per_token": 2.599604845046997, "incorrect_loss_per_token": 4.9957659641901655, "correct_loss_uncond": -12.070065975189209, "incorrect_loss_uncond": -8.874653220176697}, "model_output": [{"sum_logits": -5.199209690093994, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -2.599604845046997, "logits_per_char": -0.30583586412317615, "num_chars": 17}, {"sum_logits": -6.891477584838867, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -6.891477584838867, "logits_per_char": -0.6264979622580789, "num_chars": 11}, {"sum_logits": -6.4757561683654785, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.036821365356445, "logits_per_token": -6.4757561683654785, "logits_per_char": -1.0792926947275798, "num_chars": 6}, {"sum_logits": -11.525556564331055, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.86288070678711, "logits_per_token": -3.8418521881103516, "logits_per_char": -1.0477778694846414, "num_chars": 11}, {"sum_logits": -8.32193374633789, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.850818634033203, "logits_per_token": -2.7739779154459634, "logits_per_char": -1.0402417182922363, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 472, "native_id": "c4b1a57e7880b9cb367f9c67abf5605f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.5312721729278564, "incorrect_loss_raw": 11.858951568603516, "correct_loss_per_char": 0.31640902161598206, "incorrect_loss_per_char": 1.6939083258310954, "correct_loss_per_token": 2.5312721729278564, "incorrect_loss_per_token": 10.28725528717041, "correct_loss_uncond": -11.726155996322632, "incorrect_loss_uncond": -2.4511921405792236}, "model_output": [{"sum_logits": -12.573570251464844, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.835892677307129, "logits_per_token": -6.286785125732422, "logits_per_char": -2.5147140502929686, "num_chars": 5}, {"sum_logits": -2.5312721729278564, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.257428169250488, "logits_per_token": -2.5312721729278564, "logits_per_char": -0.31640902161598206, "num_chars": 8}, {"sum_logits": -10.416879653930664, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.416786193847656, "logits_per_token": -10.416879653930664, "logits_per_char": -1.302109956741333, "num_chars": 8}, {"sum_logits": -11.060355186462402, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.041325569152832, "logits_per_token": -11.060355186462402, "logits_per_char": -1.8433925310770671, "num_chars": 6}, {"sum_logits": -13.385001182556152, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.94657039642334, "logits_per_token": -13.385001182556152, "logits_per_char": -1.1154167652130127, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 473, "native_id": "e313d7967f72c2b880213daaaf4b7181", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.877249717712402, "incorrect_loss_raw": 10.635487794876099, "correct_loss_per_char": 0.7055178369794574, "incorrect_loss_per_char": 0.952179572798989, "correct_loss_per_token": 4.938624858856201, "incorrect_loss_per_token": 4.9553427298863735, "correct_loss_uncond": -14.149212837219238, "incorrect_loss_uncond": -7.788176774978638}, "model_output": [{"sum_logits": -8.697628021240234, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.534561157226562, "logits_per_token": -2.8992093404134116, "logits_per_char": -0.6690483093261719, "num_chars": 13}, {"sum_logits": -17.12836456298828, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.669775009155273, "logits_per_token": -8.56418228149414, "logits_per_char": -1.557124051180753, "num_chars": 11}, {"sum_logits": -9.877249717712402, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -24.02646255493164, "logits_per_token": -4.938624858856201, "logits_per_char": -0.7055178369794574, "num_chars": 14}, {"sum_logits": -9.795492172241211, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.072179794311523, "logits_per_token": -4.8977460861206055, "logits_per_char": -0.8904992883855646, "num_chars": 11}, {"sum_logits": -6.920466423034668, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.418142318725586, "logits_per_token": -3.460233211517334, "logits_per_char": -0.6920466423034668, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 474, "native_id": "3c7992df7fda23bcdeacb1f1f6b73448", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2227859497070312, "incorrect_loss_raw": 11.722137928009033, "correct_loss_per_char": 0.18523216247558594, "incorrect_loss_per_char": 0.9742297734413947, "correct_loss_per_token": 1.1113929748535156, "incorrect_loss_per_token": 6.328482270240784, "correct_loss_uncond": -15.837100982666016, "incorrect_loss_uncond": -5.7039642333984375}, "model_output": [{"sum_logits": -18.666624069213867, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.07311248779297, "logits_per_token": -6.222208023071289, "logits_per_char": -1.4358941591702974, "num_chars": 13}, {"sum_logits": -4.452572822570801, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.65154266357422, "logits_per_token": -2.2262864112854004, "logits_per_char": -0.40477934750643646, "num_chars": 11}, {"sum_logits": -2.2227859497070312, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.059886932373047, "logits_per_token": -1.1113929748535156, "logits_per_char": -0.18523216247558594, "num_chars": 12}, {"sum_logits": -9.961514472961426, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.459878921508789, "logits_per_token": -9.961514472961426, "logits_per_char": -0.905592224814675, "num_chars": 11}, {"sum_logits": -13.807840347290039, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.519874572753906, "logits_per_token": -6.9039201736450195, "logits_per_char": -1.15065336227417, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 475, "native_id": "d6644eacdb543a60545d2eb1ac7e6dbd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.184597969055176, "incorrect_loss_raw": 13.227826118469238, "correct_loss_per_char": 0.6974329948425293, "incorrect_loss_per_char": 1.7621492656887088, "correct_loss_per_token": 2.092298984527588, "incorrect_loss_per_token": 6.942066748936971, "correct_loss_uncond": -11.264328002929688, "incorrect_loss_uncond": -2.803863763809204}, "model_output": [{"sum_logits": -4.184597969055176, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.448925971984863, "logits_per_token": -2.092298984527588, "logits_per_char": -0.6974329948425293, "num_chars": 6}, {"sum_logits": -14.885635375976562, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.029136657714844, "logits_per_token": -7.442817687988281, "logits_per_char": -2.4809392293294272, "num_chars": 6}, {"sum_logits": -10.825862884521484, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.847999572753906, "logits_per_token": -2.706465721130371, "logits_per_char": -0.8327586834247296, "num_chars": 13}, {"sum_logits": -14.371233940124512, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -4.790411313374837, "logits_per_char": -1.5968037711249456, "num_chars": 9}, {"sum_logits": -12.828572273254395, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -12.828572273254395, "logits_per_char": -2.1380953788757324, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 476, "native_id": "d1ad9b79f54205b6b9ac19a27f9c2be5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.793448448181152, "incorrect_loss_raw": 6.749950110912323, "correct_loss_per_char": 0.4628130762200606, "incorrect_loss_per_char": 0.9609523718555768, "correct_loss_per_token": 4.396724224090576, "incorrect_loss_per_token": 5.82180243730545, "correct_loss_uncond": -8.87916088104248, "incorrect_loss_uncond": -9.08897453546524}, "model_output": [{"sum_logits": -7.4251813888549805, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.15056610107422, "logits_per_token": -3.7125906944274902, "logits_per_char": -0.742518138885498, "num_chars": 10}, {"sum_logits": -9.329734802246094, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.464624404907227, "logits_per_token": -9.329734802246094, "logits_per_char": -1.5549558003743489, "num_chars": 6}, {"sum_logits": -6.3774003982543945, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -6.3774003982543945, "logits_per_char": -1.0629000663757324, "num_chars": 6}, {"sum_logits": -3.8674838542938232, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -3.8674838542938232, "logits_per_char": -0.4834354817867279, "num_chars": 8}, {"sum_logits": -8.793448448181152, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -4.396724224090576, "logits_per_char": -0.4628130762200606, "num_chars": 19}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 477, "native_id": "f116ee6620c0f171e5db54bc03a5f2e2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.15281867980957, "incorrect_loss_raw": 15.954684972763062, "correct_loss_per_char": 0.8320744254372336, "incorrect_loss_per_char": 1.183487131703545, "correct_loss_per_token": 4.576409339904785, "incorrect_loss_per_token": 6.3521542151769, "correct_loss_uncond": -8.822443008422852, "incorrect_loss_uncond": -4.120662450790405}, "model_output": [{"sum_logits": -14.04720687866211, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.43353271484375, "logits_per_token": -7.023603439331055, "logits_per_char": -1.0805543752817006, "num_chars": 13}, {"sum_logits": -18.754690170288086, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -23.147979736328125, "logits_per_token": -6.251563390096028, "logits_per_char": -0.9870889563309518, "num_chars": 19}, {"sum_logits": -9.15281867980957, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.975261688232422, "logits_per_token": -4.576409339904785, "logits_per_char": -0.8320744254372336, "num_chars": 11}, {"sum_logits": -10.767014503479004, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.995750427246094, "logits_per_token": -5.383507251739502, "logits_per_char": -0.9788195003162731, "num_chars": 11}, {"sum_logits": -20.249828338623047, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.7241268157959, "logits_per_token": -6.749942779541016, "logits_per_char": -1.687485694885254, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 478, "native_id": "ea82f9e938cbfce85fb498ce46264253", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.190452575683594, "incorrect_loss_raw": 14.909948110580444, "correct_loss_per_char": 0.4718593250621449, "incorrect_loss_per_char": 1.0507924484005025, "correct_loss_per_token": 2.595226287841797, "incorrect_loss_per_token": 6.4228211641311646, "correct_loss_uncond": -11.217704772949219, "incorrect_loss_uncond": -5.1599814891815186}, "model_output": [{"sum_logits": -13.594205856323242, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.43353271484375, "logits_per_token": -6.797102928161621, "logits_per_char": -1.0457081427940955, "num_chars": 13}, {"sum_logits": -16.514446258544922, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.150218963623047, "logits_per_token": -4.1286115646362305, "logits_per_char": -0.569463664087756, "num_chars": 29}, {"sum_logits": -5.190452575683594, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.408157348632812, "logits_per_token": -2.595226287841797, "logits_per_char": -0.4718593250621449, "num_chars": 11}, {"sum_logits": -15.821695327758789, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -7.9108476638793945, "logits_per_char": -1.2170534867506762, "num_chars": 13}, {"sum_logits": -13.709444999694824, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.672916412353516, "logits_per_token": -6.854722499847412, "logits_per_char": -1.3709444999694824, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 479, "native_id": "edbb57ac2f476679ae547f75ec2bef3e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.950611114501953, "incorrect_loss_raw": 12.091604232788086, "correct_loss_per_char": 0.7967074076334636, "incorrect_loss_per_char": 1.243574481540256, "correct_loss_per_token": 2.9876527786254883, "incorrect_loss_per_token": 6.139486312866211, "correct_loss_uncond": -7.764026641845703, "incorrect_loss_uncond": -4.230946779251099}, "model_output": [{"sum_logits": -12.245197296142578, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.88811683654785, "logits_per_token": -4.081732432047526, "logits_per_char": -1.2245197296142578, "num_chars": 10}, {"sum_logits": -11.950611114501953, "num_tokens": 4, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.714637756347656, "logits_per_token": -2.9876527786254883, "logits_per_char": -0.7967074076334636, "num_chars": 15}, {"sum_logits": -14.175714492797852, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.903663635253906, "logits_per_token": -4.72523816426595, "logits_per_char": -1.417571449279785, "num_chars": 10}, {"sum_logits": -12.389060974121094, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.121227264404297, "logits_per_token": -6.194530487060547, "logits_per_char": -1.3765623304578993, "num_chars": 9}, {"sum_logits": -9.55644416809082, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.377196311950684, "logits_per_token": -9.55644416809082, "logits_per_char": -0.9556444168090821, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 480, "native_id": "07a99d5f2ca7028febeb9f09604b36c8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.456734657287598, "incorrect_loss_raw": 12.309691667556763, "correct_loss_per_char": 0.742789109547933, "incorrect_loss_per_char": 1.5000274560668252, "correct_loss_per_token": 4.456734657287598, "incorrect_loss_per_token": 8.176666021347046, "correct_loss_uncond": -11.49856185913086, "incorrect_loss_uncond": -5.485422134399414}, "model_output": [{"sum_logits": -10.270252227783203, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.595987319946289, "logits_per_token": -5.135126113891602, "logits_per_char": -2.0540504455566406, "num_chars": 5}, {"sum_logits": -4.456734657287598, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.955296516418457, "logits_per_token": -4.456734657287598, "logits_per_char": -0.742789109547933, "num_chars": 6}, {"sum_logits": -22.79395294189453, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -25.15966033935547, "logits_per_token": -11.396976470947266, "logits_per_char": -2.5326614379882812, "num_chars": 9}, {"sum_logits": -7.526209831237793, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -7.526209831237793, "logits_per_char": -0.6271841526031494, "num_chars": 12}, {"sum_logits": -8.648351669311523, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.701706886291504, "logits_per_token": -8.648351669311523, "logits_per_char": -0.7862137881192294, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 481, "native_id": "b42ef8be1748c19fa5938de5396f8fad", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.431831359863281, "incorrect_loss_raw": 9.920202732086182, "correct_loss_per_char": 0.5548136094037224, "incorrect_loss_per_char": 0.988128417188471, "correct_loss_per_token": 3.1439437866210938, "incorrect_loss_per_token": 8.193155288696289, "correct_loss_uncond": -11.239395141601562, "incorrect_loss_uncond": -6.816717147827148}, "model_output": [{"sum_logits": -11.824885368347168, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.593358993530273, "logits_per_token": -11.824885368347168, "logits_per_char": -1.0749895789406516, "num_chars": 11}, {"sum_logits": -9.431831359863281, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.671226501464844, "logits_per_token": -3.1439437866210938, "logits_per_char": -0.5548136094037224, "num_chars": 17}, {"sum_logits": -7.167302131652832, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.87862491607666, "logits_per_token": -7.167302131652832, "logits_per_char": -0.895912766456604, "num_chars": 8}, {"sum_logits": -10.362284660339355, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.023242950439453, "logits_per_token": -3.454094886779785, "logits_per_char": -0.690818977355957, "num_chars": 15}, {"sum_logits": -10.326338768005371, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -10.326338768005371, "logits_per_char": -1.2907923460006714, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 482, "native_id": "236691d38665d7bcdd0c9b9834252a51", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.8789021968841553, "incorrect_loss_raw": 5.961470544338226, "correct_loss_per_char": 0.554128885269165, "incorrect_loss_per_char": 0.9331611816845242, "correct_loss_per_token": 3.8789021968841553, "incorrect_loss_per_token": 5.961470544338226, "correct_loss_uncond": -8.21422028541565, "incorrect_loss_uncond": -6.198605597019196}, "model_output": [{"sum_logits": -3.757575750350952, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.493803977966309, "logits_per_token": -3.757575750350952, "logits_per_char": -0.41750841670566136, "num_chars": 9}, {"sum_logits": -7.794119358062744, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.415973663330078, "logits_per_token": -7.794119358062744, "logits_per_char": -1.5588238716125489, "num_chars": 5}, {"sum_logits": -5.052393913269043, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.306968688964844, "logits_per_token": -5.052393913269043, "logits_per_char": -0.7217705590384347, "num_chars": 7}, {"sum_logits": -3.8789021968841553, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.093122482299805, "logits_per_token": -3.8789021968841553, "logits_per_char": -0.554128885269165, "num_chars": 7}, {"sum_logits": -7.241793155670166, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -7.241793155670166, "logits_per_char": -1.0345418793814523, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 483, "native_id": "8ef78abb86fc282ccb02bbc495f13030", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.61148738861084, "incorrect_loss_raw": 13.470109701156616, "correct_loss_per_char": 0.18653481347220285, "incorrect_loss_per_char": 1.3990582701037912, "correct_loss_per_token": 2.61148738861084, "incorrect_loss_per_token": 10.368324518203735, "correct_loss_uncond": -11.910482406616211, "incorrect_loss_uncond": -1.5805180072784424}, "model_output": [{"sum_logits": -7.605419158935547, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.71843719482422, "logits_per_token": -3.8027095794677734, "logits_per_char": -0.9506773948669434, "num_chars": 8}, {"sum_logits": -17.2088623046875, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.962242126464844, "logits_per_token": -8.60443115234375, "logits_per_char": -1.0122860179227942, "num_chars": 17}, {"sum_logits": -14.805242538452148, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.3347749710083, "logits_per_token": -14.805242538452148, "logits_per_char": -1.8506553173065186, "num_chars": 8}, {"sum_logits": -2.61148738861084, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.52196979522705, "logits_per_token": -2.61148738861084, "logits_per_char": -0.18653481347220285, "num_chars": 14}, {"sum_logits": -14.26091480255127, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -14.26091480255127, "logits_per_char": -1.7826143503189087, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 484, "native_id": "313d033c33ec475e04e628f87c5686bd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.541599273681641, "incorrect_loss_raw": 17.26947331428528, "correct_loss_per_char": 0.5801230210524339, "incorrect_loss_per_char": 1.5627868623960586, "correct_loss_per_token": 1.8853998184204102, "incorrect_loss_per_token": 8.63473665714264, "correct_loss_uncond": -11.482671737670898, "incorrect_loss_uncond": -2.1556050777435303}, "model_output": [{"sum_logits": -8.5510892868042, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.13589859008789, "logits_per_token": -4.2755446434021, "logits_per_char": -0.8551089286804199, "num_chars": 10}, {"sum_logits": -22.576351165771484, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.799171447753906, "logits_per_token": -11.288175582885742, "logits_per_char": -1.8813625971476238, "num_chars": 12}, {"sum_logits": -7.541599273681641, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.02427101135254, "logits_per_token": -1.8853998184204102, "logits_per_char": -0.5801230210524339, "num_chars": 13}, {"sum_logits": -23.72928237915039, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.701950073242188, "logits_per_token": -11.864641189575195, "logits_per_char": -1.4830801486968994, "num_chars": 16}, {"sum_logits": -14.221170425415039, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -7.1105852127075195, "logits_per_char": -2.0315957750592912, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 485, "native_id": "d581e0ad6a4c89465dc1a527bd2d3f77", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.9552454948425293, "incorrect_loss_raw": 9.040547728538513, "correct_loss_per_char": 0.3955245494842529, "incorrect_loss_per_char": 1.114525436391734, "correct_loss_per_token": 3.9552454948425293, "incorrect_loss_per_token": 9.040547728538513, "correct_loss_uncond": -8.909011363983154, "incorrect_loss_uncond": -5.768709063529968}, "model_output": [{"sum_logits": -13.235816955566406, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.981985092163086, "logits_per_token": -13.235816955566406, "logits_per_char": -0.9454154968261719, "num_chars": 14}, {"sum_logits": -4.20134162902832, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.621487617492676, "logits_per_token": -4.20134162902832, "logits_per_char": -0.46681573655870223, "num_chars": 9}, {"sum_logits": -12.316286087036133, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.886387825012207, "logits_per_token": -12.316286087036133, "logits_per_char": -2.4632572174072265, "num_chars": 5}, {"sum_logits": -6.408746242523193, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.747166633605957, "logits_per_token": -6.408746242523193, "logits_per_char": -0.5826132947748358, "num_chars": 11}, {"sum_logits": -3.9552454948425293, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.864256858825684, "logits_per_token": -3.9552454948425293, "logits_per_char": -0.3955245494842529, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 486, "native_id": "f232bfea2a7611999688a252e476c040", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.4898672103881836, "incorrect_loss_raw": 9.500859141349792, "correct_loss_per_char": 0.27665191226535374, "incorrect_loss_per_char": 1.461590059598287, "correct_loss_per_token": 2.4898672103881836, "incorrect_loss_per_token": 8.657773435115814, "correct_loss_uncond": -9.622928619384766, "incorrect_loss_uncond": -4.384578347206116}, "model_output": [{"sum_logits": -8.341379165649414, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -8.341379165649414, "logits_per_char": -1.6682758331298828, "num_chars": 5}, {"sum_logits": -2.4898672103881836, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -12.11279582977295, "logits_per_token": -2.4898672103881836, "logits_per_char": -0.27665191226535374, "num_chars": 9}, {"sum_logits": -11.369377136230469, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.274541854858398, "logits_per_token": -11.369377136230469, "logits_per_char": -2.273875427246094, "num_chars": 5}, {"sum_logits": -11.547994613647461, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.183613777160645, "logits_per_token": -11.547994613647461, "logits_per_char": -1.154799461364746, "num_chars": 10}, {"sum_logits": -6.744685649871826, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.77837085723877, "logits_per_token": -3.372342824935913, "logits_per_char": -0.7494095166524252, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 487, "native_id": "91756d8e475d8d59fa0a4e35f408e366", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.225545883178711, "incorrect_loss_raw": 8.38151216506958, "correct_loss_per_char": 0.7042576471964518, "incorrect_loss_per_char": 1.3332040111223857, "correct_loss_per_token": 4.225545883178711, "incorrect_loss_per_token": 6.41168999671936, "correct_loss_uncond": -6.701982498168945, "incorrect_loss_uncond": -5.366495847702026}, "model_output": [{"sum_logits": -15.758577346801758, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.176830291748047, "logits_per_token": -7.879288673400879, "logits_per_char": -1.9698221683502197, "num_chars": 8}, {"sum_logits": -4.080671787261963, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.682840347290039, "logits_per_token": -4.080671787261963, "logits_per_char": -0.8161343574523926, "num_chars": 5}, {"sum_logits": -7.971787929534912, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.950617790222168, "logits_per_token": -7.971787929534912, "logits_per_char": -1.5943575859069825, "num_chars": 5}, {"sum_logits": -4.225545883178711, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -10.927528381347656, "logits_per_token": -4.225545883178711, "logits_per_char": -0.7042576471964518, "num_chars": 6}, {"sum_logits": -5.7150115966796875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.181743621826172, "logits_per_token": -5.7150115966796875, "logits_per_char": -0.9525019327799479, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 488, "native_id": "866ea9c668c0b42df19fa20865e31f77", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.4787410497665405, "incorrect_loss_raw": 17.394835472106934, "correct_loss_per_char": 0.05319344997406006, "incorrect_loss_per_char": 1.4837766324772554, "correct_loss_per_token": 0.4787410497665405, "incorrect_loss_per_token": 10.1728302637736, "correct_loss_uncond": -15.491370797157288, "incorrect_loss_uncond": -1.0698530673980713}, "model_output": [{"sum_logits": -11.684894561767578, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -11.684894561767578, "logits_per_char": -1.4606118202209473, "num_chars": 8}, {"sum_logits": -9.911979675292969, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -9.911979675292969, "logits_per_char": -1.6519966125488281, "num_chars": 6}, {"sum_logits": -18.60174560546875, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.571712493896484, "logits_per_token": -9.300872802734375, "logits_per_char": -1.0942203297334558, "num_chars": 17}, {"sum_logits": -0.4787410497665405, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -0.4787410497665405, "logits_per_char": -0.05319344997406006, "num_chars": 9}, {"sum_logits": -29.380722045898438, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -25.704574584960938, "logits_per_token": -9.793574015299479, "logits_per_char": -1.7282777674057905, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 489, "native_id": "22015315e7ff79386877828b4fa27799", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.134767055511475, "incorrect_loss_raw": 10.619799137115479, "correct_loss_per_char": 0.34456392129262287, "incorrect_loss_per_char": 1.7533988907223657, "correct_loss_per_token": 2.0673835277557373, "incorrect_loss_per_token": 7.9770402908325195, "correct_loss_uncond": -13.010611057281494, "incorrect_loss_uncond": -4.839939594268799}, "model_output": [{"sum_logits": -21.142070770263672, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.741252899169922, "logits_per_token": -10.571035385131836, "logits_per_char": -3.0202958243233815, "num_chars": 7}, {"sum_logits": -8.410622596740723, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -8.410622596740723, "logits_per_char": -1.6821245193481444, "num_chars": 5}, {"sum_logits": -8.223762512207031, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -8.223762512207031, "logits_per_char": -1.3706270853678386, "num_chars": 6}, {"sum_logits": -4.134767055511475, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -2.0673835277557373, "logits_per_char": -0.34456392129262287, "num_chars": 12}, {"sum_logits": -4.702740669250488, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.881274223327637, "logits_per_token": -4.702740669250488, "logits_per_char": -0.9405481338500976, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 490, "native_id": "484f6e4fb8e6431b010c299490b72e3c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.863004684448242, "incorrect_loss_raw": 13.196874380111694, "correct_loss_per_char": 0.2863004684448242, "incorrect_loss_per_char": 1.4716939628124237, "correct_loss_per_token": 1.431502342224121, "incorrect_loss_per_token": 11.12571668624878, "correct_loss_uncond": -14.099563598632812, "incorrect_loss_uncond": -0.8279166221618652}, "model_output": [{"sum_logits": -14.803290367126465, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -14.803290367126465, "logits_per_char": -1.850411295890808, "num_chars": 8}, {"sum_logits": -2.863004684448242, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.962568283081055, "logits_per_token": -1.431502342224121, "logits_per_char": -0.2863004684448242, "num_chars": 10}, {"sum_logits": -16.56926155090332, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.3290958404541, "logits_per_token": -8.28463077545166, "logits_per_char": -1.656926155090332, "num_chars": 10}, {"sum_logits": -10.821112632751465, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.660011291503906, "logits_per_token": -10.821112632751465, "logits_per_char": -1.202345848083496, "num_chars": 9}, {"sum_logits": -10.593832969665527, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.92300033569336, "logits_per_token": -10.593832969665527, "logits_per_char": -1.1770925521850586, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 491, "native_id": "7322d0dcf2e27c7032626a3639f5696b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1904003620147705, "incorrect_loss_raw": 13.618021965026855, "correct_loss_per_char": 0.4380800724029541, "incorrect_loss_per_char": 1.5179579743059166, "correct_loss_per_token": 2.1904003620147705, "incorrect_loss_per_token": 7.596556742986043, "correct_loss_uncond": -12.816437482833862, "incorrect_loss_uncond": -2.4674630165100098}, "model_output": [{"sum_logits": -12.137250900268555, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -12.137250900268555, "logits_per_char": -2.0228751500447593, "num_chars": 6}, {"sum_logits": -17.51065444946289, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.27172088623047, "logits_per_token": -5.83688481648763, "logits_per_char": -1.5918776772238992, "num_chars": 11}, {"sum_logits": -15.249258041381836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.632539749145508, "logits_per_token": -7.624629020690918, "logits_per_char": -1.0892327172415597, "num_chars": 14}, {"sum_logits": -9.57492446899414, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -4.78746223449707, "logits_per_char": -1.3678463527134486, "num_chars": 7}, {"sum_logits": -2.1904003620147705, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.006837844848633, "logits_per_token": -2.1904003620147705, "logits_per_char": -0.4380800724029541, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 492, "native_id": "0519b0b0869681c2884f53dbfa43e538", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.198054313659668, "incorrect_loss_raw": 9.199115753173828, "correct_loss_per_char": 0.46645047929551864, "incorrect_loss_per_char": 0.9210718837049272, "correct_loss_per_token": 2.099027156829834, "incorrect_loss_per_token": 4.1476051807403564, "correct_loss_uncond": -10.27470874786377, "incorrect_loss_uncond": -8.15804123878479}, "model_output": [{"sum_logits": -10.8767728805542, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.935124397277832, "logits_per_token": -5.4383864402771, "logits_per_char": -1.359596610069275, "num_chars": 8}, {"sum_logits": -6.587259292602539, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.962568283081055, "logits_per_token": -3.2936296463012695, "logits_per_char": -0.6587259292602539, "num_chars": 10}, {"sum_logits": -4.198054313659668, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.472763061523438, "logits_per_token": -2.099027156829834, "logits_per_char": -0.46645047929551864, "num_chars": 9}, {"sum_logits": -10.846864700317383, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.88745880126953, "logits_per_token": -3.615621566772461, "logits_per_char": -0.7231243133544922, "num_chars": 15}, {"sum_logits": -8.485566139221191, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.643476486206055, "logits_per_token": -4.242783069610596, "logits_per_char": -0.942840682135688, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 493, "native_id": "1ab04c0501b815b2a48f2581f04215a8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.338665008544922, "incorrect_loss_raw": 7.721875786781311, "correct_loss_per_char": 0.8615554173787435, "incorrect_loss_per_char": 0.995740683976706, "correct_loss_per_token": 5.169332504272461, "incorrect_loss_per_token": 4.642934083938599, "correct_loss_uncond": -11.060617446899414, "incorrect_loss_uncond": -10.033186316490173}, "model_output": [{"sum_logits": -6.255969524383545, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -6.255969524383545, "logits_per_char": -1.251193904876709, "num_chars": 5}, {"sum_logits": -5.998897552490234, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.62999725341797, "logits_per_token": -2.999448776245117, "logits_per_char": -0.5453543229536577, "num_chars": 11}, {"sum_logits": -10.338665008544922, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.399282455444336, "logits_per_token": -5.169332504272461, "logits_per_char": -0.8615554173787435, "num_chars": 12}, {"sum_logits": -9.151269912719727, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.34482192993164, "logits_per_token": -4.575634956359863, "logits_per_char": -0.831933628429066, "num_chars": 11}, {"sum_logits": -9.481366157531738, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.209156036376953, "logits_per_token": -4.740683078765869, "logits_per_char": -1.3544808796473913, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 494, "native_id": "7776b10c7bb96f3fe5e026678673634d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.9679946899414062, "incorrect_loss_raw": 10.211776494979858, "correct_loss_per_char": 0.5668563842773438, "incorrect_loss_per_char": 1.192221839500196, "correct_loss_per_token": 3.9679946899414062, "incorrect_loss_per_token": 7.60723602771759, "correct_loss_uncond": -8.236010551452637, "incorrect_loss_uncond": -4.830169200897217}, "model_output": [{"sum_logits": -7.866310119628906, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -7.866310119628906, "logits_per_char": -1.5732620239257813, "num_chars": 5}, {"sum_logits": -3.9679946899414062, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.204005241394043, "logits_per_token": -3.9679946899414062, "logits_per_char": -0.5668563842773438, "num_chars": 7}, {"sum_logits": -12.144472122192383, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.512303352355957, "logits_per_token": -12.144472122192383, "logits_per_char": -1.1040429201993076, "num_chars": 11}, {"sum_logits": -11.066451072692871, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.612686157226562, "logits_per_token": -5.5332255363464355, "logits_per_char": -1.0060410066084429, "num_chars": 11}, {"sum_logits": -9.769872665405273, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.206520080566406, "logits_per_token": -4.884936332702637, "logits_per_char": -1.0855414072672527, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 495, "native_id": "f7c005244d406b9bde48dc8c22003af1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4328815937042236, "incorrect_loss_raw": 8.680414080619812, "correct_loss_per_char": 0.14328815937042236, "incorrect_loss_per_char": 1.1625521256373479, "correct_loss_per_token": 1.4328815937042236, "incorrect_loss_per_token": 6.787916839122772, "correct_loss_uncond": -13.301911115646362, "incorrect_loss_uncond": -6.304723620414734}, "model_output": [{"sum_logits": -1.4328815937042236, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.734792709350586, "logits_per_token": -1.4328815937042236, "logits_per_char": -0.14328815937042236, "num_chars": 10}, {"sum_logits": -9.16344165802002, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.701730728149414, "logits_per_token": -9.16344165802002, "logits_per_char": -1.1454302072525024, "num_chars": 8}, {"sum_logits": -7.638452529907227, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.024660110473633, "logits_per_token": -3.8192262649536133, "logits_per_char": -0.5875732715313251, "num_chars": 13}, {"sum_logits": -7.501525402069092, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.024431228637695, "logits_per_token": -3.750762701034546, "logits_per_char": -1.875381350517273, "num_chars": 4}, {"sum_logits": -10.41823673248291, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.189728736877441, "logits_per_token": -10.41823673248291, "logits_per_char": -1.041823673248291, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 496, "native_id": "88501d528c855e2b533b3fea2f86183d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.392114639282227, "incorrect_loss_raw": 11.073095679283142, "correct_loss_per_char": 0.5811013308438387, "incorrect_loss_per_char": 1.1349287257905591, "correct_loss_per_token": 3.1960573196411133, "incorrect_loss_per_token": 6.950717091560364, "correct_loss_uncond": -10.65434455871582, "incorrect_loss_uncond": -6.650918364524841}, "model_output": [{"sum_logits": -8.896855354309082, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -8.896855354309082, "logits_per_char": -1.4828092257181804, "num_chars": 6}, {"sum_logits": -21.986019134521484, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -28.054662704467773, "logits_per_token": -5.496504783630371, "logits_per_char": -1.1571589018169202, "num_chars": 19}, {"sum_logits": -8.044110298156738, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -8.044110298156738, "logits_per_char": -1.0055137872695923, "num_chars": 8}, {"sum_logits": -6.392114639282227, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.046459197998047, "logits_per_token": -3.1960573196411133, "logits_per_char": -0.5811013308438387, "num_chars": 11}, {"sum_logits": -5.365397930145264, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.100885391235352, "logits_per_token": -5.365397930145264, "logits_per_char": -0.894232988357544, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 497, "native_id": "3d9c3253e24fb108cea9083e8a853cf2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.8026249408721924, "incorrect_loss_raw": 9.30660891532898, "correct_loss_per_char": 0.5605249881744385, "incorrect_loss_per_char": 1.3339044035427154, "correct_loss_per_token": 2.8026249408721924, "incorrect_loss_per_token": 7.653051376342773, "correct_loss_uncond": -11.502598524093628, "incorrect_loss_uncond": -6.092050552368164}, "model_output": [{"sum_logits": -6.218283653259277, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.197744369506836, "logits_per_token": -3.1091418266296387, "logits_per_char": -0.5181903044382731, "num_chars": 12}, {"sum_logits": -12.11611270904541, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.402300834655762, "logits_per_token": -12.11611270904541, "logits_per_char": -1.34623474544949, "num_chars": 9}, {"sum_logits": -7.010176658630371, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -3.5050883293151855, "logits_per_char": -0.5007269041878837, "num_chars": 14}, {"sum_logits": -2.8026249408721924, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -2.8026249408721924, "logits_per_char": -0.5605249881744385, "num_chars": 5}, {"sum_logits": -11.88186264038086, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.017007827758789, "logits_per_token": -11.88186264038086, "logits_per_char": -2.970465660095215, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 498, "native_id": "9808782b2e2e1bfbfa27c41e605bfffe", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.470335960388184, "incorrect_loss_raw": 9.50835394859314, "correct_loss_per_char": 1.9117226600646973, "incorrect_loss_per_char": 1.5587090713637215, "correct_loss_per_token": 5.735167980194092, "incorrect_loss_per_token": 8.580387234687805, "correct_loss_uncond": -1.5092411041259766, "incorrect_loss_uncond": -4.941993713378906}, "model_output": [{"sum_logits": -8.850530624389648, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -8.850530624389648, "logits_per_char": -1.2643615177699499, "num_chars": 7}, {"sum_logits": -12.225305557250977, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.826773643493652, "logits_per_token": -12.225305557250977, "logits_per_char": -2.4450611114501952, "num_chars": 5}, {"sum_logits": -11.470335960388184, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.97957706451416, "logits_per_token": -5.735167980194092, "logits_per_char": -1.9117226600646973, "num_chars": 6}, {"sum_logits": -9.533845901489258, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.407280921936035, "logits_per_token": -9.533845901489258, "logits_per_char": -1.9067691802978515, "num_chars": 5}, {"sum_logits": -7.423733711242676, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.00241470336914, "logits_per_token": -3.711866855621338, "logits_per_char": -0.6186444759368896, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 499, "native_id": "c432b860fcd7297751ff5254ec4a7956", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.425941467285156, "incorrect_loss_raw": 10.564079523086548, "correct_loss_per_char": 1.106485366821289, "incorrect_loss_per_char": 1.409386568599277, "correct_loss_per_token": 4.425941467285156, "incorrect_loss_per_token": 7.690351406733195, "correct_loss_uncond": -11.466582298278809, "incorrect_loss_uncond": -5.205596685409546}, "model_output": [{"sum_logits": -4.425941467285156, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.892523765563965, "logits_per_token": -4.425941467285156, "logits_per_char": -1.106485366821289, "num_chars": 4}, {"sum_logits": -8.561875343322754, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.534889221191406, "logits_per_token": -8.561875343322754, "logits_per_char": -1.7123750686645507, "num_chars": 5}, {"sum_logits": -6.823236465454102, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.196922302246094, "logits_per_token": -6.823236465454102, "logits_per_char": -1.7058091163635254, "num_chars": 4}, {"sum_logits": -17.242368698120117, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -5.747456232706706, "logits_per_char": -1.1494912465413412, "num_chars": 15}, {"sum_logits": -9.628837585449219, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -9.628837585449219, "logits_per_char": -1.069870842827691, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 500, "native_id": "732af155f677a51d05d0c9e080d598b6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.597168922424316, "incorrect_loss_raw": 9.903650760650635, "correct_loss_per_char": 0.9424527032034737, "incorrect_loss_per_char": 1.6384570019585745, "correct_loss_per_token": 6.597168922424316, "incorrect_loss_per_token": 9.903650760650635, "correct_loss_uncond": -6.808498382568359, "incorrect_loss_uncond": -2.4490888118743896}, "model_output": [{"sum_logits": -9.662704467773438, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.51126480102539, "logits_per_token": -9.662704467773438, "logits_per_char": -1.9325408935546875, "num_chars": 5}, {"sum_logits": -10.777034759521484, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -10.777034759521484, "logits_per_char": -1.5395763942173548, "num_chars": 7}, {"sum_logits": -11.413368225097656, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -11.413368225097656, "logits_per_char": -1.1413368225097655, "num_chars": 10}, {"sum_logits": -6.597168922424316, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.405667304992676, "logits_per_token": -6.597168922424316, "logits_per_char": -0.9424527032034737, "num_chars": 7}, {"sum_logits": -7.761495590209961, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.125860214233398, "logits_per_token": -7.761495590209961, "logits_per_char": -1.9403738975524902, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 501, "native_id": "48abc2c113623fd72f758502529f93a5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.238500595092773, "incorrect_loss_raw": 11.68454384803772, "correct_loss_per_char": 0.7199117997113396, "incorrect_loss_per_char": 0.8160368157122034, "correct_loss_per_token": 4.079500198364258, "incorrect_loss_per_token": 7.049290657043457, "correct_loss_uncond": -9.094635009765625, "incorrect_loss_uncond": -5.015918731689453}, "model_output": [{"sum_logits": -9.656149864196777, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -9.656149864196777, "logits_per_char": -1.0729055404663086, "num_chars": 9}, {"sum_logits": -12.154151916503906, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.76909637451172, "logits_per_token": -6.077075958251953, "logits_per_char": -0.7149501127355239, "num_chars": 17}, {"sum_logits": -12.238500595092773, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.3331356048584, "logits_per_token": -4.079500198364258, "logits_per_char": -0.7199117997113396, "num_chars": 17}, {"sum_logits": -11.70625114440918, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.481639862060547, "logits_per_token": -5.85312557220459, "logits_per_char": -0.780416742960612, "num_chars": 15}, {"sum_logits": -13.221622467041016, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.531368255615234, "logits_per_token": -6.610811233520508, "logits_per_char": -0.6958748666863692, "num_chars": 19}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 502, "native_id": "03f06f77aaf80b5f5e296ffbd11e9d82", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.748045921325684, "incorrect_loss_raw": 12.459791660308838, "correct_loss_per_char": 0.5225496292114258, "incorrect_loss_per_char": 1.3155354812289728, "correct_loss_per_token": 5.748045921325684, "incorrect_loss_per_token": 7.961595098177592, "correct_loss_uncond": -7.7003374099731445, "incorrect_loss_uncond": -4.251634359359741}, "model_output": [{"sum_logits": -15.416426658630371, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -5.138808886210124, "logits_per_char": -1.0277617772420247, "num_chars": 15}, {"sum_logits": -5.748045921325684, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.448383331298828, "logits_per_token": -5.748045921325684, "logits_per_char": -0.5225496292114258, "num_chars": 11}, {"sum_logits": -15.430336952209473, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.64935302734375, "logits_per_token": -7.715168476104736, "logits_per_char": -1.4027579047463157, "num_chars": 11}, {"sum_logits": -8.0106782913208, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.6105318069458, "logits_per_token": -8.0106782913208, "logits_per_char": -1.0013347864151, "num_chars": 8}, {"sum_logits": -10.981724739074707, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -10.981724739074707, "logits_per_char": -1.8302874565124512, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 503, "native_id": "e7084c166ec67d0f983a26e055e845c6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.8769621849060059, "incorrect_loss_raw": 5.19738033413887, "correct_loss_per_char": 0.31282703081766766, "incorrect_loss_per_char": 0.7608789737025896, "correct_loss_per_token": 1.8769621849060059, "incorrect_loss_per_token": 4.165730744600296, "correct_loss_uncond": -11.07885217666626, "incorrect_loss_uncond": -8.660640686750412}, "model_output": [{"sum_logits": -5.213849067687988, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -5.213849067687988, "logits_per_char": -0.8689748446146647, "num_chars": 6}, {"sum_logits": -1.5457054376602173, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": true, "sum_logits_uncond": -12.017007827758789, "logits_per_token": -1.5457054376602173, "logits_per_char": -0.3864263594150543, "num_chars": 4}, {"sum_logits": -1.8769621849060059, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -1.8769621849060059, "logits_per_char": -0.31282703081766766, "num_chars": 6}, {"sum_logits": -5.776770114898682, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -9.548184394836426, "logits_per_token": -5.776770114898682, "logits_per_char": -0.9627950191497803, "num_chars": 6}, {"sum_logits": -8.253196716308594, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.190675735473633, "logits_per_token": -4.126598358154297, "logits_per_char": -0.8253196716308594, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 504, "native_id": "c55c31b5a2aa996f3b75ad88c017a6b9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.896188259124756, "incorrect_loss_raw": 7.532217860221863, "correct_loss_per_char": 0.4870235323905945, "incorrect_loss_per_char": 1.1525786034988634, "correct_loss_per_token": 3.896188259124756, "incorrect_loss_per_token": 6.140927354494731, "correct_loss_uncond": -12.821873188018799, "incorrect_loss_uncond": -8.68303644657135}, "model_output": [{"sum_logits": -6.637218475341797, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -6.637218475341797, "logits_per_char": -1.3274436950683595, "num_chars": 5}, {"sum_logits": -3.896188259124756, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.718061447143555, "logits_per_token": -3.896188259124756, "logits_per_char": -0.4870235323905945, "num_chars": 8}, {"sum_logits": -8.708429336547852, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.331038475036621, "logits_per_token": -8.708429336547852, "logits_per_char": -1.4514048894246419, "num_chars": 6}, {"sum_logits": -8.347743034362793, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.1722412109375, "logits_per_token": -2.782581011454264, "logits_per_char": -0.7588857303966176, "num_chars": 11}, {"sum_logits": -6.43548059463501, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.383893966674805, "logits_per_token": -6.43548059463501, "logits_per_char": -1.072580099105835, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 505, "native_id": "463521a93ae71e93bea8b97cdf7a6792", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.715368270874023, "incorrect_loss_raw": 13.124494314193726, "correct_loss_per_char": 1.7858947118123372, "incorrect_loss_per_char": 1.1509277891803098, "correct_loss_per_token": 10.715368270874023, "incorrect_loss_per_token": 8.78038223584493, "correct_loss_uncond": -5.239928245544434, "incorrect_loss_uncond": -3.3211357593536377}, "model_output": [{"sum_logits": -10.09149169921875, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -10.09149169921875, "logits_per_char": -1.4416416713169642, "num_chars": 7}, {"sum_logits": -11.635306358337402, "num_tokens": 3, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.224252700805664, "logits_per_token": -3.8784354527791343, "logits_per_char": -0.8310933113098145, "num_chars": 14}, {"sum_logits": -11.532024383544922, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -11.532024383544922, "logits_per_char": -1.0483658530495383, "num_chars": 11}, {"sum_logits": -10.715368270874023, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.955296516418457, "logits_per_token": -10.715368270874023, "logits_per_char": -1.7858947118123372, "num_chars": 6}, {"sum_logits": -19.239154815673828, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.775131225585938, "logits_per_token": -9.619577407836914, "logits_per_char": -1.2826103210449218, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 506, "native_id": "c036ce033bc429ac1aba0a6ac8d057e1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.583512306213379, "incorrect_loss_raw": 12.457395315170288, "correct_loss_per_char": 0.6547874723161969, "incorrect_loss_per_char": 1.425388479232788, "correct_loss_per_token": 4.583512306213379, "incorrect_loss_per_token": 9.30836820602417, "correct_loss_uncond": -8.00654411315918, "incorrect_loss_uncond": -3.2403109073638916}, "model_output": [{"sum_logits": -9.46020221710205, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.505199432373047, "logits_per_token": -9.46020221710205, "logits_per_char": -1.8920404434204101, "num_chars": 5}, {"sum_logits": -4.583512306213379, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.590056419372559, "logits_per_token": -4.583512306213379, "logits_per_char": -0.6547874723161969, "num_chars": 7}, {"sum_logits": -15.177162170410156, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.436307907104492, "logits_per_token": -15.177162170410156, "logits_per_char": -1.3797420154918323, "num_chars": 11}, {"sum_logits": -15.352691650390625, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.394420623779297, "logits_per_token": -7.6763458251953125, "logits_per_char": -1.5352691650390624, "num_chars": 10}, {"sum_logits": -9.83952522277832, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.454896926879883, "logits_per_token": -4.91976261138916, "logits_per_char": -0.8945022929798473, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 507, "native_id": "db7f2bfdabcf53d6778fd7af80b603d2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.675283670425415, "incorrect_loss_raw": 8.096646830439568, "correct_loss_per_char": 0.3675283670425415, "incorrect_loss_per_char": 0.8294974744379047, "correct_loss_per_token": 1.8376418352127075, "incorrect_loss_per_token": 5.508523270487785, "correct_loss_uncond": -13.048824071884155, "incorrect_loss_uncond": -8.835359528660774}, "model_output": [{"sum_logits": -3.675283670425415, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.72410774230957, "logits_per_token": -1.8376418352127075, "logits_per_char": -0.3675283670425415, "num_chars": 10}, {"sum_logits": -10.918306350708008, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -10.918306350708008, "logits_per_char": -1.5597580501011439, "num_chars": 7}, {"sum_logits": -11.479211807250977, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.211345672607422, "logits_per_token": -5.739605903625488, "logits_per_char": -0.6377339892917209, "num_chars": 18}, {"sum_logits": -9.225776672363281, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.7644100189209, "logits_per_token": -4.612888336181641, "logits_per_char": -1.0250862969292536, "num_chars": 9}, {"sum_logits": -0.7632924914360046, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -0.7632924914360046, "logits_per_char": -0.09541156142950058, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 508, "native_id": "8605fd2affc796d79073d0f3ef0761c9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.844757080078125, "incorrect_loss_raw": 9.249088525772095, "correct_loss_per_char": 0.32298380533854165, "incorrect_loss_per_char": 1.105653820885764, "correct_loss_per_token": 2.4223785400390625, "incorrect_loss_per_token": 7.688981771469116, "correct_loss_uncond": -14.072269439697266, "incorrect_loss_uncond": -5.011359930038452}, "model_output": [{"sum_logits": -8.98864459991455, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -8.98864459991455, "logits_per_char": -1.2840920857020788, "num_chars": 7}, {"sum_logits": -4.844757080078125, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.91702651977539, "logits_per_token": -2.4223785400390625, "logits_per_char": -0.32298380533854165, "num_chars": 15}, {"sum_logits": -12.480854034423828, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.09314727783203, "logits_per_token": -6.240427017211914, "logits_per_char": -0.9600656949556791, "num_chars": 13}, {"sum_logits": -7.030147552490234, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -7.030147552490234, "logits_per_char": -1.406029510498047, "num_chars": 5}, {"sum_logits": -8.496707916259766, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -8.496707916259766, "logits_per_char": -0.7724279923872515, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 509, "native_id": "ad37795fd9e3a65553683ff305b5113d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.646061897277832, "incorrect_loss_raw": 20.383277773857117, "correct_loss_per_char": 0.6950965361161665, "incorrect_loss_per_char": 1.3870004144189583, "correct_loss_per_token": 3.823030948638916, "incorrect_loss_per_token": 7.25906483332316, "correct_loss_uncond": -7.7357683181762695, "incorrect_loss_uncond": -2.2766534090042114}, "model_output": [{"sum_logits": -31.201631546020508, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -30.461307525634766, "logits_per_token": -7.800407886505127, "logits_per_char": -1.5600815773010255, "num_chars": 20}, {"sum_logits": -16.905719757080078, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.387767791748047, "logits_per_token": -5.635239919026692, "logits_per_char": -1.300439981313852, "num_chars": 13}, {"sum_logits": -6.688037395477295, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -6.688037395477295, "logits_per_char": -1.1146728992462158, "num_chars": 6}, {"sum_logits": -7.646061897277832, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.381830215454102, "logits_per_token": -3.823030948638916, "logits_per_char": -0.6950965361161665, "num_chars": 11}, {"sum_logits": -26.737722396850586, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.11443328857422, "logits_per_token": -8.91257413228353, "logits_per_char": -1.5728071998147404, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 510, "native_id": "bcd51af35d691f5c3b6b548096ab1559", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.169157981872559, "incorrect_loss_raw": 14.186232089996338, "correct_loss_per_char": 1.1670225688389368, "incorrect_loss_per_char": 1.7732790112495422, "correct_loss_per_token": 8.169157981872559, "incorrect_loss_per_token": 7.288748939832052, "correct_loss_uncond": -2.671208381652832, "incorrect_loss_uncond": -1.7897281646728516}, "model_output": [{"sum_logits": -12.546025276184082, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.69063949584961, "logits_per_token": -6.273012638092041, "logits_per_char": -1.5682531595230103, "num_chars": 8}, {"sum_logits": -15.987689971923828, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.336135864257812, "logits_per_token": -5.329229990641276, "logits_per_char": -1.9984612464904785, "num_chars": 8}, {"sum_logits": -12.223523139953613, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.540929794311523, "logits_per_token": -12.223523139953613, "logits_per_char": -1.5279403924942017, "num_chars": 8}, {"sum_logits": -8.169157981872559, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -10.84036636352539, "logits_per_token": -8.169157981872559, "logits_per_char": -1.1670225688389368, "num_chars": 7}, {"sum_logits": -15.987689971923828, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.336135864257812, "logits_per_token": -5.329229990641276, "logits_per_char": -1.9984612464904785, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 511, "native_id": "b5345f15d5b451562ab9e0851e7f394f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.344169616699219, "incorrect_loss_raw": 12.07675051689148, "correct_loss_per_char": 2.0860424041748047, "incorrect_loss_per_char": 1.2516973657267434, "correct_loss_per_token": 8.344169616699219, "incorrect_loss_per_token": 8.502981305122375, "correct_loss_uncond": -5.6303510665893555, "incorrect_loss_uncond": -6.267547369003296}, "model_output": [{"sum_logits": -11.57393741607666, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -11.57393741607666, "logits_per_char": -1.6534196308680944, "num_chars": 7}, {"sum_logits": -11.003811836242676, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.879570007324219, "logits_per_token": -11.003811836242676, "logits_per_char": -1.3754764795303345, "num_chars": 8}, {"sum_logits": -8.344169616699219, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.974520683288574, "logits_per_token": -8.344169616699219, "logits_per_char": -2.0860424041748047, "num_chars": 4}, {"sum_logits": -6.669150352478027, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.914971351623535, "logits_per_token": -6.669150352478027, "logits_per_char": -1.111525058746338, "num_chars": 6}, {"sum_logits": -19.060102462768555, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -28.15909194946289, "logits_per_token": -4.765025615692139, "logits_per_char": -0.866368293762207, "num_chars": 22}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 512, "native_id": "6a884d5d8febfdd86fcf68ff1a904d9b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.803110122680664, "incorrect_loss_raw": 8.990825653076172, "correct_loss_per_char": 0.5868740081787109, "incorrect_loss_per_char": 0.9314809431761375, "correct_loss_per_token": 4.401555061340332, "incorrect_loss_per_token": 5.243967294692993, "correct_loss_uncond": -13.314176559448242, "incorrect_loss_uncond": -9.909887790679932}, "model_output": [{"sum_logits": -8.803110122680664, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.117286682128906, "logits_per_token": -4.401555061340332, "logits_per_char": -0.5868740081787109, "num_chars": 15}, {"sum_logits": -9.131999015808105, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -9.131999015808105, "logits_per_char": -1.3045712879725866, "num_chars": 7}, {"sum_logits": -8.805107116699219, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -4.402553558349609, "logits_per_char": -0.8004642833362926, "num_chars": 11}, {"sum_logits": -9.430689811706543, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.34556770324707, "logits_per_token": -3.1435632705688477, "logits_per_char": -1.0478544235229492, "num_chars": 9}, {"sum_logits": -8.59550666809082, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.65581512451172, "logits_per_token": -4.29775333404541, "logits_per_char": -0.5730337778727214, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 513, "native_id": "a1303b5177df0a5b653c9abd7d5f5e08", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.904581069946289, "incorrect_loss_raw": 8.729002118110657, "correct_loss_per_char": 1.3174301783243816, "incorrect_loss_per_char": 1.0610167088416906, "correct_loss_per_token": 7.904581069946289, "incorrect_loss_per_token": 7.17611825466156, "correct_loss_uncond": -7.709749221801758, "incorrect_loss_uncond": -7.907543778419495}, "model_output": [{"sum_logits": -12.423070907592773, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.41895294189453, "logits_per_token": -6.211535453796387, "logits_per_char": -0.8282047271728515, "num_chars": 15}, {"sum_logits": -6.066171646118164, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.63414192199707, "logits_per_token": -6.066171646118164, "logits_per_char": -0.4666285881629357, "num_chars": 13}, {"sum_logits": -11.945170402526855, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.158236503601074, "logits_per_token": -11.945170402526855, "logits_per_char": -2.389034080505371, "num_chars": 5}, {"sum_logits": -7.904581069946289, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -7.904581069946289, "logits_per_char": -1.3174301783243816, "num_chars": 6}, {"sum_logits": -4.481595516204834, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.33485221862793, "logits_per_token": -4.481595516204834, "logits_per_char": -0.5601994395256042, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 514, "native_id": "315baf79f8dd3673f67a90de0758240e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.0477142333984375, "incorrect_loss_raw": 8.876718759536743, "correct_loss_per_char": 0.25238571166992185, "incorrect_loss_per_char": 1.1887716679345994, "correct_loss_per_token": 1.6825714111328125, "incorrect_loss_per_token": 8.876718759536743, "correct_loss_uncond": -11.19929313659668, "incorrect_loss_uncond": -4.52192497253418}, "model_output": [{"sum_logits": -5.995173454284668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -5.995173454284668, "logits_per_char": -0.6661303838094076, "num_chars": 9}, {"sum_logits": -7.112060546875, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.997017860412598, "logits_per_token": -7.112060546875, "logits_per_char": -0.889007568359375, "num_chars": 8}, {"sum_logits": -11.03770923614502, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.212400436401367, "logits_per_token": -11.03770923614502, "logits_per_char": -1.5768156051635742, "num_chars": 7}, {"sum_logits": -11.361931800842285, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.249696731567383, "logits_per_token": -11.361931800842285, "logits_per_char": -1.6231331144060408, "num_chars": 7}, {"sum_logits": -5.0477142333984375, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.247007369995117, "logits_per_token": -1.6825714111328125, "logits_per_char": -0.25238571166992185, "num_chars": 20}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 515, "native_id": "01f01cc3ad152773ef42b30e926912bf", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.044544219970703, "incorrect_loss_raw": 11.935187578201294, "correct_loss_per_char": 0.8938382466634115, "incorrect_loss_per_char": 1.207229048865182, "correct_loss_per_token": 4.022272109985352, "incorrect_loss_per_token": 7.619409203529358, "correct_loss_uncond": -8.882896423339844, "incorrect_loss_uncond": -5.036294221878052}, "model_output": [{"sum_logits": -8.044544219970703, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.927440643310547, "logits_per_token": -4.022272109985352, "logits_per_char": -0.8938382466634115, "num_chars": 9}, {"sum_logits": -22.532377243041992, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.592634201049805, "logits_per_token": -11.266188621520996, "logits_per_char": -1.6094555173601424, "num_chars": 14}, {"sum_logits": -8.304386138916016, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.436759948730469, "logits_per_token": -8.304386138916016, "logits_per_char": -1.038048267364502, "num_chars": 8}, {"sum_logits": -4.910137176513672, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.7301025390625, "logits_per_token": -4.910137176513672, "logits_per_char": -0.9820274353027344, "num_chars": 5}, {"sum_logits": -11.993849754333496, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.12643051147461, "logits_per_token": -5.996924877166748, "logits_per_char": -1.1993849754333497, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 516, "native_id": "f192cfacbaa2f7e0e879f673c8e076a7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.860639572143555, "incorrect_loss_raw": 11.654154539108276, "correct_loss_per_char": 1.1434399286905925, "incorrect_loss_per_char": 1.2013061286037803, "correct_loss_per_token": 3.4303197860717773, "incorrect_loss_per_token": 7.016495227813721, "correct_loss_uncond": -9.653087615966797, "incorrect_loss_uncond": -5.564738035202026}, "model_output": [{"sum_logits": -6.860639572143555, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.51372718811035, "logits_per_token": -3.4303197860717773, "logits_per_char": -1.1434399286905925, "num_chars": 6}, {"sum_logits": -12.704906463623047, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -12.704906463623047, "logits_per_char": -1.4116562737358942, "num_chars": 9}, {"sum_logits": -9.56868839263916, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.731197357177734, "logits_per_token": -3.1895627975463867, "logits_per_char": -0.7973906993865967, "num_chars": 12}, {"sum_logits": -8.063386917114258, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.985862731933594, "logits_per_token": -4.031693458557129, "logits_per_char": -1.343897819519043, "num_chars": 6}, {"sum_logits": -16.27963638305664, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -8.13981819152832, "logits_per_char": -1.2522797217735877, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 517, "native_id": "ab8d5e21a2cf34b60a04768b01f1f8e9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5924978256225586, "incorrect_loss_raw": 9.446098327636719, "correct_loss_per_char": 0.3265907114202326, "incorrect_loss_per_char": 0.9852244823009937, "correct_loss_per_token": 3.5924978256225586, "incorrect_loss_per_token": 9.446098327636719, "correct_loss_uncond": -11.646341323852539, "incorrect_loss_uncond": -4.736314058303833}, "model_output": [{"sum_logits": -8.233222961425781, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -8.233222961425781, "logits_per_char": -0.9148025512695312, "num_chars": 9}, {"sum_logits": -3.5924978256225586, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.238839149475098, "logits_per_token": -3.5924978256225586, "logits_per_char": -0.3265907114202326, "num_chars": 11}, {"sum_logits": -8.741101264953613, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.926101684570312, "logits_per_token": -8.741101264953613, "logits_per_char": -0.7946455695412376, "num_chars": 11}, {"sum_logits": -10.379840850830078, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.396806716918945, "logits_per_token": -10.379840850830078, "logits_per_char": -0.7414172036307198, "num_chars": 14}, {"sum_logits": -10.430228233337402, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -10.430228233337402, "logits_per_char": -1.4900326047624861, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 518, "native_id": "5d1df1daa886efb78db2103ddc1398eb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.773320198059082, "incorrect_loss_raw": 11.415391683578491, "correct_loss_per_char": 0.7216650247573853, "incorrect_loss_per_char": 1.1998355090618134, "correct_loss_per_token": 2.886660099029541, "incorrect_loss_per_token": 11.415391683578491, "correct_loss_uncond": -9.075857162475586, "incorrect_loss_uncond": -2.73888897895813}, "model_output": [{"sum_logits": -5.773320198059082, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -2.886660099029541, "logits_per_char": -0.7216650247573853, "num_chars": 8}, {"sum_logits": -12.41242504119873, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.217647552490234, "logits_per_token": -12.41242504119873, "logits_per_char": -1.5515531301498413, "num_chars": 8}, {"sum_logits": -11.222599983215332, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.206267356872559, "logits_per_token": -11.222599983215332, "logits_per_char": -1.2469555536905925, "num_chars": 9}, {"sum_logits": -12.109249114990234, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.655388832092285, "logits_per_token": -12.109249114990234, "logits_per_char": -1.0091040929158528, "num_chars": 12}, {"sum_logits": -9.917292594909668, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.537818908691406, "logits_per_token": -9.917292594909668, "logits_per_char": -0.9917292594909668, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 519, "native_id": "2f8b35d352097cc9277599be49fab0b3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.146537780761719, "incorrect_loss_raw": 10.338752627372742, "correct_loss_per_char": 0.5122114817301432, "incorrect_loss_per_char": 0.9977664232254028, "correct_loss_per_token": 3.0732688903808594, "incorrect_loss_per_token": 6.564316948254903, "correct_loss_uncond": -16.12653350830078, "incorrect_loss_uncond": -7.12222945690155}, "model_output": [{"sum_logits": -11.08241081237793, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -11.08241081237793, "logits_per_char": -1.108241081237793, "num_chars": 10}, {"sum_logits": -7.625985622406006, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -7.625985622406006, "logits_per_char": -1.0894265174865723, "num_chars": 7}, {"sum_logits": -14.137899398803711, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.96915626525879, "logits_per_token": -4.71263313293457, "logits_per_char": -0.9425266265869141, "num_chars": 15}, {"sum_logits": -8.50871467590332, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.705467224121094, "logits_per_token": -2.836238225301107, "logits_per_char": -0.850871467590332, "num_chars": 10}, {"sum_logits": -6.146537780761719, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -3.0732688903808594, "logits_per_char": -0.5122114817301432, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 520, "native_id": "18eb6a3b54ccf4989e268cfb9ea90f9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.033620834350586, "incorrect_loss_raw": 10.176227927207947, "correct_loss_per_char": 0.5355747222900391, "incorrect_loss_per_char": 0.6486944919039597, "correct_loss_per_token": 4.016810417175293, "incorrect_loss_per_token": 3.9637520710627236, "correct_loss_uncond": -11.453079223632812, "incorrect_loss_uncond": -10.701184391975403}, "model_output": [{"sum_logits": -5.47228479385376, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.202003479003906, "logits_per_token": -1.8240949312845867, "logits_per_char": -0.34201779961586, "num_chars": 16}, {"sum_logits": -8.033620834350586, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.4867000579834, "logits_per_token": -4.016810417175293, "logits_per_char": -0.5355747222900391, "num_chars": 15}, {"sum_logits": -11.163517951965332, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.17514419555664, "logits_per_token": -3.721172650655111, "logits_per_char": -0.5074326341802423, "num_chars": 22}, {"sum_logits": -10.348882675170898, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.935253143310547, "logits_per_token": -3.4496275583902993, "logits_per_char": -0.6899255116780599, "num_chars": 15}, {"sum_logits": -13.720226287841797, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.197248458862305, "logits_per_token": -6.860113143920898, "logits_per_char": -1.0554020221416767, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 521, "native_id": "3e12400bc5a2038a747edf2605787fe8", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.265326499938965, "incorrect_loss_raw": 17.64743709564209, "correct_loss_per_char": 0.5189518928527832, "incorrect_loss_per_char": 1.654308270850377, "correct_loss_per_token": 2.421775499979655, "incorrect_loss_per_token": 8.083124796549479, "correct_loss_uncond": -13.184863090515137, "incorrect_loss_uncond": -0.7846674919128418}, "model_output": [{"sum_logits": -22.06886863708496, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -7.356289545694987, "logits_per_char": -1.4712579091389975, "num_chars": 15}, {"sum_logits": -13.203874588012695, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.922107696533203, "logits_per_token": -13.203874588012695, "logits_per_char": -2.2006457646687827, "num_chars": 6}, {"sum_logits": -7.265326499938965, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.4501895904541, "logits_per_token": -2.421775499979655, "logits_per_char": -0.5189518928527832, "num_chars": 14}, {"sum_logits": -16.7371768951416, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.787614822387695, "logits_per_token": -5.5790589650472, "logits_per_char": -0.8809040471127159, "num_chars": 19}, {"sum_logits": -18.5798282623291, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.7897891998291, "logits_per_token": -6.193276087443034, "logits_per_char": -2.0644253624810114, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 522, "native_id": "72baf6ca5c4daa01c2cc7fda22183db8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.084262847900391, "incorrect_loss_raw": 8.858304262161255, "correct_loss_per_char": 0.46220571344549005, "incorrect_loss_per_char": 1.5019559042020276, "correct_loss_per_token": 2.5421314239501953, "incorrect_loss_per_token": 5.881900727748871, "correct_loss_uncond": -13.52383041381836, "incorrect_loss_uncond": -5.292049407958984}, "model_output": [{"sum_logits": -5.084262847900391, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.60809326171875, "logits_per_token": -2.5421314239501953, "logits_per_char": -0.46220571344549005, "num_chars": 11}, {"sum_logits": -3.9053702354431152, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.77840518951416, "logits_per_token": -3.9053702354431152, "logits_per_char": -0.9763425588607788, "num_chars": 4}, {"sum_logits": -7.637104511260986, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.142416000366211, "logits_per_token": -3.818552255630493, "logits_per_char": -0.6942822282964533, "num_chars": 11}, {"sum_logits": -16.174123764038086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.681221961975098, "logits_per_token": -8.087061882019043, "logits_per_char": -3.2348247528076173, "num_chars": 5}, {"sum_logits": -7.716618537902832, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -7.716618537902832, "logits_per_char": -1.1023740768432617, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 523, "native_id": "9bac07574c966cae34c85e9f25538cba", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.236314296722412, "incorrect_loss_raw": 14.706563234329224, "correct_loss_per_char": 0.36684201745425954, "incorrect_loss_per_char": 1.1256831941150485, "correct_loss_per_token": 2.078771432240804, "incorrect_loss_per_token": 6.485918879508972, "correct_loss_uncond": -11.655733585357666, "incorrect_loss_uncond": -5.0782835483551025}, "model_output": [{"sum_logits": -13.877803802490234, "num_tokens": 4, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -19.33013153076172, "logits_per_token": -3.4694509506225586, "logits_per_char": -0.8163414001464844, "num_chars": 17}, {"sum_logits": -6.236314296722412, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -17.892047882080078, "logits_per_token": -2.078771432240804, "logits_per_char": -0.36684201745425954, "num_chars": 17}, {"sum_logits": -14.515202522277832, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -7.257601261138916, "logits_per_char": -1.6128002802530925, "num_chars": 9}, {"sum_logits": -21.04456901550293, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -19.891969680786133, "logits_per_token": -10.522284507751465, "logits_per_char": -1.4029712677001953, "num_chars": 15}, {"sum_logits": -9.388677597045898, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -19.440277099609375, "logits_per_token": -4.694338798522949, "logits_per_char": -0.6706198283604213, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 524, "native_id": "fe2a21ddb1bde76025a961126044a9a3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.791089057922363, "incorrect_loss_raw": 12.331544399261475, "correct_loss_per_char": 0.6762376198401818, "incorrect_loss_per_char": 1.886544334387579, "correct_loss_per_token": 8.791089057922363, "incorrect_loss_per_token": 9.910150369008383, "correct_loss_uncond": -6.943735122680664, "incorrect_loss_uncond": -1.5944840908050537}, "model_output": [{"sum_logits": -14.03417682647705, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.370438575744629, "logits_per_token": -14.03417682647705, "logits_per_char": -2.8068353652954103, "num_chars": 5}, {"sum_logits": -12.190983772277832, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.765427589416504, "logits_per_token": -12.190983772277832, "logits_per_char": -1.7415691103254045, "num_chars": 7}, {"sum_logits": -8.572652816772461, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -8.572652816772461, "logits_per_char": -2.1431632041931152, "num_chars": 4}, {"sum_logits": -8.791089057922363, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.734824180603027, "logits_per_token": -8.791089057922363, "logits_per_char": -0.6762376198401818, "num_chars": 13}, {"sum_logits": -14.528364181518555, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.23800277709961, "logits_per_token": -4.842788060506185, "logits_per_char": -0.8546096577363855, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 525, "native_id": "d03e09b22927542d6b0d5ebe233e467c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.6136481761932373, "incorrect_loss_raw": 5.671262979507446, "correct_loss_per_char": 0.4015164640214708, "incorrect_loss_per_char": 0.6063025366176258, "correct_loss_per_token": 1.8068240880966187, "incorrect_loss_per_token": 2.5646553834279375, "correct_loss_uncond": -10.8591148853302, "incorrect_loss_uncond": -12.849965333938599}, "model_output": [{"sum_logits": -6.214384078979492, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -22.504745483398438, "logits_per_token": -2.0714613596598306, "logits_per_char": -0.5649440071799539, "num_chars": 11}, {"sum_logits": -6.214384078979492, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -22.504745483398438, "logits_per_token": -2.0714613596598306, "logits_per_char": -0.5649440071799539, "num_chars": 11}, {"sum_logits": -6.210877418518066, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.310859680175781, "logits_per_token": -2.0702924728393555, "logits_per_char": -0.6210877418518066, "num_chars": 10}, {"sum_logits": -4.045406341552734, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.764562606811523, "logits_per_token": -4.045406341552734, "logits_per_char": -0.6742343902587891, "num_chars": 6}, {"sum_logits": -3.6136481761932373, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.472763061523438, "logits_per_token": -1.8068240880966187, "logits_per_char": -0.4015164640214708, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 526, "native_id": "e63a210053cf7f961ca0b5a7e6eb355d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.075211524963379, "incorrect_loss_raw": 14.426442623138428, "correct_loss_per_char": 0.6729342937469482, "incorrect_loss_per_char": 1.4352624857580505, "correct_loss_per_token": 2.0188028812408447, "incorrect_loss_per_token": 7.911817987759908, "correct_loss_uncond": -10.467452049255371, "incorrect_loss_uncond": -2.8541879653930664}, "model_output": [{"sum_logits": -12.893773078918457, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.945659637451172, "logits_per_token": -6.4468865394592285, "logits_per_char": -1.6117216348648071, "num_chars": 8}, {"sum_logits": -10.900146484375, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.67718505859375, "logits_per_token": -10.900146484375, "logits_per_char": -1.5571637834821428, "num_chars": 7}, {"sum_logits": -8.075211524963379, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.54266357421875, "logits_per_token": -2.0188028812408447, "logits_per_char": -0.6729342937469482, "num_chars": 12}, {"sum_logits": -17.977731704711914, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.724233627319336, "logits_per_token": -8.988865852355957, "logits_per_char": -1.1236082315444946, "num_chars": 16}, {"sum_logits": -15.93411922454834, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.77544403076172, "logits_per_token": -5.311373074849446, "logits_per_char": -1.448556293140758, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 527, "native_id": "a4b4242fab25e86a9d7ffedcaecdcdbe", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.113106727600098, "incorrect_loss_raw": 7.908031523227692, "correct_loss_per_char": 0.7304438182285854, "incorrect_loss_per_char": 0.6704870589905314, "correct_loss_per_token": 5.113106727600098, "incorrect_loss_per_token": 4.323310911655426, "correct_loss_uncond": -11.538313865661621, "incorrect_loss_uncond": -9.042146623134613}, "model_output": [{"sum_logits": -2.9543612003326416, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -2.9543612003326416, "logits_per_char": -0.3692951500415802, "num_chars": 8}, {"sum_logits": -14.3327054977417, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.211345672607422, "logits_per_token": -7.16635274887085, "logits_per_char": -0.7962614165412055, "num_chars": 18}, {"sum_logits": -7.369715690612793, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.7644100189209, "logits_per_token": -3.6848578453063965, "logits_per_char": -0.818857298956977, "num_chars": 9}, {"sum_logits": -5.113106727600098, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -5.113106727600098, "logits_per_char": -0.7304438182285854, "num_chars": 7}, {"sum_logits": -6.975343704223633, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.72410774230957, "logits_per_token": -3.4876718521118164, "logits_per_char": -0.6975343704223633, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 528, "native_id": "ec8797b12e3c6666ebe70b2a7680b66f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.335794448852539, "incorrect_loss_raw": 19.808520078659058, "correct_loss_per_char": 0.9396176771684126, "incorrect_loss_per_char": 1.2474697403835528, "correct_loss_per_token": 5.1678972244262695, "incorrect_loss_per_token": 6.275794235865275, "correct_loss_uncond": -9.420694351196289, "incorrect_loss_uncond": -5.016578912734985}, "model_output": [{"sum_logits": -40.35840606689453, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -42.220977783203125, "logits_per_token": -8.071681213378906, "logits_per_char": -1.8344730030406604, "num_chars": 22}, {"sum_logits": -14.438048362731934, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.839237213134766, "logits_per_token": -4.8126827875773115, "logits_per_char": -0.6015853484471639, "num_chars": 24}, {"sum_logits": -12.41644287109375, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -6.208221435546875, "logits_per_char": -1.5520553588867188, "num_chars": 8}, {"sum_logits": -10.335794448852539, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.756488800048828, "logits_per_token": -5.1678972244262695, "logits_per_char": -0.9396176771684126, "num_chars": 11}, {"sum_logits": -12.021183013916016, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.414321899414062, "logits_per_token": -6.010591506958008, "logits_per_char": -1.001765251159668, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 529, "native_id": "4536489e5d8e02aadc3fcc7a55effe20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.215561866760254, "incorrect_loss_raw": 12.428007274866104, "correct_loss_per_char": 1.1519452333450317, "incorrect_loss_per_char": 0.658813961203161, "correct_loss_per_token": 9.215561866760254, "incorrect_loss_per_token": 5.440165013074875, "correct_loss_uncond": -6.782540321350098, "incorrect_loss_uncond": -5.632110446691513}, "model_output": [{"sum_logits": -4.374013423919678, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.72410774230957, "logits_per_token": -2.187006711959839, "logits_per_char": -0.43740134239196776, "num_chars": 10}, {"sum_logits": -1.341267704963684, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -1.341267704963684, "logits_per_char": -0.1676584631204605, "num_chars": 8}, {"sum_logits": -21.40081787109375, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -10.700408935546875, "logits_per_char": -1.1263588353207237, "num_chars": 19}, {"sum_logits": -9.215561866760254, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -9.215561866760254, "logits_per_char": -1.1519452333450317, "num_chars": 8}, {"sum_logits": -22.595930099487305, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.752199172973633, "logits_per_token": -7.531976699829102, "logits_per_char": -0.9038372039794922, "num_chars": 25}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 530, "native_id": "0854478d174c9127064f0d4b58df7e62", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.083023071289062, "incorrect_loss_raw": 8.00007975101471, "correct_loss_per_char": 1.513837178548177, "incorrect_loss_per_char": 0.9050485757135209, "correct_loss_per_token": 9.083023071289062, "incorrect_loss_per_token": 4.9975796937942505, "correct_loss_uncond": -4.933176040649414, "incorrect_loss_uncond": -9.02354109287262}, "model_output": [{"sum_logits": -12.453407287597656, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.09770393371582, "logits_per_token": -4.151135762532552, "logits_per_char": -1.2453407287597655, "num_chars": 10}, {"sum_logits": -5.561593055725098, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.93619728088379, "logits_per_token": -1.853864351908366, "logits_per_char": -0.26483776455833796, "num_chars": 21}, {"sum_logits": -9.083023071289062, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.016199111938477, "logits_per_token": -9.083023071289062, "logits_per_char": -1.513837178548177, "num_chars": 6}, {"sum_logits": -5.3008952140808105, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -5.3008952140808105, "logits_per_char": -0.6626119017601013, "num_chars": 8}, {"sum_logits": -8.684423446655273, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.856280326843262, "logits_per_token": -8.684423446655273, "logits_per_char": -1.447403907775879, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 531, "native_id": "4b7d1b70060cd1f1a7321795f62a7325", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.820089817047119, "incorrect_loss_raw": 10.722097635269165, "correct_loss_per_char": 0.2350074847539266, "incorrect_loss_per_char": 1.5946928130255804, "correct_loss_per_token": 1.4100449085235596, "incorrect_loss_per_token": 7.395176649093628, "correct_loss_uncond": -17.739070415496826, "incorrect_loss_uncond": -5.763201951980591}, "model_output": [{"sum_logits": -14.96446418762207, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.85588264465332, "logits_per_token": -7.482232093811035, "logits_per_char": -1.662718243069119, "num_chars": 9}, {"sum_logits": -2.820089817047119, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -1.4100449085235596, "logits_per_char": -0.2350074847539266, "num_chars": 12}, {"sum_logits": -11.650903701782227, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.261762619018555, "logits_per_token": -5.825451850891113, "logits_per_char": -2.3301807403564454, "num_chars": 5}, {"sum_logits": -8.687322616577148, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.356603622436523, "logits_per_token": -8.687322616577148, "logits_per_char": -0.8687322616577149, "num_chars": 10}, {"sum_logits": -7.585700035095215, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.466949462890625, "logits_per_token": -7.585700035095215, "logits_per_char": -1.517140007019043, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 532, "native_id": "0e6a005eec5e6746f3facf4d608bfd8b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.167017936706543, "incorrect_loss_raw": 16.508653163909912, "correct_loss_per_char": 1.8334035873413086, "incorrect_loss_per_char": 1.321653500975681, "correct_loss_per_token": 9.167017936706543, "incorrect_loss_per_token": 9.057024796803791, "correct_loss_uncond": -3.930497169494629, "incorrect_loss_uncond": -1.2881019115447998}, "model_output": [{"sum_logits": -25.225126266479492, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.2445125579834, "logits_per_token": -8.40837542215983, "logits_per_char": -1.5765703916549683, "num_chars": 16}, {"sum_logits": -19.484643936157227, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.452266693115234, "logits_per_token": -6.494881312052409, "logits_per_char": -1.1461555256563074, "num_chars": 17}, {"sum_logits": -11.258752822875977, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -11.258752822875977, "logits_per_char": -1.1258752822875977, "num_chars": 10}, {"sum_logits": -9.167017936706543, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.097515106201172, "logits_per_token": -9.167017936706543, "logits_per_char": -1.8334035873413086, "num_chars": 5}, {"sum_logits": -10.066089630126953, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.641542434692383, "logits_per_token": -10.066089630126953, "logits_per_char": -1.4380128043038505, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 533, "native_id": "2d2b69ad187b7c40273ab13caab7dc19", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.931888580322266, "incorrect_loss_raw": 11.843414306640625, "correct_loss_per_char": 0.29011109296013327, "incorrect_loss_per_char": 1.2741527202583494, "correct_loss_per_token": 1.6439628601074219, "incorrect_loss_per_token": 7.472045024236043, "correct_loss_uncond": -15.219306945800781, "incorrect_loss_uncond": -5.061392545700073}, "model_output": [{"sum_logits": -4.931888580322266, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.151195526123047, "logits_per_token": -1.6439628601074219, "logits_per_char": -0.29011109296013327, "num_chars": 17}, {"sum_logits": -12.737016677856445, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.59212875366211, "logits_per_token": -6.368508338928223, "logits_per_char": -1.2737016677856445, "num_chars": 10}, {"sum_logits": -10.535822868347168, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.910656929016113, "logits_per_token": -10.535822868347168, "logits_per_char": -1.505117552621024, "num_chars": 7}, {"sum_logits": -16.675453186035156, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.461444854736328, "logits_per_token": -5.558484395345052, "logits_per_char": -1.389621098836263, "num_chars": 12}, {"sum_logits": -7.4253644943237305, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.654996871948242, "logits_per_token": -7.4253644943237305, "logits_per_char": -0.9281705617904663, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 534, "native_id": "fde1f9bfc33da302449c0b950d16c0ea", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.397845983505249, "incorrect_loss_raw": 12.376965403556824, "correct_loss_per_char": 0.5663076639175415, "incorrect_loss_per_char": 1.5881528919393366, "correct_loss_per_token": 3.397845983505249, "incorrect_loss_per_token": 7.502355992794037, "correct_loss_uncond": -7.4605772495269775, "incorrect_loss_uncond": -3.9211262464523315}, "model_output": [{"sum_logits": -19.766395568847656, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.44843864440918, "logits_per_token": -9.883197784423828, "logits_per_char": -1.9766395568847657, "num_chars": 10}, {"sum_logits": -3.397845983505249, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -3.397845983505249, "logits_per_char": -0.5663076639175415, "num_chars": 6}, {"sum_logits": -7.418851375579834, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.27117919921875, "logits_per_token": -3.709425687789917, "logits_per_char": -0.6744410341436212, "num_chars": 11}, {"sum_logits": -11.811628341674805, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.689945220947266, "logits_per_token": -5.905814170837402, "logits_per_char": -1.0737843946977095, "num_chars": 11}, {"sum_logits": -10.510986328125, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.782803535461426, "logits_per_token": -10.510986328125, "logits_per_char": -2.62774658203125, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 535, "native_id": "3c90a632f46aeab11fbb73aa59a33892", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.549479961395264, "incorrect_loss_raw": 8.537830710411072, "correct_loss_per_char": 0.284342497587204, "incorrect_loss_per_char": 0.8121836427061315, "correct_loss_per_token": 1.516493320465088, "incorrect_loss_per_token": 5.037577211856842, "correct_loss_uncond": -11.758852481842041, "incorrect_loss_uncond": -8.927661299705505}, "model_output": [{"sum_logits": -4.549479961395264, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.308332443237305, "logits_per_token": -1.516493320465088, "logits_per_char": -0.284342497587204, "num_chars": 16}, {"sum_logits": -7.429283618927002, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.39954376220703, "logits_per_token": -3.714641809463501, "logits_per_char": -0.43701668346629424, "num_chars": 17}, {"sum_logits": -10.421396255493164, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.268550872802734, "logits_per_token": -5.210698127746582, "logits_per_char": -0.9473996595902876, "num_chars": 11}, {"sum_logits": -6.149294853210449, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.211878776550293, "logits_per_token": -6.149294853210449, "logits_per_char": -1.22985897064209, "num_chars": 5}, {"sum_logits": -10.151348114013672, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.98199462890625, "logits_per_token": -5.075674057006836, "logits_per_char": -0.6344592571258545, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 536, "native_id": "1f3ccb722600da7d862531416934949a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.297645568847656, "incorrect_loss_raw": 6.396199345588684, "correct_loss_per_char": 0.4816041426225142, "incorrect_loss_per_char": 0.7945303968020848, "correct_loss_per_token": 2.648822784423828, "incorrect_loss_per_token": 4.649807095527649, "correct_loss_uncond": -14.278966903686523, "incorrect_loss_uncond": -11.449390530586243}, "model_output": [{"sum_logits": -9.374736785888672, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -4.687368392944336, "logits_per_char": -0.7812280654907227, "num_chars": 12}, {"sum_logits": -9.072132110595703, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.21324634552002, "logits_per_token": -9.072132110595703, "logits_per_char": -1.8144264221191406, "num_chars": 5}, {"sum_logits": -5.297645568847656, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.57661247253418, "logits_per_token": -2.648822784423828, "logits_per_char": -0.4816041426225142, "num_chars": 11}, {"sum_logits": -2.541527271270752, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -2.541527271270752, "logits_per_char": -0.2541527271270752, "num_chars": 10}, {"sum_logits": -4.596401214599609, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.72610855102539, "logits_per_token": -2.2982006072998047, "logits_per_char": -0.32831437247140066, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 537, "native_id": "46ba5d2b8cfc6708e5e2618568d8730e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6128368377685547, "incorrect_loss_raw": 11.041278839111328, "correct_loss_per_char": 0.2779105259821965, "incorrect_loss_per_char": 1.1346940801257177, "correct_loss_per_token": 1.8064184188842773, "incorrect_loss_per_token": 7.695758700370789, "correct_loss_uncond": -16.480310440063477, "incorrect_loss_uncond": -5.822110652923584}, "model_output": [{"sum_logits": -3.6128368377685547, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.09314727783203, "logits_per_token": -1.8064184188842773, "logits_per_char": -0.2779105259821965, "num_chars": 13}, {"sum_logits": -8.449459075927734, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -8.449459075927734, "logits_per_char": -1.2070655822753906, "num_chars": 7}, {"sum_logits": -8.951495170593262, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -8.951495170593262, "logits_per_char": -1.278785024370466, "num_chars": 7}, {"sum_logits": -16.118898391723633, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -8.059449195861816, "logits_per_char": -1.343241532643636, "num_chars": 12}, {"sum_logits": -10.645262718200684, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.91702651977539, "logits_per_token": -5.322631359100342, "logits_per_char": -0.709684181213379, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 538, "native_id": "f8a2cbc7189b92a809ce9cd857030621", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.806641101837158, "incorrect_loss_raw": 12.460404872894287, "correct_loss_per_char": 0.6451823446485732, "incorrect_loss_per_char": 1.0846252088508908, "correct_loss_per_token": 2.903320550918579, "incorrect_loss_per_token": 7.961042722066243, "correct_loss_uncond": -11.500766277313232, "incorrect_loss_uncond": -5.253343820571899}, "model_output": [{"sum_logits": -16.291996002197266, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -23.15835189819336, "logits_per_token": -5.430665334065755, "logits_per_char": -1.1637140001569475, "num_chars": 14}, {"sum_logits": -5.806641101837158, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.30740737915039, "logits_per_token": -2.903320550918579, "logits_per_char": -0.6451823446485732, "num_chars": 9}, {"sum_logits": -13.41856861114502, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.790875434875488, "logits_per_token": -13.41856861114502, "logits_per_char": -1.4909520679050021, "num_chars": 9}, {"sum_logits": -14.272235870361328, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.849214553833008, "logits_per_token": -7.136117935180664, "logits_per_char": -0.9514823913574219, "num_chars": 15}, {"sum_logits": -5.858819007873535, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.05655288696289, "logits_per_token": -5.858819007873535, "logits_per_char": -0.7323523759841919, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 539, "native_id": "225287e06c993feee34e0f06b25f6ba8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.038940906524658, "incorrect_loss_raw": 9.359745025634766, "correct_loss_per_char": 0.6731568177541097, "incorrect_loss_per_char": 1.3983370458924926, "correct_loss_per_token": 4.038940906524658, "incorrect_loss_per_token": 7.9522868394851685, "correct_loss_uncond": -6.471679210662842, "incorrect_loss_uncond": -2.839965581893921}, "model_output": [{"sum_logits": -11.259665489196777, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.329689979553223, "logits_per_token": -5.629832744598389, "logits_per_char": -1.0236059535633435, "num_chars": 11}, {"sum_logits": -4.038940906524658, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -10.5106201171875, "logits_per_token": -4.038940906524658, "logits_per_char": -0.6731568177541097, "num_chars": 6}, {"sum_logits": -9.16753101348877, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.684649467468262, "logits_per_token": -9.16753101348877, "logits_per_char": -1.833506202697754, "num_chars": 5}, {"sum_logits": -5.354671478271484, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -5.354671478271484, "logits_per_char": -1.0709342956542969, "num_chars": 5}, {"sum_logits": -11.657112121582031, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.765427589416504, "logits_per_token": -11.657112121582031, "logits_per_char": -1.665301731654576, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 540, "native_id": "e211b1a3f3401d164c8b0bfc10160caa", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.662575721740723, "incorrect_loss_raw": 11.20576286315918, "correct_loss_per_char": 0.5095632777494543, "incorrect_loss_per_char": 1.1956107375167666, "correct_loss_per_token": 4.331287860870361, "incorrect_loss_per_token": 7.901653289794922, "correct_loss_uncond": -11.842738151550293, "incorrect_loss_uncond": -5.579457521438599}, "model_output": [{"sum_logits": -12.082414627075195, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.624314308166504, "logits_per_token": -12.082414627075195, "logits_per_char": -1.5103018283843994, "num_chars": 8}, {"sum_logits": -19.824657440185547, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.19081687927246, "logits_per_token": -6.608219146728516, "logits_per_char": -1.4160469600132533, "num_chars": 14}, {"sum_logits": -8.662575721740723, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.505313873291016, "logits_per_token": -4.331287860870361, "logits_per_char": -0.5095632777494543, "num_chars": 17}, {"sum_logits": -5.798321723937988, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -5.798321723937988, "logits_per_char": -0.9663869539896647, "num_chars": 6}, {"sum_logits": -7.117657661437988, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -7.117657661437988, "logits_per_char": -0.8897072076797485, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 541, "native_id": "fce1c5d069758aea57a787fc98dcf7a9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.845140218734741, "incorrect_loss_raw": 11.530033349990845, "correct_loss_per_char": 0.4064486026763916, "incorrect_loss_per_char": 1.0564260386927482, "correct_loss_per_token": 2.845140218734741, "incorrect_loss_per_token": 6.562247196833292, "correct_loss_uncond": -9.077307939529419, "incorrect_loss_uncond": -5.4235875606536865}, "model_output": [{"sum_logits": -16.430675506591797, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.102293014526367, "logits_per_token": -5.476891835530599, "logits_per_char": -1.1736196790422713, "num_chars": 14}, {"sum_logits": -8.104076385498047, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -8.104076385498047, "logits_per_char": -0.6233904911921575, "num_chars": 13}, {"sum_logits": -13.376041412353516, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.349462509155273, "logits_per_token": -4.458680470784505, "logits_per_char": -0.7868259654325598, "num_chars": 17}, {"sum_logits": -8.20934009552002, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.969649314880371, "logits_per_token": -8.20934009552002, "logits_per_char": -1.6418680191040038, "num_chars": 5}, {"sum_logits": -2.845140218734741, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -2.845140218734741, "logits_per_char": -0.4064486026763916, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 542, "native_id": "c0d75f9fbf30aa3a612f16edb20d6b8d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.006834506988525, "incorrect_loss_raw": 7.061867117881775, "correct_loss_per_char": 0.6258543133735657, "incorrect_loss_per_char": 1.2660510301589967, "correct_loss_per_token": 5.006834506988525, "incorrect_loss_per_token": 7.061867117881775, "correct_loss_uncond": -8.951515674591064, "incorrect_loss_uncond": -6.17355477809906}, "model_output": [{"sum_logits": -8.063301086425781, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -8.063301086425781, "logits_per_char": -1.6126602172851563, "num_chars": 5}, {"sum_logits": -5.006834506988525, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -5.006834506988525, "logits_per_char": -0.6258543133735657, "num_chars": 8}, {"sum_logits": -4.203747749328613, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -4.203747749328613, "logits_per_char": -0.6005353927612305, "num_chars": 7}, {"sum_logits": -10.352262496948242, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -10.352262496948242, "logits_per_char": -1.725377082824707, "num_chars": 6}, {"sum_logits": -5.628157138824463, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -5.628157138824463, "logits_per_char": -1.1256314277648927, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 543, "native_id": "d07f149d8d953dcc45dda432194c375e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.430942535400391, "incorrect_loss_raw": 13.836796998977661, "correct_loss_per_char": 0.4644339084625244, "incorrect_loss_per_char": 1.0524874656306586, "correct_loss_per_token": 2.4769808451334634, "incorrect_loss_per_token": 6.69346825281779, "correct_loss_uncond": -11.2908935546875, "incorrect_loss_uncond": -7.937867879867554}, "model_output": [{"sum_logits": -11.331059455871582, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -27.70063018798828, "logits_per_token": -3.777019818623861, "logits_per_char": -0.5963715503090307, "num_chars": 19}, {"sum_logits": -7.430942535400391, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.72183609008789, "logits_per_token": -2.4769808451334634, "logits_per_char": -0.4644339084625244, "num_chars": 16}, {"sum_logits": -8.801641464233398, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.319238662719727, "logits_per_token": -8.801641464233398, "logits_per_char": -0.6770493434025691, "num_chars": 13}, {"sum_logits": -20.472190856933594, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -22.83533477783203, "logits_per_token": -6.824063618977864, "logits_per_char": -1.4622993469238281, "num_chars": 14}, {"sum_logits": -14.74229621887207, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.24345588684082, "logits_per_token": -7.371148109436035, "logits_per_char": -1.474229621887207, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 544, "native_id": "080a9cf2d6447a9a4d98b0af311e10da", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.968812942504883, "incorrect_loss_raw": 17.303557872772217, "correct_loss_per_char": 0.5312541961669922, "incorrect_loss_per_char": 1.4668348524305557, "correct_loss_per_token": 3.9844064712524414, "incorrect_loss_per_token": 9.911630868911743, "correct_loss_uncond": -7.888925552368164, "incorrect_loss_uncond": -3.3816347122192383}, "model_output": [{"sum_logits": -32.61431121826172, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -37.43246841430664, "logits_per_token": -10.871437072753906, "logits_per_char": -2.3295936584472656, "num_chars": 14}, {"sum_logits": -12.472336769104004, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.255508422851562, "logits_per_token": -12.472336769104004, "logits_per_char": -1.3858151965671115, "num_chars": 9}, {"sum_logits": -15.649667739868164, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.56256103515625, "logits_per_token": -7.824833869934082, "logits_per_char": -1.304138978322347, "num_chars": 12}, {"sum_logits": -7.968812942504883, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.857738494873047, "logits_per_token": -3.9844064712524414, "logits_per_char": -0.5312541961669922, "num_chars": 15}, {"sum_logits": -8.47791576385498, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -8.47791576385498, "logits_per_char": -0.8477915763854981, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 545, "native_id": "111501a49dd41ceed9c2073eed5d2b72", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.3323286175727844, "incorrect_loss_raw": 10.829407930374146, "correct_loss_per_char": 0.04747551679611206, "incorrect_loss_per_char": 1.1020209794952756, "correct_loss_per_token": 0.3323286175727844, "incorrect_loss_per_token": 9.450650572776794, "correct_loss_uncond": -13.739120662212372, "incorrect_loss_uncond": -3.8726892471313477}, "model_output": [{"sum_logits": -0.3323286175727844, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -14.071449279785156, "logits_per_token": -0.3323286175727844, "logits_per_char": -0.04747551679611206, "num_chars": 7}, {"sum_logits": -7.033734321594238, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -7.033734321594238, "logits_per_char": -1.0048191887991769, "num_chars": 7}, {"sum_logits": -13.507500648498535, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.38656997680664, "logits_per_token": -13.507500648498535, "logits_per_char": -1.1256250540415447, "num_chars": 12}, {"sum_logits": -11.030058860778809, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -5.515029430389404, "logits_per_char": -1.1030058860778809, "num_chars": 10}, {"sum_logits": -11.746337890625, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.545125007629395, "logits_per_token": -11.746337890625, "logits_per_char": -1.1746337890625, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 546, "native_id": "7bb87c6d8eab57d4e983f60025b1f0dc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.112782001495361, "incorrect_loss_raw": 10.56641960144043, "correct_loss_per_char": 0.3427318334579468, "incorrect_loss_per_char": 1.1711311638355255, "correct_loss_per_token": 1.370927333831787, "incorrect_loss_per_token": 6.571418642997742, "correct_loss_uncond": -12.030673503875732, "incorrect_loss_uncond": -6.889188528060913}, "model_output": [{"sum_logits": -10.305670738220215, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.181550025939941, "logits_per_token": -10.305670738220215, "logits_per_char": -1.7176117897033691, "num_chars": 6}, {"sum_logits": -7.389936447143555, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.29041290283203, "logits_per_token": -3.6949682235717773, "logits_per_char": -0.46187102794647217, "num_chars": 16}, {"sum_logits": -16.47129249572754, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -8.23564624786377, "logits_per_char": -1.8301436106363933, "num_chars": 9}, {"sum_logits": -4.112782001495361, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.143455505371094, "logits_per_token": -1.370927333831787, "logits_per_char": -0.3427318334579468, "num_chars": 12}, {"sum_logits": -8.09877872467041, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -4.049389362335205, "logits_per_char": -0.6748982270558676, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 547, "native_id": "5c2bc4335c8860342ec2d568ceb6ac6b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.5779547691345215, "incorrect_loss_raw": 12.093478679656982, "correct_loss_per_char": 0.619772752126058, "incorrect_loss_per_char": 0.9976654487287896, "correct_loss_per_token": 2.7889773845672607, "incorrect_loss_per_token": 8.008210182189941, "correct_loss_uncond": -11.393316745758057, "incorrect_loss_uncond": -4.900861024856567}, "model_output": [{"sum_logits": -8.033428192138672, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -8.033428192138672, "logits_per_char": -0.6179560147798978, "num_chars": 13}, {"sum_logits": -12.777200698852539, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.72410774230957, "logits_per_token": -6.3886003494262695, "logits_per_char": -1.277720069885254, "num_chars": 10}, {"sum_logits": -5.5779547691345215, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -2.7889773845672607, "logits_per_char": -0.619772752126058, "num_chars": 9}, {"sum_logits": -19.90494728088379, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.581262588500977, "logits_per_token": -9.952473640441895, "logits_per_char": -1.2440592050552368, "num_chars": 16}, {"sum_logits": -7.65833854675293, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.27890968322754, "logits_per_token": -7.65833854675293, "logits_per_char": -0.85092650519477, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 548, "native_id": "083861fc5ebb9226fff70544f3f83d2b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.39186429977417, "incorrect_loss_raw": 9.820583581924438, "correct_loss_per_char": 0.1988377571105957, "incorrect_loss_per_char": 0.7941476354232201, "correct_loss_per_token": 1.39186429977417, "incorrect_loss_per_token": 5.596176505088806, "correct_loss_uncond": -13.879416942596436, "incorrect_loss_uncond": -9.414179801940918}, "model_output": [{"sum_logits": -1.39186429977417, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": true, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -1.39186429977417, "logits_per_char": -0.1988377571105957, "num_chars": 7}, {"sum_logits": -10.620606422424316, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -5.310303211212158, "logits_per_char": -0.708040428161621, "num_chars": 15}, {"sum_logits": -8.272526741027832, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -22.29840087890625, "logits_per_token": -4.136263370513916, "logits_per_char": -0.6363482108482947, "num_chars": 13}, {"sum_logits": -14.90212345123291, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.368896484375, "logits_per_token": -7.451061725616455, "logits_per_char": -1.1463171885563777, "num_chars": 13}, {"sum_logits": -5.487077713012695, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.117548942565918, "logits_per_token": -5.487077713012695, "logits_per_char": -0.6858847141265869, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 549, "native_id": "520b0eea9148e3cb4d45aa69a55491eb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.1699110269546509, "incorrect_loss_raw": 7.162878006696701, "correct_loss_per_char": 0.08999315591958854, "incorrect_loss_per_char": 0.7277548939666965, "correct_loss_per_token": 1.1699110269546509, "incorrect_loss_per_token": 6.048960894346237, "correct_loss_uncond": -14.223167777061462, "incorrect_loss_uncond": -9.263854295015335}, "model_output": [{"sum_logits": -8.911336898803711, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.930557250976562, "logits_per_token": -4.4556684494018555, "logits_per_char": -0.9901485443115234, "num_chars": 9}, {"sum_logits": -1.8223603963851929, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.718061447143555, "logits_per_token": -1.8223603963851929, "logits_per_char": -0.2277950495481491, "num_chars": 8}, {"sum_logits": -10.857604026794434, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.701706886291504, "logits_per_token": -10.857604026794434, "logits_per_char": -0.9870549115267667, "num_chars": 11}, {"sum_logits": -7.060210704803467, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.356603622436523, "logits_per_token": -7.060210704803467, "logits_per_char": -0.7060210704803467, "num_chars": 10}, {"sum_logits": -1.1699110269546509, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -1.1699110269546509, "logits_per_char": -0.08999315591958854, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 550, "native_id": "ef6ede0af827ddd1dc7bbeb36a6fdd22", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.780256748199463, "incorrect_loss_raw": 8.24364161491394, "correct_loss_per_char": 0.7533618609110514, "incorrect_loss_per_char": 1.0309499308541343, "correct_loss_per_token": 3.3901283740997314, "incorrect_loss_per_token": 4.9067676067352295, "correct_loss_uncond": -10.770333766937256, "incorrect_loss_uncond": -7.540222406387329}, "model_output": [{"sum_logits": -6.780256748199463, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.55059051513672, "logits_per_token": -3.3901283740997314, "logits_per_char": -0.7533618609110514, "num_chars": 9}, {"sum_logits": -6.279574394226074, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -6.279574394226074, "logits_per_char": -1.2559148788452148, "num_chars": 5}, {"sum_logits": -6.266027450561523, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.30693244934082, "logits_per_token": -3.1330137252807617, "logits_per_char": -0.5696388591419567, "num_chars": 11}, {"sum_logits": -11.02293872833252, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -5.51146936416626, "logits_per_char": -1.5747055326189314, "num_chars": 7}, {"sum_logits": -9.406025886535645, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.435239791870117, "logits_per_token": -4.703012943267822, "logits_per_char": -0.7235404528104342, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 551, "native_id": "d47986deb91d64b2b15d385da3d2f483", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.22160530090332, "incorrect_loss_raw": 10.910125970840454, "correct_loss_per_char": 0.4248003118178424, "incorrect_loss_per_char": 1.440979995046343, "correct_loss_per_token": 3.61080265045166, "incorrect_loss_per_token": 7.009271860122681, "correct_loss_uncond": -11.693222045898438, "incorrect_loss_uncond": -5.572429418563843}, "model_output": [{"sum_logits": -12.433670997619629, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.021522521972656, "logits_per_token": -12.433670997619629, "logits_per_char": -2.4867341995239256, "num_chars": 5}, {"sum_logits": -7.305335998535156, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.505313873291016, "logits_per_token": -3.652667999267578, "logits_per_char": -0.42972564697265625, "num_chars": 17}, {"sum_logits": -9.526262283325195, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.343435287475586, "logits_per_token": -4.763131141662598, "logits_per_char": -0.7938551902770996, "num_chars": 12}, {"sum_logits": -14.375234603881836, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.05994987487793, "logits_per_token": -7.187617301940918, "logits_per_char": -2.053604943411691, "num_chars": 7}, {"sum_logits": -7.22160530090332, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.914827346801758, "logits_per_token": -3.61080265045166, "logits_per_char": -0.4248003118178424, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 552, "native_id": "c3b7f4196b12714940ac1b9417194df4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.022890090942383, "incorrect_loss_raw": 8.780198574066162, "correct_loss_per_char": 1.146127155848912, "incorrect_loss_per_char": 1.1059826031709328, "correct_loss_per_token": 8.022890090942383, "incorrect_loss_per_token": 7.2675395011901855, "correct_loss_uncond": -7.382808685302734, "incorrect_loss_uncond": -6.374991416931152}, "model_output": [{"sum_logits": -6.631848335266113, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -10.927528381347656, "logits_per_token": -6.631848335266113, "logits_per_char": -1.1053080558776855, "num_chars": 6}, {"sum_logits": -10.20424747467041, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.464624404907227, "logits_per_token": -10.20424747467041, "logits_per_char": -1.7007079124450684, "num_chars": 6}, {"sum_logits": -12.101272583007812, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.09314727783203, "logits_per_token": -6.050636291503906, "logits_per_char": -0.9308671217698318, "num_chars": 13}, {"sum_logits": -8.022890090942383, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -8.022890090942383, "logits_per_char": -1.146127155848912, "num_chars": 7}, {"sum_logits": -6.1834259033203125, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -6.1834259033203125, "logits_per_char": -0.6870473225911459, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 553, "native_id": "5d03ad171fd661a28da5b6eb79967a6b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.53076457977295, "incorrect_loss_raw": 9.891688585281372, "correct_loss_per_char": 0.8100588138286884, "incorrect_loss_per_char": 0.8443356876249437, "correct_loss_per_token": 5.265382289886475, "incorrect_loss_per_token": 5.750021278858185, "correct_loss_uncond": -8.632271766662598, "incorrect_loss_uncond": -9.207892417907715}, "model_output": [{"sum_logits": -3.2107090950012207, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.36980438232422, "logits_per_token": -1.6053545475006104, "logits_per_char": -0.29188264500011096, "num_chars": 11}, {"sum_logits": -19.49510955810547, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.974931716918945, "logits_per_token": -9.747554779052734, "logits_per_char": -1.2184443473815918, "num_chars": 16}, {"sum_logits": -10.53076457977295, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.163036346435547, "logits_per_token": -5.265382289886475, "logits_per_char": -0.8100588138286884, "num_chars": 13}, {"sum_logits": -6.43341588973999, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.553471565246582, "logits_per_token": -6.43341588973999, "logits_per_char": -0.9190594128199986, "num_chars": 7}, {"sum_logits": -10.427519798278809, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.5001163482666, "logits_per_token": -5.213759899139404, "logits_per_char": -0.9479563452980735, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 554, "native_id": "7c95d753943c58757fe6e1ccff8aea14", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.667030334472656, "incorrect_loss_raw": 11.457147598266602, "correct_loss_per_char": 0.291689395904541, "incorrect_loss_per_char": 0.8018590704266564, "correct_loss_per_token": 1.555676778157552, "incorrect_loss_per_token": 5.894948561986287, "correct_loss_uncond": -11.641302108764648, "incorrect_loss_uncond": -8.354092121124268}, "model_output": [{"sum_logits": -17.231029510498047, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.745128631591797, "logits_per_token": -5.743676503499349, "logits_per_char": -0.7179595629374186, "num_chars": 24}, {"sum_logits": -12.485098838806152, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.67490005493164, "logits_per_token": -6.242549419403076, "logits_per_char": -0.960392218369704, "num_chars": 13}, {"sum_logits": -4.667030334472656, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.308332443237305, "logits_per_token": -1.555676778157552, "logits_per_char": -0.291689395904541, "num_chars": 16}, {"sum_logits": -9.037787437438965, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.268550872802734, "logits_per_token": -4.518893718719482, "logits_per_char": -0.8216170397671786, "num_chars": 11}, {"sum_logits": -7.074674606323242, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.556379318237305, "logits_per_token": -7.074674606323242, "logits_per_char": -0.7074674606323242, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 555, "native_id": "88d8bfb9dc8e77ef642acbe1a129f3db", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.774003505706787, "incorrect_loss_raw": 9.478296756744385, "correct_loss_per_char": 0.6774003505706787, "incorrect_loss_per_char": 0.9468473805321588, "correct_loss_per_token": 3.3870017528533936, "incorrect_loss_per_token": 4.739148378372192, "correct_loss_uncond": -7.4881110191345215, "incorrect_loss_uncond": -7.59220814704895}, "model_output": [{"sum_logits": -12.923065185546875, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -6.4615325927734375, "logits_per_char": -1.4358961317274306, "num_chars": 9}, {"sum_logits": -11.356441497802734, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -5.678220748901367, "logits_per_char": -1.2618268330891926, "num_chars": 9}, {"sum_logits": -2.7884082794189453, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": true, "sum_logits_uncond": -15.397775650024414, "logits_per_token": -1.3942041397094727, "logits_per_char": -0.18589388529459636, "num_chars": 15}, {"sum_logits": -6.774003505706787, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.262114524841309, "logits_per_token": -3.3870017528533936, "logits_per_char": -0.6774003505706787, "num_chars": 10}, {"sum_logits": -10.845272064208984, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -5.422636032104492, "logits_per_char": -0.9037726720174154, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 556, "native_id": "b1a9b20793b46e46e1beedadbf852f84", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3873238563537598, "incorrect_loss_raw": 9.386384844779968, "correct_loss_per_char": 0.29841548204421997, "incorrect_loss_per_char": 0.9545264361396668, "correct_loss_per_token": 2.3873238563537598, "incorrect_loss_per_token": 6.182543317476908, "correct_loss_uncond": -11.033887386322021, "incorrect_loss_uncond": -6.556934952735901}, "model_output": [{"sum_logits": -2.3873238563537598, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.421211242675781, "logits_per_token": -2.3873238563537598, "logits_per_char": -0.29841548204421997, "num_chars": 8}, {"sum_logits": -12.176284790039062, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.07984161376953, "logits_per_token": -6.088142395019531, "logits_per_char": -0.5798230852399554, "num_chars": 21}, {"sum_logits": -10.090835571289062, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.021394729614258, "logits_per_token": -3.363611857096354, "logits_per_char": -0.5606019761827257, "num_chars": 18}, {"sum_logits": -6.615054607391357, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.463874816894531, "logits_per_token": -6.615054607391357, "logits_per_char": -0.9450078010559082, "num_chars": 7}, {"sum_logits": -8.66336441040039, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.208168029785156, "logits_per_token": -8.66336441040039, "logits_per_char": -1.732672882080078, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 557, "native_id": "81e016974d33fe383c848b6c819791cd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.589669704437256, "incorrect_loss_raw": 10.541845440864563, "correct_loss_per_char": 0.448708713054657, "incorrect_loss_per_char": 1.1143223772396693, "correct_loss_per_token": 3.589669704437256, "incorrect_loss_per_token": 7.176524043083191, "correct_loss_uncond": -9.359591007232666, "incorrect_loss_uncond": -4.759113430976868}, "model_output": [{"sum_logits": -3.589669704437256, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.949260711669922, "logits_per_token": -3.589669704437256, "logits_per_char": -0.448708713054657, "num_chars": 8}, {"sum_logits": -5.914496898651123, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.877777099609375, "logits_per_token": -5.914496898651123, "logits_per_char": -0.5914496898651123, "num_chars": 10}, {"sum_logits": -13.486875534057617, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -6.743437767028809, "logits_per_char": -1.926696504865374, "num_chars": 7}, {"sum_logits": -9.330313682556152, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.371931076049805, "logits_per_token": -9.330313682556152, "logits_per_char": -0.717716437119704, "num_chars": 13}, {"sum_logits": -13.43569564819336, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.419254302978516, "logits_per_token": -6.71784782409668, "logits_per_char": -1.2214268771084873, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 558, "native_id": "7cf54544d54818d53e7088c0749a3eca", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.437967300415039, "incorrect_loss_raw": 12.544702291488647, "correct_loss_per_char": 0.6291978200276692, "incorrect_loss_per_char": 0.7981805606215607, "correct_loss_per_token": 4.7189836502075195, "incorrect_loss_per_token": 4.966827531655629, "correct_loss_uncond": -10.697122573852539, "incorrect_loss_uncond": -6.814890623092651}, "model_output": [{"sum_logits": -10.772459030151367, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.135892868041992, "logits_per_token": -5.386229515075684, "logits_per_char": -0.6732786893844604, "num_chars": 16}, {"sum_logits": -14.93624496459961, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.652280807495117, "logits_per_token": -7.468122482299805, "logits_per_char": -0.9957496643066406, "num_chars": 15}, {"sum_logits": -9.437967300415039, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.135089874267578, "logits_per_token": -4.7189836502075195, "logits_per_char": -0.6291978200276692, "num_chars": 15}, {"sum_logits": -13.724923133850098, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -22.152891159057617, "logits_per_token": -3.4312307834625244, "logits_per_char": -0.807348419638241, "num_chars": 17}, {"sum_logits": -10.745182037353516, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.49730682373047, "logits_per_token": -3.5817273457845054, "logits_per_char": -0.716345469156901, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 559, "native_id": "6acd88b9b5dd15e23bbcc3fd679100a8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2726378440856934, "incorrect_loss_raw": 12.104841232299805, "correct_loss_per_char": 0.15150918960571289, "incorrect_loss_per_char": 1.2651049026476793, "correct_loss_per_token": 2.2726378440856934, "incorrect_loss_per_token": 7.964703559875488, "correct_loss_uncond": -11.02895212173462, "incorrect_loss_uncond": -3.7366185188293457}, "model_output": [{"sum_logits": -2.2726378440856934, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.301589965820312, "logits_per_token": -2.2726378440856934, "logits_per_char": -0.15150918960571289, "num_chars": 15}, {"sum_logits": -5.051117897033691, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.383709907531738, "logits_per_token": -5.051117897033691, "logits_per_char": -0.5612353218926324, "num_chars": 9}, {"sum_logits": -12.647598266601562, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.58812141418457, "logits_per_token": -6.323799133300781, "logits_per_char": -0.7439763686236214, "num_chars": 17}, {"sum_logits": -10.247145652770996, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.720282554626465, "logits_per_token": -10.247145652770996, "logits_per_char": -1.707857608795166, "num_chars": 6}, {"sum_logits": -20.47350311279297, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.673725128173828, "logits_per_token": -10.236751556396484, "logits_per_char": -2.047350311279297, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 560, "native_id": "c96a86957a9ab1d8ca0aeeb7f040d87a_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.4356913566589355, "incorrect_loss_raw": 5.482113629579544, "correct_loss_per_char": 0.24356913566589355, "incorrect_loss_per_char": 0.8553426239107336, "correct_loss_per_token": 2.4356913566589355, "incorrect_loss_per_token": 5.482113629579544, "correct_loss_uncond": -11.669766902923584, "incorrect_loss_uncond": -9.073147028684616}, "model_output": [{"sum_logits": -6.6840500831604, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -6.6840500831604, "logits_per_char": -0.9548642975943429, "num_chars": 7}, {"sum_logits": -2.4356913566589355, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -2.4356913566589355, "logits_per_char": -0.24356913566589355, "num_chars": 10}, {"sum_logits": -7.02162504196167, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -7.02162504196167, "logits_per_char": -1.170270840326945, "num_chars": 6}, {"sum_logits": -1.7814689874649048, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": true, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -1.7814689874649048, "logits_per_char": -0.2226836234331131, "num_chars": 8}, {"sum_logits": -6.441310405731201, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -6.441310405731201, "logits_per_char": -1.0735517342885335, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 561, "native_id": "6a1bf527af9ed0685ac5e2bf0bd76647", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.218766689300537, "incorrect_loss_raw": 13.158872604370117, "correct_loss_per_char": 0.40234583616256714, "incorrect_loss_per_char": 1.2951993921300868, "correct_loss_per_token": 3.218766689300537, "incorrect_loss_per_token": 9.794960260391235, "correct_loss_uncond": -11.233685970306396, "incorrect_loss_uncond": -3.1753365993499756}, "model_output": [{"sum_logits": -13.164360046386719, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.414422035217285, "logits_per_token": -13.164360046386719, "logits_per_char": -1.3164360046386718, "num_chars": 10}, {"sum_logits": -3.218766689300537, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -3.218766689300537, "logits_per_char": -0.40234583616256714, "num_chars": 8}, {"sum_logits": -10.664115905761719, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -5.332057952880859, "logits_per_char": -0.8203166081355169, "num_chars": 13}, {"sum_logits": -16.247182846069336, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.77212142944336, "logits_per_token": -8.123591423034668, "logits_per_char": -1.2497832958514874, "num_chars": 13}, {"sum_logits": -12.559831619262695, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.127243041992188, "logits_per_token": -12.559831619262695, "logits_per_char": -1.7942616598946708, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 562, "native_id": "094fe91b20b03c647325fa2ee94470b3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.99692153930664, "incorrect_loss_raw": 8.331909894943237, "correct_loss_per_char": 1.12461519241333, "incorrect_loss_per_char": 0.9505781067742242, "correct_loss_per_token": 4.49846076965332, "incorrect_loss_per_token": 4.888848185539246, "correct_loss_uncond": -7.461494445800781, "incorrect_loss_uncond": -9.739007472991943}, "model_output": [{"sum_logits": -5.983678817749023, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.131707191467285, "logits_per_token": -2.9918394088745117, "logits_per_char": -0.854811259678432, "num_chars": 7}, {"sum_logits": -8.99692153930664, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.458415985107422, "logits_per_token": -4.49846076965332, "logits_per_char": -1.12461519241333, "num_chars": 8}, {"sum_logits": -9.875110626220703, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.7755184173584, "logits_per_token": -4.937555313110352, "logits_per_char": -0.822925885518392, "num_chars": 12}, {"sum_logits": -11.685704231262207, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.852867126464844, "logits_per_token": -5.8428521156311035, "logits_per_char": -1.2984115812513564, "num_chars": 9}, {"sum_logits": -5.783145904541016, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.523576736450195, "logits_per_token": -5.783145904541016, "logits_per_char": -0.8261637006487165, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 563, "native_id": "bee2a6eadfaf7a4fa0a214e341ddbe5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9848841428756714, "incorrect_loss_raw": 9.982115983963013, "correct_loss_per_char": 0.1406977346965245, "incorrect_loss_per_char": 1.2419256540861996, "correct_loss_per_token": 0.9848841428756714, "incorrect_loss_per_token": 9.982115983963013, "correct_loss_uncond": -12.789809346199036, "incorrect_loss_uncond": -4.854554176330566}, "model_output": [{"sum_logits": -0.9848841428756714, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -13.774693489074707, "logits_per_token": -0.9848841428756714, "logits_per_char": -0.1406977346965245, "num_chars": 7}, {"sum_logits": -12.301251411437988, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -12.301251411437988, "logits_per_char": -2.050208568572998, "num_chars": 6}, {"sum_logits": -4.181708335876465, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.933419227600098, "logits_per_token": -4.181708335876465, "logits_per_char": -0.5227135419845581, "num_chars": 8}, {"sum_logits": -10.408638000488281, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -10.408638000488281, "logits_per_char": -0.9462398182262074, "num_chars": 11}, {"sum_logits": -13.036866188049316, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.23413848876953, "logits_per_token": -13.036866188049316, "logits_per_char": -1.4485406875610352, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 564, "native_id": "2f97a77d155cb99092e8a7c055737b03_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.073373794555664, "incorrect_loss_raw": 16.72470259666443, "correct_loss_per_char": 0.759171724319458, "incorrect_loss_per_char": 1.305028995540407, "correct_loss_per_token": 2.0244579315185547, "incorrect_loss_per_token": 6.14508871237437, "correct_loss_uncond": -10.802042007446289, "incorrect_loss_uncond": -3.4035544395446777}, "model_output": [{"sum_logits": -11.616157531738281, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -3.8720525105794272, "logits_per_char": -1.2906841701931424, "num_chars": 9}, {"sum_logits": -21.077768325805664, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.628948211669922, "logits_per_token": -7.025922775268555, "logits_per_char": -1.317360520362854, "num_chars": 16}, {"sum_logits": -20.520376205444336, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.242900848388672, "logits_per_token": -6.840125401814778, "logits_per_char": -1.3680250803629557, "num_chars": 15}, {"sum_logits": -6.073373794555664, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.875415802001953, "logits_per_token": -2.0244579315185547, "logits_per_char": -0.759171724319458, "num_chars": 8}, {"sum_logits": -13.684508323669434, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.00588607788086, "logits_per_token": -6.842254161834717, "logits_per_char": -1.2440462112426758, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 565, "native_id": "bc268cd19e2c95c78967fd6b9092fb90", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.336417198181152, "incorrect_loss_raw": 11.072226405143738, "correct_loss_per_char": 0.7578561089255593, "incorrect_loss_per_char": 1.2426867683728537, "correct_loss_per_token": 4.168208599090576, "incorrect_loss_per_token": 5.536113202571869, "correct_loss_uncond": -11.651116371154785, "incorrect_loss_uncond": -8.623715996742249}, "model_output": [{"sum_logits": -8.336417198181152, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.987533569335938, "logits_per_token": -4.168208599090576, "logits_per_char": -0.7578561089255593, "num_chars": 11}, {"sum_logits": -10.974039077758789, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.97695541381836, "logits_per_token": -5.4870195388793945, "logits_per_char": -0.914503256479899, "num_chars": 12}, {"sum_logits": -6.537808895111084, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.851922988891602, "logits_per_token": -3.268904447555542, "logits_per_char": -1.0896348158518474, "num_chars": 6}, {"sum_logits": -15.220928192138672, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.780696868896484, "logits_per_token": -7.610464096069336, "logits_per_char": -1.5220928192138672, "num_chars": 10}, {"sum_logits": -11.556129455566406, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.1741943359375, "logits_per_token": -5.778064727783203, "logits_per_char": -1.4445161819458008, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 566, "native_id": "060cad0d3c007ceb151db9907bfcb214", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.301124572753906, "incorrect_loss_raw": 12.455511569976807, "correct_loss_per_char": 0.5813202857971191, "incorrect_loss_per_char": 1.7813875241157335, "correct_loss_per_token": 4.650562286376953, "incorrect_loss_per_token": 10.222498178482056, "correct_loss_uncond": -10.350149154663086, "incorrect_loss_uncond": -4.621133804321289}, "model_output": [{"sum_logits": -12.751944541931152, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.386359214782715, "logits_per_token": -12.751944541931152, "logits_per_char": -2.5503889083862306, "num_chars": 5}, {"sum_logits": -10.475188255310059, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.464624404907227, "logits_per_token": -10.475188255310059, "logits_per_char": -1.745864709218343, "num_chars": 6}, {"sum_logits": -17.864107131958008, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.871232986450195, "logits_per_token": -8.932053565979004, "logits_per_char": -1.3741620870736928, "num_chars": 13}, {"sum_logits": -9.301124572753906, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.651273727416992, "logits_per_token": -4.650562286376953, "logits_per_char": -0.5813202857971191, "num_chars": 16}, {"sum_logits": -8.730806350708008, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -8.730806350708008, "logits_per_char": -1.455134391784668, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 567, "native_id": "29c2cc0ba85b4afb9c9d29801469a68f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.358193397521973, "incorrect_loss_raw": 7.389448881149292, "correct_loss_per_char": 1.0255852426801408, "incorrect_loss_per_char": 0.673673961134184, "correct_loss_per_token": 4.786064465840657, "incorrect_loss_per_token": 3.8217997550964355, "correct_loss_uncond": -5.539071083068848, "incorrect_loss_uncond": -11.305331468582153}, "model_output": [{"sum_logits": -8.953116416931152, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.712093353271484, "logits_per_token": -2.984372138977051, "logits_per_char": -0.559569776058197, "num_chars": 16}, {"sum_logits": -13.581111907958984, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.131093978881836, "logits_per_token": -6.790555953979492, "logits_per_char": -1.1317593256632488, "num_chars": 12}, {"sum_logits": -3.022592544555664, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -1.511296272277832, "logits_per_char": -0.43179893493652344, "num_chars": 7}, {"sum_logits": -14.358193397521973, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.89726448059082, "logits_per_token": -4.786064465840657, "logits_per_char": -1.0255852426801408, "num_chars": 14}, {"sum_logits": -4.000974655151367, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.87264060974121, "logits_per_token": -4.000974655151367, "logits_per_char": -0.5715678078787667, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 568, "native_id": "6cb895ce89995f6be422f7c4167c7638", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.318013191223145, "incorrect_loss_raw": 16.602285861968994, "correct_loss_per_char": 0.8598344326019287, "incorrect_loss_per_char": 1.1789797704973262, "correct_loss_per_token": 5.159006595611572, "incorrect_loss_per_token": 6.911120057106018, "correct_loss_uncond": -11.171786308288574, "incorrect_loss_uncond": -3.622142791748047}, "model_output": [{"sum_logits": -10.318013191223145, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.48979949951172, "logits_per_token": -5.159006595611572, "logits_per_char": -0.8598344326019287, "num_chars": 12}, {"sum_logits": -19.188648223876953, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.94456672668457, "logits_per_token": -9.594324111938477, "logits_per_char": -1.7444225658069958, "num_chars": 11}, {"sum_logits": -13.859946250915527, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.404834747314453, "logits_per_token": -6.929973125457764, "logits_per_char": -0.9899961607796806, "num_chars": 14}, {"sum_logits": -18.808374404907227, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -6.269458134969075, "logits_per_char": -1.2538916269938152, "num_chars": 15}, {"sum_logits": -14.55217456817627, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.319406509399414, "logits_per_token": -4.850724856058757, "logits_per_char": -0.7276087284088135, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 569, "native_id": "839f3c37622c1ed5eebc9cd0b9d658e8", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.692207336425781, "incorrect_loss_raw": 7.4732348918914795, "correct_loss_per_char": 0.6057629585266113, "incorrect_loss_per_char": 1.102642634179857, "correct_loss_per_token": 2.4230518341064453, "incorrect_loss_per_token": 7.4732348918914795, "correct_loss_uncond": -12.232336044311523, "incorrect_loss_uncond": -6.312700510025024}, "model_output": [{"sum_logits": -10.119905471801758, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -10.119905471801758, "logits_per_char": -1.1244339413113065, "num_chars": 9}, {"sum_logits": -6.379132270812988, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.234234809875488, "logits_per_token": -6.379132270812988, "logits_per_char": -1.0631887118021648, "num_chars": 6}, {"sum_logits": -7.316135406494141, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -7.316135406494141, "logits_per_char": -1.4632270812988282, "num_chars": 5}, {"sum_logits": -9.692207336425781, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.924543380737305, "logits_per_token": -2.4230518341064453, "logits_per_char": -0.6057629585266113, "num_chars": 16}, {"sum_logits": -6.077766418457031, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -6.077766418457031, "logits_per_char": -0.7597208023071289, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 570, "native_id": "3957ac6bab96fc9d4f173ada4692d16b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.453577518463135, "incorrect_loss_raw": 13.972225904464722, "correct_loss_per_char": 0.37267887592315674, "incorrect_loss_per_char": 1.5559301412466802, "correct_loss_per_token": 2.4845258394877114, "incorrect_loss_per_token": 8.30147635936737, "correct_loss_uncond": -13.86582899093628, "incorrect_loss_uncond": -4.618477821350098}, "model_output": [{"sum_logits": -7.453577518463135, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.319406509399414, "logits_per_token": -2.4845258394877114, "logits_per_char": -0.37267887592315674, "num_chars": 20}, {"sum_logits": -14.854034423828125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.94456672668457, "logits_per_token": -7.4270172119140625, "logits_per_char": -1.3503667658025569, "num_chars": 11}, {"sum_logits": -16.945430755615234, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.48979949951172, "logits_per_token": -8.472715377807617, "logits_per_char": -1.4121192296346028, "num_chars": 12}, {"sum_logits": -13.56653118133545, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.521936416625977, "logits_per_token": -6.783265590667725, "logits_per_char": -1.3566531181335448, "num_chars": 10}, {"sum_logits": -10.522907257080078, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.406512260437012, "logits_per_token": -10.522907257080078, "logits_per_char": -2.1045814514160157, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 571, "native_id": "a4f5e5412f0f8ac9190db1730db07a90", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.794259071350098, "incorrect_loss_raw": 15.184930324554443, "correct_loss_per_char": 0.3995215892791748, "incorrect_loss_per_char": 1.2313951249445387, "correct_loss_per_token": 2.397129535675049, "incorrect_loss_per_token": 7.467652916908264, "correct_loss_uncond": -12.730971336364746, "incorrect_loss_uncond": -3.3409605026245117}, "model_output": [{"sum_logits": -23.020736694335938, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -25.807647705078125, "logits_per_token": -5.755184173583984, "logits_per_char": -2.092794244939631, "num_chars": 11}, {"sum_logits": -10.511870384216309, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.702447891235352, "logits_per_token": -10.511870384216309, "logits_per_char": -1.3139837980270386, "num_chars": 8}, {"sum_logits": -13.180988311767578, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.6312255859375, "logits_per_token": -6.590494155883789, "logits_per_char": -0.6937362269351357, "num_chars": 19}, {"sum_logits": -4.794259071350098, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.525230407714844, "logits_per_token": -2.397129535675049, "logits_per_char": -0.3995215892791748, "num_chars": 12}, {"sum_logits": -14.02612590789795, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.962242126464844, "logits_per_token": -7.013062953948975, "logits_per_char": -0.8250662298763499, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 572, "native_id": "cb5b39878be0e05a3ffe783801adbc3b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.411652088165283, "incorrect_loss_raw": 11.579667806625366, "correct_loss_per_char": 1.2352753480275471, "incorrect_loss_per_char": 1.1627797541164218, "correct_loss_per_token": 7.411652088165283, "incorrect_loss_per_token": 11.579667806625366, "correct_loss_uncond": -6.35291051864624, "incorrect_loss_uncond": -2.0760233402252197}, "model_output": [{"sum_logits": -12.484189987182617, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.009564399719238, "logits_per_token": -12.484189987182617, "logits_per_char": -1.0403491655985515, "num_chars": 12}, {"sum_logits": -7.411652088165283, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.764562606811523, "logits_per_token": -7.411652088165283, "logits_per_char": -1.2352753480275471, "num_chars": 6}, {"sum_logits": -6.686518669128418, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.464104652404785, "logits_per_token": -6.686518669128418, "logits_per_char": -1.6716296672821045, "num_chars": 4}, {"sum_logits": -13.373950958251953, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.072769165039062, "logits_per_token": -13.373950958251953, "logits_per_char": -0.9552822113037109, "num_chars": 14}, {"sum_logits": -13.774011611938477, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.076326370239258, "logits_per_token": -13.774011611938477, "logits_per_char": -0.9838579722813198, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 573, "native_id": "985a4f1a3f31f1ba6654f4fc48f504df", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2406022548675537, "incorrect_loss_raw": 10.987283825874329, "correct_loss_per_char": 0.4050752818584442, "incorrect_loss_per_char": 1.3889758765697477, "correct_loss_per_token": 1.6203011274337769, "incorrect_loss_per_token": 8.334237813949585, "correct_loss_uncond": -14.753389596939087, "incorrect_loss_uncond": -4.50421941280365}, "model_output": [{"sum_logits": -10.729923248291016, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.767433166503906, "logits_per_token": -5.364961624145508, "logits_per_char": -1.0729923248291016, "num_chars": 10}, {"sum_logits": -14.819823265075684, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.164570808410645, "logits_per_token": -14.819823265075684, "logits_per_char": -1.8524779081344604, "num_chars": 8}, {"sum_logits": -10.494444847106934, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.555559158325195, "logits_per_token": -5.247222423553467, "logits_per_char": -1.0494444847106934, "num_chars": 10}, {"sum_logits": -3.2406022548675537, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.99399185180664, "logits_per_token": -1.6203011274337769, "logits_per_char": -0.4050752818584442, "num_chars": 8}, {"sum_logits": -7.904943943023682, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.478449821472168, "logits_per_token": -7.904943943023682, "logits_per_char": -1.5809887886047362, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 574, "native_id": "5d687fe9c95436ce84230c996d34382d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.463851928710938, "incorrect_loss_raw": 7.849390745162964, "correct_loss_per_char": 0.8719876607259115, "incorrect_loss_per_char": 0.9668834552620396, "correct_loss_per_token": 5.231925964355469, "incorrect_loss_per_token": 5.964738845825195, "correct_loss_uncond": -9.225067138671875, "incorrect_loss_uncond": -8.551465034484863}, "model_output": [{"sum_logits": -6.835794448852539, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.886232376098633, "logits_per_token": -6.835794448852539, "logits_per_char": -1.3671588897705078, "num_chars": 5}, {"sum_logits": -7.37222146987915, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -3.686110734939575, "logits_per_char": -0.6143517891565958, "num_chars": 12}, {"sum_logits": -10.463851928710938, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.688919067382812, "logits_per_token": -5.231925964355469, "logits_per_char": -0.8719876607259115, "num_chars": 12}, {"sum_logits": -9.484553337097168, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.944735527038574, "logits_per_token": -9.484553337097168, "logits_per_char": -1.185569167137146, "num_chars": 8}, {"sum_logits": -7.704993724822998, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.89899444580078, "logits_per_token": -3.852496862411499, "logits_per_char": -0.7004539749839089, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 575, "native_id": "af11faa29097b71141fe192ad019d1dd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.518020629882812, "incorrect_loss_raw": 10.558116912841797, "correct_loss_per_char": 0.8652746027166193, "incorrect_loss_per_char": 1.2118200988996597, "correct_loss_per_token": 4.759010314941406, "incorrect_loss_per_token": 5.893245458602905, "correct_loss_uncond": -8.516155242919922, "incorrect_loss_uncond": -5.388932228088379}, "model_output": [{"sum_logits": -9.518020629882812, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.034175872802734, "logits_per_token": -4.759010314941406, "logits_per_char": -0.8652746027166193, "num_chars": 11}, {"sum_logits": -19.302452087402344, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.618534088134766, "logits_per_token": -6.434150695800781, "logits_per_char": -1.378746577671596, "num_chars": 14}, {"sum_logits": -7.463748931884766, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.156793594360352, "logits_per_token": -7.463748931884766, "logits_per_char": -1.2439581553141277, "num_chars": 6}, {"sum_logits": -11.582368850708008, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.14725399017334, "logits_per_token": -5.791184425354004, "logits_per_char": -1.447796106338501, "num_chars": 8}, {"sum_logits": -3.8838977813720703, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.865614891052246, "logits_per_token": -3.8838977813720703, "logits_per_char": -0.7767795562744141, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 576, "native_id": "07fd8b0aed06406fedb137d11b07a890", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1009793281555176, "incorrect_loss_raw": 11.175608515739441, "correct_loss_per_char": 0.31009793281555176, "incorrect_loss_per_char": 1.2255573215938749, "correct_loss_per_token": 3.1009793281555176, "incorrect_loss_per_token": 6.759684503078461, "correct_loss_uncond": -14.06895399093628, "incorrect_loss_uncond": -7.048780798912048}, "model_output": [{"sum_logits": -16.706605911254883, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -8.353302955627441, "logits_per_char": -1.856289545694987, "num_chars": 9}, {"sum_logits": -12.883633613586426, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -6.441816806793213, "logits_per_char": -1.0736361344655354, "num_chars": 12}, {"sum_logits": -5.737152576446533, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.72610855102539, "logits_per_token": -2.8685762882232666, "logits_per_char": -0.4097966126033238, "num_chars": 14}, {"sum_logits": -3.1009793281555176, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -3.1009793281555176, "logits_per_char": -0.31009793281555176, "num_chars": 10}, {"sum_logits": -9.375041961669922, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.903817176818848, "logits_per_token": -9.375041961669922, "logits_per_char": -1.5625069936116536, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 577, "native_id": "7044d82a456d0fa6f0210abb03cbf2c4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.412654876708984, "incorrect_loss_raw": 8.531355559825897, "correct_loss_per_char": 0.6738777160644531, "incorrect_loss_per_char": 1.1746597850606555, "correct_loss_per_token": 3.706327438354492, "incorrect_loss_per_token": 8.531355559825897, "correct_loss_uncond": -8.494283676147461, "incorrect_loss_uncond": -4.340485155582428}, "model_output": [{"sum_logits": -10.148162841796875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.312516212463379, "logits_per_token": -10.148162841796875, "logits_per_char": -1.449737548828125, "num_chars": 7}, {"sum_logits": -9.917230606079102, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.382490158081055, "logits_per_token": -9.917230606079102, "logits_per_char": -0.8264358838399252, "num_chars": 12}, {"sum_logits": -7.412654876708984, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.906938552856445, "logits_per_token": -3.706327438354492, "logits_per_char": -0.6738777160644531, "num_chars": 11}, {"sum_logits": -3.8629748821258545, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.508755683898926, "logits_per_token": -3.8629748821258545, "logits_per_char": -0.9657437205314636, "num_chars": 4}, {"sum_logits": -10.197053909301758, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -10.197053909301758, "logits_per_char": -1.4567219870431083, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 578, "native_id": "e53ba4c7d2a818bdb6001e6924bc8896", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.676340579986572, "incorrect_loss_raw": 9.554439187049866, "correct_loss_per_char": 1.3352681159973145, "incorrect_loss_per_char": 1.1388783640331692, "correct_loss_per_token": 6.676340579986572, "incorrect_loss_per_token": 7.093094706535339, "correct_loss_uncond": -5.506913661956787, "incorrect_loss_uncond": -4.603149056434631}, "model_output": [{"sum_logits": -11.205827713012695, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.624297142028809, "logits_per_token": -11.205827713012695, "logits_per_char": -1.8676379521687825, "num_chars": 6}, {"sum_logits": -6.676340579986572, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.18325424194336, "logits_per_token": -6.676340579986572, "logits_per_char": -1.3352681159973145, "num_chars": 5}, {"sum_logits": -7.052623748779297, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.203079223632812, "logits_per_token": -3.5263118743896484, "logits_per_char": -0.4701749165852865, "num_chars": 15}, {"sum_logits": -7.321173191070557, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.11279582977295, "logits_per_token": -7.321173191070557, "logits_per_char": -0.8134636878967285, "num_chars": 9}, {"sum_logits": -12.638132095336914, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.690180778503418, "logits_per_token": -6.319066047668457, "logits_per_char": -1.4042368994818792, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 579, "native_id": "ecbc1ab06ad1ed6c53e5293d7a90ebd3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.933354377746582, "incorrect_loss_raw": 10.843045115470886, "correct_loss_per_char": 0.522808125144557, "incorrect_loss_per_char": 1.2842835874268503, "correct_loss_per_token": 4.966677188873291, "incorrect_loss_per_token": 5.878530820210775, "correct_loss_uncond": -9.210516929626465, "incorrect_loss_uncond": -6.321126580238342}, "model_output": [{"sum_logits": -11.617388725280762, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.69198226928711, "logits_per_token": -5.808694362640381, "logits_per_char": -2.3234777450561523, "num_chars": 5}, {"sum_logits": -11.532767295837402, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.695837020874023, "logits_per_token": -3.8442557652791343, "logits_per_char": -0.8237690925598145, "num_chars": 14}, {"sum_logits": -7.500321865081787, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.140993118286133, "logits_per_token": -7.500321865081787, "logits_per_char": -0.8333690961201986, "num_chars": 9}, {"sum_logits": -9.933354377746582, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.143871307373047, "logits_per_token": -4.966677188873291, "logits_per_char": -0.522808125144557, "num_chars": 19}, {"sum_logits": -12.721702575683594, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.12787437438965, "logits_per_token": -6.360851287841797, "logits_per_char": -1.1565184159712358, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 580, "native_id": "9a356ff463c042d04ba45bfd627bac20", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.6892826557159424, "incorrect_loss_raw": 8.28853040933609, "correct_loss_per_char": 0.3361603319644928, "incorrect_loss_per_char": 1.3641814563009476, "correct_loss_per_token": 2.6892826557159424, "incorrect_loss_per_token": 8.28853040933609, "correct_loss_uncond": -11.411566495895386, "incorrect_loss_uncond": -4.754807412624359}, "model_output": [{"sum_logits": -10.667797088623047, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -10.667797088623047, "logits_per_char": -2.1335594177246096, "num_chars": 5}, {"sum_logits": -1.8349583148956299, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.493803977966309, "logits_per_token": -1.8349583148956299, "logits_per_char": -0.20388425721062553, "num_chars": 9}, {"sum_logits": -13.479881286621094, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -13.479881286621094, "logits_per_char": -1.6849851608276367, "num_chars": 8}, {"sum_logits": -2.6892826557159424, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -2.6892826557159424, "logits_per_char": -0.3361603319644928, "num_chars": 8}, {"sum_logits": -7.17148494720459, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.415973663330078, "logits_per_token": -7.17148494720459, "logits_per_char": -1.4342969894409179, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 581, "native_id": "0a5c069836784c3d574828d85a20a074", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.21617317199707, "incorrect_loss_raw": 9.766994714736938, "correct_loss_per_char": 0.9346810976664225, "incorrect_loss_per_char": 0.7978832458699501, "correct_loss_per_token": 5.608086585998535, "incorrect_loss_per_token": 4.883497357368469, "correct_loss_uncond": -8.793012619018555, "incorrect_loss_uncond": -7.993833541870117}, "model_output": [{"sum_logits": -7.549654006958008, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.62997817993164, "logits_per_token": -3.774827003479004, "logits_per_char": -0.5807426159198468, "num_chars": 13}, {"sum_logits": -11.850075721740723, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -20.216522216796875, "logits_per_token": -5.925037860870361, "logits_per_char": -1.0772796110673384, "num_chars": 11}, {"sum_logits": -11.555044174194336, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.701950073242188, "logits_per_token": -5.777522087097168, "logits_per_char": -0.722190260887146, "num_chars": 16}, {"sum_logits": -11.21617317199707, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -20.009185791015625, "logits_per_token": -5.608086585998535, "logits_per_char": -0.9346810976664225, "num_chars": 12}, {"sum_logits": -8.113204956054688, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.49486255645752, "logits_per_token": -4.056602478027344, "logits_per_char": -0.8113204956054687, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 582, "native_id": "f996430ce208606452868fd2e739d409", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.0728178024292, "incorrect_loss_raw": 15.434775829315186, "correct_loss_per_char": 1.2793470729481091, "incorrect_loss_per_char": 1.2013082997840747, "correct_loss_per_token": 7.0364089012146, "incorrect_loss_per_token": 8.04883627096812, "correct_loss_uncond": -3.8998517990112305, "incorrect_loss_uncond": -4.526190996170044}, "model_output": [{"sum_logits": -9.760964393615723, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.86296558380127, "logits_per_token": -9.760964393615723, "logits_per_char": -1.394423484802246, "num_chars": 7}, {"sum_logits": -14.0728178024292, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.97266960144043, "logits_per_token": -7.0364089012146, "logits_per_char": -1.2793470729481091, "num_chars": 11}, {"sum_logits": -21.32813262939453, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.58591079711914, "logits_per_token": -7.109377543131511, "logits_per_char": -1.1225332962839227, "num_chars": 19}, {"sum_logits": -14.69655990600586, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.519742965698242, "logits_per_token": -7.34827995300293, "logits_per_char": -1.2247133255004883, "num_chars": 12}, {"sum_logits": -15.953446388244629, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.875247955322266, "logits_per_token": -7.9767231941223145, "logits_per_char": -1.063563092549642, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 583, "native_id": "26c854d933d2115e7636fdcde57eb463", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5526316165924072, "incorrect_loss_raw": 11.296152710914612, "correct_loss_per_char": 0.27327935512249285, "incorrect_loss_per_char": 1.8115128534180776, "correct_loss_per_token": 1.7763158082962036, "incorrect_loss_per_token": 11.296152710914612, "correct_loss_uncond": -16.144498586654663, "incorrect_loss_uncond": -2.5313321352005005}, "model_output": [{"sum_logits": -13.21246337890625, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.056729316711426, "logits_per_token": -13.21246337890625, "logits_per_char": -2.64249267578125, "num_chars": 5}, {"sum_logits": -14.52885627746582, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.292170524597168, "logits_per_token": -14.52885627746582, "logits_per_char": -2.0755508967808316, "num_chars": 7}, {"sum_logits": -4.634619235992432, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -4.634619235992432, "logits_per_char": -0.9269238471984863, "num_chars": 5}, {"sum_logits": -12.808671951293945, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.486090660095215, "logits_per_token": -12.808671951293945, "logits_per_char": -1.6010839939117432, "num_chars": 8}, {"sum_logits": -3.5526316165924072, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.69713020324707, "logits_per_token": -1.7763158082962036, "logits_per_char": -0.27327935512249285, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 584, "native_id": "83c25b9a5db5f9b3fd1ff6c7453d23d0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.5367937088012695, "incorrect_loss_raw": 13.465536236763, "correct_loss_per_char": 0.4124357917092063, "incorrect_loss_per_char": 0.9305954569163808, "correct_loss_per_token": 2.2683968544006348, "incorrect_loss_per_token": 4.71865850687027, "correct_loss_uncond": -12.075892448425293, "incorrect_loss_uncond": -8.139671206474304}, "model_output": [{"sum_logits": -7.699706554412842, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.420150756835938, "logits_per_token": -3.849853277206421, "logits_per_char": -0.6999733231284402, "num_chars": 11}, {"sum_logits": -19.974506378173828, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -26.462875366210938, "logits_per_token": -6.658168792724609, "logits_per_char": -1.2484066486358643, "num_chars": 16}, {"sum_logits": -4.5367937088012695, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.612686157226562, "logits_per_token": -2.2683968544006348, "logits_per_char": -0.4124357917092063, "num_chars": 11}, {"sum_logits": -7.278515815734863, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.400543212890625, "logits_per_token": -3.6392579078674316, "logits_per_char": -0.6616832559758966, "num_chars": 11}, {"sum_logits": -18.90941619873047, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.13726043701172, "logits_per_token": -4.727354049682617, "logits_per_char": -1.1123185999253218, "num_chars": 17}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 585, "native_id": "a0d02fc32878efdf0b0d420972943492", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.082419395446777, "incorrect_loss_raw": 9.252601385116577, "correct_loss_per_char": 0.4536021550496419, "incorrect_loss_per_char": 1.2405864397684732, "correct_loss_per_token": 2.0412096977233887, "incorrect_loss_per_token": 8.211770296096802, "correct_loss_uncond": -9.493485450744629, "incorrect_loss_uncond": -6.219338655471802}, "model_output": [{"sum_logits": -8.326648712158203, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.340968132019043, "logits_per_token": -4.163324356079102, "logits_per_char": -0.6938873926798502, "num_chars": 12}, {"sum_logits": -4.082419395446777, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.575904846191406, "logits_per_token": -2.0412096977233887, "logits_per_char": -0.4536021550496419, "num_chars": 9}, {"sum_logits": -7.658543586730957, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.931857109069824, "logits_per_token": -7.658543586730957, "logits_per_char": -1.2764239311218262, "num_chars": 6}, {"sum_logits": -12.29202651977539, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.380257606506348, "logits_per_token": -12.29202651977539, "logits_per_char": -1.5365033149719238, "num_chars": 8}, {"sum_logits": -8.733186721801758, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.2346773147583, "logits_per_token": -8.733186721801758, "logits_per_char": -1.455531120300293, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 586, "native_id": "73fbd2caac2c3786ca810adfe7030273", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.029644012451172, "incorrect_loss_raw": 14.391637802124023, "correct_loss_per_char": 0.7715110778808594, "incorrect_loss_per_char": 1.5451746908517983, "correct_loss_per_token": 2.507411003112793, "incorrect_loss_per_token": 10.076168656349182, "correct_loss_uncond": -9.526710510253906, "incorrect_loss_uncond": -3.033602714538574}, "model_output": [{"sum_logits": -18.618467330932617, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.673725128173828, "logits_per_token": -9.309233665466309, "logits_per_char": -1.8618467330932618, "num_chars": 10}, {"sum_logits": -11.001748085021973, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.876677513122559, "logits_per_token": -11.001748085021973, "logits_per_char": -1.3752185106277466, "num_chars": 8}, {"sum_logits": -10.029644012451172, "num_tokens": 4, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.556354522705078, "logits_per_token": -2.507411003112793, "logits_per_char": -0.7715110778808594, "num_chars": 13}, {"sum_logits": -15.905285835266113, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.443462371826172, "logits_per_token": -7.952642917633057, "logits_per_char": -1.223483525789701, "num_chars": 13}, {"sum_logits": -12.04104995727539, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.707097053527832, "logits_per_token": -12.04104995727539, "logits_per_char": -1.7201499938964844, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 587, "native_id": "6c515b068b4d3aa88a5382224d9b866d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.6725513935089111, "incorrect_loss_raw": 7.379932522773743, "correct_loss_per_char": 0.16725513935089112, "incorrect_loss_per_char": 0.7994254646363196, "correct_loss_per_token": 1.6725513935089111, "incorrect_loss_per_token": 5.385435581207275, "correct_loss_uncond": -15.497381925582886, "incorrect_loss_uncond": -10.082679152488708}, "model_output": [{"sum_logits": -7.0116987228393555, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -7.0116987228393555, "logits_per_char": -1.001671246119908, "num_chars": 7}, {"sum_logits": -5.708580493927002, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.16405487060547, "logits_per_token": -1.902860164642334, "logits_per_char": -0.3805720329284668, "num_chars": 15}, {"sum_logits": -1.6725513935089111, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -1.6725513935089111, "logits_per_char": -0.16725513935089112, "num_chars": 10}, {"sum_logits": -8.454916000366211, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -8.454916000366211, "logits_per_char": -1.0568645000457764, "num_chars": 8}, {"sum_logits": -8.344534873962402, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -4.172267436981201, "logits_per_char": -0.7585940794511274, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 588, "native_id": "0af371b94fb414860b13eea6009ccc31", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.631023406982422, "incorrect_loss_raw": 8.673038125038147, "correct_loss_per_char": 0.3307873862130301, "incorrect_loss_per_char": 1.0081217402503606, "correct_loss_per_token": 1.5436744689941406, "incorrect_loss_per_token": 5.147755265235901, "correct_loss_uncond": -11.866401672363281, "incorrect_loss_uncond": -7.095480561256409}, "model_output": [{"sum_logits": -12.876452445983887, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.33782958984375, "logits_per_token": -6.438226222991943, "logits_per_char": -0.9197466032845634, "num_chars": 14}, {"sum_logits": -15.325810432434082, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.470478057861328, "logits_per_token": -7.662905216217041, "logits_per_char": -1.9157263040542603, "num_chars": 8}, {"sum_logits": -3.028916120529175, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -10.916240692138672, "logits_per_token": -3.028916120529175, "logits_per_char": -0.5048193534215292, "num_chars": 6}, {"sum_logits": -4.631023406982422, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.497425079345703, "logits_per_token": -1.5436744689941406, "logits_per_char": -0.3307873862130301, "num_chars": 14}, {"sum_logits": -3.4609735012054443, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.349526405334473, "logits_per_token": -3.4609735012054443, "logits_per_char": -0.6921947002410889, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 589, "native_id": "38e61d4be0da46b3cbbd76dc20bce677", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.022947311401367, "incorrect_loss_raw": 9.672796726226807, "correct_loss_per_char": 1.288992473057338, "incorrect_loss_per_char": 0.8845716487793694, "correct_loss_per_token": 9.022947311401367, "incorrect_loss_per_token": 5.478082140286764, "correct_loss_uncond": -6.114428520202637, "incorrect_loss_uncond": -9.360424995422363}, "model_output": [{"sum_logits": -9.202461242675781, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.71224594116211, "logits_per_token": -4.601230621337891, "logits_per_char": -0.6573186601911273, "num_chars": 14}, {"sum_logits": -11.725668907165527, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -23.86139488220215, "logits_per_token": -3.9085563023885093, "logits_per_char": -0.7817112604777018, "num_chars": 15}, {"sum_logits": -9.04202651977539, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -9.04202651977539, "logits_per_char": -1.1302533149719238, "num_chars": 8}, {"sum_logits": -9.022947311401367, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.137375831604004, "logits_per_token": -9.022947311401367, "logits_per_char": -1.288992473057338, "num_chars": 7}, {"sum_logits": -8.721030235290527, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.55059051513672, "logits_per_token": -4.360515117645264, "logits_per_char": -0.9690033594767252, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 590, "native_id": "cebc07bd5080cc72862cb333b10d782d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.9028496742248535, "incorrect_loss_raw": 10.198254585266113, "correct_loss_per_char": 0.5447610749138726, "incorrect_loss_per_char": 1.5535529176394145, "correct_loss_per_token": 2.4514248371124268, "incorrect_loss_per_token": 8.859302043914795, "correct_loss_uncond": -10.663681507110596, "incorrect_loss_uncond": -5.360116481781006}, "model_output": [{"sum_logits": -10.711620330810547, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.34975814819336, "logits_per_token": -5.355810165405273, "logits_per_char": -1.0711620330810547, "num_chars": 10}, {"sum_logits": -7.220224380493164, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -7.220224380493164, "logits_per_char": -0.9025280475616455, "num_chars": 8}, {"sum_logits": -4.9028496742248535, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.56653118133545, "logits_per_token": -2.4514248371124268, "logits_per_char": -0.5447610749138726, "num_chars": 9}, {"sum_logits": -9.951394081115723, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -9.951394081115723, "logits_per_char": -1.6585656801859539, "num_chars": 6}, {"sum_logits": -12.90977954864502, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.20553970336914, "logits_per_token": -12.90977954864502, "logits_per_char": -2.581955909729004, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 591, "native_id": "de0386024f32cdf277a785a851b97544", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.891762733459473, "incorrect_loss_raw": 9.971418738365173, "correct_loss_per_char": 0.8083420666781339, "incorrect_loss_per_char": 0.7087024704641965, "correct_loss_per_token": 4.445881366729736, "incorrect_loss_per_token": 4.196384370326996, "correct_loss_uncond": -10.047324180603027, "incorrect_loss_uncond": -8.535900712013245}, "model_output": [{"sum_logits": -12.629199981689453, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.43723487854004, "logits_per_token": -3.1572999954223633, "logits_per_char": -0.6646947358783922, "num_chars": 19}, {"sum_logits": -12.517400741577148, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.381830215454102, "logits_per_token": -6.258700370788574, "logits_per_char": -1.1379455219615588, "num_chars": 11}, {"sum_logits": -8.891762733459473, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.9390869140625, "logits_per_token": -4.445881366729736, "logits_per_char": -0.8083420666781339, "num_chars": 11}, {"sum_logits": -7.694438934326172, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.556182861328125, "logits_per_token": -3.847219467163086, "logits_per_char": -0.5918799180250901, "num_chars": 13}, {"sum_logits": -7.04463529586792, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.654029846191406, "logits_per_token": -3.52231764793396, "logits_per_char": -0.440289705991745, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 592, "native_id": "9b62cd7f89716f393239e6c6ff3e11d5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.354459762573242, "incorrect_loss_raw": 16.391634702682495, "correct_loss_per_char": 0.39585997841574927, "incorrect_loss_per_char": 1.4436797185377641, "correct_loss_per_token": 2.177229881286621, "incorrect_loss_per_token": 8.195817351341248, "correct_loss_uncond": -14.253633499145508, "incorrect_loss_uncond": -5.101943254470825}, "model_output": [{"sum_logits": -4.354459762573242, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.60809326171875, "logits_per_token": -2.177229881286621, "logits_per_char": -0.39585997841574927, "num_chars": 11}, {"sum_logits": -12.949896812438965, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.52931785583496, "logits_per_token": -6.474948406219482, "logits_per_char": -0.8633264541625977, "num_chars": 15}, {"sum_logits": -16.984142303466797, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.650463104248047, "logits_per_token": -8.492071151733398, "logits_per_char": -1.5440129366787998, "num_chars": 11}, {"sum_logits": -14.086746215820312, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.91724967956543, "logits_per_token": -7.043373107910156, "logits_per_char": -1.4086746215820312, "num_chars": 10}, {"sum_logits": -21.545753479003906, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -26.877281188964844, "logits_per_token": -10.772876739501953, "logits_per_char": -1.958704861727628, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 593, "native_id": "8b25332de2894ab38784235838d38cec", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.992856979370117, "incorrect_loss_raw": 12.798706293106079, "correct_loss_per_char": 0.7494047482808431, "incorrect_loss_per_char": 1.0892117050173995, "correct_loss_per_token": 4.496428489685059, "incorrect_loss_per_token": 5.81053884824117, "correct_loss_uncond": -8.122537612915039, "incorrect_loss_uncond": -5.423563003540039}, "model_output": [{"sum_logits": -8.131892204284668, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -8.131892204284668, "logits_per_char": -1.161698886326381, "num_chars": 7}, {"sum_logits": -12.219136238098145, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.654029846191406, "logits_per_token": -6.109568119049072, "logits_per_char": -0.763696014881134, "num_chars": 16}, {"sum_logits": -15.366846084594727, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.386695861816406, "logits_per_token": -3.8417115211486816, "logits_per_char": -1.0244564056396483, "num_chars": 15}, {"sum_logits": -15.476950645446777, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.026775360107422, "logits_per_token": -5.158983548482259, "logits_per_char": -1.4069955132224343, "num_chars": 11}, {"sum_logits": -8.992856979370117, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.115394592285156, "logits_per_token": -4.496428489685059, "logits_per_char": -0.7494047482808431, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 594, "native_id": "dd4a811d18549f1ae1954cf938b28536", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.675881385803223, "incorrect_loss_raw": 11.644256353378296, "correct_loss_per_char": 0.6679830551147461, "incorrect_loss_per_char": 1.383501506386017, "correct_loss_per_token": 4.675881385803223, "incorrect_loss_per_token": 8.55401599407196, "correct_loss_uncond": -7.768119812011719, "incorrect_loss_uncond": -4.675846338272095}, "model_output": [{"sum_logits": -4.675881385803223, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -4.675881385803223, "logits_per_char": -0.6679830551147461, "num_chars": 7}, {"sum_logits": -14.618350982666016, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -14.618350982666016, "logits_per_char": -2.088335854666574, "num_chars": 7}, {"sum_logits": -12.274154663085938, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -4.0913848876953125, "logits_per_char": -0.7220090978285846, "num_chars": 17}, {"sum_logits": -11.328136444091797, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.856280326843262, "logits_per_token": -11.328136444091797, "logits_per_char": -1.888022740681966, "num_chars": 6}, {"sum_logits": -8.356383323669434, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.663773536682129, "logits_per_token": -4.178191661834717, "logits_per_char": -0.8356383323669434, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 595, "native_id": "e2ff952c17faf1c56a970502630d4c86", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9716343879699707, "incorrect_loss_raw": 11.269118070602417, "correct_loss_per_char": 0.05715496399823357, "incorrect_loss_per_char": 0.983765185851873, "correct_loss_per_token": 0.48581719398498535, "incorrect_loss_per_token": 5.888120611508688, "correct_loss_uncond": -17.564311504364014, "incorrect_loss_uncond": -7.229637622833252}, "model_output": [{"sum_logits": -5.774444580078125, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -5.774444580078125, "logits_per_char": -0.48120371500651044, "num_chars": 12}, {"sum_logits": -11.237855911254883, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.017868041992188, "logits_per_token": -3.7459519704182944, "logits_per_char": -1.021623264659535, "num_chars": 11}, {"sum_logits": -8.512402534484863, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.069997787475586, "logits_per_token": -4.256201267242432, "logits_per_char": -0.6548001949603741, "num_chars": 13}, {"sum_logits": -19.551769256591797, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.930374145507812, "logits_per_token": -9.775884628295898, "logits_per_char": -1.7774335687810725, "num_chars": 11}, {"sum_logits": -0.9716343879699707, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -18.535945892333984, "logits_per_token": -0.48581719398498535, "logits_per_char": -0.05715496399823357, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 596, "native_id": "3a6140e475cbbd3ee1da5ba9a6953597_1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7891135811805725, "incorrect_loss_raw": 11.275229692459106, "correct_loss_per_char": 0.09863919764757156, "incorrect_loss_per_char": 1.0843564240137735, "correct_loss_per_token": 0.7891135811805725, "incorrect_loss_per_token": 7.156873345375061, "correct_loss_uncond": -13.311735570430756, "incorrect_loss_uncond": -5.707542896270752}, "model_output": [{"sum_logits": -5.958233833312988, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -5.958233833312988, "logits_per_char": -0.5958233833312988, "num_chars": 10}, {"sum_logits": -6.491634368896484, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -6.491634368896484, "logits_per_char": -1.0819390614827473, "num_chars": 6}, {"sum_logits": -21.964567184448242, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.420154571533203, "logits_per_token": -5.4911417961120605, "logits_per_char": -0.8785826873779297, "num_chars": 25}, {"sum_logits": -10.686483383178711, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -10.686483383178711, "logits_per_char": -1.7810805638631184, "num_chars": 6}, {"sum_logits": -0.7891135811805725, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -0.7891135811805725, "logits_per_char": -0.09863919764757156, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 597, "native_id": "e75e0c11e2d5a7b634455a1b4b76856c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5084471702575684, "incorrect_loss_raw": 10.103945851325989, "correct_loss_per_char": 0.16760524113972983, "incorrect_loss_per_char": 0.976081132888794, "correct_loss_per_token": 1.5084471702575684, "incorrect_loss_per_token": 5.856486797332764, "correct_loss_uncond": -10.293315410614014, "incorrect_loss_uncond": -5.686446785926819}, "model_output": [{"sum_logits": -8.337944984436035, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.826593399047852, "logits_per_token": -4.168972492218018, "logits_per_char": -0.6948287487030029, "num_chars": 12}, {"sum_logits": -1.5084471702575684, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -11.801762580871582, "logits_per_token": -1.5084471702575684, "logits_per_char": -0.16760524113972983, "num_chars": 9}, {"sum_logits": -11.716796875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.801639556884766, "logits_per_token": -5.8583984375, "logits_per_char": -0.9763997395833334, "num_chars": 12}, {"sum_logits": -6.436110973358154, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.242709159851074, "logits_per_token": -6.436110973358154, "logits_per_char": -1.0726851622263591, "num_chars": 6}, {"sum_logits": -13.924930572509766, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.29062843322754, "logits_per_token": -6.962465286254883, "logits_per_char": -1.1604108810424805, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 598, "native_id": "3b9ccdcb1c932c46a38e040d3e6c7f5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.095396041870117, "incorrect_loss_raw": 12.996655464172363, "correct_loss_per_char": 0.27302640279134116, "incorrect_loss_per_char": 1.4610156052596084, "correct_loss_per_token": 2.0476980209350586, "incorrect_loss_per_token": 8.595846891403198, "correct_loss_uncond": -13.931169509887695, "incorrect_loss_uncond": -3.6188509464263916}, "model_output": [{"sum_logits": -4.095396041870117, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.026565551757812, "logits_per_token": -2.0476980209350586, "logits_per_char": -0.27302640279134116, "num_chars": 15}, {"sum_logits": -17.87403678894043, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.64935302734375, "logits_per_token": -8.937018394470215, "logits_per_char": -1.624912435358221, "num_chars": 11}, {"sum_logits": -8.227531433105469, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -8.227531433105469, "logits_per_char": -1.1753616333007812, "num_chars": 7}, {"sum_logits": -8.552621841430664, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.758267402648926, "logits_per_token": -8.552621841430664, "logits_per_char": -1.7105243682861329, "num_chars": 5}, {"sum_logits": -17.33243179321289, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.648706436157227, "logits_per_token": -8.666215896606445, "logits_per_char": -1.3332639840932994, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 599, "native_id": "6a29b657b29e1506284d8328dffbbd21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.6839005947113037, "incorrect_loss_raw": 10.418919801712036, "correct_loss_per_char": 0.5367801189422607, "incorrect_loss_per_char": 1.4674393635291558, "correct_loss_per_token": 2.6839005947113037, "incorrect_loss_per_token": 7.800496459007263, "correct_loss_uncond": -11.027122735977173, "incorrect_loss_uncond": -4.560867071151733}, "model_output": [{"sum_logits": -9.415184020996094, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -9.415184020996094, "logits_per_char": -1.8830368041992187, "num_chars": 5}, {"sum_logits": -2.6839005947113037, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.711023330688477, "logits_per_token": -2.6839005947113037, "logits_per_char": -0.5367801189422607, "num_chars": 5}, {"sum_logits": -11.696661949157715, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.967296600341797, "logits_per_token": -5.848330974578857, "logits_per_char": -1.063332904468883, "num_chars": 11}, {"sum_logits": -9.250724792480469, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.77729606628418, "logits_per_token": -4.625362396240234, "logits_per_char": -0.6607660566057477, "num_chars": 14}, {"sum_logits": -11.313108444213867, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.869331359863281, "logits_per_token": -11.313108444213867, "logits_per_char": -2.2626216888427733, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 600, "native_id": "96cb628fb7ed2f53245598f707ed2b80", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.069681167602539, "incorrect_loss_raw": 14.635550022125244, "correct_loss_per_char": 0.7336073788729581, "incorrect_loss_per_char": 1.074377209261844, "correct_loss_per_token": 4.0348405838012695, "incorrect_loss_per_token": 6.99395207564036, "correct_loss_uncond": -10.939695358276367, "incorrect_loss_uncond": -3.1884255409240723}, "model_output": [{"sum_logits": -25.565181732177734, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -26.34801483154297, "logits_per_token": -8.521727244059244, "logits_per_char": -1.2782590866088868, "num_chars": 20}, {"sum_logits": -8.069681167602539, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.009376525878906, "logits_per_token": -4.0348405838012695, "logits_per_char": -0.7336073788729581, "num_chars": 11}, {"sum_logits": -8.646138191223145, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.305801391601562, "logits_per_token": -4.323069095611572, "logits_per_char": -0.8646138191223145, "num_chars": 10}, {"sum_logits": -18.399736404418945, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -9.199868202209473, "logits_per_char": -0.9684071791799445, "num_chars": 19}, {"sum_logits": -5.931143760681152, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.978771209716797, "logits_per_token": -5.931143760681152, "logits_per_char": -1.1862287521362305, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 601, "native_id": "bd4e80fa6642a76c064d0bc924411fb0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.688743591308594, "incorrect_loss_raw": 8.824599981307983, "correct_loss_per_char": 0.7240619659423828, "incorrect_loss_per_char": 1.126625096750638, "correct_loss_per_token": 4.344371795654297, "incorrect_loss_per_token": 6.999008536338806, "correct_loss_uncond": -8.456634521484375, "incorrect_loss_uncond": -5.942867040634155}, "model_output": [{"sum_logits": -14.604731559753418, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.860878944396973, "logits_per_token": -7.302365779876709, "logits_per_char": -1.8255914449691772, "num_chars": 8}, {"sum_logits": -9.758481979370117, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -9.758481979370117, "logits_per_char": -1.3940688541957311, "num_chars": 7}, {"sum_logits": -8.688743591308594, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -4.344371795654297, "logits_per_char": -0.7240619659423828, "num_chars": 12}, {"sum_logits": -5.170995235443115, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.336503982543945, "logits_per_token": -5.170995235443115, "logits_per_char": -0.6463744044303894, "num_chars": 8}, {"sum_logits": -5.764191150665283, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -5.764191150665283, "logits_per_char": -0.6404656834072537, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 602, "native_id": "05490e6c191fbc3c2fe0033ed0bd8aa0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.1373281478881836, "incorrect_loss_raw": 8.022756099700928, "correct_loss_per_char": 0.3485920164320204, "incorrect_loss_per_char": 0.7422164968081884, "correct_loss_per_token": 3.1373281478881836, "incorrect_loss_per_token": 4.6137841145197545, "correct_loss_uncond": -13.91806697845459, "incorrect_loss_uncond": -8.425917863845825}, "model_output": [{"sum_logits": -10.063704490661621, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.232545852661133, "logits_per_token": -3.3545681635538735, "logits_per_char": -0.503185224533081, "num_chars": 20}, {"sum_logits": -5.422050476074219, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -5.422050476074219, "logits_per_char": -0.7745786394391742, "num_chars": 7}, {"sum_logits": -2.7517662048339844, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -2.7517662048339844, "logits_per_char": -0.3057518005371094, "num_chars": 9}, {"sum_logits": -3.1373281478881836, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.055395126342773, "logits_per_token": -3.1373281478881836, "logits_per_char": -0.3485920164320204, "num_chars": 9}, {"sum_logits": -13.853503227233887, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -6.926751613616943, "logits_per_char": -1.3853503227233888, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 603, "native_id": "6abd34442438509b4a00c69d6fd24764", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 12.018477439880371, "incorrect_loss_raw": 13.984635353088379, "correct_loss_per_char": 0.9244982646061823, "incorrect_loss_per_char": 1.3865849930486638, "correct_loss_per_token": 6.0092387199401855, "incorrect_loss_per_token": 7.551315863927205, "correct_loss_uncond": -4.975674629211426, "incorrect_loss_uncond": -3.9540555477142334}, "model_output": [{"sum_logits": -11.990293502807617, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.795001983642578, "logits_per_token": -5.995146751403809, "logits_per_char": -1.0900266820734197, "num_chars": 11}, {"sum_logits": -20.68844985961914, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.307945251464844, "logits_per_token": -6.89614995320638, "logits_per_char": -1.724037488301595, "num_chars": 12}, {"sum_logits": -11.368135452270508, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.342520713806152, "logits_per_token": -11.368135452270508, "logits_per_char": -1.0334668592973189, "num_chars": 11}, {"sum_logits": -11.89166259765625, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.309295654296875, "logits_per_token": -5.945831298828125, "logits_per_char": -1.6988089425223214, "num_chars": 7}, {"sum_logits": -12.018477439880371, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.994152069091797, "logits_per_token": -6.0092387199401855, "logits_per_char": -0.9244982646061823, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 604, "native_id": "e58eb0ec4197c29e961a7bdd4d67de4e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.440800428390503, "incorrect_loss_raw": 8.760145425796509, "correct_loss_per_char": 0.49154291834150043, "incorrect_loss_per_char": 1.0837919403808285, "correct_loss_per_token": 3.440800428390503, "incorrect_loss_per_token": 7.148138125737509, "correct_loss_uncond": -10.748159646987915, "incorrect_loss_uncond": -5.867660045623779}, "model_output": [{"sum_logits": -9.672043800354004, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.19892692565918, "logits_per_token": -3.2240146001180015, "logits_per_char": -0.5373357666863335, "num_chars": 18}, {"sum_logits": -8.949891090393066, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.457103729248047, "logits_per_token": -8.949891090393066, "logits_per_char": -0.813626462763006, "num_chars": 11}, {"sum_logits": -7.432931900024414, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.612482070922852, "logits_per_token": -7.432931900024414, "logits_per_char": -1.4865863800048829, "num_chars": 5}, {"sum_logits": -3.440800428390503, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.188960075378418, "logits_per_token": -3.440800428390503, "logits_per_char": -0.49154291834150043, "num_chars": 7}, {"sum_logits": -8.98571491241455, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.242709159851074, "logits_per_token": -8.98571491241455, "logits_per_char": -1.4976191520690918, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 605, "native_id": "597d2a1c9df7962218d8b807df1f8212", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.7015228271484375, "incorrect_loss_raw": 12.299412488937378, "correct_loss_per_char": 1.5403045654296874, "incorrect_loss_per_char": 2.116713001046862, "correct_loss_per_token": 7.7015228271484375, "incorrect_loss_per_token": 12.299412488937378, "correct_loss_uncond": -5.933326721191406, "incorrect_loss_uncond": -1.0574743747711182}, "model_output": [{"sum_logits": -10.798029899597168, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -12.541668891906738, "logits_per_token": -10.798029899597168, "logits_per_char": -1.5425756999424525, "num_chars": 7}, {"sum_logits": -12.567845344543457, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -12.567845344543457, "logits_per_char": -2.5135690689086916, "num_chars": 5}, {"sum_logits": -16.377891540527344, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -16.377891540527344, "logits_per_char": -2.047236442565918, "num_chars": 8}, {"sum_logits": -9.453883171081543, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -12.017007827758789, "logits_per_token": -9.453883171081543, "logits_per_char": -2.3634707927703857, "num_chars": 4}, {"sum_logits": -7.7015228271484375, "num_tokens": 1, "num_tokens_all": 129, "is_greedy": false, "sum_logits_uncond": -13.634849548339844, "logits_per_token": -7.7015228271484375, "logits_per_char": -1.5403045654296874, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 606, "native_id": "68f6ac445cc008d93f931b999b44b0ba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.313854694366455, "incorrect_loss_raw": 11.660412549972534, "correct_loss_per_char": 0.0772855702568503, "incorrect_loss_per_char": 1.4605844285753038, "correct_loss_per_token": 0.6569273471832275, "incorrect_loss_per_token": 6.223403215408325, "correct_loss_uncond": -13.132749080657959, "incorrect_loss_uncond": -4.8093180656433105}, "model_output": [{"sum_logits": -13.128456115722656, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.91456413269043, "logits_per_token": -6.564228057861328, "logits_per_char": -1.4587173461914062, "num_chars": 9}, {"sum_logits": -3.145575523376465, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.478461265563965, "logits_per_token": -3.145575523376465, "logits_per_char": -0.7863938808441162, "num_chars": 4}, {"sum_logits": -16.059358596801758, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -8.029679298400879, "logits_per_char": -2.0074198246002197, "num_chars": 8}, {"sum_logits": -14.308259963989258, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.660037994384766, "logits_per_token": -7.154129981994629, "logits_per_char": -1.589806662665473, "num_chars": 9}, {"sum_logits": -1.313854694366455, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.446603775024414, "logits_per_token": -0.6569273471832275, "logits_per_char": -0.0772855702568503, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 607, "native_id": "aa4c5d2d348796b8d7fa324f27f4c34f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.6969809532165527, "incorrect_loss_raw": 11.376177787780762, "correct_loss_per_char": 0.5281401361737933, "incorrect_loss_per_char": 1.0821950502004944, "correct_loss_per_token": 3.6969809532165527, "incorrect_loss_per_token": 5.336877822875977, "correct_loss_uncond": -12.954439640045166, "incorrect_loss_uncond": -7.027970790863037}, "model_output": [{"sum_logits": -7.5658040046691895, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.117111206054688, "logits_per_token": -2.521934668223063, "logits_per_char": -0.4450472943923053, "num_chars": 17}, {"sum_logits": -16.027769088745117, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -24.173377990722656, "logits_per_token": -5.342589696248372, "logits_per_char": -1.1448406491960799, "num_chars": 14}, {"sum_logits": -16.85630226135254, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -8.42815113067627, "logits_per_char": -2.1070377826690674, "num_chars": 8}, {"sum_logits": -3.6969809532165527, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -3.6969809532165527, "logits_per_char": -0.5281401361737933, "num_chars": 7}, {"sum_logits": -5.054835796356201, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -5.054835796356201, "logits_per_char": -0.6318544745445251, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 608, "native_id": "7400e9c4a2c8e600a0f7e2d162a07837", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.819331169128418, "incorrect_loss_raw": 9.37969696521759, "correct_loss_per_char": 1.2819331169128418, "incorrect_loss_per_char": 1.1043403166713137, "correct_loss_per_token": 6.409665584564209, "incorrect_loss_per_token": 6.906675398349762, "correct_loss_uncond": -4.973990440368652, "incorrect_loss_uncond": -7.278514266014099}, "model_output": [{"sum_logits": -6.7657880783081055, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.399640083312988, "logits_per_token": -6.7657880783081055, "logits_per_char": -0.8457235097885132, "num_chars": 8}, {"sum_logits": -10.968827247619629, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -10.968827247619629, "logits_per_char": -1.8281378746032715, "num_chars": 6}, {"sum_logits": -12.51608657836914, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -21.041534423828125, "logits_per_token": -6.25804328918457, "logits_per_char": -1.1378260525790127, "num_chars": 11}, {"sum_logits": -12.819331169128418, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.79332160949707, "logits_per_token": -6.409665584564209, "logits_per_char": -1.2819331169128418, "num_chars": 10}, {"sum_logits": -7.268085956573486, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -3.634042978286743, "logits_per_char": -0.6056738297144572, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 609, "native_id": "fad197409a977126c9587eccd240ceea", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.803028583526611, "incorrect_loss_raw": 15.655629992485046, "correct_loss_per_char": 1.1338380972544353, "incorrect_loss_per_char": 1.479401906804433, "correct_loss_per_token": 3.4015142917633057, "incorrect_loss_per_token": 8.178960585594178, "correct_loss_uncond": -6.059598445892334, "incorrect_loss_uncond": -3.229723572731018}, "model_output": [{"sum_logits": -16.421344757080078, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.61535930633545, "logits_per_token": -8.210672378540039, "logits_per_char": -1.1729531969342912, "num_chars": 14}, {"sum_logits": -12.258438110351562, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -12.258438110351562, "logits_per_char": -1.5323047637939453, "num_chars": 8}, {"sum_logits": -6.803028583526611, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.862627029418945, "logits_per_token": -3.4015142917633057, "logits_per_char": -1.1338380972544353, "num_chars": 6}, {"sum_logits": -27.120006561279297, "num_tokens": 5, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -31.96542739868164, "logits_per_token": -5.424001312255859, "logits_per_char": -1.5066670311821833, "num_chars": 18}, {"sum_logits": -6.822730541229248, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.781716346740723, "logits_per_token": -6.822730541229248, "logits_per_char": -1.705682635307312, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 610, "native_id": "f09038444aeb1a048f04dedd5b97b769", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.096892833709717, "incorrect_loss_raw": 7.88729202747345, "correct_loss_per_char": 0.5542629848827015, "incorrect_loss_per_char": 0.8990922914696978, "correct_loss_per_token": 3.0484464168548584, "incorrect_loss_per_token": 4.686108291149139, "correct_loss_uncond": -14.905186176300049, "incorrect_loss_uncond": -9.412693858146667}, "model_output": [{"sum_logits": -7.140928745269775, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -3.5704643726348877, "logits_per_char": -0.595077395439148, "num_chars": 12}, {"sum_logits": -9.120749473571777, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.770475387573242, "logits_per_token": -4.560374736785889, "logits_per_char": -1.302964210510254, "num_chars": 7}, {"sum_logits": -9.34779167175293, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.87839126586914, "logits_per_token": -4.673895835876465, "logits_per_char": -0.84979924288663, "num_chars": 11}, {"sum_logits": -6.096892833709717, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -3.0484464168548584, "logits_per_char": -0.5542629848827015, "num_chars": 11}, {"sum_logits": -5.939698219299316, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -5.939698219299316, "logits_per_char": -0.8485283170427594, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 611, "native_id": "0aa23ad1ba9f28bc3e0185237a7ce1cc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.755908012390137, "incorrect_loss_raw": 13.224186420440674, "correct_loss_per_char": 0.9694885015487671, "incorrect_loss_per_char": 1.297955362711634, "correct_loss_per_token": 7.755908012390137, "incorrect_loss_per_token": 9.140859723091125, "correct_loss_uncond": -7.028785705566406, "incorrect_loss_uncond": -4.959424018859863}, "model_output": [{"sum_logits": -19.185270309448242, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.448081970214844, "logits_per_token": -9.592635154724121, "logits_per_char": -1.3703764506748743, "num_chars": 14}, {"sum_logits": -10.671653747558594, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.35022735595703, "logits_per_token": -10.671653747558594, "logits_per_char": -1.0671653747558594, "num_chars": 10}, {"sum_logits": -7.755908012390137, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -7.755908012390137, "logits_per_char": -0.9694885015487671, "num_chars": 8}, {"sum_logits": -13.481343269348145, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -6.740671634674072, "logits_per_char": -0.842583954334259, "num_chars": 16}, {"sum_logits": -9.558478355407715, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.466949462890625, "logits_per_token": -9.558478355407715, "logits_per_char": -1.911695671081543, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 612, "native_id": "06be29539ad3e1fbd7b53b05243f4bd7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3520691394805908, "incorrect_loss_raw": 10.469482898712158, "correct_loss_per_char": 0.22534485658009848, "incorrect_loss_per_char": 1.3246000905831656, "correct_loss_per_token": 1.3520691394805908, "incorrect_loss_per_token": 10.469482898712158, "correct_loss_uncond": -10.689256429672241, "incorrect_loss_uncond": -3.7813634872436523}, "model_output": [{"sum_logits": -8.73413372039795, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.257428169250488, "logits_per_token": -8.73413372039795, "logits_per_char": -1.0917667150497437, "num_chars": 8}, {"sum_logits": -1.3520691394805908, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": true, "sum_logits_uncond": -12.041325569152832, "logits_per_token": -1.3520691394805908, "logits_per_char": -0.22534485658009848, "num_chars": 6}, {"sum_logits": -8.873669624328613, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.94657039642334, "logits_per_token": -8.873669624328613, "logits_per_char": -0.7394724686940511, "num_chars": 12}, {"sum_logits": -11.045235633850098, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.780000686645508, "logits_per_token": -11.045235633850098, "logits_per_char": -1.5778908048357283, "num_chars": 7}, {"sum_logits": -13.224892616271973, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.019386291503906, "logits_per_token": -13.224892616271973, "logits_per_char": -1.8892703737531389, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 613, "native_id": "bbe0a1ad733e5699f991ff91b3712a6f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.255945205688477, "incorrect_loss_raw": 9.225290536880493, "correct_loss_per_char": 0.6569931507110596, "incorrect_loss_per_char": 1.093495134796415, "correct_loss_per_token": 5.255945205688477, "incorrect_loss_per_token": 6.790514707565308, "correct_loss_uncond": -11.114479064941406, "incorrect_loss_uncond": -6.3113508224487305}, "model_output": [{"sum_logits": -5.255945205688477, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.370424270629883, "logits_per_token": -5.255945205688477, "logits_per_char": -0.6569931507110596, "num_chars": 8}, {"sum_logits": -11.950693130493164, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.331829071044922, "logits_per_token": -11.950693130493164, "logits_per_char": -1.7072418757847376, "num_chars": 7}, {"sum_logits": -10.262726783752441, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.114463806152344, "logits_per_token": -5.131363391876221, "logits_per_char": -0.7330519131251744, "num_chars": 14}, {"sum_logits": -5.472262382507324, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.985722541809082, "logits_per_token": -5.472262382507324, "logits_per_char": -0.7817517689296177, "num_chars": 7}, {"sum_logits": -9.215479850769043, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.714550018310547, "logits_per_token": -4.6077399253845215, "logits_per_char": -1.1519349813461304, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 614, "native_id": "9e5ce2b7d9eb404cdf8c7317dd0b5a59", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.5519609451293945, "incorrect_loss_raw": 11.269826889038086, "correct_loss_per_char": 0.5394257817949567, "incorrect_loss_per_char": 1.0626315871874492, "correct_loss_per_token": 3.7759804725646973, "incorrect_loss_per_token": 6.355451822280884, "correct_loss_uncond": -11.40503215789795, "incorrect_loss_uncond": -5.753168344497681}, "model_output": [{"sum_logits": -10.336118698120117, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.981910705566406, "logits_per_token": -2.5840296745300293, "logits_per_char": -0.6460074186325073, "num_chars": 16}, {"sum_logits": -9.134565353393555, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -4.567282676696777, "logits_per_char": -1.0149517059326172, "num_chars": 9}, {"sum_logits": -7.5519609451293945, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.956993103027344, "logits_per_token": -3.7759804725646973, "logits_per_char": -0.5394257817949567, "num_chars": 14}, {"sum_logits": -14.676257133483887, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.519742965698242, "logits_per_token": -7.338128566741943, "logits_per_char": -1.223021427790324, "num_chars": 12}, {"sum_logits": -10.932366371154785, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.05655288696289, "logits_per_token": -10.932366371154785, "logits_per_char": -1.3665457963943481, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 615, "native_id": "ffde211723f55e9744f94cbc14488a23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.570154190063477, "incorrect_loss_raw": 13.677273035049438, "correct_loss_per_char": 0.9385934557233538, "incorrect_loss_per_char": 1.7961178942332192, "correct_loss_per_token": 6.570154190063477, "incorrect_loss_per_token": 8.085646748542786, "correct_loss_uncond": -8.300809860229492, "incorrect_loss_uncond": -3.520148754119873}, "model_output": [{"sum_logits": -14.201685905456543, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.071331024169922, "logits_per_token": -7.1008429527282715, "logits_per_char": -2.366947650909424, "num_chars": 6}, {"sum_logits": -16.37002182006836, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -8.18501091003418, "logits_per_char": -1.8188913133409288, "num_chars": 9}, {"sum_logits": -9.976081848144531, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.076739311218262, "logits_per_token": -9.976081848144531, "logits_per_char": -1.425154549734933, "num_chars": 7}, {"sum_logits": -6.570154190063477, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.870964050292969, "logits_per_token": -6.570154190063477, "logits_per_char": -0.9385934557233538, "num_chars": 7}, {"sum_logits": -14.16130256652832, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.164608001708984, "logits_per_token": -7.08065128326416, "logits_per_char": -1.573478062947591, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 616, "native_id": "5ff8b0deed53b9ff91d58bd5b6f85bdf", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.5151567459106445, "incorrect_loss_raw": 9.049916505813599, "correct_loss_per_char": 0.6515156745910644, "incorrect_loss_per_char": 0.8582007895895849, "correct_loss_per_token": 3.2575783729553223, "incorrect_loss_per_token": 5.557904005050659, "correct_loss_uncond": -13.301466941833496, "incorrect_loss_uncond": -8.15577745437622}, "model_output": [{"sum_logits": -11.330634117126465, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.7796630859375, "logits_per_token": -5.665317058563232, "logits_per_char": -0.9442195097605387, "num_chars": 12}, {"sum_logits": -4.793987274169922, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.125431060791016, "logits_per_token": -2.396993637084961, "logits_per_char": -0.39949893951416016, "num_chars": 12}, {"sum_logits": -8.263566017150879, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -8.263566017150879, "logits_per_char": -1.180509431021554, "num_chars": 7}, {"sum_logits": -6.5151567459106445, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.81662368774414, "logits_per_token": -3.2575783729553223, "logits_per_char": -0.6515156745910644, "num_chars": 10}, {"sum_logits": -11.811478614807129, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.698488235473633, "logits_per_token": -5.9057393074035645, "logits_per_char": -0.9085752780620868, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 617, "native_id": "36f1ceeecde7abf99dab635239e12442", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.268776893615723, "incorrect_loss_raw": 14.720717668533325, "correct_loss_per_char": 0.6965307659573026, "incorrect_loss_per_char": 1.5762970474031235, "correct_loss_per_token": 3.1343884468078613, "incorrect_loss_per_token": 7.761018514633179, "correct_loss_uncond": -11.00086498260498, "incorrect_loss_uncond": -5.156982660293579}, "model_output": [{"sum_logits": -7.103174209594727, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.28336524963379, "logits_per_token": -3.5515871047973633, "logits_per_char": -0.7892415788438585, "num_chars": 9}, {"sum_logits": -22.37252426147461, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -27.001239776611328, "logits_per_token": -7.457508087158203, "logits_per_char": -1.8643770217895508, "num_chars": 12}, {"sum_logits": -6.268776893615723, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.269641876220703, "logits_per_token": -3.1343884468078613, "logits_per_char": -0.6965307659573026, "num_chars": 9}, {"sum_logits": -10.662785530090332, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.12999153137207, "logits_per_token": -10.662785530090332, "logits_per_char": -1.777130921681722, "num_chars": 6}, {"sum_logits": -18.744386672973633, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.09620475769043, "logits_per_token": -9.372193336486816, "logits_per_char": -1.8744386672973632, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 618, "native_id": "e3c9e83c0c62d842de2dfe229f5e6d41", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.355902671813965, "incorrect_loss_raw": 10.154792070388794, "correct_loss_per_char": 0.6427617439856896, "incorrect_loss_per_char": 1.4099407454331716, "correct_loss_per_token": 4.177951335906982, "incorrect_loss_per_token": 7.486716270446777, "correct_loss_uncond": -8.512995719909668, "incorrect_loss_uncond": -6.528583288192749}, "model_output": [{"sum_logits": -9.323826789855957, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -9.323826789855957, "logits_per_char": -1.5539711316426594, "num_chars": 6}, {"sum_logits": -11.715014457702637, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.02178192138672, "logits_per_token": -5.857507228851318, "logits_per_char": -1.4643768072128296, "num_chars": 8}, {"sum_logits": -9.950735092163086, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.019948959350586, "logits_per_token": -9.950735092163086, "logits_per_char": -1.6584558486938477, "num_chars": 6}, {"sum_logits": -9.629591941833496, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.83334732055664, "logits_per_token": -4.814795970916748, "logits_per_char": -0.9629591941833496, "num_chars": 10}, {"sum_logits": -8.355902671813965, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.868898391723633, "logits_per_token": -4.177951335906982, "logits_per_char": -0.6427617439856896, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 619, "native_id": "c0e4d0118c9cdfe2edc49ef954572b31", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.653301239013672, "incorrect_loss_raw": 13.453498601913452, "correct_loss_per_char": 1.163325309753418, "incorrect_loss_per_char": 1.4529541443694722, "correct_loss_per_token": 4.653301239013672, "incorrect_loss_per_token": 8.630555152893066, "correct_loss_uncond": -8.944131851196289, "incorrect_loss_uncond": -2.6930885314941406}, "model_output": [{"sum_logits": -21.20303726196289, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.831436157226562, "logits_per_token": -10.601518630981445, "logits_per_char": -1.9275488419966265, "num_chars": 11}, {"sum_logits": -17.380510330200195, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -17.654029846191406, "logits_per_token": -8.690255165100098, "logits_per_char": -1.0862818956375122, "num_chars": 16}, {"sum_logits": -4.653301239013672, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.597433090209961, "logits_per_token": -4.653301239013672, "logits_per_char": -1.163325309753418, "num_chars": 4}, {"sum_logits": -7.787341117858887, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.516927719116211, "logits_per_token": -7.787341117858887, "logits_per_char": -1.5574682235717774, "num_chars": 5}, {"sum_logits": -7.443105697631836, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.583954811096191, "logits_per_token": -7.443105697631836, "logits_per_char": -1.2405176162719727, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 620, "native_id": "4423c006f2a43f222d4c4e97360c25d3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.478477478027344, "incorrect_loss_raw": 21.581472873687744, "correct_loss_per_char": 0.7291136521559495, "incorrect_loss_per_char": 1.1752325367963512, "correct_loss_per_token": 4.739238739013672, "incorrect_loss_per_token": 7.720598967870076, "correct_loss_uncond": -6.586574554443359, "incorrect_loss_uncond": 0.034758567810058594}, "model_output": [{"sum_logits": -9.478477478027344, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.065052032470703, "logits_per_token": -4.739238739013672, "logits_per_char": -0.7291136521559495, "num_chars": 13}, {"sum_logits": -16.215200424194336, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -5.405066808064778, "logits_per_char": -1.0810133616129558, "num_chars": 15}, {"sum_logits": -31.926721572875977, "num_tokens": 5, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -30.770191192626953, "logits_per_token": -6.385344314575195, "logits_per_char": -1.2279508297259991, "num_chars": 26}, {"sum_logits": -19.58808135986328, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.82876205444336, "logits_per_token": -9.79404067993164, "logits_per_char": -1.1522400799919577, "num_chars": 17}, {"sum_logits": -18.595888137817383, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.358997344970703, "logits_per_token": -9.297944068908691, "logits_per_char": -1.2397258758544922, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 621, "native_id": "9382bc51ba092f55a494eff8615899de", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.470704078674316, "incorrect_loss_raw": 12.248067378997803, "correct_loss_per_char": 1.1838380098342896, "incorrect_loss_per_char": 1.5224518885020337, "correct_loss_per_token": 4.735352039337158, "incorrect_loss_per_token": 6.633480032285054, "correct_loss_uncond": -7.427075386047363, "incorrect_loss_uncond": -5.013119697570801}, "model_output": [{"sum_logits": -8.58005142211914, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -8.58005142211914, "logits_per_char": -1.4300085703531902, "num_chars": 6}, {"sum_logits": -11.128296852111816, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.43558692932129, "logits_per_token": -5.564148426055908, "logits_per_char": -1.236477428012424, "num_chars": 9}, {"sum_logits": -9.470704078674316, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.89777946472168, "logits_per_token": -4.735352039337158, "logits_per_char": -1.1838380098342896, "num_chars": 8}, {"sum_logits": -13.513442039489746, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.956085205078125, "logits_per_token": -4.504480679829915, "logits_per_char": -0.7949083552641028, "num_chars": 17}, {"sum_logits": -15.770479202270508, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.038745880126953, "logits_per_token": -7.885239601135254, "logits_per_char": -2.628413200378418, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 622, "native_id": "dec1c42628a7448aa364cdada6e82f98", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.482169151306152, "incorrect_loss_raw": 12.126863479614258, "correct_loss_per_char": 1.1482169151306152, "incorrect_loss_per_char": 1.1721302529176076, "correct_loss_per_token": 5.741084575653076, "incorrect_loss_per_token": 9.207005739212036, "correct_loss_uncond": -4.395153045654297, "incorrect_loss_uncond": -2.9437828063964844}, "model_output": [{"sum_logits": -11.482169151306152, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -5.741084575653076, "logits_per_char": -1.1482169151306152, "num_chars": 10}, {"sum_logits": -12.448612213134766, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.838809967041016, "logits_per_token": -6.224306106567383, "logits_per_char": -1.0373843510945637, "num_chars": 12}, {"sum_logits": -10.869635581970215, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -10.869635581970215, "logits_per_char": -1.0869635581970214, "num_chars": 10}, {"sum_logits": -14.278956413269043, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -14.278956413269043, "logits_per_char": -1.7848695516586304, "num_chars": 8}, {"sum_logits": -10.910249710083008, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -5.455124855041504, "logits_per_char": -0.7793035507202148, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 623, "native_id": "07ea8ff6ee916f2bf9aceab1e19ff99a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.763690948486328, "incorrect_loss_raw": 9.708773016929626, "correct_loss_per_char": 0.6763690948486328, "incorrect_loss_per_char": 1.0376051796807184, "correct_loss_per_token": 3.381845474243164, "incorrect_loss_per_token": 7.340040802955627, "correct_loss_uncond": -11.07186508178711, "incorrect_loss_uncond": -5.966178297996521}, "model_output": [{"sum_logits": -5.925222396850586, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.976659774780273, "logits_per_token": -2.962611198425293, "logits_per_char": -0.49376853307088214, "num_chars": 12}, {"sum_logits": -13.024635314941406, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -6.512317657470703, "logits_per_char": -1.4471817016601562, "num_chars": 9}, {"sum_logits": -5.250457286834717, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -5.250457286834717, "logits_per_char": -0.5833841429816352, "num_chars": 9}, {"sum_logits": -6.763690948486328, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.835556030273438, "logits_per_token": -3.381845474243164, "logits_per_char": -0.6763690948486328, "num_chars": 10}, {"sum_logits": -14.634777069091797, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.583192825317383, "logits_per_token": -14.634777069091797, "logits_per_char": -1.6260863410101996, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 624, "native_id": "a328285c6212c899e335c45db3c49ffd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.725650787353516, "incorrect_loss_raw": 10.946064233779907, "correct_loss_per_char": 0.7157063484191895, "incorrect_loss_per_char": 1.3867209694602272, "correct_loss_per_token": 5.725650787353516, "incorrect_loss_per_token": 6.742168466250102, "correct_loss_uncond": -10.272451400756836, "incorrect_loss_uncond": -5.307549715042114}, "model_output": [{"sum_logits": -14.49307918548584, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.429981231689453, "logits_per_token": -7.24653959274292, "logits_per_char": -1.449307918548584, "num_chars": 10}, {"sum_logits": -14.353565216064453, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.27172088623047, "logits_per_token": -4.784521738688151, "logits_per_char": -1.3048695650967685, "num_chars": 11}, {"sum_logits": -9.09312915802002, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.191305160522461, "logits_per_token": -9.09312915802002, "logits_per_char": -1.8186258316040038, "num_chars": 5}, {"sum_logits": -5.725650787353516, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -5.725650787353516, "logits_per_char": -0.7157063484191895, "num_chars": 8}, {"sum_logits": -5.844483375549316, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -5.844483375549316, "logits_per_char": -0.9740805625915527, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 625, "native_id": "e248968fec422e1fab0f0561fedff76e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.788641929626465, "incorrect_loss_raw": 8.271750569343567, "correct_loss_per_char": 0.799290120601654, "incorrect_loss_per_char": 1.1360891406412248, "correct_loss_per_token": 6.394320964813232, "incorrect_loss_per_token": 6.572782814502716, "correct_loss_uncond": -6.86069393157959, "incorrect_loss_uncond": -7.955877661705017}, "model_output": [{"sum_logits": -6.192631244659424, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.7442626953125, "logits_per_token": -3.096315622329712, "logits_per_char": -0.5160526037216187, "num_chars": 12}, {"sum_logits": -9.985856056213379, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.952707290649414, "logits_per_token": -9.985856056213379, "logits_per_char": -1.9971712112426758, "num_chars": 5}, {"sum_logits": -7.399110794067383, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.118465423583984, "logits_per_token": -3.6995553970336914, "logits_per_char": -0.6726464358243075, "num_chars": 11}, {"sum_logits": -12.788641929626465, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.649335861206055, "logits_per_token": -6.394320964813232, "logits_per_char": -0.799290120601654, "num_chars": 16}, {"sum_logits": -9.509404182434082, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.095077514648438, "logits_per_token": -9.509404182434082, "logits_per_char": -1.3584863117762975, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 626, "native_id": "2067720531fc03c017af941cec2f6f40", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.146087408065796, "incorrect_loss_raw": 9.075000047683716, "correct_loss_per_char": 0.26217395067214966, "incorrect_loss_per_char": 1.0603157058594719, "correct_loss_per_token": 1.573043704032898, "incorrect_loss_per_token": 6.684125185012817, "correct_loss_uncond": -12.990307092666626, "incorrect_loss_uncond": -6.448190212249756}, "model_output": [{"sum_logits": -19.126998901367188, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.673725128173828, "logits_per_token": -9.563499450683594, "logits_per_char": -1.9126998901367187, "num_chars": 10}, {"sum_logits": -6.536280632019043, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.326382637023926, "logits_per_token": -6.536280632019043, "logits_per_char": -0.9337543760027204, "num_chars": 7}, {"sum_logits": -3.146087408065796, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.136394500732422, "logits_per_token": -1.573043704032898, "logits_per_char": -0.26217395067214966, "num_chars": 12}, {"sum_logits": -3.8331127166748047, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.287192344665527, "logits_per_token": -3.8331127166748047, "logits_per_char": -0.6388521194458008, "num_chars": 6}, {"sum_logits": -6.803607940673828, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -6.803607940673828, "logits_per_char": -0.7559564378526475, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 627, "native_id": "70d3ebc00b165d9d08f9491a1dd85034", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.414873123168945, "incorrect_loss_raw": 7.26517915725708, "correct_loss_per_char": 0.49226119301535864, "incorrect_loss_per_char": 0.6616233939216251, "correct_loss_per_token": 2.7074365615844727, "incorrect_loss_per_token": 4.25034761428833, "correct_loss_uncond": -10.983335494995117, "incorrect_loss_uncond": -10.492624759674072}, "model_output": [{"sum_logits": -8.385414123535156, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -4.192707061767578, "logits_per_char": -0.44133758544921875, "num_chars": 19}, {"sum_logits": -5.414873123168945, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.398208618164062, "logits_per_token": -2.7074365615844727, "logits_per_char": -0.49226119301535864, "num_chars": 11}, {"sum_logits": -9.018655776977539, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.01470184326172, "logits_per_token": -4.5093278884887695, "logits_per_char": -0.9018655776977539, "num_chars": 10}, {"sum_logits": -6.714582443237305, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.88495445251465, "logits_per_token": -3.3572912216186523, "logits_per_char": -0.47961303165980745, "num_chars": 14}, {"sum_logits": -4.94206428527832, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.45895004272461, "logits_per_token": -4.94206428527832, "logits_per_char": -0.8236773808797201, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 628, "native_id": "41bab71fea3fa04e5a4e10a2f86996df", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.413618087768555, "incorrect_loss_raw": 10.01409101486206, "correct_loss_per_char": 0.7733740125383649, "incorrect_loss_per_char": 1.256593474320003, "correct_loss_per_token": 5.413618087768555, "incorrect_loss_per_token": 8.933542013168335, "correct_loss_uncond": -7.06639289855957, "incorrect_loss_uncond": -7.0672242641448975}, "model_output": [{"sum_logits": -14.660292625427246, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -15.229870796203613, "logits_per_token": -14.660292625427246, "logits_per_char": -2.094327517918178, "num_chars": 7}, {"sum_logits": -7.056219100952148, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -7.056219100952148, "logits_per_char": -0.8820273876190186, "num_chars": 8}, {"sum_logits": -8.644392013549805, "num_tokens": 2, "num_tokens_all": 177, "is_greedy": false, "sum_logits_uncond": -20.09314727783203, "logits_per_token": -4.322196006774902, "logits_per_char": -0.6649532318115234, "num_chars": 13}, {"sum_logits": -9.695460319519043, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -16.823331832885742, "logits_per_token": -9.695460319519043, "logits_per_char": -1.3850657599312919, "num_chars": 7}, {"sum_logits": -5.413618087768555, "num_tokens": 1, "num_tokens_all": 176, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -5.413618087768555, "logits_per_char": -0.7733740125383649, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 629, "native_id": "e18dd9ffc7b7934c39f2b5e9dee5a8c2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.710843563079834, "incorrect_loss_raw": 15.723341703414917, "correct_loss_per_char": 0.7710843563079834, "incorrect_loss_per_char": 1.5723341703414917, "correct_loss_per_token": 3.855421781539917, "incorrect_loss_per_token": 9.465824365615845, "correct_loss_uncond": -14.339266300201416, "incorrect_loss_uncond": -1.996047019958496}, "model_output": [{"sum_logits": -14.932563781738281, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.941198348999023, "logits_per_token": -7.466281890869141, "logits_per_char": -1.4932563781738282, "num_chars": 10}, {"sum_logits": -16.32168960571289, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -8.160844802856445, "logits_per_char": -1.632168960571289, "num_chars": 10}, {"sum_logits": -12.83322811126709, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -12.83322811126709, "logits_per_char": -1.283322811126709, "num_chars": 10}, {"sum_logits": -18.805885314941406, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.04369354248047, "logits_per_token": -9.402942657470703, "logits_per_char": -1.8805885314941406, "num_chars": 10}, {"sum_logits": -7.710843563079834, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.05010986328125, "logits_per_token": -3.855421781539917, "logits_per_char": -0.7710843563079834, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 630, "native_id": "449de58e919975867255218484a9fc89", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2338578701019287, "incorrect_loss_raw": 11.617502689361572, "correct_loss_per_char": 0.15956127643585205, "incorrect_loss_per_char": 1.3807212034861247, "correct_loss_per_token": 2.2338578701019287, "incorrect_loss_per_token": 8.684346556663513, "correct_loss_uncond": -12.838911294937134, "incorrect_loss_uncond": -3.924489974975586}, "model_output": [{"sum_logits": -2.2338578701019287, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.072769165039062, "logits_per_token": -2.2338578701019287, "logits_per_char": -0.15956127643585205, "num_chars": 14}, {"sum_logits": -12.019935607910156, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.51049041748047, "logits_per_token": -6.009967803955078, "logits_per_char": -1.2019935607910157, "num_chars": 10}, {"sum_logits": -9.86650562286377, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.47290325164795, "logits_per_token": -9.86650562286377, "logits_per_char": -0.9866505622863769, "num_chars": 10}, {"sum_logits": -11.445313453674316, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.896928787231445, "logits_per_token": -5.722656726837158, "logits_per_char": -1.1445313453674317, "num_chars": 10}, {"sum_logits": -13.138256072998047, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.28764820098877, "logits_per_token": -13.138256072998047, "logits_per_char": -2.1897093454996743, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 631, "native_id": "9698232e3599157431c9dc8f2fe179cd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.868015289306641, "incorrect_loss_raw": 10.558450937271118, "correct_loss_per_char": 0.38155640496148, "incorrect_loss_per_char": 1.8668203512827557, "correct_loss_per_token": 3.4340076446533203, "incorrect_loss_per_token": 8.40325403213501, "correct_loss_uncond": -13.229881286621094, "incorrect_loss_uncond": -3.8827505111694336}, "model_output": [{"sum_logits": -10.157652854919434, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.702693939208984, "logits_per_token": -10.157652854919434, "logits_per_char": -2.5394132137298584, "num_chars": 4}, {"sum_logits": -6.868015289306641, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.097896575927734, "logits_per_token": -3.4340076446533203, "logits_per_char": -0.38155640496148, "num_chars": 18}, {"sum_logits": -17.241575241088867, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.628829956054688, "logits_per_token": -8.620787620544434, "logits_per_char": -2.1551969051361084, "num_chars": 8}, {"sum_logits": -9.0072603225708, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.925810813903809, "logits_per_token": -9.0072603225708, "logits_per_char": -1.80145206451416, "num_chars": 5}, {"sum_logits": -5.827315330505371, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.507471084594727, "logits_per_token": -5.827315330505371, "logits_per_char": -0.9712192217508951, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 632, "native_id": "0b5d0c3bafbe06dd5334c20cd8ea7fe2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.203767776489258, "incorrect_loss_raw": 11.598936319351196, "correct_loss_per_char": 0.5668759875827365, "incorrect_loss_per_char": 1.330163950679683, "correct_loss_per_token": 5.101883888244629, "incorrect_loss_per_token": 10.475713014602661, "correct_loss_uncond": -7.001445770263672, "incorrect_loss_uncond": -2.6236958503723145}, "model_output": [{"sum_logits": -17.865070343017578, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.197978019714355, "logits_per_token": -17.865070343017578, "logits_per_char": -2.5521529061453685, "num_chars": 7}, {"sum_logits": -6.660228729248047, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.051198959350586, "logits_per_token": -6.660228729248047, "logits_per_char": -0.9514612470354352, "num_chars": 7}, {"sum_logits": -8.985786437988281, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.151119232177734, "logits_per_token": -4.492893218994141, "logits_per_char": -0.5285756728228401, "num_chars": 17}, {"sum_logits": -10.203767776489258, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.20521354675293, "logits_per_token": -5.101883888244629, "logits_per_char": -0.5668759875827365, "num_chars": 18}, {"sum_logits": -12.884659767150879, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -12.884659767150879, "logits_per_char": -1.288465976715088, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 633, "native_id": "7fe53bf68ec57a52a508611acf5b279e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.63001537322998, "incorrect_loss_raw": 12.341163396835327, "correct_loss_per_char": 1.116429669516427, "incorrect_loss_per_char": 1.0341339983782925, "correct_loss_per_token": 7.81500768661499, "incorrect_loss_per_token": 5.321960846583048, "correct_loss_uncond": -4.839163780212402, "incorrect_loss_uncond": -6.190734148025513}, "model_output": [{"sum_logits": -15.162677764892578, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -18.18124771118164, "logits_per_token": -7.581338882446289, "logits_per_char": -1.1663598280686598, "num_chars": 13}, {"sum_logits": -9.288138389587402, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -18.924055099487305, "logits_per_token": -3.0960461298624673, "logits_per_char": -0.6634384563991002, "num_chars": 14}, {"sum_logits": -11.07876205444336, "num_tokens": 3, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -16.534706115722656, "logits_per_token": -3.692920684814453, "logits_per_char": -0.9232301712036133, "num_chars": 12}, {"sum_logits": -13.835075378417969, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -20.487581253051758, "logits_per_token": -6.917537689208984, "logits_per_char": -1.383507537841797, "num_chars": 10}, {"sum_logits": -15.63001537322998, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -20.469179153442383, "logits_per_token": -7.81500768661499, "logits_per_char": -1.116429669516427, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 634, "native_id": "68c41ec8415eab50620eb9ecf6f35a6a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.619991302490234, "incorrect_loss_raw": 11.575342059135437, "correct_loss_per_char": 1.4366652170817058, "incorrect_loss_per_char": 1.0359005702959074, "correct_loss_per_token": 8.619991302490234, "incorrect_loss_per_token": 4.6942209005355835, "correct_loss_uncond": -5.236289024353027, "incorrect_loss_uncond": -8.34556519985199}, "model_output": [{"sum_logits": -20.93773651123047, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -25.513315200805664, "logits_per_token": -5.234434127807617, "logits_per_char": -1.9034305919300427, "num_chars": 11}, {"sum_logits": -8.619991302490234, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.856280326843262, "logits_per_token": -8.619991302490234, "logits_per_char": -1.4366652170817058, "num_chars": 6}, {"sum_logits": -4.38503360748291, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.87264060974121, "logits_per_token": -4.38503360748291, "logits_per_char": -0.6264333724975586, "num_chars": 7}, {"sum_logits": -3.2468247413635254, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -3.2468247413635254, "logits_per_char": -0.2497557493356558, "num_chars": 13}, {"sum_logits": -17.731773376464844, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.90459442138672, "logits_per_token": -5.910591125488281, "logits_per_char": -1.3639825674203725, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 635, "native_id": "6c4b2c93a4bdafb6cbf2b2ef2439b06f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.241201400756836, "incorrect_loss_raw": 7.309814751148224, "correct_loss_per_char": 0.436766783396403, "incorrect_loss_per_char": 0.7507449302150401, "correct_loss_per_token": 2.620600700378418, "incorrect_loss_per_token": 6.530637145042419, "correct_loss_uncond": -11.930219650268555, "incorrect_loss_uncond": -6.86569207906723}, "model_output": [{"sum_logits": -12.403436660766602, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.078682899475098, "logits_per_token": -12.403436660766602, "logits_per_char": -1.127585150978782, "num_chars": 11}, {"sum_logits": -8.271153450012207, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.073770523071289, "logits_per_token": -8.271153450012207, "logits_per_char": -0.9190170500013564, "num_chars": 9}, {"sum_logits": -5.241201400756836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.17142105102539, "logits_per_token": -2.620600700378418, "logits_per_char": -0.436766783396403, "num_chars": 12}, {"sum_logits": -6.2334208488464355, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.972463607788086, "logits_per_token": -3.1167104244232178, "logits_per_char": -0.6233420848846436, "num_chars": 10}, {"sum_logits": -2.3312480449676514, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -11.577110290527344, "logits_per_token": -2.3312480449676514, "logits_per_char": -0.33303543499537874, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 636, "native_id": "51e2da7396ab7045533e885dbb98a424", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.514923095703125, "incorrect_loss_raw": 8.936422169208527, "correct_loss_per_char": 0.4514923095703125, "incorrect_loss_per_char": 0.7955448163278175, "correct_loss_per_token": 2.2574615478515625, "incorrect_loss_per_token": 6.138085782527924, "correct_loss_uncond": -11.254810333251953, "incorrect_loss_uncond": -6.427030265331268}, "model_output": [{"sum_logits": -7.652199745178223, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.291111946105957, "logits_per_token": -3.8260998725891113, "logits_per_char": -0.6956545222889293, "num_chars": 11}, {"sum_logits": -4.514923095703125, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.769733428955078, "logits_per_token": -2.2574615478515625, "logits_per_char": -0.4514923095703125, "num_chars": 10}, {"sum_logits": -11.332453727722168, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.38656997680664, "logits_per_token": -11.332453727722168, "logits_per_char": -0.9443711439768473, "num_chars": 12}, {"sum_logits": -2.0265438556671143, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.104832649230957, "logits_per_token": -2.0265438556671143, "logits_per_char": -0.20265438556671142, "num_chars": 10}, {"sum_logits": -14.734491348266602, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.671295166015625, "logits_per_token": -7.367245674133301, "logits_per_char": -1.339499213478782, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 637, "native_id": "3f6157968fcf50d257ec3d8c729b7443", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.392024993896484, "incorrect_loss_raw": 9.363486051559448, "correct_loss_per_char": 1.043558332655165, "incorrect_loss_per_char": 1.1827236144573658, "correct_loss_per_token": 9.392024993896484, "incorrect_loss_per_token": 7.348790407180786, "correct_loss_uncond": -2.547100067138672, "incorrect_loss_uncond": -5.312031984329224}, "model_output": [{"sum_logits": -9.392024993896484, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.939125061035156, "logits_per_token": -9.392024993896484, "logits_per_char": -1.043558332655165, "num_chars": 9}, {"sum_logits": -11.316192626953125, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.238255500793457, "logits_per_token": -11.316192626953125, "logits_per_char": -1.257354736328125, "num_chars": 9}, {"sum_logits": -6.431815147399902, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.51126480102539, "logits_per_token": -6.431815147399902, "logits_per_char": -1.2863630294799804, "num_chars": 5}, {"sum_logits": -12.088173866271973, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.935651779174805, "logits_per_token": -4.029391288757324, "logits_per_char": -1.0989248969338157, "num_chars": 11}, {"sum_logits": -7.617762565612793, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.016900062561035, "logits_per_token": -7.617762565612793, "logits_per_char": -1.0882517950875419, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 638, "native_id": "4768aa28fa14569d830f8947565296c1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.759229898452759, "incorrect_loss_raw": 8.244644522666931, "correct_loss_per_char": 0.34490373730659485, "incorrect_loss_per_char": 1.051814993862995, "correct_loss_per_token": 2.759229898452759, "incorrect_loss_per_token": 8.244644522666931, "correct_loss_uncond": -10.190030813217163, "incorrect_loss_uncond": -5.857045531272888}, "model_output": [{"sum_logits": -4.939934253692627, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.940896987915039, "logits_per_token": -4.939934253692627, "logits_per_char": -0.9879868507385254, "num_chars": 5}, {"sum_logits": -9.647184371948242, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.416156768798828, "logits_per_token": -9.647184371948242, "logits_per_char": -1.3781691959926061, "num_chars": 7}, {"sum_logits": -12.471506118774414, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.410898208618164, "logits_per_token": -12.471506118774414, "logits_per_char": -1.385722902086046, "num_chars": 9}, {"sum_logits": -2.759229898452759, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.949260711669922, "logits_per_token": -2.759229898452759, "logits_per_char": -0.34490373730659485, "num_chars": 8}, {"sum_logits": -5.919953346252441, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.638808250427246, "logits_per_token": -5.919953346252441, "logits_per_char": -0.4553810266348032, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 639, "native_id": "5516b1c93f94aaa0bf9a4c7b124788d4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.364931106567383, "incorrect_loss_raw": 17.345325469970703, "correct_loss_per_char": 1.3364931106567384, "incorrect_loss_per_char": 1.4702929558215683, "correct_loss_per_token": 6.682465553283691, "incorrect_loss_per_token": 7.78132422765096, "correct_loss_uncond": -6.401308059692383, "incorrect_loss_uncond": -1.6933786869049072}, "model_output": [{"sum_logits": -13.364931106567383, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.766239166259766, "logits_per_token": -6.682465553283691, "logits_per_char": -1.3364931106567384, "num_chars": 10}, {"sum_logits": -14.36115837097168, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.475631713867188, "logits_per_token": -7.18057918548584, "logits_per_char": -1.1047044900747447, "num_chars": 13}, {"sum_logits": -12.577781677246094, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.721759796142578, "logits_per_token": -6.288890838623047, "logits_per_char": -1.1434346979314631, "num_chars": 11}, {"sum_logits": -21.05023765563965, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.44722843170166, "logits_per_token": -10.525118827819824, "logits_per_char": -2.1050237655639648, "num_chars": 10}, {"sum_logits": -21.39212417602539, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.510196685791016, "logits_per_token": -7.13070805867513, "logits_per_char": -1.5280088697160994, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 640, "native_id": "96ea2c3174229c4a6a0e2ffaed2df378", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.460637092590332, "incorrect_loss_raw": 10.22303819656372, "correct_loss_per_char": 2.6921274185180666, "incorrect_loss_per_char": 1.3855793121315183, "correct_loss_per_token": 6.730318546295166, "incorrect_loss_per_token": 6.525732159614563, "correct_loss_uncond": -2.4058971405029297, "incorrect_loss_uncond": -4.343173027038574}, "model_output": [{"sum_logits": -13.001002311706543, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -6.5005011558532715, "logits_per_char": -1.6251252889633179, "num_chars": 8}, {"sum_logits": -5.730252265930176, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -5.730252265930176, "logits_per_char": -1.1460504531860352, "num_chars": 5}, {"sum_logits": -9.727813720703125, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -9.727813720703125, "logits_per_char": -1.3896876743861608, "num_chars": 7}, {"sum_logits": -12.433084487915039, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.524063110351562, "logits_per_token": -4.14436149597168, "logits_per_char": -1.3814538319905598, "num_chars": 9}, {"sum_logits": -13.460637092590332, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.866534233093262, "logits_per_token": -6.730318546295166, "logits_per_char": -2.6921274185180666, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 641, "native_id": "7905b9f4ba503b0ce13b576808e99c42", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.9890718460083, "incorrect_loss_raw": 9.397156119346619, "correct_loss_per_char": 0.6914670650775616, "incorrect_loss_per_char": 1.0045989746139163, "correct_loss_per_token": 2.996357282002767, "incorrect_loss_per_token": 5.054679075876872, "correct_loss_uncond": -13.293776512145996, "incorrect_loss_uncond": -9.869325995445251}, "model_output": [{"sum_logits": -8.9890718460083, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.282848358154297, "logits_per_token": -2.996357282002767, "logits_per_char": -0.6914670650775616, "num_chars": 13}, {"sum_logits": -12.848761558532715, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.115236282348633, "logits_per_token": -4.282920519510905, "logits_per_char": -0.9177686827523368, "num_chars": 14}, {"sum_logits": -13.607311248779297, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -6.803655624389648, "logits_per_char": -1.5119234720865886, "num_chars": 9}, {"sum_logits": -4.000823020935059, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.371767044067383, "logits_per_token": -2.0004115104675293, "logits_per_char": -0.40008230209350587, "num_chars": 10}, {"sum_logits": -7.131728649139404, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -7.131728649139404, "logits_per_char": -1.1886214415232341, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 642, "native_id": "e0a7d1df3ce14b27888e785e6636d5f0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.388781547546387, "incorrect_loss_raw": 8.226144671440125, "correct_loss_per_char": 0.49144473442664516, "incorrect_loss_per_char": 0.8488658757436842, "correct_loss_per_token": 3.1943907737731934, "incorrect_loss_per_token": 5.4152934948603315, "correct_loss_uncond": -15.43199634552002, "incorrect_loss_uncond": -8.389326930046082}, "model_output": [{"sum_logits": -9.017229080200195, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -4.508614540100098, "logits_per_char": -0.6011486053466797, "num_chars": 15}, {"sum_logits": -7.76605224609375, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.534867286682129, "logits_per_token": -7.76605224609375, "logits_per_char": -1.10943603515625, "num_chars": 7}, {"sum_logits": -6.388781547546387, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.820777893066406, "logits_per_token": -3.1943907737731934, "logits_per_char": -0.49144473442664516, "num_chars": 13}, {"sum_logits": -6.0191121101379395, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.557395935058594, "logits_per_token": -6.0191121101379395, "logits_per_char": -1.203822422027588, "num_chars": 5}, {"sum_logits": -10.102185249328613, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.215415954589844, "logits_per_token": -3.3673950831095376, "logits_per_char": -0.4810564404442197, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 643, "native_id": "3eb397b96b6c3a245c81ab30205943f1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.054731845855713, "incorrect_loss_raw": 10.770840167999268, "correct_loss_per_char": 0.3378943204879761, "incorrect_loss_per_char": 1.7129455261760287, "correct_loss_per_token": 2.0273659229278564, "incorrect_loss_per_token": 9.444321155548096, "correct_loss_uncond": -12.838508129119873, "incorrect_loss_uncond": -4.197504758834839}, "model_output": [{"sum_logits": -13.768780708312988, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -14.348709106445312, "logits_per_token": -13.768780708312988, "logits_per_char": -1.5298645231458876, "num_chars": 9}, {"sum_logits": -7.213312149047852, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -14.630324363708496, "logits_per_token": -7.213312149047852, "logits_per_char": -0.9016640186309814, "num_chars": 8}, {"sum_logits": -4.054731845855713, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -16.893239974975586, "logits_per_token": -2.0273659229278564, "logits_per_char": -0.3378943204879761, "num_chars": 12}, {"sum_logits": -10.612152099609375, "num_tokens": 2, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -17.195316314697266, "logits_per_token": -5.3060760498046875, "logits_per_char": -2.122430419921875, "num_chars": 5}, {"sum_logits": -11.489115715026855, "num_tokens": 1, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -13.699029922485352, "logits_per_token": -11.489115715026855, "logits_per_char": -2.2978231430053713, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 644, "native_id": "536c9af0fae0aa75b32874dfcac66353", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.2347307205200195, "incorrect_loss_raw": 12.29501461982727, "correct_loss_per_char": 0.4795946708092323, "incorrect_loss_per_char": 1.1848528271629697, "correct_loss_per_token": 3.1173653602600098, "incorrect_loss_per_token": 7.3456807136535645, "correct_loss_uncond": -10.131363868713379, "incorrect_loss_uncond": -3.5614938735961914}, "model_output": [{"sum_logits": -6.2347307205200195, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.3660945892334, "logits_per_token": -3.1173653602600098, "logits_per_char": -0.4795946708092323, "num_chars": 13}, {"sum_logits": -14.115859031677246, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.61535930633545, "logits_per_token": -7.057929515838623, "logits_per_char": -1.0082756451198034, "num_chars": 14}, {"sum_logits": -16.890642166137695, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.238094329833984, "logits_per_token": -8.445321083068848, "logits_per_char": -1.4075535138448079, "num_chars": 12}, {"sum_logits": -8.588170051574707, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -4.2940850257873535, "logits_per_char": -0.9542411168416342, "num_chars": 9}, {"sum_logits": -9.585387229919434, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.839591979980469, "logits_per_token": -9.585387229919434, "logits_per_char": -1.3693410328456335, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 645, "native_id": "dc36293f603cf230f8059fc6f2e5660d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.0740742683410645, "incorrect_loss_raw": 7.1404112577438354, "correct_loss_per_char": 0.6342592835426331, "incorrect_loss_per_char": 1.232843930380685, "correct_loss_per_token": 5.0740742683410645, "incorrect_loss_per_token": 7.1404112577438354, "correct_loss_uncond": -10.924027919769287, "incorrect_loss_uncond": -7.467568516731262}, "model_output": [{"sum_logits": -8.487366676330566, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -8.487366676330566, "logits_per_char": -1.2124809537615096, "num_chars": 7}, {"sum_logits": -6.068609237670898, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.706355094909668, "logits_per_token": -6.068609237670898, "logits_per_char": -0.6068609237670899, "num_chars": 10}, {"sum_logits": -5.0740742683410645, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -5.0740742683410645, "logits_per_char": -0.6342592835426331, "num_chars": 8}, {"sum_logits": -9.333067893981934, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.551799774169922, "logits_per_token": -9.333067893981934, "logits_per_char": -2.3332669734954834, "num_chars": 4}, {"sum_logits": -4.672601222991943, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -4.672601222991943, "logits_per_char": -0.7787668704986572, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 646, "native_id": "1510f5183095466e4fe41b82501a9dd0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8103349208831787, "incorrect_loss_raw": 16.276965856552124, "correct_loss_per_char": 0.42337054676479763, "incorrect_loss_per_char": 1.9470596631368, "correct_loss_per_token": 3.8103349208831787, "incorrect_loss_per_token": 11.77162516117096, "correct_loss_uncond": -11.59436821937561, "incorrect_loss_uncond": -0.1697249412536621}, "model_output": [{"sum_logits": -11.393444061279297, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.171759605407715, "logits_per_token": -11.393444061279297, "logits_per_char": -2.2786888122558593, "num_chars": 5}, {"sum_logits": -23.613014221191406, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -11.806507110595703, "logits_per_char": -2.3613014221191406, "num_chars": 10}, {"sum_logits": -3.8103349208831787, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.404703140258789, "logits_per_token": -3.8103349208831787, "logits_per_char": -0.42337054676479763, "num_chars": 9}, {"sum_logits": -12.42971134185791, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.042043685913086, "logits_per_token": -6.214855670928955, "logits_per_char": -1.3810790379842122, "num_chars": 9}, {"sum_logits": -17.671693801879883, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.433216094970703, "logits_per_token": -17.671693801879883, "logits_per_char": -1.7671693801879882, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 647, "native_id": "1fcc547e4e6813afc1a66717248d6c62", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.792675971984863, "incorrect_loss_raw": 6.8686500787734985, "correct_loss_per_char": 0.6846679959978376, "incorrect_loss_per_char": 0.770981334504627, "correct_loss_per_token": 4.792675971984863, "incorrect_loss_per_token": 6.20743715763092, "correct_loss_uncond": -10.895732879638672, "incorrect_loss_uncond": -7.950250744819641}, "model_output": [{"sum_logits": -10.892162322998047, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.623971939086914, "logits_per_token": -10.892162322998047, "logits_per_char": -0.6807601451873779, "num_chars": 16}, {"sum_logits": -5.631099700927734, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.91370677947998, "logits_per_token": -5.631099700927734, "logits_per_char": -0.7038874626159668, "num_chars": 8}, {"sum_logits": -4.792675971984863, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.688408851623535, "logits_per_token": -4.792675971984863, "logits_per_char": -0.6846679959978376, "num_chars": 7}, {"sum_logits": -5.289703369140625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.520305633544922, "logits_per_token": -2.6448516845703125, "logits_per_char": -0.7556719098772321, "num_chars": 7}, {"sum_logits": -5.661634922027588, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.217618942260742, "logits_per_token": -5.661634922027588, "logits_per_char": -0.9436058203379313, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 648, "native_id": "68a911b64dc943b5f81c0f8dec7faed7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.2957100868225098, "incorrect_loss_raw": 8.278684258460999, "correct_loss_per_char": 0.4119637608528137, "incorrect_loss_per_char": 0.8000141696018331, "correct_loss_per_token": 3.2957100868225098, "incorrect_loss_per_token": 5.075755397478739, "correct_loss_uncond": -10.805139064788818, "incorrect_loss_uncond": -8.811943888664246}, "model_output": [{"sum_logits": -3.196727752685547, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -3.196727752685547, "logits_per_char": -0.6393455505371094, "num_chars": 5}, {"sum_logits": -3.2957100868225098, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -3.2957100868225098, "logits_per_char": -0.4119637608528137, "num_chars": 8}, {"sum_logits": -10.488463401794434, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.968976974487305, "logits_per_token": -3.496154467264811, "logits_per_char": -0.6169684353996726, "num_chars": 17}, {"sum_logits": -7.7907328605651855, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -7.7907328605651855, "logits_per_char": -0.9738416075706482, "num_chars": 8}, {"sum_logits": -11.638813018798828, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -5.819406509399414, "logits_per_char": -0.9699010848999023, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 649, "native_id": "92f423de9a556a66c3eb73e9ddf9399a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.513731002807617, "incorrect_loss_raw": 11.36221170425415, "correct_loss_per_char": 0.9557937275279652, "incorrect_loss_per_char": 1.1094095756610234, "correct_loss_per_token": 5.256865501403809, "incorrect_loss_per_token": 7.8980549573898315, "correct_loss_uncond": -8.471212387084961, "incorrect_loss_uncond": -6.289060592651367}, "model_output": [{"sum_logits": -13.239127159118652, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.393962860107422, "logits_per_token": -6.619563579559326, "logits_per_char": -0.8274454474449158, "num_chars": 16}, {"sum_logits": -10.513731002807617, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.984943389892578, "logits_per_token": -5.256865501403809, "logits_per_char": -0.9557937275279652, "num_chars": 11}, {"sum_logits": -11.726659774780273, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.641094207763672, "logits_per_token": -11.726659774780273, "logits_per_char": -1.9544432957967122, "num_chars": 6}, {"sum_logits": -14.474126815795898, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -7.237063407897949, "logits_per_char": -0.9046329259872437, "num_chars": 16}, {"sum_logits": -6.008933067321777, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -6.008933067321777, "logits_per_char": -0.7511166334152222, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 650, "native_id": "1cd94405124031e8681cd12bd25e2d61", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.274270057678223, "incorrect_loss_raw": 11.896779537200928, "correct_loss_per_char": 2.8185675144195557, "incorrect_loss_per_char": 2.025692687715803, "correct_loss_per_token": 11.274270057678223, "incorrect_loss_per_token": 10.10422682762146, "correct_loss_uncond": -1.3662710189819336, "incorrect_loss_uncond": -3.7618865966796875}, "model_output": [{"sum_logits": -14.340421676635742, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.184417724609375, "logits_per_token": -7.170210838317871, "logits_per_char": -1.7925527095794678, "num_chars": 8}, {"sum_logits": -7.658501625061035, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.269688606262207, "logits_per_token": -7.658501625061035, "logits_per_char": -1.9146254062652588, "num_chars": 4}, {"sum_logits": -12.952383995056152, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.017155647277832, "logits_per_token": -12.952383995056152, "logits_per_char": -2.5904767990112303, "num_chars": 5}, {"sum_logits": -11.274270057678223, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.640541076660156, "logits_per_token": -11.274270057678223, "logits_per_char": -2.8185675144195557, "num_chars": 4}, {"sum_logits": -12.635810852050781, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.163402557373047, "logits_per_token": -12.635810852050781, "logits_per_char": -1.8051158360072546, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 651, "native_id": "64ab884bd870f6f68146636b4cce921c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.086862564086914, "incorrect_loss_raw": 11.387802600860596, "correct_loss_per_char": 0.6442602330988104, "incorrect_loss_per_char": 1.3701293771229093, "correct_loss_per_token": 7.086862564086914, "incorrect_loss_per_token": 9.680053234100342, "correct_loss_uncond": -9.152360916137695, "incorrect_loss_uncond": -4.0581300258636475}, "model_output": [{"sum_logits": -7.086862564086914, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.23922348022461, "logits_per_token": -7.086862564086914, "logits_per_char": -0.6442602330988104, "num_chars": 11}, {"sum_logits": -13.661994934082031, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.031539916992188, "logits_per_token": -6.830997467041016, "logits_per_char": -0.9758567810058594, "num_chars": 14}, {"sum_logits": -8.895108222961426, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.443731307983398, "logits_per_token": -8.895108222961426, "logits_per_char": -0.9883453581068251, "num_chars": 9}, {"sum_logits": -9.720602035522461, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.28764820098877, "logits_per_token": -9.720602035522461, "logits_per_char": -1.6201003392537434, "num_chars": 6}, {"sum_logits": -13.273505210876465, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.020811080932617, "logits_per_token": -13.273505210876465, "logits_per_char": -1.8962150301252092, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 652, "native_id": "66275550d64d16339c944e6a6d63eb5b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.1370201110839844, "incorrect_loss_raw": 12.930654048919678, "correct_loss_per_char": 0.20913467407226563, "incorrect_loss_per_char": 1.297800634605716, "correct_loss_per_token": 1.5685100555419922, "incorrect_loss_per_token": 8.585597455501556, "correct_loss_uncond": -14.554191589355469, "incorrect_loss_uncond": -3.3626201152801514}, "model_output": [{"sum_logits": -3.1370201110839844, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.691211700439453, "logits_per_token": -1.5685100555419922, "logits_per_char": -0.20913467407226563, "num_chars": 15}, {"sum_logits": -12.35770320892334, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.388635635375977, "logits_per_token": -6.17885160446167, "logits_per_char": -1.1234275644475764, "num_chars": 11}, {"sum_logits": -14.935166358947754, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.708890914916992, "logits_per_token": -3.7337915897369385, "logits_per_char": -1.4935166358947753, "num_chars": 10}, {"sum_logits": -11.815530776977539, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -11.815530776977539, "logits_per_char": -1.3128367529975042, "num_chars": 9}, {"sum_logits": -12.614215850830078, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -12.614215850830078, "logits_per_char": -1.2614215850830077, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 653, "native_id": "9b26329d74a6159ab9af4f899303de39", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.343053817749023, "incorrect_loss_raw": 9.03654658794403, "correct_loss_per_char": 0.7343053817749023, "incorrect_loss_per_char": 1.3088530499826778, "correct_loss_per_token": 7.343053817749023, "incorrect_loss_per_token": 8.412432134151459, "correct_loss_uncond": -6.669040679931641, "incorrect_loss_uncond": -5.801937937736511}, "model_output": [{"sum_logits": -4.992915630340576, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -2.496457815170288, "logits_per_char": -0.624114453792572, "num_chars": 8}, {"sum_logits": -7.343053817749023, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.012094497680664, "logits_per_token": -7.343053817749023, "logits_per_char": -0.7343053817749023, "num_chars": 10}, {"sum_logits": -9.177437782287598, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -9.177437782287598, "logits_per_char": -1.0197153091430664, "num_chars": 9}, {"sum_logits": -7.366188049316406, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.593358993530273, "logits_per_token": -7.366188049316406, "logits_per_char": -0.6696534590287642, "num_chars": 11}, {"sum_logits": -14.609644889831543, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -14.609644889831543, "logits_per_char": -2.9219289779663087, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 654, "native_id": "f74b7f268d3c190a13f99ede6d2359e1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.250351428985596, "incorrect_loss_raw": 12.959017872810364, "correct_loss_per_char": 0.5833723809983995, "incorrect_loss_per_char": 1.9419824282328286, "correct_loss_per_token": 2.625175714492798, "incorrect_loss_per_token": 9.37807285785675, "correct_loss_uncond": -11.025996685028076, "incorrect_loss_uncond": -1.8829814195632935}, "model_output": [{"sum_logits": -5.250351428985596, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.276348114013672, "logits_per_token": -2.625175714492798, "logits_per_char": -0.5833723809983995, "num_chars": 9}, {"sum_logits": -9.179377555847168, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -9.179377555847168, "logits_per_char": -1.5298962593078613, "num_chars": 6}, {"sum_logits": -28.647560119628906, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.044944763183594, "logits_per_token": -14.323780059814453, "logits_per_char": -3.1830622355143228, "num_chars": 9}, {"sum_logits": -8.641386985778809, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.586869239807129, "logits_per_token": -8.641386985778809, "logits_per_char": -2.160346746444702, "num_chars": 4}, {"sum_logits": -5.367746829986572, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.277134895324707, "logits_per_token": -5.367746829986572, "logits_per_char": -0.8946244716644287, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 655, "native_id": "22458fdcead20e2def0df0d92d5806f6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.54405403137207, "incorrect_loss_raw": 15.059072494506836, "correct_loss_per_char": 0.9143561194924748, "incorrect_loss_per_char": 1.4022347774380952, "correct_loss_per_token": 5.18135134379069, "incorrect_loss_per_token": 7.4153679211934405, "correct_loss_uncond": -4.564882278442383, "incorrect_loss_uncond": -3.7141401767730713}, "model_output": [{"sum_logits": -4.886651992797852, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -4.886651992797852, "logits_per_char": -0.48866519927978513, "num_chars": 10}, {"sum_logits": -21.95439910888672, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -10.97719955444336, "logits_per_char": -2.439377678765191, "num_chars": 9}, {"sum_logits": -17.399995803833008, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.16470718383789, "logits_per_token": -5.799998601277669, "logits_per_char": -1.7399995803833008, "num_chars": 10}, {"sum_logits": -15.995243072509766, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.69821548461914, "logits_per_token": -7.997621536254883, "logits_per_char": -0.9408966513241038, "num_chars": 17}, {"sum_logits": -15.54405403137207, "num_tokens": 3, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -5.18135134379069, "logits_per_char": -0.9143561194924748, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 656, "native_id": "f7b96f195a7adfe0c74924a165cfd055", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.732560157775879, "incorrect_loss_raw": 8.751927614212036, "correct_loss_per_char": 0.5915700197219849, "incorrect_loss_per_char": 1.2580311763854253, "correct_loss_per_token": 4.732560157775879, "incorrect_loss_per_token": 8.751927614212036, "correct_loss_uncond": -7.970641136169434, "incorrect_loss_uncond": -3.247206926345825}, "model_output": [{"sum_logits": -12.251920700073242, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -12.251920700073242, "logits_per_char": -2.0419867833455405, "num_chars": 6}, {"sum_logits": -4.732560157775879, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.703201293945312, "logits_per_token": -4.732560157775879, "logits_per_char": -0.5915700197219849, "num_chars": 8}, {"sum_logits": -4.953062057495117, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -10.492364883422852, "logits_per_token": -4.953062057495117, "logits_per_char": -0.8255103429158529, "num_chars": 6}, {"sum_logits": -8.968278884887695, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.104936599731445, "logits_per_token": -8.968278884887695, "logits_per_char": -1.2811826978410994, "num_chars": 7}, {"sum_logits": -8.83444881439209, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.327677726745605, "logits_per_token": -8.83444881439209, "logits_per_char": -0.883444881439209, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 657, "native_id": "9b631734e72a0e559da153492c1e7894", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.040139675140381, "incorrect_loss_raw": 8.958206176757812, "correct_loss_per_char": 0.5050174593925476, "incorrect_loss_per_char": 0.642075262995033, "correct_loss_per_token": 4.040139675140381, "incorrect_loss_per_token": 3.819750467936198, "correct_loss_uncond": -13.248226642608643, "incorrect_loss_uncond": -7.384647846221924}, "model_output": [{"sum_logits": -5.6760478019714355, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.308551788330078, "logits_per_token": -2.8380239009857178, "logits_per_char": -0.5160043456337668, "num_chars": 11}, {"sum_logits": -4.040139675140381, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.288366317749023, "logits_per_token": -4.040139675140381, "logits_per_char": -0.5050174593925476, "num_chars": 8}, {"sum_logits": -21.651124954223633, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -17.805097579956055, "logits_per_token": -7.217041651407878, "logits_per_char": -1.353195309638977, "num_chars": 16}, {"sum_logits": -6.563431262969971, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.382843017578125, "logits_per_token": -3.2817156314849854, "logits_per_char": -0.504879327920767, "num_chars": 13}, {"sum_logits": -1.942220687866211, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": true, "sum_logits_uncond": -13.874923706054688, "logits_per_token": -1.942220687866211, "logits_per_char": -0.1942220687866211, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 658, "native_id": "caccaa51ee960a92d44e5b949fc35a66", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9373621940612793, "incorrect_loss_raw": 9.061234772205353, "correct_loss_per_char": 0.24478018283843994, "incorrect_loss_per_char": 1.1069313350177947, "correct_loss_per_token": 1.4686810970306396, "incorrect_loss_per_token": 4.98901841044426, "correct_loss_uncond": -16.00053834915161, "incorrect_loss_uncond": -5.595611035823822}, "model_output": [{"sum_logits": -11.334368705749512, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.332897186279297, "logits_per_token": -2.833592176437378, "logits_per_char": -0.7556245803833008, "num_chars": 15}, {"sum_logits": -9.334392547607422, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.537405014038086, "logits_per_token": -9.334392547607422, "logits_per_char": -1.5557320912679036, "num_chars": 6}, {"sum_logits": -12.148990631103516, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.279212951660156, "logits_per_token": -6.074495315551758, "logits_per_char": -1.7355700901576452, "num_chars": 7}, {"sum_logits": -3.427187204360962, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.47786808013916, "logits_per_token": -1.713593602180481, "logits_per_char": -0.3807985782623291, "num_chars": 9}, {"sum_logits": -2.9373621940612793, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -18.93790054321289, "logits_per_token": -1.4686810970306396, "logits_per_char": -0.24478018283843994, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 659, "native_id": "def936fda9f6ccee01f57c0f804fabd0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.10544490814209, "incorrect_loss_raw": 14.777489423751831, "correct_loss_per_char": 0.7631806135177612, "incorrect_loss_per_char": 2.0295541286468506, "correct_loss_per_token": 3.052722454071045, "incorrect_loss_per_token": 10.147770643234253, "correct_loss_uncond": -15.533177375793457, "incorrect_loss_uncond": -1.673722267150879}, "model_output": [{"sum_logits": -10.52038860321045, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.006954193115234, "logits_per_token": -10.52038860321045, "logits_per_char": -2.1040777206420898, "num_chars": 5}, {"sum_logits": -13.270263671875, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.58919906616211, "logits_per_token": -6.6351318359375, "logits_per_char": -1.3270263671875, "num_chars": 10}, {"sum_logits": -23.767486572265625, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.524044036865234, "logits_per_token": -11.883743286132812, "logits_per_char": -2.3767486572265626, "num_chars": 10}, {"sum_logits": -6.10544490814209, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.638622283935547, "logits_per_token": -3.052722454071045, "logits_per_char": -0.7631806135177612, "num_chars": 8}, {"sum_logits": -11.55181884765625, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.684649467468262, "logits_per_token": -11.55181884765625, "logits_per_char": -2.31036376953125, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 660, "native_id": "761b0f6c68b1540949b70f76a9e67c78", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.841482162475586, "incorrect_loss_raw": 17.320783138275146, "correct_loss_per_char": 0.8037711056795988, "incorrect_loss_per_char": 1.712986877986363, "correct_loss_per_token": 8.841482162475586, "incorrect_loss_per_token": 10.509259104728699, "correct_loss_uncond": -1.862544059753418, "incorrect_loss_uncond": -0.21833109855651855}, "model_output": [{"sum_logits": -14.790940284729004, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -14.790940284729004, "logits_per_char": -1.4790940284729004, "num_chars": 10}, {"sum_logits": -19.60175323486328, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.061038970947266, "logits_per_token": -9.80087661743164, "logits_per_char": -1.400125231061663, "num_chars": 14}, {"sum_logits": -8.841482162475586, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -10.704026222229004, "logits_per_token": -8.841482162475586, "logits_per_char": -0.8037711056795988, "num_chars": 11}, {"sum_logits": -15.543065071105957, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.714157104492188, "logits_per_token": -7.7715325355529785, "logits_per_char": -1.5543065071105957, "num_chars": 10}, {"sum_logits": -19.347373962402344, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.275802612304688, "logits_per_token": -9.673686981201172, "logits_per_char": -2.418421745300293, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 661, "native_id": "8c11546468a2595b29a1297e73334fc4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.696039199829102, "incorrect_loss_raw": 12.203216910362244, "correct_loss_per_char": 1.7826731999715169, "incorrect_loss_per_char": 1.6986073176066079, "correct_loss_per_token": 10.696039199829102, "incorrect_loss_per_token": 9.450350642204285, "correct_loss_uncond": -4.348942756652832, "incorrect_loss_uncond": -2.898110032081604}, "model_output": [{"sum_logits": -7.373687267303467, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -10.933771133422852, "logits_per_token": -7.373687267303467, "logits_per_char": -1.4747374534606934, "num_chars": 5}, {"sum_logits": -10.696039199829102, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.044981956481934, "logits_per_token": -10.696039199829102, "logits_per_char": -1.7826731999715169, "num_chars": 6}, {"sum_logits": -8.720211029052734, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.143332481384277, "logits_per_token": -8.720211029052734, "logits_per_char": -1.0900263786315918, "num_chars": 8}, {"sum_logits": -22.022930145263672, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.283222198486328, "logits_per_token": -11.011465072631836, "logits_per_char": -2.4469922383626304, "num_chars": 9}, {"sum_logits": -10.696039199829102, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.044981956481934, "logits_per_token": -10.696039199829102, "logits_per_char": -1.7826731999715169, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 662, "native_id": "a5dcac512870e79f5aa2b22dbd662404", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 2.8649392127990723, "incorrect_loss_raw": 9.095767140388489, "correct_loss_per_char": 0.5729878425598145, "incorrect_loss_per_char": 1.2403140805849509, "correct_loss_per_token": 2.8649392127990723, "incorrect_loss_per_token": 8.500693321228027, "correct_loss_uncond": -12.222546100616455, "incorrect_loss_uncond": -6.661328673362732}, "model_output": [{"sum_logits": -6.315680980682373, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.006837844848633, "logits_per_token": -6.315680980682373, "logits_per_char": -1.2631361961364747, "num_chars": 5}, {"sum_logits": -2.8649392127990723, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -2.8649392127990723, "logits_per_char": -0.5729878425598145, "num_chars": 5}, {"sum_logits": -4.760590553283691, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -2.3802952766418457, "logits_per_char": -0.28003473842845245, "num_chars": 17}, {"sum_logits": -14.265202522277832, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -14.265202522277832, "logits_per_char": -2.037886074611119, "num_chars": 7}, {"sum_logits": -11.041594505310059, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -11.041594505310059, "logits_per_char": -1.3801993131637573, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 663, "native_id": "870b07a1c5af2e956673a9680da99852", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.748175621032715, "incorrect_loss_raw": 14.190513134002686, "correct_loss_per_char": 0.8391554015023368, "incorrect_loss_per_char": 1.0580834934585972, "correct_loss_per_token": 5.874087810516357, "incorrect_loss_per_token": 4.928784807523091, "correct_loss_uncond": -7.090023994445801, "incorrect_loss_uncond": -5.833677768707275}, "model_output": [{"sum_logits": -11.318893432617188, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.336471557617188, "logits_per_token": -3.7729644775390625, "logits_per_char": -0.7545928955078125, "num_chars": 15}, {"sum_logits": -20.459470748901367, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.30321502685547, "logits_per_token": -6.819823582967122, "logits_per_char": -1.0768142499421771, "num_chars": 19}, {"sum_logits": -13.477972030639648, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.663280487060547, "logits_per_token": -3.369493007659912, "logits_per_char": -0.962712287902832, "num_chars": 14}, {"sum_logits": -11.505716323852539, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.79379653930664, "logits_per_token": -5.7528581619262695, "logits_per_char": -1.4382145404815674, "num_chars": 8}, {"sum_logits": -11.748175621032715, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.838199615478516, "logits_per_token": -5.874087810516357, "logits_per_char": -0.8391554015023368, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 664, "native_id": "f48528156632b9c5b18af9ce2095509b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.728578805923462, "incorrect_loss_raw": 18.47076416015625, "correct_loss_per_char": 0.1728578805923462, "incorrect_loss_per_char": 1.9632612011649393, "correct_loss_per_token": 1.728578805923462, "incorrect_loss_per_token": 13.748130083084106, "correct_loss_uncond": -13.76208758354187, "incorrect_loss_uncond": 2.132127523422241}, "model_output": [{"sum_logits": -17.081775665283203, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.074918746948242, "logits_per_token": -17.081775665283203, "logits_per_char": -1.5528886968439275, "num_chars": 11}, {"sum_logits": -19.02020835876465, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -19.02020835876465, "logits_per_char": -2.717172622680664, "num_chars": 7}, {"sum_logits": -1.728578805923462, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -15.490666389465332, "logits_per_token": -1.728578805923462, "logits_per_char": -0.1728578805923462, "num_chars": 10}, {"sum_logits": -16.31745719909668, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -8.15872859954834, "logits_per_char": -1.6317457199096679, "num_chars": 10}, {"sum_logits": -21.46361541748047, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.203964233398438, "logits_per_token": -10.731807708740234, "logits_per_char": -1.951237765225497, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 665, "native_id": "5496c7293f653120e5a5213db2d7b103", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.924479484558105, "incorrect_loss_raw": 7.653338968753815, "correct_loss_per_char": 0.9937066237131754, "incorrect_loss_per_char": 1.0098308355926158, "correct_loss_per_token": 5.962239742279053, "incorrect_loss_per_token": 7.653338968753815, "correct_loss_uncond": -8.72634220123291, "incorrect_loss_uncond": -6.550755679607391}, "model_output": [{"sum_logits": -8.106019020080566, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.351019859313965, "logits_per_token": -8.106019020080566, "logits_per_char": -1.1580027171543665, "num_chars": 7}, {"sum_logits": -10.361412048339844, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -10.361412048339844, "logits_per_char": -0.7970316960261419, "num_chars": 13}, {"sum_logits": -11.924479484558105, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -5.962239742279053, "logits_per_char": -0.9937066237131754, "num_chars": 12}, {"sum_logits": -8.887127876281738, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.290563583374023, "logits_per_token": -8.887127876281738, "logits_per_char": -1.2695896966116769, "num_chars": 7}, {"sum_logits": -3.2587969303131104, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.781716346740723, "logits_per_token": -3.2587969303131104, "logits_per_char": -0.8146992325782776, "num_chars": 4}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 666, "native_id": "9d97e2bb458d93a8bafe4380b08727e3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.643304824829102, "incorrect_loss_raw": 7.3097487688064575, "correct_loss_per_char": 1.1825894249810114, "incorrect_loss_per_char": 1.0531568129857383, "correct_loss_per_token": 5.321652412414551, "incorrect_loss_per_token": 5.289655804634094, "correct_loss_uncond": -9.469669342041016, "incorrect_loss_uncond": -7.943194031715393}, "model_output": [{"sum_logits": -10.643304824829102, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -5.321652412414551, "logits_per_char": -1.1825894249810114, "num_chars": 9}, {"sum_logits": -4.040191173553467, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -4.040191173553467, "logits_per_char": -0.6733651955922445, "num_chars": 6}, {"sum_logits": -3.391756057739258, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -3.391756057739258, "logits_per_char": -0.4239695072174072, "num_chars": 8}, {"sum_logits": -16.160743713378906, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.782365798950195, "logits_per_token": -8.080371856689453, "logits_per_char": -2.3086776733398438, "num_chars": 7}, {"sum_logits": -5.646304130554199, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -5.646304130554199, "logits_per_char": -0.806614875793457, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 667, "native_id": "26d7d59ef7b9f2e0c2d47419fa5bca91", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.8763346672058105, "incorrect_loss_raw": 7.101764559745789, "correct_loss_per_char": 0.4063612222671509, "incorrect_loss_per_char": 0.9193908791521411, "correct_loss_per_token": 4.8763346672058105, "incorrect_loss_per_token": 3.5508822798728943, "correct_loss_uncond": -10.846765995025635, "incorrect_loss_uncond": -9.494371056556702}, "model_output": [{"sum_logits": -12.99205493927002, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.989322662353516, "logits_per_token": -6.49602746963501, "logits_per_char": -1.8560078484671456, "num_chars": 7}, {"sum_logits": -6.738008499145508, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.102951049804688, "logits_per_token": -3.369004249572754, "logits_per_char": -0.9625726427350726, "num_chars": 7}, {"sum_logits": -3.4731831550598145, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.75814437866211, "logits_per_token": -1.7365915775299072, "logits_per_char": -0.3859092394510905, "num_chars": 9}, {"sum_logits": -4.8763346672058105, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -4.8763346672058105, "logits_per_char": -0.4063612222671509, "num_chars": 12}, {"sum_logits": -5.2038116455078125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.53412437438965, "logits_per_token": -2.6019058227539062, "logits_per_char": -0.4730737859552557, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 668, "native_id": "c6f10fd07348bf2cf5488b0d9f38d806", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.70634937286377, "incorrect_loss_raw": 17.205202341079712, "correct_loss_per_char": 0.8470899581909179, "incorrect_loss_per_char": 1.2189843067756065, "correct_loss_per_token": 6.353174686431885, "incorrect_loss_per_token": 7.164263844490051, "correct_loss_uncond": -6.142783164978027, "incorrect_loss_uncond": -1.046567678451538}, "model_output": [{"sum_logits": -21.12527084350586, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.72524642944336, "logits_per_token": -10.56263542175293, "logits_per_char": -1.7604392369588215, "num_chars": 12}, {"sum_logits": -12.70634937286377, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.849132537841797, "logits_per_token": -6.353174686431885, "logits_per_char": -0.8470899581909179, "num_chars": 15}, {"sum_logits": -13.175442695617676, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.13226890563965, "logits_per_token": -6.587721347808838, "logits_per_char": -1.0134955919705904, "num_chars": 13}, {"sum_logits": -17.940359115600586, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.920658111572266, "logits_per_token": -5.980119705200195, "logits_per_char": -0.9966866175333658, "num_chars": 18}, {"sum_logits": -16.579736709594727, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -5.526578903198242, "logits_per_char": -1.1053157806396485, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 669, "native_id": "8ebf9d24719649a0b041aea02a6e46af", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.3352694511413574, "incorrect_loss_raw": 7.873365759849548, "correct_loss_per_char": 0.3336099215916225, "incorrect_loss_per_char": 0.8681866327921549, "correct_loss_per_token": 2.3352694511413574, "incorrect_loss_per_token": 7.0629947781562805, "correct_loss_uncond": -9.814024448394775, "incorrect_loss_uncond": -6.900059342384338}, "model_output": [{"sum_logits": -8.948816299438477, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -8.948816299438477, "logits_per_char": -1.278402328491211, "num_chars": 7}, {"sum_logits": -9.594368934631348, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -9.594368934631348, "logits_per_char": -1.0660409927368164, "num_chars": 9}, {"sum_logits": -2.3352694511413574, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -2.3352694511413574, "logits_per_char": -0.3336099215916225, "num_chars": 7}, {"sum_logits": -6.467309951782227, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -6.467309951782227, "logits_per_char": -0.5389424959818522, "num_chars": 12}, {"sum_logits": -6.482967853546143, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.53412437438965, "logits_per_token": -3.2414839267730713, "logits_per_char": -0.5893607139587402, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 670, "native_id": "c961578f4c5768b67b843e5d2ce18452", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.210321426391602, "incorrect_loss_raw": 12.248035430908203, "correct_loss_per_char": 0.6210321426391602, "incorrect_loss_per_char": 1.3191336336589994, "correct_loss_per_token": 3.105160713195801, "incorrect_loss_per_token": 8.451851447423298, "correct_loss_uncond": -9.936193466186523, "incorrect_loss_uncond": -4.128939628601074}, "model_output": [{"sum_logits": -6.210321426391602, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.146514892578125, "logits_per_token": -3.105160713195801, "logits_per_char": -0.6210321426391602, "num_chars": 10}, {"sum_logits": -9.656474113464355, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -9.656474113464355, "logits_per_char": -1.3794963019234794, "num_chars": 7}, {"sum_logits": -16.028352737426758, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.93790054321289, "logits_per_token": -8.014176368713379, "logits_per_char": -1.3356960614522297, "num_chars": 12}, {"sum_logits": -10.755839347839355, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.72342300415039, "logits_per_token": -3.5852797826131186, "logits_per_char": -0.7682742391313825, "num_chars": 14}, {"sum_logits": -12.551475524902344, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.910656929016113, "logits_per_token": -12.551475524902344, "logits_per_char": -1.7930679321289062, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 671, "native_id": "cce1b59f7c4f540a84a1a7d6d88548c4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.934162139892578, "incorrect_loss_raw": 6.698679566383362, "correct_loss_per_char": 0.8223603566487631, "incorrect_loss_per_char": 0.664076832078752, "correct_loss_per_token": 4.934162139892578, "incorrect_loss_per_token": 4.8523589968681335, "correct_loss_uncond": -7.373848915100098, "incorrect_loss_uncond": -8.967171549797058}, "model_output": [{"sum_logits": -4.641271591186523, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.502076148986816, "logits_per_token": -4.641271591186523, "logits_per_char": -0.663038798740932, "num_chars": 7}, {"sum_logits": -4.934162139892578, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -4.934162139892578, "logits_per_char": -0.8223603566487631, "num_chars": 6}, {"sum_logits": -7.382882118225098, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.997454643249512, "logits_per_token": -7.382882118225098, "logits_per_char": -0.9228602647781372, "num_chars": 8}, {"sum_logits": -7.702661514282227, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.29041290283203, "logits_per_token": -3.8513307571411133, "logits_per_char": -0.48141634464263916, "num_chars": 16}, {"sum_logits": -7.0679030418396, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -3.5339515209198, "logits_per_char": -0.5889919201532999, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 672, "native_id": "60848ce50295fc745756fbe960e78b88", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.767143249511719, "incorrect_loss_raw": 8.649527430534363, "correct_loss_per_char": 0.7979464530944824, "incorrect_loss_per_char": 0.9074110783410795, "correct_loss_per_token": 4.255714416503906, "incorrect_loss_per_token": 5.609514236450195, "correct_loss_uncond": -7.573034286499023, "incorrect_loss_uncond": -6.58558714389801}, "model_output": [{"sum_logits": -12.767143249511719, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.340177536010742, "logits_per_token": -4.255714416503906, "logits_per_char": -0.7979464530944824, "num_chars": 16}, {"sum_logits": -13.342181205749512, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.270978927612305, "logits_per_token": -6.671090602874756, "logits_per_char": -0.8894787470499674, "num_chars": 15}, {"sum_logits": -4.1790452003479, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.794161796569824, "logits_per_token": -4.1790452003479, "logits_per_char": -0.5223806500434875, "num_chars": 8}, {"sum_logits": -6.098958969116211, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.211878776550293, "logits_per_token": -6.098958969116211, "logits_per_char": -1.219791793823242, "num_chars": 5}, {"sum_logits": -10.977924346923828, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.66343879699707, "logits_per_token": -5.488962173461914, "logits_per_char": -0.9979931224476207, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 673, "native_id": "3fdc0c422c524c994b9911a17f1f1834", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.483285903930664, "incorrect_loss_raw": 9.720525741577148, "correct_loss_per_char": 0.4052053689956665, "incorrect_loss_per_char": 1.0866664257314471, "correct_loss_per_token": 3.241642951965332, "incorrect_loss_per_token": 5.568098187446594, "correct_loss_uncond": -15.638404846191406, "incorrect_loss_uncond": -8.050982475280762}, "model_output": [{"sum_logits": -6.483285903930664, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.12169075012207, "logits_per_token": -3.241642951965332, "logits_per_char": -0.4052053689956665, "num_chars": 16}, {"sum_logits": -15.255732536315918, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.888107299804688, "logits_per_token": -7.627866268157959, "logits_per_char": -2.5426220893859863, "num_chars": 6}, {"sum_logits": -9.123380661010742, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.684141159057617, "logits_per_token": -4.561690330505371, "logits_per_char": -0.5068544811672635, "num_chars": 18}, {"sum_logits": -8.840307235717773, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.30948257446289, "logits_per_token": -4.420153617858887, "logits_per_char": -0.5893538157145183, "num_chars": 15}, {"sum_logits": -5.66268253326416, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -5.66268253326416, "logits_per_char": -0.70783531665802, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 674, "native_id": "cc8eac9956f645533b8d7b99702e3507", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 8.459808349609375, "incorrect_loss_raw": 11.46548318862915, "correct_loss_per_char": 1.2085440499441964, "incorrect_loss_per_char": 1.706757834979466, "correct_loss_per_token": 4.2299041748046875, "incorrect_loss_per_token": 5.908917427062988, "correct_loss_uncond": -9.206197738647461, "incorrect_loss_uncond": -5.457468271255493}, "model_output": [{"sum_logits": -9.867182731628418, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -9.867182731628418, "logits_per_char": -1.9734365463256835, "num_chars": 5}, {"sum_logits": -12.740583419799805, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.95319366455078, "logits_per_token": -4.2468611399332685, "logits_per_char": -1.8200833456856864, "num_chars": 7}, {"sum_logits": -12.632744789123535, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.04315948486328, "logits_per_token": -4.210914929707845, "logits_per_char": -1.2632744789123536, "num_chars": 10}, {"sum_logits": -10.621421813964844, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.029136657714844, "logits_per_token": -5.310710906982422, "logits_per_char": -1.7702369689941406, "num_chars": 6}, {"sum_logits": -8.459808349609375, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.666006088256836, "logits_per_token": -4.2299041748046875, "logits_per_char": -1.2085440499441964, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 675, "native_id": "c0e7fa3e39a2d9af2c323416015729dc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.489195346832275, "incorrect_loss_raw": 14.459812641143799, "correct_loss_per_char": 0.6861494183540344, "incorrect_loss_per_char": 1.6353942371077008, "correct_loss_per_token": 1.829731782277425, "incorrect_loss_per_token": 7.302413622538248, "correct_loss_uncond": -11.225827693939209, "incorrect_loss_uncond": -6.552386283874512}, "model_output": [{"sum_logits": -24.651994705200195, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -26.2424373626709, "logits_per_token": -8.217331568400065, "logits_per_char": -1.643466313680013, "num_chars": 15}, {"sum_logits": -5.489195346832275, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.715023040771484, "logits_per_token": -1.829731782277425, "logits_per_char": -0.6861494183540344, "num_chars": 8}, {"sum_logits": -4.847886085510254, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.712093353271484, "logits_per_token": -1.615962028503418, "logits_per_char": -0.30299288034439087, "num_chars": 16}, {"sum_logits": -10.413352012634277, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.551799774169922, "logits_per_token": -10.413352012634277, "logits_per_char": -2.6033380031585693, "num_chars": 4}, {"sum_logits": -17.92601776123047, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -24.542465209960938, "logits_per_token": -8.963008880615234, "logits_per_char": -1.99177975124783, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 676, "native_id": "335b51bd3a8ada014bbe6754dcbd425f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.137816905975342, "incorrect_loss_raw": 8.598946452140808, "correct_loss_per_char": 0.8275633811950683, "incorrect_loss_per_char": 0.891218858416057, "correct_loss_per_token": 4.137816905975342, "incorrect_loss_per_token": 4.799056708812714, "correct_loss_uncond": -8.528499126434326, "incorrect_loss_uncond": -7.467307686805725}, "model_output": [{"sum_logits": -9.256058692932129, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.710946083068848, "logits_per_token": -4.6280293464660645, "logits_per_char": -1.322294098990304, "num_chars": 7}, {"sum_logits": -7.539321422576904, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -3.769660711288452, "logits_per_char": -0.39680639066194234, "num_chars": 19}, {"sum_logits": -4.137816905975342, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -4.137816905975342, "logits_per_char": -0.8275633811950683, "num_chars": 5}, {"sum_logits": -13.603737831115723, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -6.801868915557861, "logits_per_char": -1.0464413716242864, "num_chars": 13}, {"sum_logits": -3.9966678619384766, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -3.9966678619384766, "logits_per_char": -0.7993335723876953, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 677, "native_id": "c7327a1a7d12b6cc0740fc9446270e02", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.275230884552002, "incorrect_loss_raw": 13.514515042304993, "correct_loss_per_char": 0.5196593488965716, "incorrect_loss_per_char": 1.2289792660828476, "correct_loss_per_token": 2.425076961517334, "incorrect_loss_per_token": 5.421589811642965, "correct_loss_uncond": -15.289790630340576, "incorrect_loss_uncond": -5.254713177680969}, "model_output": [{"sum_logits": -5.500508785247803, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.969649314880371, "logits_per_token": -5.500508785247803, "logits_per_char": -1.1001017570495606, "num_chars": 5}, {"sum_logits": -14.425642967224121, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.2967472076416, "logits_per_token": -4.808547655741374, "logits_per_char": -1.0304030690874373, "num_chars": 14}, {"sum_logits": -9.190500259399414, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.252992630004883, "logits_per_token": -3.063500086466471, "logits_per_char": -0.7069615584153396, "num_chars": 13}, {"sum_logits": -24.941408157348633, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -22.557523727416992, "logits_per_token": -8.313802719116211, "logits_per_char": -2.0784506797790527, "num_chars": 12}, {"sum_logits": -7.275230884552002, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -22.565021514892578, "logits_per_token": -2.425076961517334, "logits_per_char": -0.5196593488965716, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 678, "native_id": "2729d8502208c25d8e9293cd4e8ecbb5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.496364116668701, "incorrect_loss_raw": 11.827453374862671, "correct_loss_per_char": 0.49966946515170013, "incorrect_loss_per_char": 1.5517169064945646, "correct_loss_per_token": 2.7481820583343506, "incorrect_loss_per_token": 9.172645330429077, "correct_loss_uncond": -8.958532810211182, "incorrect_loss_uncond": -3.900195837020874}, "model_output": [{"sum_logits": -13.850289344787598, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -15.892523765563965, "logits_per_token": -13.850289344787598, "logits_per_char": -3.4625723361968994, "num_chars": 4}, {"sum_logits": -15.928848266601562, "num_tokens": 3, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.80748176574707, "logits_per_token": -5.3096160888671875, "logits_per_char": -0.7964424133300781, "num_chars": 20}, {"sum_logits": -5.496364116668701, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.454896926879883, "logits_per_token": -2.7481820583343506, "logits_per_char": -0.49966946515170013, "num_chars": 11}, {"sum_logits": -10.218213081359863, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -10.218213081359863, "logits_per_char": -1.1353570090399847, "num_chars": 9}, {"sum_logits": -7.31246280670166, "num_tokens": 1, "num_tokens_all": 130, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -7.31246280670166, "logits_per_char": -0.8124958674112955, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 679, "native_id": "7ea57ee4580042b0a6a40479c8ace3e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 15.288414001464844, "incorrect_loss_raw": 15.25387954711914, "correct_loss_per_char": 1.0920295715332031, "incorrect_loss_per_char": 1.627491934177203, "correct_loss_per_token": 7.644207000732422, "incorrect_loss_per_token": 11.13056230545044, "correct_loss_uncond": -8.710163116455078, "incorrect_loss_uncond": 0.11838626861572266}, "model_output": [{"sum_logits": -12.642135620117188, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -12.642135620117188, "logits_per_char": -2.5284271240234375, "num_chars": 5}, {"sum_logits": -18.410411834716797, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.908443450927734, "logits_per_token": -9.205205917358398, "logits_per_char": -1.1506507396697998, "num_chars": 16}, {"sum_logits": -15.288414001464844, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.998577117919922, "logits_per_token": -7.644207000732422, "logits_per_char": -1.0920295715332031, "num_chars": 14}, {"sum_logits": -14.576126098632812, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.55240249633789, "logits_per_token": -7.288063049316406, "logits_per_char": -1.121240469125601, "num_chars": 13}, {"sum_logits": -15.386844635009766, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -15.386844635009766, "logits_per_char": -1.7096494038899739, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 680, "native_id": "65432eb6e617514d863a465f38865fde", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.722055912017822, "incorrect_loss_raw": 17.572399854660034, "correct_loss_per_char": 0.3576284945011139, "incorrect_loss_per_char": 1.2765667818730173, "correct_loss_per_token": 2.861027956008911, "incorrect_loss_per_token": 6.955257773399353, "correct_loss_uncond": -16.399634838104248, "incorrect_loss_uncond": -4.175223112106323}, "model_output": [{"sum_logits": -5.722055912017822, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.12169075012207, "logits_per_token": -2.861027956008911, "logits_per_char": -0.3576284945011139, "num_chars": 16}, {"sum_logits": -15.732107162475586, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.69129180908203, "logits_per_token": -7.866053581237793, "logits_per_char": -1.048807144165039, "num_chars": 15}, {"sum_logits": -29.295074462890625, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -27.587892532348633, "logits_per_token": -7.323768615722656, "logits_per_char": -1.9530049641927083, "num_chars": 15}, {"sum_logits": -11.827482223510742, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.013092041015625, "logits_per_token": -5.913741111755371, "logits_per_char": -1.3141646915011935, "num_chars": 9}, {"sum_logits": -13.434935569763184, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.69821548461914, "logits_per_token": -6.717467784881592, "logits_per_char": -0.7902903276331285, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 681, "native_id": "316a8dee8a4dde7d95cf503a715104be", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.201988220214844, "incorrect_loss_raw": 13.834023237228394, "correct_loss_per_char": 1.2752485275268555, "incorrect_loss_per_char": 1.3366006622966538, "correct_loss_per_token": 10.201988220214844, "incorrect_loss_per_token": 7.357591430346171, "correct_loss_uncond": -4.806667327880859, "incorrect_loss_uncond": -2.5271103382110596}, "model_output": [{"sum_logits": -16.50004768371582, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -15.371232986450195, "logits_per_token": -5.50001589457194, "logits_per_char": -1.8333386315239801, "num_chars": 9}, {"sum_logits": -9.024654388427734, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -15.622055053710938, "logits_per_token": -9.024654388427734, "logits_per_char": -1.1280817985534668, "num_chars": 8}, {"sum_logits": -10.201988220214844, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -10.201988220214844, "logits_per_char": -1.2752485275268555, "num_chars": 8}, {"sum_logits": -15.49085521697998, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.81740951538086, "logits_per_token": -7.74542760848999, "logits_per_char": -1.1916042474599986, "num_chars": 13}, {"sum_logits": -14.320535659790039, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.63383674621582, "logits_per_token": -7.1602678298950195, "logits_per_char": -1.19337797164917, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 682, "native_id": "520972425aed0e532fa28a91c9b55b30", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.628848552703857, "incorrect_loss_raw": 11.812193989753723, "correct_loss_per_char": 0.6254276169670953, "incorrect_loss_per_char": 1.1192679854030283, "correct_loss_per_token": 2.8144242763519287, "incorrect_loss_per_token": 5.906096994876862, "correct_loss_uncond": -8.90492582321167, "incorrect_loss_uncond": -5.399162173271179}, "model_output": [{"sum_logits": -11.517268180847168, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.329479217529297, "logits_per_token": -5.758634090423584, "logits_per_char": -0.885943706219013, "num_chars": 13}, {"sum_logits": -5.628848552703857, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -2.8144242763519287, "logits_per_char": -0.6254276169670953, "num_chars": 9}, {"sum_logits": -12.325597763061523, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.843425750732422, "logits_per_token": -6.162798881530762, "logits_per_char": -1.3695108625623915, "num_chars": 9}, {"sum_logits": -7.138417720794678, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.14129638671875, "logits_per_token": -3.569208860397339, "logits_per_char": -0.5948681433995565, "num_chars": 12}, {"sum_logits": -16.267492294311523, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.53122329711914, "logits_per_token": -8.133746147155762, "logits_per_char": -1.6267492294311523, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 683, "native_id": "4d67cdb4ba1b0058e383c212303a9f4e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.481712341308594, "incorrect_loss_raw": 11.408105611801147, "correct_loss_per_char": 0.6753948436063879, "incorrect_loss_per_char": 1.2282652673267185, "correct_loss_per_token": 3.8272374471028647, "incorrect_loss_per_token": 6.182834267616272, "correct_loss_uncond": -10.7420654296875, "incorrect_loss_uncond": -4.306077241897583}, "model_output": [{"sum_logits": -10.440277099609375, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.616445541381836, "logits_per_token": -2.6100692749023438, "logits_per_char": -0.7457340785435268, "num_chars": 14}, {"sum_logits": -13.794240951538086, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.80850601196289, "logits_per_token": -6.897120475769043, "logits_per_char": -0.9853029251098633, "num_chars": 14}, {"sum_logits": -11.481712341308594, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.223777770996094, "logits_per_token": -3.8272374471028647, "logits_per_char": -0.6753948436063879, "num_chars": 17}, {"sum_logits": -12.347514152526855, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.02449893951416, "logits_per_token": -6.173757076263428, "logits_per_char": -1.3719460169474285, "num_chars": 9}, {"sum_logits": -9.050390243530273, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.407280921936035, "logits_per_token": -9.050390243530273, "logits_per_char": -1.8100780487060546, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 684, "native_id": "95d1d968ee66b6054cbb16b58a7c6455", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.147479057312012, "incorrect_loss_raw": 11.585392236709595, "correct_loss_per_char": 1.3934348821640015, "incorrect_loss_per_char": 1.1547899751436144, "correct_loss_per_token": 11.147479057312012, "incorrect_loss_per_token": 8.335306525230408, "correct_loss_uncond": -4.7972564697265625, "incorrect_loss_uncond": -4.139883518218994}, "model_output": [{"sum_logits": -10.579435348510742, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.946989059448242, "logits_per_token": -10.579435348510742, "logits_per_char": -1.0579435348510742, "num_chars": 10}, {"sum_logits": -17.410865783691406, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.688919067382812, "logits_per_token": -8.705432891845703, "logits_per_char": -1.450905481974284, "num_chars": 12}, {"sum_logits": -11.147479057312012, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.944735527038574, "logits_per_token": -11.147479057312012, "logits_per_char": -1.3934348821640015, "num_chars": 8}, {"sum_logits": -8.58981990814209, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -4.294909954071045, "logits_per_char": -0.7158183256785074, "num_chars": 12}, {"sum_logits": -9.76144790649414, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.39173412322998, "logits_per_token": -9.76144790649414, "logits_per_char": -1.3944925580705916, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 685, "native_id": "c43b60be106662de1863097ee3ddb4d2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.940022468566895, "incorrect_loss_raw": 10.53940749168396, "correct_loss_per_char": 1.7057174955095564, "incorrect_loss_per_char": 1.2373188790820895, "correct_loss_per_token": 11.940022468566895, "incorrect_loss_per_token": 7.803500771522522, "correct_loss_uncond": -1.7160301208496094, "incorrect_loss_uncond": -5.109806537628174}, "model_output": [{"sum_logits": -11.940022468566895, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.656052589416504, "logits_per_token": -11.940022468566895, "logits_per_char": -1.7057174955095564, "num_chars": 7}, {"sum_logits": -13.02923583984375, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -6.514617919921875, "logits_per_char": -0.9306597028459821, "num_chars": 14}, {"sum_logits": -8.858017921447754, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.322437286376953, "logits_per_token": -4.429008960723877, "logits_per_char": -0.8858017921447754, "num_chars": 10}, {"sum_logits": -10.314444541931152, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -10.314444541931152, "logits_per_char": -1.4734920774187361, "num_chars": 7}, {"sum_logits": -9.955931663513184, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -9.955931663513184, "logits_per_char": -1.659321943918864, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 686, "native_id": "456f2fb41cac8c028dcfe2f48637e473", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.6241960525512695, "incorrect_loss_raw": 7.894631385803223, "correct_loss_per_char": 0.762419605255127, "incorrect_loss_per_char": 0.8610640327135722, "correct_loss_per_token": 3.8120980262756348, "incorrect_loss_per_token": 4.922287702560425, "correct_loss_uncond": -9.290343284606934, "incorrect_loss_uncond": -9.911363124847412}, "model_output": [{"sum_logits": -7.6241960525512695, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.914539337158203, "logits_per_token": -3.8120980262756348, "logits_per_char": -0.762419605255127, "num_chars": 10}, {"sum_logits": -7.799776077270508, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.614330291748047, "logits_per_token": -7.799776077270508, "logits_per_char": -1.2999626795450847, "num_chars": 6}, {"sum_logits": -9.219913482666016, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.594419479370117, "logits_per_token": -4.609956741333008, "logits_per_char": -0.9219913482666016, "num_chars": 10}, {"sum_logits": -6.228839874267578, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.74160385131836, "logits_per_token": -3.114419937133789, "logits_per_char": -0.38930249214172363, "num_chars": 16}, {"sum_logits": -8.329996109008789, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.273624420166016, "logits_per_token": -4.1649980545043945, "logits_per_char": -0.8329996109008789, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 687, "native_id": "a5d853d1c2fb3ef160218fb91110fbe5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.767158031463623, "incorrect_loss_raw": 10.376879215240479, "correct_loss_per_char": 0.6767158031463623, "incorrect_loss_per_char": 1.3469957717741377, "correct_loss_per_token": 3.3835790157318115, "incorrect_loss_per_token": 6.32464063167572, "correct_loss_uncond": -9.277763843536377, "incorrect_loss_uncond": -6.6500608921051025}, "model_output": [{"sum_logits": -9.089608192443848, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.783340454101562, "logits_per_token": -9.089608192443848, "logits_per_char": -0.8263280174948953, "num_chars": 11}, {"sum_logits": -6.767158031463623, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.044921875, "logits_per_token": -3.3835790157318115, "logits_per_char": -0.6767158031463623, "num_chars": 10}, {"sum_logits": -12.442681312561035, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.465423583984375, "logits_per_token": -6.221340656280518, "logits_per_char": -2.488536262512207, "num_chars": 5}, {"sum_logits": -12.730857849121094, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.24494171142578, "logits_per_token": -6.365428924560547, "logits_per_char": -1.414539761013455, "num_chars": 9}, {"sum_logits": -7.2443695068359375, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.614054679870605, "logits_per_token": -3.6221847534179688, "logits_per_char": -0.6585790460759943, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 688, "native_id": "3df1b88da6a90c9526be2c8a6cc736dc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.331019401550293, "incorrect_loss_raw": 7.526448845863342, "correct_loss_per_char": 1.8885032335917156, "incorrect_loss_per_char": 1.1854945985097733, "correct_loss_per_token": 11.331019401550293, "incorrect_loss_per_token": 6.608335196971893, "correct_loss_uncond": -2.0433664321899414, "incorrect_loss_uncond": -8.260910391807556}, "model_output": [{"sum_logits": -7.344909191131592, "num_tokens": 2, "num_tokens_all": 167, "is_greedy": false, "sum_logits_uncond": -16.84996795654297, "logits_per_token": -3.672454595565796, "logits_per_char": -1.0492727415902274, "num_chars": 7}, {"sum_logits": -11.331019401550293, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -11.331019401550293, "logits_per_char": -1.8885032335917156, "num_chars": 6}, {"sum_logits": -8.502923011779785, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -16.45895004272461, "logits_per_token": -8.502923011779785, "logits_per_char": -1.4171538352966309, "num_chars": 6}, {"sum_logits": -6.480459213256836, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -15.535295486450195, "logits_per_token": -6.480459213256836, "logits_per_char": -0.720051023695204, "num_chars": 9}, {"sum_logits": -7.777503967285156, "num_tokens": 1, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -7.777503967285156, "logits_per_char": -1.5555007934570313, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 689, "native_id": "f912bcd7479b76db9b1c57a612b90f00", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.809823036193848, "incorrect_loss_raw": 11.778048396110535, "correct_loss_per_char": 0.5206548690795898, "incorrect_loss_per_char": 1.2089292890885295, "correct_loss_per_token": 3.904911518096924, "incorrect_loss_per_token": 7.072767913341522, "correct_loss_uncond": -14.771533012390137, "incorrect_loss_uncond": -7.727479338645935}, "model_output": [{"sum_logits": -7.367135524749756, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -22.367595672607422, "logits_per_token": -3.683567762374878, "logits_per_char": -0.43336091322057385, "num_chars": 17}, {"sum_logits": -7.809823036193848, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -22.581356048583984, "logits_per_token": -3.904911518096924, "logits_per_char": -0.5206548690795898, "num_chars": 15}, {"sum_logits": -19.355640411376953, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.798471450805664, "logits_per_token": -9.677820205688477, "logits_per_char": -1.4888954162597656, "num_chars": 13}, {"sum_logits": -9.469949722290039, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -13.974520683288574, "logits_per_token": -9.469949722290039, "logits_per_char": -2.3674874305725098, "num_chars": 4}, {"sum_logits": -10.91946792602539, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -21.88152313232422, "logits_per_token": -5.459733963012695, "logits_per_char": -0.5459733963012695, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 690, "native_id": "94f34cc1e6aa9eefe06563cce8225658", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.707082748413086, "incorrect_loss_raw": 10.38011109828949, "correct_loss_per_char": 0.8566926717758179, "incorrect_loss_per_char": 1.239947088077815, "correct_loss_per_token": 4.569027582804362, "incorrect_loss_per_token": 8.172442972660065, "correct_loss_uncond": -9.303079605102539, "incorrect_loss_uncond": -7.6072715520858765}, "model_output": [{"sum_logits": -13.707082748413086, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.010162353515625, "logits_per_token": -4.569027582804362, "logits_per_char": -0.8566926717758179, "num_chars": 16}, {"sum_logits": -7.707397937774658, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.88583755493164, "logits_per_token": -3.853698968887329, "logits_per_char": -0.7006725397976962, "num_chars": 11}, {"sum_logits": -11.869961738586426, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.617376327514648, "logits_per_token": -11.869961738586426, "logits_per_char": -2.373992347717285, "num_chars": 5}, {"sum_logits": -9.953947067260742, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -24.472808837890625, "logits_per_token": -4.976973533630371, "logits_per_char": -0.5529970592922635, "num_chars": 18}, {"sum_logits": -11.989137649536133, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.97350788116455, "logits_per_token": -11.989137649536133, "logits_per_char": -1.3321264055040147, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 691, "native_id": "bb503ece4eac41dfe608a1dcb654e6bf", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.866752624511719, "incorrect_loss_raw": 12.842786312103271, "correct_loss_per_char": 1.0963058471679688, "incorrect_loss_per_char": 1.417228091846813, "correct_loss_per_token": 4.933376312255859, "incorrect_loss_per_token": 10.909400701522827, "correct_loss_uncond": -6.529233932495117, "incorrect_loss_uncond": -1.6944820880889893}, "model_output": [{"sum_logits": -15.561370849609375, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.858414649963379, "logits_per_token": -15.561370849609375, "logits_per_char": -2.223052978515625, "num_chars": 7}, {"sum_logits": -14.862985610961914, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.334514617919922, "logits_per_token": -14.862985610961914, "logits_per_char": -1.3511805100874468, "num_chars": 11}, {"sum_logits": -9.866752624511719, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.395986557006836, "logits_per_token": -4.933376312255859, "logits_per_char": -1.0963058471679688, "num_chars": 9}, {"sum_logits": -5.479703903198242, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.185674667358398, "logits_per_token": -5.479703903198242, "logits_per_char": -0.5479703903198242, "num_chars": 10}, {"sum_logits": -15.467084884643555, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.770469665527344, "logits_per_token": -7.733542442321777, "logits_per_char": -1.5467084884643554, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 692, "native_id": "5502dc807d4921679ae1abd0dc9570d6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.465842247009277, "incorrect_loss_raw": 12.598076343536377, "correct_loss_per_char": 0.5258801248338487, "incorrect_loss_per_char": 1.4032328526178997, "correct_loss_per_token": 4.732921123504639, "incorrect_loss_per_token": 7.483624339103699, "correct_loss_uncond": -10.346087455749512, "incorrect_loss_uncond": -5.497839689254761}, "model_output": [{"sum_logits": -11.107784271240234, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.917130470275879, "logits_per_token": -5.553892135620117, "logits_per_char": -1.2341982523600261, "num_chars": 9}, {"sum_logits": -11.350762367248535, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.351154327392578, "logits_per_token": -5.675381183624268, "logits_per_char": -1.2611958185831706, "num_chars": 9}, {"sum_logits": -18.457069396972656, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.19327163696289, "logits_per_token": -9.228534698486328, "logits_per_char": -1.538089116414388, "num_chars": 12}, {"sum_logits": -9.465842247009277, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.81192970275879, "logits_per_token": -4.732921123504639, "logits_per_char": -0.5258801248338487, "num_chars": 18}, {"sum_logits": -9.476689338684082, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.922107696533203, "logits_per_token": -9.476689338684082, "logits_per_char": -1.5794482231140137, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 693, "native_id": "a7e3de0719fe30e7048f67426e29fdd1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.054776191711426, "incorrect_loss_raw": 9.173620223999023, "correct_loss_per_char": 1.3818470239639282, "incorrect_loss_per_char": 1.3177303380264347, "correct_loss_per_token": 5.527388095855713, "incorrect_loss_per_token": 7.698210120201111, "correct_loss_uncond": -4.262673377990723, "incorrect_loss_uncond": -4.6162519454956055}, "model_output": [{"sum_logits": -8.254267692565918, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.803590774536133, "logits_per_token": -8.254267692565918, "logits_per_char": -1.3757112820943196, "num_chars": 6}, {"sum_logits": -8.637629508972168, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -8.637629508972168, "logits_per_char": -0.7852390462701971, "num_chars": 11}, {"sum_logits": -11.8032808303833, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.97957706451416, "logits_per_token": -5.90164041519165, "logits_per_char": -1.96721347173055, "num_chars": 6}, {"sum_logits": -11.054776191711426, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -5.527388095855713, "logits_per_char": -1.3818470239639282, "num_chars": 8}, {"sum_logits": -7.999302864074707, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.137375831604004, "logits_per_token": -7.999302864074707, "logits_per_char": -1.1427575520106725, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 694, "native_id": "d6107d454181b701ddcaa449a1e422a3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.206701278686523, "incorrect_loss_raw": 9.36093783378601, "correct_loss_per_char": 0.9389770214374249, "incorrect_loss_per_char": 0.9405991644616635, "correct_loss_per_token": 6.103350639343262, "incorrect_loss_per_token": 5.545154690742493, "correct_loss_uncond": -11.49858283996582, "incorrect_loss_uncond": -9.556183338165283}, "model_output": [{"sum_logits": -12.206701278686523, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.705284118652344, "logits_per_token": -6.103350639343262, "logits_per_char": -0.9389770214374249, "num_chars": 13}, {"sum_logits": -8.008649826049805, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.938169479370117, "logits_per_token": -4.004324913024902, "logits_per_char": -0.7280590750954368, "num_chars": 11}, {"sum_logits": -11.418657302856445, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -27.973392486572266, "logits_per_token": -5.709328651428223, "logits_per_char": -0.8783582540658804, "num_chars": 13}, {"sum_logits": -11.098958015441895, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.644126892089844, "logits_per_token": -5.549479007720947, "logits_per_char": -1.3873697519302368, "num_chars": 8}, {"sum_logits": -6.917486190795898, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.11279582977295, "logits_per_token": -6.917486190795898, "logits_per_char": -0.7686095767550998, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 695, "native_id": "ab2eb930b29bb6d5e94a6cd3b04ba01e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.227308750152588, "incorrect_loss_raw": 11.043919563293457, "correct_loss_per_char": 0.31818696430751253, "incorrect_loss_per_char": 1.3804956001980824, "correct_loss_per_token": 2.227308750152588, "incorrect_loss_per_token": 7.830782413482666, "correct_loss_uncond": -10.849430561065674, "incorrect_loss_uncond": -4.9498350620269775}, "model_output": [{"sum_logits": -13.953231811523438, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.98363208770752, "logits_per_token": -6.976615905761719, "logits_per_char": -1.2684756192294033, "num_chars": 11}, {"sum_logits": -5.488821029663086, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.870964050292969, "logits_per_token": -5.488821029663086, "logits_per_char": -0.7841172899518695, "num_chars": 7}, {"sum_logits": -11.75186538696289, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.164608001708984, "logits_per_token": -5.875932693481445, "logits_per_char": -1.3057628207736545, "num_chars": 9}, {"sum_logits": -12.981760025024414, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -12.981760025024414, "logits_per_char": -2.1636266708374023, "num_chars": 6}, {"sum_logits": -2.227308750152588, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -13.076739311218262, "logits_per_token": -2.227308750152588, "logits_per_char": -0.31818696430751253, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 696, "native_id": "92869fc0be5dc45f407700692ffd80a0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8554309606552124, "incorrect_loss_raw": 12.415157318115234, "correct_loss_per_char": 0.17108619213104248, "incorrect_loss_per_char": 0.9847018678983052, "correct_loss_per_token": 0.8554309606552124, "incorrect_loss_per_token": 8.02007571856181, "correct_loss_uncond": -12.401904702186584, "incorrect_loss_uncond": -5.537058591842651}, "model_output": [{"sum_logits": -13.358704566955566, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.58977508544922, "logits_per_token": -4.452901522318522, "logits_per_char": -0.8905803044637044, "num_chars": 15}, {"sum_logits": -15.206267356872559, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -15.206267356872559, "logits_per_char": -1.2671889464060466, "num_chars": 12}, {"sum_logits": -17.34904670715332, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.1832218170166, "logits_per_token": -8.67452335357666, "logits_per_char": -1.1566031138102213, "num_chars": 15}, {"sum_logits": -3.746610641479492, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.312766075134277, "logits_per_token": -3.746610641479492, "logits_per_char": -0.6244351069132487, "num_chars": 6}, {"sum_logits": -0.8554309606552124, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -13.257335662841797, "logits_per_token": -0.8554309606552124, "logits_per_char": -0.17108619213104248, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 697, "native_id": "6a0177586d506cb7b741f4207b428e42", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.55416202545166, "incorrect_loss_raw": 10.013353824615479, "correct_loss_per_char": 0.327708101272583, "incorrect_loss_per_char": 1.043980594476064, "correct_loss_per_token": 3.27708101272583, "incorrect_loss_per_token": 6.3371234734853115, "correct_loss_uncond": -16.336153984069824, "incorrect_loss_uncond": -7.047594308853149}, "model_output": [{"sum_logits": -5.722200393676758, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -5.722200393676758, "logits_per_char": -0.7152750492095947, "num_chars": 8}, {"sum_logits": -6.55416202545166, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.890316009521484, "logits_per_token": -3.27708101272583, "logits_per_char": -0.327708101272583, "num_chars": 20}, {"sum_logits": -11.603626251220703, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.600555419921875, "logits_per_token": -3.867875417073568, "logits_per_char": -0.7735750834147136, "num_chars": 15}, {"sum_logits": -8.789247512817383, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.534889221191406, "logits_per_token": -8.789247512817383, "logits_per_char": -1.7578495025634766, "num_chars": 5}, {"sum_logits": -13.93834114074707, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.323654174804688, "logits_per_token": -6.969170570373535, "logits_per_char": -0.9292227427164713, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 698, "native_id": "584188da9a429f1bc319abda5e5c7a76", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.9366559982299805, "incorrect_loss_raw": 8.249402284622192, "correct_loss_per_char": 0.7052365711757115, "incorrect_loss_per_char": 1.443696826034122, "correct_loss_per_token": 4.9366559982299805, "incorrect_loss_per_token": 7.326258420944214, "correct_loss_uncond": -11.714764595031738, "incorrect_loss_uncond": -6.131286859512329}, "model_output": [{"sum_logits": -7.385150909423828, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.875513076782227, "logits_per_token": -3.692575454711914, "logits_per_char": -0.7385150909423828, "num_chars": 10}, {"sum_logits": -11.315732955932617, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.823081016540527, "logits_per_token": -11.315732955932617, "logits_per_char": -2.2631465911865236, "num_chars": 5}, {"sum_logits": -8.52912425994873, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -8.52912425994873, "logits_per_char": -2.1322810649871826, "num_chars": 4}, {"sum_logits": -5.767601013183594, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.493917465209961, "logits_per_token": -5.767601013183594, "logits_per_char": -0.6408445570203993, "num_chars": 9}, {"sum_logits": -4.9366559982299805, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -4.9366559982299805, "logits_per_char": -0.7052365711757115, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 699, "native_id": "e480d4a672af0194e0a6ccdb8c37499b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.083809852600098, "incorrect_loss_raw": 11.135702133178711, "correct_loss_per_char": 0.7083809852600098, "incorrect_loss_per_char": 0.9642678631676568, "correct_loss_per_token": 3.541904926300049, "incorrect_loss_per_token": 9.169092774391174, "correct_loss_uncond": -8.888653755187988, "incorrect_loss_uncond": -4.649116039276123}, "model_output": [{"sum_logits": -10.84575366973877, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -10.84575366973877, "logits_per_char": -1.2050837410820856, "num_chars": 9}, {"sum_logits": -10.11326789855957, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -10.11326789855957, "logits_per_char": -1.1236964331732855, "num_chars": 9}, {"sum_logits": -7.850912094116211, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.14395523071289, "logits_per_token": -7.850912094116211, "logits_per_char": -0.6542426745096842, "num_chars": 12}, {"sum_logits": -15.732874870300293, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.535367965698242, "logits_per_token": -7.8664374351501465, "logits_per_char": -0.8740486039055718, "num_chars": 18}, {"sum_logits": -7.083809852600098, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.972463607788086, "logits_per_token": -3.541904926300049, "logits_per_char": -0.7083809852600098, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 700, "native_id": "275c859994f7d3acd3c8863be591ab2c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.508148193359375, "incorrect_loss_raw": 7.934965252876282, "correct_loss_per_char": 0.8852421687199519, "incorrect_loss_per_char": 0.9976882374475873, "correct_loss_per_token": 5.7540740966796875, "incorrect_loss_per_token": 6.336522579193115, "correct_loss_uncond": -7.072355270385742, "incorrect_loss_uncond": -6.157629370689392}, "model_output": [{"sum_logits": -9.456218719482422, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.405667304992676, "logits_per_token": -9.456218719482422, "logits_per_char": -1.350888388497489, "num_chars": 7}, {"sum_logits": -5.507785797119141, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.809617042541504, "logits_per_token": -5.507785797119141, "logits_per_char": -0.9179642995198568, "num_chars": 6}, {"sum_logits": -3.9883151054382324, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.322914123535156, "logits_per_token": -3.9883151054382324, "logits_per_char": -0.44314612282647026, "num_chars": 9}, {"sum_logits": -12.787541389465332, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.83218002319336, "logits_per_token": -6.393770694732666, "logits_per_char": -1.2787541389465331, "num_chars": 10}, {"sum_logits": -11.508148193359375, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.580503463745117, "logits_per_token": -5.7540740966796875, "logits_per_char": -0.8852421687199519, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 701, "native_id": "32758ab86d888be680845b0dfe7de35e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.2220587730407715, "incorrect_loss_raw": 14.864171743392944, "correct_loss_per_char": 0.5555429825415978, "incorrect_loss_per_char": 1.5955465829733646, "correct_loss_per_token": 3.6110293865203857, "incorrect_loss_per_token": 7.432085871696472, "correct_loss_uncond": -12.633223056793213, "incorrect_loss_uncond": -2.761326551437378}, "model_output": [{"sum_logits": -13.960939407348633, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.13278579711914, "logits_per_token": -6.980469703674316, "logits_per_char": -1.5512154897054036, "num_chars": 9}, {"sum_logits": -14.112781524658203, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.420150756835938, "logits_per_token": -7.056390762329102, "logits_per_char": -1.2829801386052913, "num_chars": 11}, {"sum_logits": -14.99520206451416, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.941198348999023, "logits_per_token": -7.49760103225708, "logits_per_char": -1.499520206451416, "num_chars": 10}, {"sum_logits": -16.38776397705078, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.007858276367188, "logits_per_token": -8.19388198852539, "logits_per_char": -2.0484704971313477, "num_chars": 8}, {"sum_logits": -7.2220587730407715, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.855281829833984, "logits_per_token": -3.6110293865203857, "logits_per_char": -0.5555429825415978, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 702, "native_id": "69335eb9bc5b7b5df840c38a086bf8b2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8803812265396118, "incorrect_loss_raw": 8.97990596294403, "correct_loss_per_char": 0.17607624530792237, "incorrect_loss_per_char": 1.1992040988471773, "correct_loss_per_token": 0.8803812265396118, "incorrect_loss_per_token": 8.97990596294403, "correct_loss_uncond": -9.444947600364685, "incorrect_loss_uncond": -4.581216216087341}, "model_output": [{"sum_logits": -10.72421932220459, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.009564399719238, "logits_per_token": -10.72421932220459, "logits_per_char": -0.8936849435170492, "num_chars": 12}, {"sum_logits": -5.171823024749756, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -13.071375846862793, "logits_per_token": -5.171823024749756, "logits_per_char": -0.6464778780937195, "num_chars": 8}, {"sum_logits": -11.60787582397461, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.137758255004883, "logits_per_token": -11.60787582397461, "logits_per_char": -2.321575164794922, "num_chars": 5}, {"sum_logits": -0.8803812265396118, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": true, "sum_logits_uncond": -10.325328826904297, "logits_per_token": -0.8803812265396118, "logits_per_char": -0.17607624530792237, "num_chars": 5}, {"sum_logits": -8.415705680847168, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.025790214538574, "logits_per_token": -8.415705680847168, "logits_per_char": -0.9350784089830186, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 703, "native_id": "4396cb65629672723c7b184424e139bb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.464888572692871, "incorrect_loss_raw": 9.633159160614014, "correct_loss_per_char": 0.35547088441394625, "incorrect_loss_per_char": 0.8061455188653408, "correct_loss_per_token": 2.4882961908976235, "incorrect_loss_per_token": 4.816579580307007, "correct_loss_uncond": -9.702115058898926, "incorrect_loss_uncond": -9.326043128967285}, "model_output": [{"sum_logits": -7.343796253204346, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.044885635375977, "logits_per_token": -3.671898126602173, "logits_per_char": -0.4895864168802897, "num_chars": 15}, {"sum_logits": -7.464888572692871, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.167003631591797, "logits_per_token": -2.4882961908976235, "logits_per_char": -0.35547088441394625, "num_chars": 21}, {"sum_logits": -10.178135871887207, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -5.0890679359436035, "logits_per_char": -0.7829335286067083, "num_chars": 13}, {"sum_logits": -13.76858139038086, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.580202102661133, "logits_per_token": -6.88429069519043, "logits_per_char": -1.1473817825317383, "num_chars": 12}, {"sum_logits": -7.242123126983643, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.188671112060547, "logits_per_token": -3.6210615634918213, "logits_per_char": -0.804680347442627, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 704, "native_id": "2a58e81a9c4ce095d099e0d785fc2da4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.314610481262207, "incorrect_loss_raw": 9.121353507041931, "correct_loss_per_char": 2.2629220962524412, "incorrect_loss_per_char": 1.0503210590945349, "correct_loss_per_token": 11.314610481262207, "incorrect_loss_per_token": 6.892830550670624, "correct_loss_uncond": -2.8460464477539062, "incorrect_loss_uncond": -7.564275145530701}, "model_output": [{"sum_logits": -10.593198776245117, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.301739692687988, "logits_per_token": -10.593198776245117, "logits_per_char": -1.1770220862494574, "num_chars": 9}, {"sum_logits": -8.064031600952148, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.743524551391602, "logits_per_token": -8.064031600952148, "logits_per_char": -0.8960035112169054, "num_chars": 9}, {"sum_logits": -7.219030857086182, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.87139129638672, "logits_per_token": -3.609515428543091, "logits_per_char": -0.8021145396762424, "num_chars": 9}, {"sum_logits": -10.609152793884277, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -5.304576396942139, "logits_per_char": -1.3261440992355347, "num_chars": 8}, {"sum_logits": -11.314610481262207, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.160656929016113, "logits_per_token": -11.314610481262207, "logits_per_char": -2.2629220962524412, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 705, "native_id": "07f108d5321a66f460685f5c7499ecb2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.973661422729492, "incorrect_loss_raw": 8.360541880130768, "correct_loss_per_char": 0.5540923012627496, "incorrect_loss_per_char": 1.3347621349351746, "correct_loss_per_token": 4.986830711364746, "incorrect_loss_per_token": 8.360541880130768, "correct_loss_uncond": -9.657127380371094, "incorrect_loss_uncond": -6.128338038921356}, "model_output": [{"sum_logits": -14.150184631347656, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.876028060913086, "logits_per_token": -14.150184631347656, "logits_per_char": -2.0214549473353793, "num_chars": 7}, {"sum_logits": -8.414093971252441, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.595356941223145, "logits_per_token": -8.414093971252441, "logits_per_char": -1.4023489952087402, "num_chars": 6}, {"sum_logits": -9.973661422729492, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.630788803100586, "logits_per_token": -4.986830711364746, "logits_per_char": -0.5540923012627496, "num_chars": 18}, {"sum_logits": -3.47110915184021, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -3.47110915184021, "logits_per_char": -0.43388864398002625, "num_chars": 8}, {"sum_logits": -7.406779766082764, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -7.406779766082764, "logits_per_char": -1.4813559532165528, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 706, "native_id": "69bef3eb55463d040bdf98e2c97bfe1f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.212308883666992, "incorrect_loss_raw": 10.495994091033936, "correct_loss_per_char": 0.3474872589111328, "incorrect_loss_per_char": 1.0197553315547982, "correct_loss_per_token": 2.606154441833496, "incorrect_loss_per_token": 5.519557873408, "correct_loss_uncond": -9.894571304321289, "incorrect_loss_uncond": -7.069000959396362}, "model_output": [{"sum_logits": -5.212308883666992, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.106880187988281, "logits_per_token": -2.606154441833496, "logits_per_char": -0.3474872589111328, "num_chars": 15}, {"sum_logits": -9.179059982299805, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.303110122680664, "logits_per_token": -4.589529991149902, "logits_per_char": -0.8344599983908914, "num_chars": 11}, {"sum_logits": -11.411819458007812, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.771648406982422, "logits_per_token": -5.705909729003906, "logits_per_char": -1.2679799397786458, "num_chars": 9}, {"sum_logits": -14.415457725524902, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -25.383459091186523, "logits_per_token": -4.805152575174968, "logits_per_char": -1.201288143793742, "num_chars": 12}, {"sum_logits": -6.977639198303223, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.801762580871582, "logits_per_token": -6.977639198303223, "logits_per_char": -0.7752932442559136, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 707, "native_id": "912676495cceefadccbbf8c604486f97", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.286529064178467, "incorrect_loss_raw": 16.174309968948364, "correct_loss_per_char": 0.4490377902984619, "incorrect_loss_per_char": 1.1521965155713276, "correct_loss_per_token": 3.1432645320892334, "incorrect_loss_per_token": 6.447469453016917, "correct_loss_uncond": -9.521976947784424, "incorrect_loss_uncond": -2.299171209335327}, "model_output": [{"sum_logits": -6.286529064178467, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.80850601196289, "logits_per_token": -3.1432645320892334, "logits_per_char": -0.4490377902984619, "num_chars": 14}, {"sum_logits": -16.210542678833008, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.506126403808594, "logits_per_token": -8.105271339416504, "logits_per_char": -1.157895905630929, "num_chars": 14}, {"sum_logits": -16.86594581604004, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.918004989624023, "logits_per_token": -8.43297290802002, "logits_per_char": -1.686594581604004, "num_chars": 10}, {"sum_logits": -16.1573486328125, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.33769416809082, "logits_per_token": -5.385782877604167, "logits_per_char": -0.9504322725183824, "num_chars": 17}, {"sum_logits": -15.46340274810791, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.132099151611328, "logits_per_token": -3.8658506870269775, "logits_per_char": -0.8138633025319952, "num_chars": 19}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 708, "native_id": "bdf92566f14599f1606109677206001f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.680619239807129, "incorrect_loss_raw": 10.872333526611328, "correct_loss_per_char": 0.8900516033172607, "incorrect_loss_per_char": 1.186127403804234, "correct_loss_per_token": 5.3403096199035645, "incorrect_loss_per_token": 9.131836652755737, "correct_loss_uncond": -9.878540992736816, "incorrect_loss_uncond": -4.911088228225708}, "model_output": [{"sum_logits": -10.680619239807129, "num_tokens": 2, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -5.3403096199035645, "logits_per_char": -0.8900516033172607, "num_chars": 12}, {"sum_logits": -13.923974990844727, "num_tokens": 2, "num_tokens_all": 162, "is_greedy": false, "sum_logits_uncond": -20.607328414916992, "logits_per_token": -6.961987495422363, "logits_per_char": -0.9282649993896485, "num_chars": 15}, {"sum_logits": -9.505489349365234, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -9.505489349365234, "logits_per_char": -0.9505489349365235, "num_chars": 10}, {"sum_logits": -8.020686149597168, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -8.020686149597168, "logits_per_char": -1.1458123070853097, "num_chars": 7}, {"sum_logits": -12.039183616638184, "num_tokens": 1, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -12.039183616638184, "logits_per_char": -1.7198833738054549, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 709, "native_id": "0df042743128b57e874bd5d79b7aae7a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.470493793487549, "incorrect_loss_raw": 11.519357919692993, "correct_loss_per_char": 0.4967215326097276, "incorrect_loss_per_char": 1.2042929958868651, "correct_loss_per_token": 2.2352468967437744, "incorrect_loss_per_token": 5.577342132727305, "correct_loss_uncond": -11.44663667678833, "incorrect_loss_uncond": -5.476428508758545}, "model_output": [{"sum_logits": -10.129067420959473, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.17085838317871, "logits_per_token": -5.064533710479736, "logits_per_char": -0.5958274953505572, "num_chars": 17}, {"sum_logits": -4.470493793487549, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.917130470275879, "logits_per_token": -2.2352468967437744, "logits_per_char": -0.4967215326097276, "num_chars": 9}, {"sum_logits": -9.474818229675293, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.017155647277832, "logits_per_token": -9.474818229675293, "logits_per_char": -1.8949636459350585, "num_chars": 5}, {"sum_logits": -13.819561004638672, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.101287841796875, "logits_per_token": -4.606520334879558, "logits_per_char": -1.5355067782931857, "num_chars": 9}, {"sum_logits": -12.653985023498535, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.693843841552734, "logits_per_token": -3.163496255874634, "logits_per_char": -0.7908740639686584, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 710, "native_id": "866ef7266d34c11e5a1b3df49fab96a4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.84478759765625, "incorrect_loss_raw": 8.879354357719421, "correct_loss_per_char": 0.7605319552951388, "incorrect_loss_per_char": 0.7255592213991361, "correct_loss_per_token": 3.422393798828125, "incorrect_loss_per_token": 4.627895951271057, "correct_loss_uncond": -10.940547943115234, "incorrect_loss_uncond": -9.862583756446838}, "model_output": [{"sum_logits": -7.086243629455566, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.159774780273438, "logits_per_token": -3.543121814727783, "logits_per_char": -0.5905203024546305, "num_chars": 12}, {"sum_logits": -14.83568286895752, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.68887710571289, "logits_per_token": -7.41784143447876, "logits_per_char": -1.141206374535194, "num_chars": 13}, {"sum_logits": -4.5281853675842285, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.333724975585938, "logits_per_token": -4.5281853675842285, "logits_per_char": -0.5660231709480286, "num_chars": 8}, {"sum_logits": -6.84478759765625, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.785335540771484, "logits_per_token": -3.422393798828125, "logits_per_char": -0.7605319552951388, "num_chars": 9}, {"sum_logits": -9.067305564880371, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.785375595092773, "logits_per_token": -3.022435188293457, "logits_per_char": -0.6044870376586914, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 711, "native_id": "67ffcb4c3f2c6a1155e356f8a15ed250", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.8263282775878906, "incorrect_loss_raw": 6.851524353027344, "correct_loss_per_char": 0.7065820693969727, "incorrect_loss_per_char": 0.9513624628384908, "correct_loss_per_token": 2.8263282775878906, "incorrect_loss_per_token": 5.274494290351868, "correct_loss_uncond": -11.725471496582031, "incorrect_loss_uncond": -7.857354640960693}, "model_output": [{"sum_logits": -2.8263282775878906, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.551799774169922, "logits_per_token": -2.8263282775878906, "logits_per_char": -0.7065820693969727, "num_chars": 4}, {"sum_logits": -12.616240501403809, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.561912536621094, "logits_per_token": -6.308120250701904, "logits_per_char": -0.8410827000935872, "num_chars": 15}, {"sum_logits": -5.686482906341553, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.456185340881348, "logits_per_token": -5.686482906341553, "logits_per_char": -1.4216207265853882, "num_chars": 4}, {"sum_logits": -4.8873138427734375, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.706355094909668, "logits_per_token": -4.8873138427734375, "logits_per_char": -0.48873138427734375, "num_chars": 10}, {"sum_logits": -4.216060161590576, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -9.111063003540039, "logits_per_token": -4.216060161590576, "logits_per_char": -1.054015040397644, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 712, "native_id": "87a133afae5d9d29d634f3384f28ef24", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.413406372070312, "incorrect_loss_raw": 9.177183985710144, "correct_loss_per_char": 0.5883378982543945, "incorrect_loss_per_char": 0.9623994917064519, "correct_loss_per_token": 4.706703186035156, "incorrect_loss_per_token": 4.935782313346863, "correct_loss_uncond": -8.288543701171875, "incorrect_loss_uncond": -7.488511681556702}, "model_output": [{"sum_logits": -10.180057525634766, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.681646347045898, "logits_per_token": -3.393352508544922, "logits_per_char": -0.9254597750577059, "num_chars": 11}, {"sum_logits": -4.936354637145996, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.026565551757812, "logits_per_token": -2.468177318572998, "logits_per_char": -0.3290903091430664, "num_chars": 15}, {"sum_logits": -15.421448707580566, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.156307220458984, "logits_per_token": -7.710724353790283, "logits_per_char": -1.7134943008422852, "num_chars": 9}, {"sum_logits": -6.170875072479248, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.798263549804688, "logits_per_token": -6.170875072479248, "logits_per_char": -0.8815535817827497, "num_chars": 7}, {"sum_logits": -9.413406372070312, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.701950073242188, "logits_per_token": -4.706703186035156, "logits_per_char": -0.5883378982543945, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 713, "native_id": "4779be55f47a301debfc472e4fc2c7b6", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1225225925445557, "incorrect_loss_raw": 14.328664779663086, "correct_loss_per_char": 0.10204750841314142, "incorrect_loss_per_char": 1.3507680448618802, "correct_loss_per_token": 1.1225225925445557, "incorrect_loss_per_token": 9.983246922492981, "correct_loss_uncond": -14.639424085617065, "incorrect_loss_uncond": -2.35660982131958}, "model_output": [{"sum_logits": -23.175561904907227, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.316556930541992, "logits_per_token": -5.793890476226807, "logits_per_char": -1.5450374603271484, "num_chars": 15}, {"sum_logits": -1.1225225925445557, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -15.761946678161621, "logits_per_token": -1.1225225925445557, "logits_per_char": -0.10204750841314142, "num_chars": 11}, {"sum_logits": -11.035704612731934, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -11.035704612731934, "logits_per_char": -1.3794630765914917, "num_chars": 8}, {"sum_logits": -12.007671356201172, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.238839149475098, "logits_per_token": -12.007671356201172, "logits_per_char": -1.0916064869273792, "num_chars": 11}, {"sum_logits": -11.095721244812012, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.622055053710938, "logits_per_token": -11.095721244812012, "logits_per_char": -1.3869651556015015, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 714, "native_id": "7a28d31e66d870370642de3be47b9ef9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.521251678466797, "incorrect_loss_raw": 13.807608366012573, "correct_loss_per_char": 1.089485392850988, "incorrect_loss_per_char": 1.2880528706770678, "correct_loss_per_token": 9.260625839233398, "incorrect_loss_per_token": 6.903804183006287, "correct_loss_uncond": -4.266014099121094, "incorrect_loss_uncond": -4.89690089225769}, "model_output": [{"sum_logits": -13.224294662475586, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.44894027709961, "logits_per_token": -6.612147331237793, "logits_per_char": -1.4693660736083984, "num_chars": 9}, {"sum_logits": -10.675335884094238, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.70046615600586, "logits_per_token": -5.337667942047119, "logits_per_char": -0.8211796833918645, "num_chars": 13}, {"sum_logits": -11.643356323242188, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.359224319458008, "logits_per_token": -5.821678161621094, "logits_per_char": -1.4554195404052734, "num_chars": 8}, {"sum_logits": -18.521251678466797, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.78726577758789, "logits_per_token": -9.260625839233398, "logits_per_char": -1.089485392850988, "num_chars": 17}, {"sum_logits": -19.68744659423828, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.309406280517578, "logits_per_token": -9.84372329711914, "logits_per_char": -1.4062461853027344, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 715, "native_id": "042898e0c71adac5d123aaa6221c9754", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.674983978271484, "incorrect_loss_raw": 10.622596621513367, "correct_loss_per_char": 0.6910702841622489, "incorrect_loss_per_char": 0.9586400747299195, "correct_loss_per_token": 2.418745994567871, "incorrect_loss_per_token": 5.602811932563782, "correct_loss_uncond": -8.048591613769531, "incorrect_loss_uncond": -9.690970778465271}, "model_output": [{"sum_logits": -11.701334953308105, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.128650665283203, "logits_per_token": -5.850667476654053, "logits_per_char": -1.1701334953308105, "num_chars": 10}, {"sum_logits": -13.503679275512695, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -28.810569763183594, "logits_per_token": -4.501226425170898, "logits_per_char": -0.6430323464529855, "num_chars": 21}, {"sum_logits": -10.45203685760498, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.717529296875, "logits_per_token": -5.22601842880249, "logits_per_char": -1.045203685760498, "num_chars": 10}, {"sum_logits": -9.674983978271484, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -2.418745994567871, "logits_per_char": -0.6910702841622489, "num_chars": 14}, {"sum_logits": -6.8333353996276855, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.597519874572754, "logits_per_token": -6.8333353996276855, "logits_per_char": -0.9761907713753837, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 716, "native_id": "93bbaccb1c46d22124a846b8514de5bc", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.115260124206543, "incorrect_loss_raw": 11.167190074920654, "correct_loss_per_char": 0.653838830835679, "incorrect_loss_per_char": 1.127749743846932, "correct_loss_per_token": 3.7050867080688477, "incorrect_loss_per_token": 4.588119705518087, "correct_loss_uncond": -8.840825080871582, "incorrect_loss_uncond": -6.858340501785278}, "model_output": [{"sum_logits": -11.115260124206543, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.956085205078125, "logits_per_token": -3.7050867080688477, "logits_per_char": -0.653838830835679, "num_chars": 17}, {"sum_logits": -12.558904647827148, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.943510055541992, "logits_per_token": -6.279452323913574, "logits_per_char": -2.51178092956543, "num_chars": 5}, {"sum_logits": -10.352887153625488, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.011009216308594, "logits_per_token": -3.4509623845418296, "logits_per_char": -0.5751603974236382, "num_chars": 18}, {"sum_logits": -13.538520812988281, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.104209899902344, "logits_per_token": -4.512840270996094, "logits_per_char": -0.6769260406494141, "num_chars": 20}, {"sum_logits": -8.2184476852417, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.0433931350708, "logits_per_token": -4.10922384262085, "logits_per_char": -0.7471316077492454, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 717, "native_id": "ef889edd1b57d8d0c81e43f73c98c8e9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.1971254348754883, "incorrect_loss_raw": 9.889872193336487, "correct_loss_per_char": 0.39964067935943604, "incorrect_loss_per_char": 0.8361786497963799, "correct_loss_per_token": 3.1971254348754883, "incorrect_loss_per_token": 5.246581673622131, "correct_loss_uncond": -11.007176399230957, "incorrect_loss_uncond": -8.252896189689636}, "model_output": [{"sum_logits": -8.163451194763184, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.215415954589844, "logits_per_token": -2.7211503982543945, "logits_per_char": -0.3887357711791992, "num_chars": 21}, {"sum_logits": -5.134315013885498, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -5.134315013885498, "logits_per_char": -0.5704794459872775, "num_chars": 9}, {"sum_logits": -3.1971254348754883, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -3.1971254348754883, "logits_per_char": -0.39964067935943604, "num_chars": 8}, {"sum_logits": -14.440372467041016, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.902433395385742, "logits_per_token": -7.220186233520508, "logits_per_char": -1.203364372253418, "num_chars": 12}, {"sum_logits": -11.82135009765625, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.483112335205078, "logits_per_token": -5.910675048828125, "logits_per_char": -1.182135009765625, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 718, "native_id": "f4bb8ecacb9ce89e040f5f76bc79afb3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.488409996032715, "incorrect_loss_raw": 16.239202737808228, "correct_loss_per_char": 0.6555256247520447, "incorrect_loss_per_char": 1.0357018652071677, "correct_loss_per_token": 5.244204998016357, "incorrect_loss_per_token": 5.492903272310893, "correct_loss_uncond": -12.465306282043457, "incorrect_loss_uncond": -3.0043818950653076}, "model_output": [{"sum_logits": -15.951635360717773, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.200817108154297, "logits_per_token": -3.9879088401794434, "logits_per_char": -0.6646514733632406, "num_chars": 24}, {"sum_logits": -10.488409996032715, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.953716278076172, "logits_per_token": -5.244204998016357, "logits_per_char": -0.6555256247520447, "num_chars": 16}, {"sum_logits": -18.30684471130371, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.559406280517578, "logits_per_token": -9.153422355651855, "logits_per_char": -1.5255703926086426, "num_chars": 12}, {"sum_logits": -16.829940795898438, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -4.207485198974609, "logits_per_char": -0.8857863576788652, "num_chars": 19}, {"sum_logits": -13.868390083312988, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.131969451904297, "logits_per_token": -4.622796694437663, "logits_per_char": -1.0667992371779222, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 719, "native_id": "ec2e18fd8c18a4ebe5a091e0c8b94462", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.511837005615234, "incorrect_loss_raw": 16.259023666381836, "correct_loss_per_char": 1.1511837005615235, "incorrect_loss_per_char": 1.3827243691140954, "correct_loss_per_token": 5.755918502807617, "incorrect_loss_per_token": 7.570015509923299, "correct_loss_uncond": -7.272275924682617, "incorrect_loss_uncond": -4.405213832855225}, "model_output": [{"sum_logits": -14.243942260742188, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.482776641845703, "logits_per_token": -7.121971130371094, "logits_per_char": -1.2949038418856533, "num_chars": 11}, {"sum_logits": -11.511837005615234, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.78411293029785, "logits_per_token": -5.755918502807617, "logits_per_char": -1.1511837005615235, "num_chars": 10}, {"sum_logits": -13.427911758422852, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.61450958251953, "logits_per_token": -4.47597058614095, "logits_per_char": -0.8392444849014282, "num_chars": 16}, {"sum_logits": -17.386934280395508, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.89220428466797, "logits_per_token": -8.693467140197754, "logits_per_char": -1.5806303891268643, "num_chars": 11}, {"sum_logits": -19.977306365966797, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.66745948791504, "logits_per_token": -9.988653182983398, "logits_per_char": -1.816118760542436, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 720, "native_id": "07b51b231a9d6a143d8a73e69121e1b1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.631694793701172, "incorrect_loss_raw": 10.531362771987915, "correct_loss_per_char": 0.802641232808431, "incorrect_loss_per_char": 0.7629179159800211, "correct_loss_per_token": 4.815847396850586, "incorrect_loss_per_token": 5.325120131174724, "correct_loss_uncond": -12.466997146606445, "incorrect_loss_uncond": -5.31492280960083}, "model_output": [{"sum_logits": -12.988265991210938, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.10668182373047, "logits_per_token": -6.494132995605469, "logits_per_char": -0.6184888567243304, "num_chars": 21}, {"sum_logits": -9.631694793701172, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.098691940307617, "logits_per_token": -4.815847396850586, "logits_per_char": -0.802641232808431, "num_chars": 12}, {"sum_logits": -13.440086364746094, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.035812377929688, "logits_per_token": -4.480028788248698, "logits_per_char": -0.8400053977966309, "num_chars": 16}, {"sum_logits": -4.955538749694824, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.809617042541504, "logits_per_token": -4.955538749694824, "logits_per_char": -0.8259231249491373, "num_chars": 6}, {"sum_logits": -10.741559982299805, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.43303108215332, "logits_per_token": -5.370779991149902, "logits_per_char": -0.767254284449986, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 721, "native_id": "e1744fc698cffb574e5cf4b29a81ce76", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8197216987609863, "incorrect_loss_raw": 7.798727989196777, "correct_loss_per_char": 0.23873260617256165, "incorrect_loss_per_char": 0.842685994647798, "correct_loss_per_token": 1.9098608493804932, "incorrect_loss_per_token": 5.649033308029175, "correct_loss_uncond": -15.649461269378662, "incorrect_loss_uncond": -7.5055787563323975}, "model_output": [{"sum_logits": -3.8197216987609863, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -1.9098608493804932, "logits_per_char": -0.23873260617256165, "num_chars": 16}, {"sum_logits": -8.59877872467041, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.076425552368164, "logits_per_token": -4.299389362335205, "logits_per_char": -0.6141984803336007, "num_chars": 14}, {"sum_logits": -5.994447708129883, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -5.994447708129883, "logits_per_char": -0.9990746180216471, "num_chars": 6}, {"sum_logits": -8.002906799316406, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -8.002906799316406, "logits_per_char": -1.1432723999023438, "num_chars": 7}, {"sum_logits": -8.59877872467041, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.076425552368164, "logits_per_token": -4.299389362335205, "logits_per_char": -0.6141984803336007, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 722, "native_id": "27604394ccee83e089f9ffae1883cf07", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.437188148498535, "incorrect_loss_raw": 11.544289827346802, "correct_loss_per_char": 0.7152431276109483, "incorrect_loss_per_char": 1.3037781047098562, "correct_loss_per_token": 3.2185940742492676, "incorrect_loss_per_token": 8.181340456008911, "correct_loss_uncond": -11.252774238586426, "incorrect_loss_uncond": -6.212406396865845}, "model_output": [{"sum_logits": -6.437188148498535, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.68996238708496, "logits_per_token": -3.2185940742492676, "logits_per_char": -0.7152431276109483, "num_chars": 9}, {"sum_logits": -13.424947738647461, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.201961517333984, "logits_per_token": -6.7124738693237305, "logits_per_char": -1.2204497944224963, "num_chars": 11}, {"sum_logits": -10.630311965942383, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -10.630311965942383, "logits_per_char": -1.3287889957427979, "num_chars": 8}, {"sum_logits": -8.6432523727417, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -8.6432523727417, "logits_per_char": -1.4405420621236165, "num_chars": 6}, {"sum_logits": -13.478647232055664, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.660274505615234, "logits_per_token": -6.739323616027832, "logits_per_char": -1.225331566550515, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 723, "native_id": "1272e693cf9152e7ac71095c643676b5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.880512237548828, "incorrect_loss_raw": 8.561220645904541, "correct_loss_per_char": 0.7350640296936035, "incorrect_loss_per_char": 0.9933343578170944, "correct_loss_per_token": 5.880512237548828, "incorrect_loss_per_token": 7.603969931602478, "correct_loss_uncond": -10.298398971557617, "incorrect_loss_uncond": -6.915384769439697}, "model_output": [{"sum_logits": -5.880512237548828, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -5.880512237548828, "logits_per_char": -0.7350640296936035, "num_chars": 8}, {"sum_logits": -8.598065376281738, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -16.823331832885742, "logits_per_token": -8.598065376281738, "logits_per_char": -1.228295053754534, "num_chars": 7}, {"sum_logits": -7.658005714416504, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -20.09314727783203, "logits_per_token": -3.829002857208252, "logits_per_char": -0.5890773626474234, "num_chars": 13}, {"sum_logits": -9.656854629516602, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -12.509931564331055, "logits_per_token": -9.656854629516602, "logits_per_char": -0.9656854629516601, "num_chars": 10}, {"sum_logits": -8.33195686340332, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -8.33195686340332, "logits_per_char": -1.19027955191476, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 724, "native_id": "7bff23f6c12e9136f0961514bebb8cd3", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.122583866119385, "incorrect_loss_raw": 9.495613694190979, "correct_loss_per_char": 0.34354865550994873, "incorrect_loss_per_char": 1.0219923312013799, "correct_loss_per_token": 1.374194622039795, "incorrect_loss_per_token": 5.79558801651001, "correct_loss_uncond": -13.322091579437256, "incorrect_loss_uncond": -6.916680932044983}, "model_output": [{"sum_logits": -2.3254494667053223, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.809617042541504, "logits_per_token": -2.3254494667053223, "logits_per_char": -0.3875749111175537, "num_chars": 6}, {"sum_logits": -6.05679988861084, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -10.18783950805664, "logits_per_token": -6.05679988861084, "logits_per_char": -1.0094666481018066, "num_chars": 6}, {"sum_logits": -13.617496490478516, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.79824447631836, "logits_per_token": -6.808748245239258, "logits_per_char": -1.2379542264071377, "num_chars": 11}, {"sum_logits": -4.122583866119385, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.44467544555664, "logits_per_token": -1.374194622039795, "logits_per_char": -0.34354865550994873, "num_chars": 12}, {"sum_logits": -15.982708930969238, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.853477478027344, "logits_per_token": -7.991354465484619, "logits_per_char": -1.4529735391790217, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 725, "native_id": "20ae70b9b157b298569cd761787833e7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.819968223571777, "incorrect_loss_raw": 8.200860500335693, "correct_loss_per_char": 0.4819968223571777, "incorrect_loss_per_char": 1.1871649384498597, "correct_loss_per_token": 4.819968223571777, "incorrect_loss_per_token": 5.5999376972516375, "correct_loss_uncond": -9.932950973510742, "incorrect_loss_uncond": -7.77305269241333}, "model_output": [{"sum_logits": -9.168334007263184, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.208596229553223, "logits_per_token": -9.168334007263184, "logits_per_char": -1.8336668014526367, "num_chars": 5}, {"sum_logits": -6.891551494598389, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -6.891551494598389, "logits_per_char": -1.7228878736495972, "num_chars": 4}, {"sum_logits": -4.552078723907471, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -2.2760393619537354, "logits_per_char": -0.3793398936589559, "num_chars": 12}, {"sum_logits": -12.19147777557373, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -4.063825925191243, "logits_per_char": -0.8127651850382487, "num_chars": 15}, {"sum_logits": -4.819968223571777, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -4.819968223571777, "logits_per_char": -0.4819968223571777, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 726, "native_id": "bdd29d7c12e3d795b78ffc048631e7e7", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.769653081893921, "incorrect_loss_raw": 10.374749422073364, "correct_loss_per_char": 0.7539306163787842, "incorrect_loss_per_char": 1.0144705943811954, "correct_loss_per_token": 3.769653081893921, "incorrect_loss_per_token": 5.3234468301137285, "correct_loss_uncond": -11.317832231521606, "incorrect_loss_uncond": -7.798047065734863}, "model_output": [{"sum_logits": -12.880352020263672, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -4.293450673421224, "logits_per_char": -1.4311502244737413, "num_chars": 9}, {"sum_logits": -10.136537551879883, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.867748260498047, "logits_per_token": -5.068268775939941, "logits_per_char": -0.7797336578369141, "num_chars": 13}, {"sum_logits": -5.382027626037598, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -5.382027626037598, "logits_per_char": -1.0764055252075195, "num_chars": 5}, {"sum_logits": -3.769653081893921, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -3.769653081893921, "logits_per_char": -0.7539306163787842, "num_chars": 5}, {"sum_logits": -13.100080490112305, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -25.356258392333984, "logits_per_token": -6.550040245056152, "logits_per_char": -0.7705929700066062, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 727, "native_id": "cc1a547bdfdcc95e4d632453af14bc96", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.511316299438477, "incorrect_loss_raw": 10.416127443313599, "correct_loss_per_char": 1.4185527165730794, "incorrect_loss_per_char": 1.5990990910265181, "correct_loss_per_token": 8.511316299438477, "incorrect_loss_per_token": 10.416127443313599, "correct_loss_uncond": -4.863069534301758, "incorrect_loss_uncond": -5.081989765167236}, "model_output": [{"sum_logits": -10.5571870803833, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -10.5571870803833, "logits_per_char": -1.3196483850479126, "num_chars": 8}, {"sum_logits": -10.034261703491211, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -10.034261703491211, "logits_per_char": -1.114917967054579, "num_chars": 9}, {"sum_logits": -8.511316299438477, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -8.511316299438477, "logits_per_char": -1.4185527165730794, "num_chars": 6}, {"sum_logits": -7.583465576171875, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -7.583465576171875, "logits_per_char": -1.2639109293619792, "num_chars": 6}, {"sum_logits": -13.489595413208008, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -13.993163108825684, "logits_per_token": -13.489595413208008, "logits_per_char": -2.6979190826416017, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 728, "native_id": "896b25dc41f84357add1c798d4a96cd8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.925623416900635, "incorrect_loss_raw": 7.101622939109802, "correct_loss_per_char": 0.9876039028167725, "incorrect_loss_per_char": 0.6939158082008362, "correct_loss_per_token": 5.925623416900635, "incorrect_loss_per_token": 3.6560529470443726, "correct_loss_uncond": -3.622560977935791, "incorrect_loss_uncond": -10.314052700996399}, "model_output": [{"sum_logits": -3.297513961791992, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -3.297513961791992, "logits_per_char": -0.5495856602986654, "num_chars": 6}, {"sum_logits": -10.085901260375977, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.739906311035156, "logits_per_token": -3.361967086791992, "logits_per_char": -0.6723934173583984, "num_chars": 15}, {"sum_logits": -5.925623416900635, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -9.548184394836426, "logits_per_token": -5.925623416900635, "logits_per_char": -0.9876039028167725, "num_chars": 6}, {"sum_logits": -10.587518692016602, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.2907657623291, "logits_per_token": -3.529172897338867, "logits_per_char": -0.8144245147705078, "num_chars": 13}, {"sum_logits": -4.435557842254639, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -4.435557842254639, "logits_per_char": -0.7392596403757731, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 729, "native_id": "1ca3cd9475d7e9da2ddb74911ee2bb68", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.142597198486328, "incorrect_loss_raw": 16.68377709388733, "correct_loss_per_char": 1.1142597198486328, "incorrect_loss_per_char": 1.5654710428023235, "correct_loss_per_token": 5.571298599243164, "incorrect_loss_per_token": 12.74326765537262, "correct_loss_uncond": -5.222698211669922, "incorrect_loss_uncond": 0.835015058517456}, "model_output": [{"sum_logits": -16.191259384155273, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.955645561218262, "logits_per_token": -16.191259384155273, "logits_per_char": -1.3492716153462727, "num_chars": 12}, {"sum_logits": -21.016050338745117, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.20255470275879, "logits_per_token": -5.254012584686279, "logits_per_char": -1.9105500307950107, "num_chars": 11}, {"sum_logits": -12.501076698303223, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -12.501076698303223, "logits_per_char": -1.7858680997576033, "num_chars": 7}, {"sum_logits": -17.026721954345703, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.017654418945312, "logits_per_token": -17.026721954345703, "logits_per_char": -1.2161944253104073, "num_chars": 14}, {"sum_logits": -11.142597198486328, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.36529541015625, "logits_per_token": -5.571298599243164, "logits_per_char": -1.1142597198486328, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 730, "native_id": "129ec46cc2541b73198d774ee632c8d7", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.6681742668151855, "incorrect_loss_raw": 9.512095093727112, "correct_loss_per_char": 0.7085217833518982, "incorrect_loss_per_char": 1.3897523376676773, "correct_loss_per_token": 2.8340871334075928, "incorrect_loss_per_token": 6.437587141990662, "correct_loss_uncond": -10.519615650177002, "incorrect_loss_uncond": -6.129030108451843}, "model_output": [{"sum_logits": -12.229127883911133, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.75861358642578, "logits_per_token": -6.114563941955566, "logits_per_char": -1.747018269130162, "num_chars": 7}, {"sum_logits": -12.366935729980469, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.867130279541016, "logits_per_token": -6.183467864990234, "logits_per_char": -1.766705104282924, "num_chars": 7}, {"sum_logits": -5.6681742668151855, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.187789916992188, "logits_per_token": -2.8340871334075928, "logits_per_char": -0.7085217833518982, "num_chars": 8}, {"sum_logits": -6.194071292877197, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.617376327514648, "logits_per_token": -6.194071292877197, "logits_per_char": -1.2388142585754394, "num_chars": 5}, {"sum_logits": -7.258245468139648, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.321380615234375, "logits_per_token": -7.258245468139648, "logits_per_char": -0.8064717186821831, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 731, "native_id": "0e5c7c0cec5b693e52f74f5f879d84fb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.399300575256348, "incorrect_loss_raw": 16.514143466949463, "correct_loss_per_char": 0.46245628595352173, "incorrect_loss_per_char": 1.0250972982265651, "correct_loss_per_token": 3.699650287628174, "incorrect_loss_per_token": 7.225921869277954, "correct_loss_uncond": -14.915587425231934, "incorrect_loss_uncond": -4.533449649810791}, "model_output": [{"sum_logits": -16.818553924560547, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.391342163085938, "logits_per_token": -8.409276962280273, "logits_per_char": -1.2937349172738881, "num_chars": 13}, {"sum_logits": -7.399300575256348, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.31488800048828, "logits_per_token": -3.699650287628174, "logits_per_char": -0.46245628595352173, "num_chars": 16}, {"sum_logits": -17.85957908630371, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.579242706298828, "logits_per_token": -8.929789543151855, "logits_per_char": -1.0505634756649243, "num_chars": 17}, {"sum_logits": -17.629776000976562, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -24.49643898010254, "logits_per_token": -8.814888000488281, "logits_per_char": -0.8395131429036459, "num_chars": 21}, {"sum_logits": -13.748664855957031, "num_tokens": 5, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.72334861755371, "logits_per_token": -2.7497329711914062, "logits_per_char": -0.9165776570638021, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 732, "native_id": "af035b75b6f7a1927b1648963f281c5e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3522188663482666, "incorrect_loss_raw": 10.216400861740112, "correct_loss_per_char": 0.3920364777247111, "incorrect_loss_per_char": 1.0975527105459508, "correct_loss_per_token": 2.3522188663482666, "incorrect_loss_per_token": 7.947626829147339, "correct_loss_uncond": -11.022166967391968, "incorrect_loss_uncond": -4.953953981399536}, "model_output": [{"sum_logits": -7.930259704589844, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -7.930259704589844, "logits_per_char": -0.9912824630737305, "num_chars": 8}, {"sum_logits": -2.3522188663482666, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -2.3522188663482666, "logits_per_char": -0.3920364777247111, "num_chars": 6}, {"sum_logits": -13.61264419555664, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.831119537353516, "logits_per_token": -4.537548065185547, "logits_per_char": -1.04712647658128, "num_chars": 13}, {"sum_logits": -12.87038803100586, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -12.87038803100586, "logits_per_char": -1.4300431145562067, "num_chars": 9}, {"sum_logits": -6.4523115158081055, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -6.4523115158081055, "logits_per_char": -0.9217587879725865, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 733, "native_id": "32d5b7fcae24f0d4871cfb219c5a4b47", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.388385772705078, "incorrect_loss_raw": 12.320704936981201, "correct_loss_per_char": 0.44903214772542316, "incorrect_loss_per_char": 1.503586865565627, "correct_loss_per_token": 5.388385772705078, "incorrect_loss_per_token": 7.612386862436931, "correct_loss_uncond": -10.08261489868164, "incorrect_loss_uncond": -6.07124662399292}, "model_output": [{"sum_logits": -14.129440307617188, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.883464813232422, "logits_per_token": -7.064720153808594, "logits_per_char": -1.5699378119574652, "num_chars": 9}, {"sum_logits": -9.121642112731934, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.485767364501953, "logits_per_token": -9.121642112731934, "logits_per_char": -2.2804105281829834, "num_chars": 4}, {"sum_logits": -5.388385772705078, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.471000671386719, "logits_per_token": -5.388385772705078, "logits_per_char": -0.44903214772542316, "num_chars": 12}, {"sum_logits": -17.652828216552734, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -24.94717788696289, "logits_per_token": -5.884276072184245, "logits_per_char": -0.7675142702849015, "num_chars": 23}, {"sum_logits": -8.37890911102295, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -17.25139617919922, "logits_per_token": -8.37890911102295, "logits_per_char": -1.3964848518371582, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 734, "native_id": "87505da761eaa5c3c4703d02a12d46bc", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.697227478027344, "incorrect_loss_raw": 10.708149790763855, "correct_loss_per_char": 0.7735382883172286, "incorrect_loss_per_char": 0.9623914175864423, "correct_loss_per_token": 4.899075826009114, "incorrect_loss_per_token": 3.3206328749656677, "correct_loss_uncond": -11.388187408447266, "incorrect_loss_uncond": -7.686151623725891}, "model_output": [{"sum_logits": -6.78644323348999, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.317449569702148, "logits_per_token": -3.393221616744995, "logits_per_char": -0.8483054041862488, "num_chars": 8}, {"sum_logits": -17.193649291992188, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.37485122680664, "logits_per_token": -4.298412322998047, "logits_per_char": -1.0746030807495117, "num_chars": 16}, {"sum_logits": -8.319255828857422, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.403579711914062, "logits_per_token": -2.0798139572143555, "logits_per_char": -0.7562959844415839, "num_chars": 11}, {"sum_logits": -14.697227478027344, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -26.08541488647461, "logits_per_token": -4.899075826009114, "logits_per_char": -0.7735382883172286, "num_chars": 19}, {"sum_logits": -10.53325080871582, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.481325149536133, "logits_per_token": -3.5110836029052734, "logits_per_char": -1.1703612009684246, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 735, "native_id": "ef3d5d35128678937c36438466e0fc93", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.070024490356445, "incorrect_loss_raw": 7.52958607673645, "correct_loss_per_char": 0.3380016326904297, "incorrect_loss_per_char": 0.9383406079176699, "correct_loss_per_token": 2.5350122451782227, "incorrect_loss_per_token": 5.549702763557434, "correct_loss_uncond": -15.743690490722656, "incorrect_loss_uncond": -8.019019842147827}, "model_output": [{"sum_logits": -8.082544326782227, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.359224319458008, "logits_per_token": -4.041272163391113, "logits_per_char": -1.0103180408477783, "num_chars": 8}, {"sum_logits": -8.207366943359375, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.052994728088379, "logits_per_token": -8.207366943359375, "logits_per_char": -1.0259208679199219, "num_chars": 8}, {"sum_logits": -6.071910858154297, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.472149848937988, "logits_per_token": -6.071910858154297, "logits_per_char": -1.011985143025716, "num_chars": 6}, {"sum_logits": -7.756522178649902, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.310054779052734, "logits_per_token": -3.878261089324951, "logits_per_char": -0.7051383798772638, "num_chars": 11}, {"sum_logits": -5.070024490356445, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.8137149810791, "logits_per_token": -2.5350122451782227, "logits_per_char": -0.3380016326904297, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 736, "native_id": "4f1d8007b446b0e10f07fd63cbd31b6f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.285081386566162, "incorrect_loss_raw": 10.047891974449158, "correct_loss_per_char": 0.4570162773132324, "incorrect_loss_per_char": 1.2193010014200967, "correct_loss_per_token": 2.285081386566162, "incorrect_loss_per_token": 6.524466832478841, "correct_loss_uncond": -10.06444501876831, "incorrect_loss_uncond": -4.154764771461487}, "model_output": [{"sum_logits": -10.721428871154785, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -10.721428871154785, "logits_per_char": -1.7869048118591309, "num_chars": 6}, {"sum_logits": -2.285081386566162, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -12.349526405334473, "logits_per_token": -2.285081386566162, "logits_per_char": -0.4570162773132324, "num_chars": 5}, {"sum_logits": -12.068779945373535, "num_tokens": 3, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -16.497425079345703, "logits_per_token": -4.022926648457845, "logits_per_char": -0.8620557103838239, "num_chars": 14}, {"sum_logits": -12.095694541931152, "num_tokens": 2, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -16.441146850585938, "logits_per_token": -6.047847270965576, "logits_per_char": -1.3439660602145724, "num_chars": 9}, {"sum_logits": -5.305664539337158, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -10.916240692138672, "logits_per_token": -5.305664539337158, "logits_per_char": -0.8842774232228597, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 737, "native_id": "4c30d5eed4137cba89747510973f37a3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4794175624847412, "incorrect_loss_raw": 12.995086908340454, "correct_loss_per_char": 0.1479417562484741, "incorrect_loss_per_char": 1.3315141464982714, "correct_loss_per_token": 1.4794175624847412, "incorrect_loss_per_token": 8.779602766036987, "correct_loss_uncond": -14.87080979347229, "incorrect_loss_uncond": -3.7510383129119873}, "model_output": [{"sum_logits": -10.215240478515625, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -10.215240478515625, "logits_per_char": -2.043048095703125, "num_chars": 5}, {"sum_logits": -1.4794175624847412, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.35022735595703, "logits_per_token": -1.4794175624847412, "logits_per_char": -0.1479417562484741, "num_chars": 10}, {"sum_logits": -14.671310424804688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -7.335655212402344, "logits_per_char": -0.916956901550293, "num_chars": 16}, {"sum_logits": -19.052562713623047, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.27187156677246, "logits_per_token": -9.526281356811523, "logits_per_char": -1.3608973366873605, "num_chars": 14}, {"sum_logits": -8.041234016418457, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -8.041234016418457, "logits_per_char": -1.0051542520523071, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 738, "native_id": "515834727e23e30ab7c8fe5ba7e9a765", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8005077838897705, "incorrect_loss_raw": 11.706281542778015, "correct_loss_per_char": 0.4000725405556815, "incorrect_loss_per_char": 0.9881456884599867, "correct_loss_per_token": 2.8005077838897705, "incorrect_loss_per_token": 6.636253237724304, "correct_loss_uncond": -12.470773458480835, "incorrect_loss_uncond": -6.584381699562073}, "model_output": [{"sum_logits": -16.39169692993164, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.51051139831543, "logits_per_token": -8.19584846496582, "logits_per_char": -1.4901542663574219, "num_chars": 11}, {"sum_logits": -2.8005077838897705, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -2.8005077838897705, "logits_per_char": -0.4000725405556815, "num_chars": 7}, {"sum_logits": -6.264899730682373, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -6.264899730682373, "logits_per_char": -0.7831124663352966, "num_chars": 8}, {"sum_logits": -14.296951293945312, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.29363250732422, "logits_per_token": -7.148475646972656, "logits_per_char": -1.0212108067103796, "num_chars": 14}, {"sum_logits": -9.871578216552734, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -4.935789108276367, "logits_per_char": -0.658105214436849, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 739, "native_id": "34ec6393db5a01f689c11fac153e31c1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.939569473266602, "incorrect_loss_raw": 24.940985202789307, "correct_loss_per_char": 1.8232615788777669, "incorrect_loss_per_char": 1.7668743252911272, "correct_loss_per_token": 10.939569473266602, "incorrect_loss_per_token": 9.3267662525177, "correct_loss_uncond": -3.71047306060791, "incorrect_loss_uncond": 2.9152445793151855}, "model_output": [{"sum_logits": -10.939569473266602, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.650042533874512, "logits_per_token": -10.939569473266602, "logits_per_char": -1.8232615788777669, "num_chars": 6}, {"sum_logits": -22.883285522460938, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.679433822631836, "logits_per_token": -7.6277618408203125, "logits_per_char": -1.2712936401367188, "num_chars": 18}, {"sum_logits": -23.226579666137695, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.15237045288086, "logits_per_token": -11.613289833068848, "logits_per_char": -2.5807310740152993, "num_chars": 9}, {"sum_logits": -35.044097900390625, "num_tokens": 4, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -32.408485412597656, "logits_per_token": -8.761024475097656, "logits_per_char": -1.5236564304517664, "num_chars": 23}, {"sum_logits": -18.60997772216797, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.862672805786133, "logits_per_token": -9.304988861083984, "logits_per_char": -1.6918161565607244, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 740, "native_id": "0f0e339412f719a019bf373e6daf2530", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.660932540893555, "incorrect_loss_raw": 12.298124313354492, "correct_loss_per_char": 0.4354563492995042, "incorrect_loss_per_char": 1.3985820770263673, "correct_loss_per_token": 1.8869775136311848, "incorrect_loss_per_token": 7.089942574501038, "correct_loss_uncond": -14.426809310913086, "incorrect_loss_uncond": -5.707392930984497}, "model_output": [{"sum_logits": -5.660932540893555, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.08774185180664, "logits_per_token": -1.8869775136311848, "logits_per_char": -0.4354563492995042, "num_chars": 13}, {"sum_logits": -12.162942886352539, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -22.716899871826172, "logits_per_token": -6.0814714431762695, "logits_per_char": -0.8108628590901693, "num_chars": 15}, {"sum_logits": -7.527043342590332, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.539950370788574, "logits_per_token": -7.527043342590332, "logits_per_char": -1.5054086685180663, "num_chars": 5}, {"sum_logits": -14.500863075256348, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.573307037353516, "logits_per_token": -7.250431537628174, "logits_per_char": -1.6112070083618164, "num_chars": 9}, {"sum_logits": -15.00164794921875, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.191911697387695, "logits_per_token": -7.500823974609375, "logits_per_char": -1.6668497721354167, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 741, "native_id": "489a082aab43dd1a53f3f1f89c2365ed", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.722369909286499, "incorrect_loss_raw": 15.94058084487915, "correct_loss_per_char": 0.3402962386608124, "incorrect_loss_per_char": 1.4980563875400659, "correct_loss_per_token": 2.722369909286499, "incorrect_loss_per_token": 7.970290422439575, "correct_loss_uncond": -9.782615900039673, "incorrect_loss_uncond": -2.1525864601135254}, "model_output": [{"sum_logits": -2.722369909286499, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.504985809326172, "logits_per_token": -2.722369909286499, "logits_per_char": -0.3402962386608124, "num_chars": 8}, {"sum_logits": -15.35207748413086, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.225109100341797, "logits_per_token": -7.67603874206543, "logits_per_char": -1.39564340764826, "num_chars": 11}, {"sum_logits": -19.54971694946289, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.18634796142578, "logits_per_token": -9.774858474731445, "logits_per_char": -1.954971694946289, "num_chars": 10}, {"sum_logits": -14.666546821594238, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.59581756591797, "logits_per_token": -7.333273410797119, "logits_per_char": -1.2222122351328533, "num_chars": 12}, {"sum_logits": -14.193982124328613, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.365394592285156, "logits_per_token": -7.096991062164307, "logits_per_char": -1.4193982124328612, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 742, "native_id": "7c45033e9fd9f1a759923971b14390ed", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5386537313461304, "incorrect_loss_raw": 22.395065784454346, "correct_loss_per_char": 0.3846634328365326, "incorrect_loss_per_char": 1.4402729709704956, "correct_loss_per_token": 1.5386537313461304, "incorrect_loss_per_token": 9.486918846766153, "correct_loss_uncond": -12.467135071754456, "incorrect_loss_uncond": 1.5401504039764404}, "model_output": [{"sum_logits": -17.740264892578125, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.752081871032715, "logits_per_token": -17.740264892578125, "logits_per_char": -1.6127513538707385, "num_chars": 11}, {"sum_logits": -15.346057891845703, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.308332443237305, "logits_per_token": -5.115352630615234, "logits_per_char": -0.9591286182403564, "num_chars": 16}, {"sum_logits": -17.029203414916992, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.462663650512695, "logits_per_token": -8.514601707458496, "logits_per_char": -1.309938724224384, "num_chars": 13}, {"sum_logits": -39.46473693847656, "num_tokens": 6, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -32.896583557128906, "logits_per_token": -6.577456156412761, "logits_per_char": -1.879273187546503, "num_chars": 21}, {"sum_logits": -1.5386537313461304, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -14.005788803100586, "logits_per_token": -1.5386537313461304, "logits_per_char": -0.3846634328365326, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 743, "native_id": "061f326d2a87a10da6316b55bd5522bd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.231417655944824, "incorrect_loss_raw": 8.819496631622314, "correct_loss_per_char": 0.4616310937064035, "incorrect_loss_per_char": 0.8078172683113753, "correct_loss_per_token": 3.231417655944824, "incorrect_loss_per_token": 5.046763261159261, "correct_loss_uncond": -12.039863586425781, "incorrect_loss_uncond": -9.272577047348022}, "model_output": [{"sum_logits": -5.742751598358154, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.478294372558594, "logits_per_token": -2.871375799179077, "logits_per_char": -0.4785626331965129, "num_chars": 12}, {"sum_logits": -7.68149995803833, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -3.840749979019165, "logits_per_char": -0.5120999972025554, "num_chars": 15}, {"sum_logits": -9.285523414611816, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.449410438537598, "logits_per_token": -9.285523414611816, "logits_per_char": -0.8441384922374379, "num_chars": 11}, {"sum_logits": -3.231417655944824, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -3.231417655944824, "logits_per_char": -0.4616310937064035, "num_chars": 7}, {"sum_logits": -12.568211555480957, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.2863826751709, "logits_per_token": -4.189403851826985, "logits_per_char": -1.3964679506089952, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 744, "native_id": "d747c4e463b80bfcc49b874063f9fae1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.88240385055542, "incorrect_loss_raw": 10.843024730682373, "correct_loss_per_char": 0.4201717036111014, "incorrect_loss_per_char": 1.4060839172170945, "correct_loss_per_token": 2.94120192527771, "incorrect_loss_per_token": 7.865421295166016, "correct_loss_uncond": -11.807987689971924, "incorrect_loss_uncond": -5.993370056152344}, "model_output": [{"sum_logits": -12.1715087890625, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.04582977294922, "logits_per_token": -6.08575439453125, "logits_per_char": -1.5214385986328125, "num_chars": 8}, {"sum_logits": -8.620697021484375, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.249696731567383, "logits_per_token": -8.620697021484375, "logits_per_char": -1.2315281459263392, "num_chars": 7}, {"sum_logits": -10.930574417114258, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.257335662841797, "logits_per_token": -10.930574417114258, "logits_per_char": -2.1861148834228517, "num_chars": 5}, {"sum_logits": -5.88240385055542, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.690391540527344, "logits_per_token": -2.94120192527771, "logits_per_char": -0.4201717036111014, "num_chars": 14}, {"sum_logits": -11.64931869506836, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.79271697998047, "logits_per_token": -5.82465934753418, "logits_per_char": -0.6852540408863741, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 745, "native_id": "df3d27338bcf86b341b8b02d4309daf5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.33723783493042, "incorrect_loss_raw": 8.308863520622253, "correct_loss_per_char": 1.0562063058217366, "incorrect_loss_per_char": 1.1482756620103665, "correct_loss_per_token": 6.33723783493042, "incorrect_loss_per_token": 8.308863520622253, "correct_loss_uncond": -7.993800640106201, "incorrect_loss_uncond": -5.510756611824036}, "model_output": [{"sum_logits": -8.294869422912598, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -8.294869422912598, "logits_per_char": -1.3824782371520996, "num_chars": 6}, {"sum_logits": -6.33723783493042, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.331038475036621, "logits_per_token": -6.33723783493042, "logits_per_char": -1.0562063058217366, "num_chars": 6}, {"sum_logits": -6.996549606323242, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -6.996549606323242, "logits_per_char": -0.6360499642112039, "num_chars": 11}, {"sum_logits": -4.420934200286865, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -4.420934200286865, "logits_per_char": -0.884186840057373, "num_chars": 5}, {"sum_logits": -13.523100852966309, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.967434883117676, "logits_per_token": -13.523100852966309, "logits_per_char": -1.6903876066207886, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 746, "native_id": "db63bf66a8bfd16e5103cbdd350f5202", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.91218376159668, "incorrect_loss_raw": 11.19888186454773, "correct_loss_per_char": 0.614022970199585, "incorrect_loss_per_char": 1.120067820851765, "correct_loss_per_token": 4.91218376159668, "incorrect_loss_per_token": 7.801421085993449, "correct_loss_uncond": -11.266727447509766, "incorrect_loss_uncond": -5.364821195602417}, "model_output": [{"sum_logits": -4.91218376159668, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -4.91218376159668, "logits_per_char": -0.614022970199585, "num_chars": 8}, {"sum_logits": -7.40242862701416, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -7.40242862701416, "logits_per_char": -1.23373810450236, "num_chars": 6}, {"sum_logits": -10.57345962524414, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.27483367919922, "logits_per_token": -5.28672981262207, "logits_per_char": -1.0573459625244142, "num_chars": 10}, {"sum_logits": -14.364969253540039, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.663701057434082, "logits_per_token": -14.364969253540039, "logits_per_char": -1.5961076948377821, "num_chars": 9}, {"sum_logits": -12.454669952392578, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.244718551635742, "logits_per_token": -4.151556650797526, "logits_per_char": -0.5930795215425038, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 747, "native_id": "f8a9208665a4f2d64986940456b4b964", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.214118957519531, "incorrect_loss_raw": 18.05991244316101, "correct_loss_per_char": 1.0214118957519531, "incorrect_loss_per_char": 1.1783037900827646, "correct_loss_per_token": 5.107059478759766, "incorrect_loss_per_token": 7.276597499847412, "correct_loss_uncond": -7.173099517822266, "incorrect_loss_uncond": -8.314489603042603}, "model_output": [{"sum_logits": -11.040328025817871, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -5.5201640129089355, "logits_per_char": -1.2267031139797635, "num_chars": 9}, {"sum_logits": -28.053739547729492, "num_tokens": 4, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -44.22098159790039, "logits_per_token": -7.013434886932373, "logits_per_char": -1.0390273906566478, "num_chars": 27}, {"sum_logits": -18.595943450927734, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.676189422607422, "logits_per_token": -9.297971725463867, "logits_per_char": -1.3282816750662667, "num_chars": 14}, {"sum_logits": -10.214118957519531, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.387218475341797, "logits_per_token": -5.107059478759766, "logits_per_char": -1.0214118957519531, "num_chars": 10}, {"sum_logits": -14.549638748168945, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.605876922607422, "logits_per_token": -7.274819374084473, "logits_per_char": -1.1192029806283803, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 748, "native_id": "1bf4c6b5bd870b1a079106e1e97e5d09", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.489652156829834, "incorrect_loss_raw": 10.105087876319885, "correct_loss_per_char": 0.31120651960372925, "incorrect_loss_per_char": 1.0837382985399917, "correct_loss_per_token": 2.489652156829834, "incorrect_loss_per_token": 5.1912859082221985, "correct_loss_uncond": -10.459608554840088, "incorrect_loss_uncond": -5.629361510276794}, "model_output": [{"sum_logits": -16.240707397460938, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.689495086669922, "logits_per_token": -8.120353698730469, "logits_per_char": -1.4764279452237217, "num_chars": 11}, {"sum_logits": -4.300826549530029, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -4.300826549530029, "logits_per_char": -0.8601653099060058, "num_chars": 5}, {"sum_logits": -2.489652156829834, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.949260711669922, "logits_per_token": -2.489652156829834, "logits_per_char": -0.31120651960372925, "num_chars": 8}, {"sum_logits": -4.499011993408203, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -4.499011993408203, "logits_per_char": -0.8998023986816406, "num_chars": 5}, {"sum_logits": -15.379805564880371, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -3.8449513912200928, "logits_per_char": -1.0985575403485979, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 749, "native_id": "c1c73ef0ff662a76cd42c3500240974a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.16574239730835, "incorrect_loss_raw": 11.054476737976074, "correct_loss_per_char": 0.32285889983177185, "incorrect_loss_per_char": 1.7466667124203274, "correct_loss_per_token": 2.582871198654175, "incorrect_loss_per_token": 8.367191672325134, "correct_loss_uncond": -15.228220462799072, "incorrect_loss_uncond": -4.225335359573364}, "model_output": [{"sum_logits": -5.16574239730835, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.393962860107422, "logits_per_token": -2.582871198654175, "logits_per_char": -0.32285889983177185, "num_chars": 16}, {"sum_logits": -11.466827392578125, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.770475387573242, "logits_per_token": -5.7334136962890625, "logits_per_char": -1.6381181989397322, "num_chars": 7}, {"sum_logits": -10.894915580749512, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.236857414245605, "logits_per_token": -10.894915580749512, "logits_per_char": -1.5564165115356445, "num_chars": 7}, {"sum_logits": -10.031453132629395, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -5.015726566314697, "logits_per_char": -0.8359544277191162, "num_chars": 12}, {"sum_logits": -11.824710845947266, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.966537475585938, "logits_per_token": -11.824710845947266, "logits_per_char": -2.9561777114868164, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 750, "native_id": "aefa60233f3c5c4966f8ac22e901a1c7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.300048828125, "incorrect_loss_raw": 9.866089463233948, "correct_loss_per_char": 0.33077298677884615, "incorrect_loss_per_char": 1.6341296911239624, "correct_loss_per_token": 4.300048828125, "incorrect_loss_per_token": 9.866089463233948, "correct_loss_uncond": -10.33409309387207, "incorrect_loss_uncond": -4.691646218299866}, "model_output": [{"sum_logits": -4.300048828125, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.63414192199707, "logits_per_token": -4.300048828125, "logits_per_char": -0.33077298677884615, "num_chars": 13}, {"sum_logits": -5.06597375869751, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -5.06597375869751, "logits_per_char": -1.013194751739502, "num_chars": 5}, {"sum_logits": -10.386768341064453, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.70215892791748, "logits_per_token": -10.386768341064453, "logits_per_char": -2.077353668212891, "num_chars": 5}, {"sum_logits": -11.62588119506836, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -11.62588119506836, "logits_per_char": -0.9688234329223633, "num_chars": 12}, {"sum_logits": -12.385734558105469, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.13936710357666, "logits_per_token": -12.385734558105469, "logits_per_char": -2.4771469116210936, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 751, "native_id": "9221962ed3a6094e5c8f33785ce048cd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.668247222900391, "incorrect_loss_raw": 14.36036992073059, "correct_loss_per_char": 1.3336494445800782, "incorrect_loss_per_char": 1.355563860061841, "correct_loss_per_token": 6.668247222900391, "incorrect_loss_per_token": 5.447587728500366, "correct_loss_uncond": -5.798702239990234, "incorrect_loss_uncond": -2.5924367904663086}, "model_output": [{"sum_logits": -19.818078994750977, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.750091552734375, "logits_per_token": -4.954519748687744, "logits_per_char": -1.5244676149808443, "num_chars": 13}, {"sum_logits": -13.552617073059082, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.554622650146484, "logits_per_token": -4.517539024353027, "logits_per_char": -0.7529231707255045, "num_chars": 18}, {"sum_logits": -6.668247222900391, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.466949462890625, "logits_per_token": -6.668247222900391, "logits_per_char": -1.3336494445800782, "num_chars": 5}, {"sum_logits": -8.400794982910156, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -9.173615455627441, "logits_per_token": -8.400794982910156, "logits_per_char": -2.100198745727539, "num_chars": 4}, {"sum_logits": -15.669988632202148, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.332897186279297, "logits_per_token": -3.917497158050537, "logits_per_char": -1.0446659088134767, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 752, "native_id": "8c8052980e357545398d27d1c3c832d8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.4358086585998535, "incorrect_loss_raw": 10.171829342842102, "correct_loss_per_char": 0.37857697991763845, "incorrect_loss_per_char": 0.9421449974634126, "correct_loss_per_token": 1.6089521646499634, "incorrect_loss_per_token": 5.860045373439789, "correct_loss_uncond": -13.353726863861084, "incorrect_loss_uncond": -8.566530108451843}, "model_output": [{"sum_logits": -6.4358086585998535, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.789535522460938, "logits_per_token": -1.6089521646499634, "logits_per_char": -0.37857697991763845, "num_chars": 17}, {"sum_logits": -6.193045616149902, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.68338394165039, "logits_per_token": -6.193045616149902, "logits_per_char": -1.0321742693583171, "num_chars": 6}, {"sum_logits": -23.04773712158203, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.729854583740234, "logits_per_token": -11.523868560791016, "logits_per_char": -1.77290285550631, "num_chars": 13}, {"sum_logits": -3.6765098571777344, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.388938903808594, "logits_per_token": -1.8382549285888672, "logits_per_char": -0.4085010952419705, "num_chars": 9}, {"sum_logits": -7.77002477645874, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -3.88501238822937, "logits_per_char": -0.5550017697470528, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 753, "native_id": "418913999c665ae9527fd14a6132da39", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.279626846313477, "incorrect_loss_raw": 11.258928060531616, "correct_loss_per_char": 0.5519751230875651, "incorrect_loss_per_char": 1.4176087992531914, "correct_loss_per_token": 4.139813423156738, "incorrect_loss_per_token": 5.815158526102702, "correct_loss_uncond": -10.569587707519531, "incorrect_loss_uncond": -6.8835484981536865}, "model_output": [{"sum_logits": -9.457735061645508, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.30740737915039, "logits_per_token": -4.728867530822754, "logits_per_char": -1.0508594512939453, "num_chars": 9}, {"sum_logits": -8.279626846313477, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.849214553833008, "logits_per_token": -4.139813423156738, "logits_per_char": -0.5519751230875651, "num_chars": 15}, {"sum_logits": -14.488822937011719, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.15835189819336, "logits_per_token": -4.829607645670573, "logits_per_char": -1.0349159240722656, "num_chars": 14}, {"sum_logits": -10.008661270141602, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.778377532958984, "logits_per_token": -10.008661270141602, "logits_per_char": -2.0017322540283202, "num_chars": 5}, {"sum_logits": -11.080492973327637, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.325769424438477, "logits_per_token": -3.693497657775879, "logits_per_char": -1.5829275676182337, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 754, "native_id": "2634468d21fa33a88cefe28a5d613f59", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.80259370803833, "incorrect_loss_raw": 13.85692811012268, "correct_loss_per_char": 0.82894195829119, "incorrect_loss_per_char": 2.260150516600836, "correct_loss_per_token": 5.80259370803833, "incorrect_loss_per_token": 6.290785610675812, "correct_loss_uncond": -9.108063220977783, "incorrect_loss_uncond": -2.8019254207611084}, "model_output": [{"sum_logits": -16.06214141845703, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.185791015625, "logits_per_token": -8.031070709228516, "logits_per_char": -3.2124282836914064, "num_chars": 5}, {"sum_logits": -17.182920455932617, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.837512969970703, "logits_per_token": -8.591460227966309, "logits_per_char": -3.4365840911865235, "num_chars": 5}, {"sum_logits": -10.202855110168457, "num_tokens": 4, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -17.332897186279297, "logits_per_token": -2.5507137775421143, "logits_per_char": -0.6801903406778972, "num_chars": 15}, {"sum_logits": -11.979795455932617, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.279212951660156, "logits_per_token": -5.989897727966309, "logits_per_char": -1.7113993508475167, "num_chars": 7}, {"sum_logits": -5.80259370803833, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.910656929016113, "logits_per_token": -5.80259370803833, "logits_per_char": -0.82894195829119, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 755, "native_id": "66bfb6e209c94e6be5b0d04b0c7e2064", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.666954040527344, "incorrect_loss_raw": 7.747838973999023, "correct_loss_per_char": 0.729184627532959, "incorrect_loss_per_char": 1.091953409561003, "correct_loss_per_token": 5.833477020263672, "incorrect_loss_per_token": 4.416383286317189, "correct_loss_uncond": -6.914308547973633, "incorrect_loss_uncond": -6.34752082824707}, "model_output": [{"sum_logits": -5.904929161071777, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.060754776000977, "logits_per_token": -1.4762322902679443, "logits_per_char": -0.5368117419156161, "num_chars": 11}, {"sum_logits": -13.345688819885254, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -4.448562939961751, "logits_per_char": -1.4828543133205838, "num_chars": 9}, {"sum_logits": -11.666954040527344, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.581262588500977, "logits_per_token": -5.833477020263672, "logits_per_char": -0.729184627532959, "num_chars": 16}, {"sum_logits": -6.736109733581543, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -6.736109733581543, "logits_per_char": -1.3472219467163087, "num_chars": 5}, {"sum_logits": -5.0046281814575195, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -5.0046281814575195, "logits_per_char": -1.000925636291504, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 756, "native_id": "3163910d665c139a1f6f07d85803baba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.1647672653198242, "incorrect_loss_raw": 8.71544086933136, "correct_loss_per_char": 0.16639532361711776, "incorrect_loss_per_char": 1.3993701132861052, "correct_loss_per_token": 1.1647672653198242, "incorrect_loss_per_token": 8.71544086933136, "correct_loss_uncond": -11.834604263305664, "incorrect_loss_uncond": -5.646705508232117}, "model_output": [{"sum_logits": -9.304293632507324, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.12514591217041, "logits_per_token": -9.304293632507324, "logits_per_char": -1.8608587265014649, "num_chars": 5}, {"sum_logits": -9.406119346618652, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -9.406119346618652, "logits_per_char": -0.8551017587835138, "num_chars": 11}, {"sum_logits": -7.268282413482666, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.448383331298828, "logits_per_token": -7.268282413482666, "logits_per_char": -0.6607529466802423, "num_chars": 11}, {"sum_logits": -8.883068084716797, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.912240028381348, "logits_per_token": -8.883068084716797, "logits_per_char": -2.220767021179199, "num_chars": 4}, {"sum_logits": -1.1647672653198242, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -1.1647672653198242, "logits_per_char": -0.16639532361711776, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 757, "native_id": "0e52659484f2f6d763cf0d38d4c5999d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9708949327468872, "incorrect_loss_raw": 8.61463463306427, "correct_loss_per_char": 0.1791722666133534, "incorrect_loss_per_char": 0.9121506917786288, "correct_loss_per_token": 1.9708949327468872, "incorrect_loss_per_token": 6.393028140068054, "correct_loss_uncond": -13.9046391248703, "incorrect_loss_uncond": -6.726684927940369}, "model_output": [{"sum_logits": -1.9708949327468872, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -15.875534057617188, "logits_per_token": -1.9708949327468872, "logits_per_char": -0.1791722666133534, "num_chars": 11}, {"sum_logits": -17.772851943969727, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.608802795410156, "logits_per_token": -8.886425971984863, "logits_per_char": -1.615713813088157, "num_chars": 11}, {"sum_logits": -7.122723579406738, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.184625625610352, "logits_per_token": -7.122723579406738, "logits_per_char": -0.8903404474258423, "num_chars": 8}, {"sum_logits": -4.345884799957275, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.376161575317383, "logits_per_token": -4.345884799957275, "logits_per_char": -0.6208406857081822, "num_chars": 7}, {"sum_logits": -5.21707820892334, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.195688247680664, "logits_per_token": -5.21707820892334, "logits_per_char": -0.521707820892334, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 758, "native_id": "167d2cfa04bfaea0e0b5bac3598d5769", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1621835231781006, "incorrect_loss_raw": 11.600612878799438, "correct_loss_per_char": 0.21621835231781006, "incorrect_loss_per_char": 2.0558581829071048, "correct_loss_per_token": 1.0810917615890503, "incorrect_loss_per_token": 8.83908232053121, "correct_loss_uncond": -16.34895348548889, "incorrect_loss_uncond": -3.7894132137298584}, "model_output": [{"sum_logits": -8.546612739562988, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -8.546612739562988, "logits_per_char": -1.7093225479125977, "num_chars": 5}, {"sum_logits": -9.289440155029297, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.62877368927002, "logits_per_token": -9.289440155029297, "logits_per_char": -1.8578880310058594, "num_chars": 5}, {"sum_logits": -11.997215270996094, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -11.997215270996094, "logits_per_char": -2.9993038177490234, "num_chars": 4}, {"sum_logits": -2.1621835231781006, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -18.511137008666992, "logits_per_token": -1.0810917615890503, "logits_per_char": -0.21621835231781006, "num_chars": 10}, {"sum_logits": -16.569183349609375, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.16470718383789, "logits_per_token": -5.523061116536458, "logits_per_char": -1.6569183349609375, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 759, "native_id": "39572e0ba1db51fa74f7fc2d90c5ec7f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.834165096282959, "incorrect_loss_raw": 11.212425827980042, "correct_loss_per_char": 0.5303786451166327, "incorrect_loss_per_char": 1.639856029124487, "correct_loss_per_token": 2.9170825481414795, "incorrect_loss_per_token": 9.467704892158508, "correct_loss_uncond": -14.354235172271729, "incorrect_loss_uncond": -3.6021522283554077}, "model_output": [{"sum_logits": -13.167986869812012, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.6295747756958, "logits_per_token": -13.167986869812012, "logits_per_char": -1.881140981401716, "num_chars": 7}, {"sum_logits": -9.855446815490723, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.947824478149414, "logits_per_token": -9.855446815490723, "logits_per_char": -1.9710893630981445, "num_chars": 5}, {"sum_logits": -7.868502140045166, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.803590774536133, "logits_per_token": -7.868502140045166, "logits_per_char": -1.311417023340861, "num_chars": 6}, {"sum_logits": -5.834165096282959, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.188400268554688, "logits_per_token": -2.9170825481414795, "logits_per_char": -0.5303786451166327, "num_chars": 11}, {"sum_logits": -13.957767486572266, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.87732219696045, "logits_per_token": -6.978883743286133, "logits_per_char": -1.3957767486572266, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 760, "native_id": "2a32b1e541b1daae04690d0d3a4b3310", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.461445808410645, "incorrect_loss_raw": 14.23128628730774, "correct_loss_per_char": 2.292289161682129, "incorrect_loss_per_char": 1.177595315794317, "correct_loss_per_token": 11.461445808410645, "incorrect_loss_per_token": 8.992718696594238, "correct_loss_uncond": -1.5600767135620117, "incorrect_loss_uncond": -3.495695114135742}, "model_output": [{"sum_logits": -15.01660442352295, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -15.01660442352295, "logits_per_char": -2.1452292033604214, "num_chars": 7}, {"sum_logits": -12.74283218383789, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.922863006591797, "logits_per_token": -6.371416091918945, "logits_per_char": -0.8495221455891927, "num_chars": 15}, {"sum_logits": -11.461445808410645, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.021522521972656, "logits_per_token": -11.461445808410645, "logits_per_char": -2.292289161682129, "num_chars": 5}, {"sum_logits": -16.209331512451172, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.914827346801758, "logits_per_token": -8.104665756225586, "logits_per_char": -0.953490088967716, "num_chars": 17}, {"sum_logits": -12.956377029418945, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.505313873291016, "logits_per_token": -6.478188514709473, "logits_per_char": -0.762139825259938, "num_chars": 17}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 761, "native_id": "71cbfeb995b06b21e890c91040722252", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.5039567947387695, "incorrect_loss_raw": 9.293609738349915, "correct_loss_per_char": 0.9379945993423462, "incorrect_loss_per_char": 1.0052801688512167, "correct_loss_per_token": 7.5039567947387695, "incorrect_loss_per_token": 6.55001978079478, "correct_loss_uncond": -5.792637825012207, "incorrect_loss_uncond": -6.945061802864075}, "model_output": [{"sum_logits": -9.758844375610352, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.308147430419922, "logits_per_token": -9.758844375610352, "logits_per_char": -1.219855546951294, "num_chars": 8}, {"sum_logits": -13.411293029785156, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.932151794433594, "logits_per_token": -6.705646514892578, "logits_per_char": -0.8940862019856771, "num_chars": 15}, {"sum_logits": -7.601231575012207, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.242709159851074, "logits_per_token": -7.601231575012207, "logits_per_char": -1.2668719291687012, "num_chars": 6}, {"sum_logits": -7.5039567947387695, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.296594619750977, "logits_per_token": -7.5039567947387695, "logits_per_char": -0.9379945993423462, "num_chars": 8}, {"sum_logits": -6.403069972991943, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.471677780151367, "logits_per_token": -2.134356657663981, "logits_per_char": -0.6403069972991944, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 762, "native_id": "a15d564d0be6996251b5d523ac62db2a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.801736831665039, "incorrect_loss_raw": 9.90308928489685, "correct_loss_per_char": 0.7092488028786399, "incorrect_loss_per_char": 1.3772719105084739, "correct_loss_per_token": 3.9008684158325195, "incorrect_loss_per_token": 8.810187458992004, "correct_loss_uncond": -8.662786483764648, "incorrect_loss_uncond": -4.189903497695923}, "model_output": [{"sum_logits": -11.016975402832031, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.297904014587402, "logits_per_token": -11.016975402832031, "logits_per_char": -1.1016975402832032, "num_chars": 10}, {"sum_logits": -8.749336242675781, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.505871772766113, "logits_per_token": -8.749336242675781, "logits_per_char": -1.4582227071126301, "num_chars": 6}, {"sum_logits": -8.74321460723877, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.470680236816406, "logits_per_token": -4.371607303619385, "logits_per_char": -0.7286012172698975, "num_chars": 12}, {"sum_logits": -11.10283088684082, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.097515106201172, "logits_per_token": -11.10283088684082, "logits_per_char": -2.220566177368164, "num_chars": 5}, {"sum_logits": -7.801736831665039, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.464523315429688, "logits_per_token": -3.9008684158325195, "logits_per_char": -0.7092488028786399, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 763, "native_id": "6bd170c8d3d99d3c47b3e96427bacaeb", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2025375366210938, "incorrect_loss_raw": 11.103305101394653, "correct_loss_per_char": 0.15732410975864955, "incorrect_loss_per_char": 1.290253336742671, "correct_loss_per_token": 0.7341791788736979, "incorrect_loss_per_token": 6.309574643770854, "correct_loss_uncond": -17.61739730834961, "incorrect_loss_uncond": -8.451478719711304}, "model_output": [{"sum_logits": -9.76295280456543, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.102163314819336, "logits_per_token": -9.76295280456543, "logits_per_char": -1.952590560913086, "num_chars": 5}, {"sum_logits": -7.052746772766113, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.44894027709961, "logits_per_token": -3.5263733863830566, "logits_per_char": -0.7836385303073459, "num_chars": 9}, {"sum_logits": -16.49879264831543, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.179349899291992, "logits_per_token": -8.249396324157715, "logits_per_char": -1.4998902407559482, "num_chars": 11}, {"sum_logits": -11.09872817993164, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -22.48868179321289, "logits_per_token": -3.6995760599772134, "logits_per_char": -0.9248940149943033, "num_chars": 12}, {"sum_logits": -2.2025375366210938, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.819934844970703, "logits_per_token": -0.7341791788736979, "logits_per_char": -0.15732410975864955, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 764, "native_id": "7bc1198664b376f79d584725ad7f874b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.833456993103027, "incorrect_loss_raw": 11.098480939865112, "correct_loss_per_char": 0.6481618881225586, "incorrect_loss_per_char": 0.8494141346276409, "correct_loss_per_token": 1.9444856643676758, "incorrect_loss_per_token": 4.948923408985138, "correct_loss_uncond": -10.94824504852295, "incorrect_loss_uncond": -7.525782585144043}, "model_output": [{"sum_logits": -10.20541763305664, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.169651985168457, "logits_per_token": -5.10270881652832, "logits_per_char": -1.0205417633056642, "num_chars": 10}, {"sum_logits": -6.275208950042725, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.471572875976562, "logits_per_token": -6.275208950042725, "logits_per_char": -0.570473540912975, "num_chars": 11}, {"sum_logits": -22.15549087524414, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -28.181848526000977, "logits_per_token": -5.538872718811035, "logits_per_char": -1.2308606041802301, "num_chars": 18}, {"sum_logits": -5.757806301116943, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.673980712890625, "logits_per_token": -2.8789031505584717, "logits_per_char": -0.5757806301116943, "num_chars": 10}, {"sum_logits": -5.833456993103027, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.781702041625977, "logits_per_token": -1.9444856643676758, "logits_per_char": -0.6481618881225586, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 765, "native_id": "d6c002d46d9bfa466637cec4a134f332", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.495359897613525, "incorrect_loss_raw": 14.657327890396118, "correct_loss_per_char": 0.5412799914677938, "incorrect_loss_per_char": 2.5908644596735635, "correct_loss_per_token": 3.2476799488067627, "incorrect_loss_per_token": 14.657327890396118, "correct_loss_uncond": -10.114461421966553, "incorrect_loss_uncond": 1.794715166091919}, "model_output": [{"sum_logits": -12.92233657836914, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.75931167602539, "logits_per_token": -12.92233657836914, "logits_per_char": -2.584467315673828, "num_chars": 5}, {"sum_logits": -16.604225158691406, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.834463119506836, "logits_per_token": -16.604225158691406, "logits_per_char": -2.3720321655273438, "num_chars": 7}, {"sum_logits": -12.407748222351074, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.674345016479492, "logits_per_token": -12.407748222351074, "logits_per_char": -2.067958037058512, "num_chars": 6}, {"sum_logits": -6.495359897613525, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.609821319580078, "logits_per_token": -3.2476799488067627, "logits_per_char": -0.5412799914677938, "num_chars": 12}, {"sum_logits": -16.69500160217285, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.182331085205078, "logits_per_token": -16.69500160217285, "logits_per_char": -3.3390003204345704, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 766, "native_id": "8cb45b421375243e788cfc64bd77b051", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.246175765991211, "incorrect_loss_raw": 14.25803518295288, "correct_loss_per_char": 0.7246175765991211, "incorrect_loss_per_char": 1.2174523459540474, "correct_loss_per_token": 7.246175765991211, "incorrect_loss_per_token": 6.889649073282877, "correct_loss_uncond": -5.306164741516113, "incorrect_loss_uncond": -2.4729092121124268}, "model_output": [{"sum_logits": -15.049118041992188, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.94290828704834, "logits_per_token": -3.762279510498047, "logits_per_char": -0.5573747422960069, "num_chars": 27}, {"sum_logits": -12.1718111038208, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.960880279541016, "logits_per_token": -4.057270367940267, "logits_per_char": -1.1065282821655273, "num_chars": 11}, {"sum_logits": -15.108247756958008, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.06417465209961, "logits_per_token": -5.036082585652669, "logits_per_char": -0.7554123878479004, "num_chars": 20}, {"sum_logits": -14.702963829040527, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -14.702963829040527, "logits_per_char": -2.4504939715067544, "num_chars": 6}, {"sum_logits": -7.246175765991211, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.552340507507324, "logits_per_token": -7.246175765991211, "logits_per_char": -0.7246175765991211, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 767, "native_id": "d6ff2d749494d89e9c7a53f587c519f4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.088792324066162, "incorrect_loss_raw": 11.61591362953186, "correct_loss_per_char": 0.1555417605808803, "incorrect_loss_per_char": 1.7472989559173584, "correct_loss_per_token": 1.088792324066162, "incorrect_loss_per_token": 11.61591362953186, "correct_loss_uncond": -12.317162990570068, "incorrect_loss_uncond": -1.1482579708099365}, "model_output": [{"sum_logits": -8.378743171691895, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.792819023132324, "logits_per_token": -8.378743171691895, "logits_per_char": -0.9309714635213217, "num_chars": 9}, {"sum_logits": -1.088792324066162, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -13.40595531463623, "logits_per_token": -1.088792324066162, "logits_per_char": -0.1555417605808803, "num_chars": 7}, {"sum_logits": -15.525840759277344, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.507471084594727, "logits_per_token": -15.525840759277344, "logits_per_char": -2.587640126546224, "num_chars": 6}, {"sum_logits": -10.410114288330078, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.812825202941895, "logits_per_token": -10.410114288330078, "logits_per_char": -1.735019048055013, "num_chars": 6}, {"sum_logits": -12.148956298828125, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.943571090698242, "logits_per_token": -12.148956298828125, "logits_per_char": -1.735565185546875, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 768, "native_id": "6974d215428a974641c1df18678522f5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 14.807209968566895, "incorrect_loss_raw": 15.503782510757446, "correct_loss_per_char": 1.6452455520629883, "incorrect_loss_per_char": 1.20138775541649, "correct_loss_per_token": 7.403604984283447, "incorrect_loss_per_token": 7.294232805569967, "correct_loss_uncond": -9.415785789489746, "incorrect_loss_uncond": -4.3994433879852295}, "model_output": [{"sum_logits": -17.769725799560547, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.43353271484375, "logits_per_token": -8.884862899780273, "logits_per_char": -1.3669019845815806, "num_chars": 13}, {"sum_logits": -10.983802795410156, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.533153533935547, "logits_per_token": -3.6612675984700522, "logits_per_char": -0.8449079073392428, "num_chars": 13}, {"sum_logits": -15.137436866760254, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.770896911621094, "logits_per_token": -7.568718433380127, "logits_per_char": -0.9460898041725159, "num_chars": 16}, {"sum_logits": -18.124164581298828, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.875320434570312, "logits_per_token": -9.062082290649414, "logits_per_char": -1.6476513255726208, "num_chars": 11}, {"sum_logits": -14.807209968566895, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -24.22299575805664, "logits_per_token": -7.403604984283447, "logits_per_char": -1.6452455520629883, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 769, "native_id": "b94a9764acff078b52a9cbae04661dc9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.127649307250977, "incorrect_loss_raw": 11.109213590621948, "correct_loss_per_char": 1.5127649307250977, "incorrect_loss_per_char": 0.9594006470605438, "correct_loss_per_token": 7.563824653625488, "incorrect_loss_per_token": 5.554606795310974, "correct_loss_uncond": -4.529668807983398, "incorrect_loss_uncond": -6.026517629623413}, "model_output": [{"sum_logits": -15.127649307250977, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.657318115234375, "logits_per_token": -7.563824653625488, "logits_per_char": -1.5127649307250977, "num_chars": 10}, {"sum_logits": -12.70850944519043, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.672916412353516, "logits_per_token": -6.354254722595215, "logits_per_char": -1.270850944519043, "num_chars": 10}, {"sum_logits": -8.476982116699219, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.642452239990234, "logits_per_token": -4.238491058349609, "logits_per_char": -0.8476982116699219, "num_chars": 10}, {"sum_logits": -8.917253494262695, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.39954376220703, "logits_per_token": -4.458626747131348, "logits_per_char": -0.5245443231919232, "num_chars": 17}, {"sum_logits": -14.33410930633545, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.828012466430664, "logits_per_token": -7.167054653167725, "logits_per_char": -1.1945091088612874, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 770, "native_id": "80930e9df9ac4ad752749a54e7fc124f_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.685135841369629, "incorrect_loss_raw": 8.077727675437927, "correct_loss_per_char": 0.6404279867808024, "incorrect_loss_per_char": 0.9623151695027071, "correct_loss_per_token": 7.685135841369629, "incorrect_loss_per_token": 7.136440634727478, "correct_loss_uncond": -8.386261940002441, "incorrect_loss_uncond": -6.685279726982117}, "model_output": [{"sum_logits": -6.457942962646484, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -6.457942962646484, "logits_per_char": -1.291588592529297, "num_chars": 5}, {"sum_logits": -7.685135841369629, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.07139778137207, "logits_per_token": -7.685135841369629, "logits_per_char": -0.6404279867808024, "num_chars": 12}, {"sum_logits": -11.732256889343262, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -11.732256889343262, "logits_per_char": -1.1732256889343262, "num_chars": 10}, {"sum_logits": -7.530296325683594, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.715438842773438, "logits_per_token": -3.765148162841797, "logits_per_char": -0.44295860739315257, "num_chars": 17}, {"sum_logits": -6.590414524078369, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -6.590414524078369, "logits_per_char": -0.9414877891540527, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 771, "native_id": "3310b5b24f03d67179fababf9ae95144", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.904974937438965, "incorrect_loss_raw": 11.972329139709473, "correct_loss_per_char": 1.180994987487793, "incorrect_loss_per_char": 1.1708083511886325, "correct_loss_per_token": 5.904974937438965, "incorrect_loss_per_token": 8.043618083000183, "correct_loss_uncond": -6.155165672302246, "incorrect_loss_uncond": -3.46235728263855}, "model_output": [{"sum_logits": -13.689499855041504, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -14.773482322692871, "logits_per_token": -6.844749927520752, "logits_per_char": -1.521055539449056, "num_chars": 9}, {"sum_logits": -17.740188598632812, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -21.040119171142578, "logits_per_token": -8.870094299316406, "logits_per_char": -1.0435405058019303, "num_chars": 17}, {"sum_logits": -5.904974937438965, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.060140609741211, "logits_per_token": -5.904974937438965, "logits_per_char": -1.180994987487793, "num_chars": 5}, {"sum_logits": -8.214459419250488, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.277134895324707, "logits_per_token": -8.214459419250488, "logits_per_char": -1.3690765698750813, "num_chars": 6}, {"sum_logits": -8.245168685913086, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.648009300231934, "logits_per_token": -8.245168685913086, "logits_per_char": -0.7495607896284624, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 772, "native_id": "846bc47ced7119ad2ee19a8780d7fe18", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.695570945739746, "incorrect_loss_raw": 13.23813009262085, "correct_loss_per_char": 2.139114189147949, "incorrect_loss_per_char": 0.938592281836968, "correct_loss_per_token": 10.695570945739746, "incorrect_loss_per_token": 5.030231952667236, "correct_loss_uncond": -4.396627426147461, "incorrect_loss_uncond": -7.9573235511779785}, "model_output": [{"sum_logits": -25.421329498291016, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -26.966171264648438, "logits_per_token": -6.355332374572754, "logits_per_char": -1.0168531799316407, "num_chars": 25}, {"sum_logits": -8.540898323059082, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.568483352661133, "logits_per_token": -4.270449161529541, "logits_per_char": -1.2201283318655831, "num_chars": 7}, {"sum_logits": -8.621357917785645, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.131711959838867, "logits_per_token": -4.310678958892822, "logits_per_char": -0.574757194519043, "num_chars": 15}, {"sum_logits": -10.695570945739746, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.092198371887207, "logits_per_token": -10.695570945739746, "logits_per_char": -2.139114189147949, "num_chars": 5}, {"sum_logits": -10.368934631347656, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.115447998046875, "logits_per_token": -5.184467315673828, "logits_per_char": -0.9426304210316051, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 773, "native_id": "fd5a34e94303d7fd343de2a8f36943d5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.333641052246094, "incorrect_loss_raw": 10.857574939727783, "correct_loss_per_char": 0.7778034210205078, "incorrect_loss_per_char": 1.392102651179783, "correct_loss_per_token": 4.666820526123047, "incorrect_loss_per_token": 5.775466283162435, "correct_loss_uncond": -9.693836212158203, "incorrect_loss_uncond": -4.61864447593689}, "model_output": [{"sum_logits": -15.960691452026367, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.798885345458984, "logits_per_token": -5.320230484008789, "logits_per_char": -1.1400493894304549, "num_chars": 14}, {"sum_logits": -14.531960487365723, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.524063110351562, "logits_per_token": -4.843986829121907, "logits_per_char": -1.614662276373969, "num_chars": 9}, {"sum_logits": -3.926654815673828, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -3.926654815673828, "logits_per_char": -0.560950687953404, "num_chars": 7}, {"sum_logits": -9.010993003845215, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.017007827758789, "logits_per_token": -9.010993003845215, "logits_per_char": -2.2527482509613037, "num_chars": 4}, {"sum_logits": -9.333641052246094, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.027477264404297, "logits_per_token": -4.666820526123047, "logits_per_char": -0.7778034210205078, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 774, "native_id": "4e87db4771f2d6423034935446e3fff1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.478731155395508, "incorrect_loss_raw": 12.606941938400269, "correct_loss_per_char": 0.6056236539568219, "incorrect_loss_per_char": 0.9994707544644673, "correct_loss_per_token": 4.239365577697754, "incorrect_loss_per_token": 8.714243173599243, "correct_loss_uncond": -8.969717025756836, "incorrect_loss_uncond": -3.4818918704986572}, "model_output": [{"sum_logits": -11.06242847442627, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -11.06242847442627, "logits_per_char": -1.2291587193806965, "num_chars": 9}, {"sum_logits": -8.478731155395508, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.448448181152344, "logits_per_token": -4.239365577697754, "logits_per_char": -0.6056236539568219, "num_chars": 14}, {"sum_logits": -17.653076171875, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.52347183227539, "logits_per_token": -8.8265380859375, "logits_per_char": -1.1033172607421875, "num_chars": 16}, {"sum_logits": -8.223749160766602, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -8.223749160766602, "logits_per_char": -0.8223749160766601, "num_chars": 10}, {"sum_logits": -13.488513946533203, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.651273727416992, "logits_per_token": -6.744256973266602, "logits_per_char": -0.8430321216583252, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 775, "native_id": "a585df0818180ce3c06f963a4c3c810a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.948994636535645, "incorrect_loss_raw": 7.9316099882125854, "correct_loss_per_char": 1.3686243295669556, "incorrect_loss_per_char": 0.7908721697199476, "correct_loss_per_token": 5.474497318267822, "incorrect_loss_per_token": 7.360728323459625, "correct_loss_uncond": -10.288981437683105, "incorrect_loss_uncond": -6.6116756200790405}, "model_output": [{"sum_logits": -10.948994636535645, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -21.23797607421875, "logits_per_token": -5.474497318267822, "logits_per_char": -1.3686243295669556, "num_chars": 8}, {"sum_logits": -6.644174575805664, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -6.644174575805664, "logits_per_char": -0.9491677965436663, "num_chars": 7}, {"sum_logits": -4.567053318023682, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -2.283526659011841, "logits_per_char": -0.32621809414454866, "num_chars": 14}, {"sum_logits": -7.081464767456055, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -7.081464767456055, "logits_per_char": -0.5447280590350811, "num_chars": 13}, {"sum_logits": -13.433747291564941, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -14.706355094909668, "logits_per_token": -13.433747291564941, "logits_per_char": -1.3433747291564941, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 776, "native_id": "c9f7d07e6d363a99f5fadd68a4dfa35a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.947171926498413, "incorrect_loss_raw": 8.385370254516602, "correct_loss_per_char": 0.21051228046417236, "incorrect_loss_per_char": 1.579617840051651, "correct_loss_per_token": 1.4735859632492065, "incorrect_loss_per_token": 8.385370254516602, "correct_loss_uncond": -13.20408844947815, "incorrect_loss_uncond": -5.547569036483765}, "model_output": [{"sum_logits": -8.48295783996582, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.830101013183594, "logits_per_token": -8.48295783996582, "logits_per_char": -2.120739459991455, "num_chars": 4}, {"sum_logits": -2.947171926498413, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -1.4735859632492065, "logits_per_char": -0.21051228046417236, "num_chars": 14}, {"sum_logits": -10.458334922790527, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.505199432373047, "logits_per_token": -10.458334922790527, "logits_per_char": -2.0916669845581053, "num_chars": 5}, {"sum_logits": -7.855195045471191, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.380257606506348, "logits_per_token": -7.855195045471191, "logits_per_char": -0.9818993806838989, "num_chars": 8}, {"sum_logits": -6.744993209838867, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.016199111938477, "logits_per_token": -6.744993209838867, "logits_per_char": -1.1241655349731445, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 777, "native_id": "c7cb327fa4c0008efaa7741081a365d4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.907394409179688, "incorrect_loss_raw": 12.078507900238037, "correct_loss_per_char": 1.2422828674316406, "incorrect_loss_per_char": 1.195045034090678, "correct_loss_per_token": 4.9691314697265625, "incorrect_loss_per_token": 6.506860534350078, "correct_loss_uncond": -13.155862808227539, "incorrect_loss_uncond": -7.271345615386963}, "model_output": [{"sum_logits": -19.62500762939453, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.18246078491211, "logits_per_token": -9.812503814697266, "logits_per_char": -1.3083338419596353, "num_chars": 15}, {"sum_logits": -7.176684379577637, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.440656661987305, "logits_per_token": -7.176684379577637, "logits_per_char": -1.1961140632629395, "num_chars": 6}, {"sum_logits": -11.20484447479248, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.597005844116211, "logits_per_token": -5.60242223739624, "logits_per_char": -1.2449827194213867, "num_chars": 9}, {"sum_logits": -10.3074951171875, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -27.179290771484375, "logits_per_token": -3.4358317057291665, "logits_per_char": -1.03074951171875, "num_chars": 10}, {"sum_logits": -14.907394409179688, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -28.063257217407227, "logits_per_token": -4.9691314697265625, "logits_per_char": -1.2422828674316406, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 778, "native_id": "c54ddc0f9d170ba65d9f4f2e0bb41d1c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.328435182571411, "incorrect_loss_raw": 13.269476532936096, "correct_loss_per_char": 0.38807253042856854, "incorrect_loss_per_char": 1.270686731151506, "correct_loss_per_token": 2.328435182571411, "incorrect_loss_per_token": 8.520919919013977, "correct_loss_uncond": -13.976187467575073, "incorrect_loss_uncond": -3.501975655555725}, "model_output": [{"sum_logits": -2.328435182571411, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.304622650146484, "logits_per_token": -2.328435182571411, "logits_per_char": -0.38807253042856854, "num_chars": 6}, {"sum_logits": -7.252988338470459, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.89332103729248, "logits_per_token": -7.252988338470459, "logits_per_char": -1.4505976676940917, "num_chars": 5}, {"sum_logits": -12.405221939086914, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -12.405221939086914, "logits_per_char": -1.0337684949239094, "num_chars": 12}, {"sum_logits": -19.713424682617188, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.13169288635254, "logits_per_token": -9.856712341308594, "logits_per_char": -1.7921295166015625, "num_chars": 11}, {"sum_logits": -13.706271171569824, "num_tokens": 3, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.33769416809082, "logits_per_token": -4.568757057189941, "logits_per_char": -0.8062512453864602, "num_chars": 17}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 779, "native_id": "1729c737ff92cf558efecde2c6cafc5e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.619258880615234, "incorrect_loss_raw": 15.162697076797485, "correct_loss_per_char": 1.124558375431941, "incorrect_loss_per_char": 1.1251465327209897, "correct_loss_per_token": 3.6548147201538086, "incorrect_loss_per_token": 8.267548004786175, "correct_loss_uncond": -2.891805648803711, "incorrect_loss_uncond": -3.7367446422576904}, "model_output": [{"sum_logits": -28.903100967407227, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -28.269590377807617, "logits_per_token": -9.634366989135742, "logits_per_char": -1.6057278315226238, "num_chars": 18}, {"sum_logits": -12.467793464660645, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.21961784362793, "logits_per_token": -4.155931154886882, "logits_per_char": -0.6926551924811469, "num_chars": 18}, {"sum_logits": -14.619258880615234, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.511064529418945, "logits_per_token": -3.6548147201538086, "logits_per_char": -1.124558375431941, "num_chars": 13}, {"sum_logits": -8.311345100402832, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.656105995178223, "logits_per_token": -8.311345100402832, "logits_per_char": -0.8311345100402832, "num_chars": 10}, {"sum_logits": -10.968548774719238, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -10.968548774719238, "logits_per_char": -1.3710685968399048, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 780, "native_id": "19dfd55e967dacd6f5700a62c1e14eee", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7538648843765259, "incorrect_loss_raw": 7.855117321014404, "correct_loss_per_char": 0.08351737544650123, "incorrect_loss_per_char": 0.8144045205343338, "correct_loss_per_token": 0.5846216281255087, "incorrect_loss_per_token": 5.133008162180582, "correct_loss_uncond": -17.461551070213318, "incorrect_loss_uncond": -8.74971604347229}, "model_output": [{"sum_logits": -6.082874774932861, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -6.082874774932861, "logits_per_char": -1.2165749549865723, "num_chars": 5}, {"sum_logits": -9.229742050170898, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.934810638427734, "logits_per_token": -3.0765806833902993, "logits_per_char": -0.4614871025085449, "num_chars": 20}, {"sum_logits": -6.637301921844482, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -6.637301921844482, "logits_per_char": -0.9481859888349261, "num_chars": 7}, {"sum_logits": -1.7538648843765259, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.215415954589844, "logits_per_token": -0.5846216281255087, "logits_per_char": -0.08351737544650123, "num_chars": 21}, {"sum_logits": -9.470550537109375, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.91702651977539, "logits_per_token": -4.7352752685546875, "logits_per_char": -0.6313700358072917, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 781, "native_id": "b9bed83138901f4a45041b02c5b242c1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.597966194152832, "incorrect_loss_raw": 10.769742548465729, "correct_loss_per_char": 0.3998547281537737, "incorrect_loss_per_char": 1.536856347373408, "correct_loss_per_token": 2.798983097076416, "incorrect_loss_per_token": 6.364565034707387, "correct_loss_uncond": -14.695666313171387, "incorrect_loss_uncond": -6.264262139797211}, "model_output": [{"sum_logits": -9.726990699768066, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.402257919311523, "logits_per_token": -9.726990699768066, "logits_per_char": -1.9453981399536133, "num_chars": 5}, {"sum_logits": -5.597966194152832, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.29363250732422, "logits_per_token": -2.798983097076416, "logits_per_char": -0.3998547281537737, "num_chars": 14}, {"sum_logits": -3.865210771560669, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.667924880981445, "logits_per_token": -3.865210771560669, "logits_per_char": -0.429467863506741, "num_chars": 9}, {"sum_logits": -17.263954162597656, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.02482795715332, "logits_per_token": -5.754651387532552, "logits_per_char": -1.3279964740459735, "num_chars": 13}, {"sum_logits": -12.222814559936523, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.04100799560547, "logits_per_token": -6.111407279968262, "logits_per_char": -2.4445629119873047, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 782, "native_id": "b9d22425a3d5810be9528a55245c8f09", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.334874153137207, "incorrect_loss_raw": 9.712622046470642, "correct_loss_per_char": 1.6669748306274415, "incorrect_loss_per_char": 0.910829780593751, "correct_loss_per_token": 8.334874153137207, "incorrect_loss_per_token": 7.064041435718536, "correct_loss_uncond": -6.383862495422363, "incorrect_loss_uncond": -6.083450436592102}, "model_output": [{"sum_logits": -5.99179744720459, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.13819694519043, "logits_per_token": -5.99179744720459, "logits_per_char": -0.85597106388637, "num_chars": 7}, {"sum_logits": -8.334874153137207, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.71873664855957, "logits_per_token": -8.334874153137207, "logits_per_char": -1.6669748306274415, "num_chars": 5}, {"sum_logits": -13.21110725402832, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.097896575927734, "logits_per_token": -6.60555362701416, "logits_per_char": -0.7339504030015733, "num_chars": 18}, {"sum_logits": -7.977537631988525, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -3.9887688159942627, "logits_per_char": -0.8863930702209473, "num_chars": 9}, {"sum_logits": -11.670045852661133, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.414422035217285, "logits_per_token": -11.670045852661133, "logits_per_char": -1.1670045852661133, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 783, "native_id": "2af70107e04e61e3c7884bc743901c02", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.273664951324463, "incorrect_loss_raw": 7.716807723045349, "correct_loss_per_char": 0.47942408648404206, "incorrect_loss_per_char": 0.8329294443130493, "correct_loss_per_token": 5.273664951324463, "incorrect_loss_per_token": 5.872686326503754, "correct_loss_uncond": -8.238638401031494, "incorrect_loss_uncond": -6.719424843788147}, "model_output": [{"sum_logits": -7.116332054138184, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.127046585083008, "logits_per_token": -3.558166027069092, "logits_per_char": -0.6469392776489258, "num_chars": 11}, {"sum_logits": -5.273664951324463, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -13.512303352355957, "logits_per_token": -5.273664951324463, "logits_per_char": -0.47942408648404206, "num_chars": 11}, {"sum_logits": -11.780805587768555, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -11.780805587768555, "logits_per_char": -1.3089783986409504, "num_chars": 9}, {"sum_logits": -4.333454132080078, "num_tokens": 1, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -12.016385078430176, "logits_per_token": -4.333454132080078, "logits_per_char": -0.8666908264160156, "num_chars": 5}, {"sum_logits": -7.63663911819458, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.58175277709961, "logits_per_token": -3.81831955909729, "logits_per_char": -0.5091092745463054, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 784, "native_id": "be2cb9c96069ac355a7ccef262743d14", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3062963485717773, "incorrect_loss_raw": 10.052384495735168, "correct_loss_per_char": 0.0870864232381185, "incorrect_loss_per_char": 0.9992202083269754, "correct_loss_per_token": 0.6531481742858887, "incorrect_loss_per_token": 7.635783493518829, "correct_loss_uncond": -18.84791088104248, "incorrect_loss_uncond": -6.308350682258606}, "model_output": [{"sum_logits": -9.458879470825195, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -9.458879470825195, "logits_per_char": -1.050986607869466, "num_chars": 9}, {"sum_logits": -1.3062963485717773, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -0.6531481742858887, "logits_per_char": -0.0870864232381185, "num_chars": 15}, {"sum_logits": -13.889847755432129, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.026565551757812, "logits_per_token": -6.9449238777160645, "logits_per_char": -0.9259898503621419, "num_chars": 15}, {"sum_logits": -5.442960262298584, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -2.721480131149292, "logits_per_char": -0.38878287587847027, "num_chars": 14}, {"sum_logits": -11.417850494384766, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.87264060974121, "logits_per_token": -11.417850494384766, "logits_per_char": -1.6311214991978236, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 785, "native_id": "799e48ec7fb16415c8f82828c5761ed1", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.184025764465332, "incorrect_loss_raw": 16.259511470794678, "correct_loss_per_char": 0.6530932513150302, "incorrect_loss_per_char": 1.1292805735735958, "correct_loss_per_token": 7.184025764465332, "incorrect_loss_per_token": 7.704137404759725, "correct_loss_uncond": -6.232291221618652, "incorrect_loss_uncond": -3.5170211791992188}, "model_output": [{"sum_logits": -14.221977233886719, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.841983795166016, "logits_per_token": -7.110988616943359, "logits_per_char": -1.0158555167061942, "num_chars": 14}, {"sum_logits": -7.184025764465332, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.416316986083984, "logits_per_token": -7.184025764465332, "logits_per_char": -0.6530932513150302, "num_chars": 11}, {"sum_logits": -22.71568489074707, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.381427764892578, "logits_per_token": -7.5718949635823565, "logits_per_char": -1.747360376211313, "num_chars": 13}, {"sum_logits": -15.955623626708984, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.421754837036133, "logits_per_token": -3.988905906677246, "logits_per_char": -0.8864235348171658, "num_chars": 18}, {"sum_logits": -12.144760131835938, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.46096420288086, "logits_per_token": -12.144760131835938, "logits_per_char": -0.8674828665597099, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 786, "native_id": "a5db1e9677af118deb8e4add8bc18db2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.67691421508789, "incorrect_loss_raw": 18.49608838558197, "correct_loss_per_char": 0.7230761845906576, "incorrect_loss_per_char": 1.0523701647726713, "correct_loss_per_token": 2.8923047383626304, "incorrect_loss_per_token": 4.367415828364236, "correct_loss_uncond": -11.752208709716797, "incorrect_loss_uncond": -7.586034893989563}, "model_output": [{"sum_logits": -7.864199161529541, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.32390785217285, "logits_per_token": -3.9320995807647705, "logits_per_char": -0.7149271965026855, "num_chars": 11}, {"sum_logits": -50.36520767211914, "num_tokens": 7, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -45.965110778808594, "logits_per_token": -7.195029667445591, "logits_per_char": -2.1897916379182236, "num_chars": 23}, {"sum_logits": -6.545310974121094, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.80850601196289, "logits_per_token": -3.272655487060547, "logits_per_char": -0.467522212437221, "num_chars": 14}, {"sum_logits": -8.67691421508789, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.429122924804688, "logits_per_token": -2.8923047383626304, "logits_per_char": -0.7230761845906576, "num_chars": 12}, {"sum_logits": -9.209635734558105, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.230968475341797, "logits_per_token": -3.069878578186035, "logits_per_char": -0.837239612232555, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 787, "native_id": "28357ebf85f8bb82b6a3210c4397e0aa", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.1908440589904785, "incorrect_loss_raw": 8.334691941738129, "correct_loss_per_char": 0.47189491445367987, "incorrect_loss_per_char": 0.899100743672427, "correct_loss_per_token": 1.7302813529968262, "incorrect_loss_per_token": 4.8306153019269304, "correct_loss_uncond": -10.49080228805542, "incorrect_loss_uncond": -7.937564194202423}, "model_output": [{"sum_logits": -8.487215995788574, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.117111206054688, "logits_per_token": -2.8290719985961914, "logits_per_char": -0.4992479997522691, "num_chars": 17}, {"sum_logits": -8.497838020324707, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -8.497838020324707, "logits_per_char": -1.4163063367207844, "num_chars": 6}, {"sum_logits": -3.81646990776062, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -3.81646990776062, "logits_per_char": -0.6360783179601034, "num_chars": 6}, {"sum_logits": -5.1908440589904785, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.681646347045898, "logits_per_token": -1.7302813529968262, "logits_per_char": -0.47189491445367987, "num_chars": 11}, {"sum_logits": -12.537243843078613, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.55123519897461, "logits_per_token": -4.179081281026204, "logits_per_char": -1.044770320256551, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 788, "native_id": "7b95825a19d6930d6aed35c7c57a2d82", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.793382167816162, "incorrect_loss_raw": 8.396711826324463, "correct_loss_per_char": 0.9483455419540405, "incorrect_loss_per_char": 1.339393207005092, "correct_loss_per_token": 3.793382167816162, "incorrect_loss_per_token": 8.396711826324463, "correct_loss_uncond": -8.141355037689209, "incorrect_loss_uncond": -5.705150127410889}, "model_output": [{"sum_logits": -6.696789264678955, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -6.696789264678955, "logits_per_char": -0.9566841806684222, "num_chars": 7}, {"sum_logits": -9.437414169311523, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.30998420715332, "logits_per_token": -9.437414169311523, "logits_per_char": -1.3482020241873605, "num_chars": 7}, {"sum_logits": -9.790406227111816, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -9.790406227111816, "logits_per_char": -1.9580812454223633, "num_chars": 5}, {"sum_logits": -3.793382167816162, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -3.793382167816162, "logits_per_char": -0.9483455419540405, "num_chars": 4}, {"sum_logits": -7.662237644195557, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -7.662237644195557, "logits_per_char": -1.0946053777422224, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 789, "native_id": "6b270159bd402ddd498a38153f9d1efe", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.7516865730285645, "incorrect_loss_raw": 9.366795778274536, "correct_loss_per_char": 0.821669510432652, "incorrect_loss_per_char": 1.1472226001139498, "correct_loss_per_token": 5.7516865730285645, "incorrect_loss_per_token": 8.429186284542084, "correct_loss_uncond": -11.41916799545288, "incorrect_loss_uncond": -6.638696908950806}, "model_output": [{"sum_logits": -7.500875949859619, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.29444694519043, "logits_per_token": -3.7504379749298096, "logits_per_char": -1.0715537071228027, "num_chars": 7}, {"sum_logits": -11.90855598449707, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.701706886291504, "logits_per_token": -11.90855598449707, "logits_per_char": -1.0825959985906428, "num_chars": 11}, {"sum_logits": -4.565548419952393, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.108352661132812, "logits_per_token": -4.565548419952393, "logits_per_char": -0.5072831577724881, "num_chars": 9}, {"sum_logits": -13.492202758789062, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.917464256286621, "logits_per_token": -13.492202758789062, "logits_per_char": -1.927457536969866, "num_chars": 7}, {"sum_logits": -5.7516865730285645, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.170854568481445, "logits_per_token": -5.7516865730285645, "logits_per_char": -0.821669510432652, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 790, "native_id": "eae0e03773365064ce915603c7addc91", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.125810384750366, "incorrect_loss_raw": 15.098604917526245, "correct_loss_per_char": 0.22327217033931188, "incorrect_loss_per_char": 1.1357030430732415, "correct_loss_per_token": 1.562905192375183, "incorrect_loss_per_token": 6.3316874504089355, "correct_loss_uncond": -11.745118379592896, "incorrect_loss_uncond": -4.447227239608765}, "model_output": [{"sum_logits": -3.125810384750366, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.870928764343262, "logits_per_token": -1.562905192375183, "logits_per_char": -0.22327217033931188, "num_chars": 14}, {"sum_logits": -21.326404571533203, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.028850555419922, "logits_per_token": -10.663202285766602, "logits_per_char": -1.4217603047688803, "num_chars": 15}, {"sum_logits": -14.757874488830566, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -4.9192914962768555, "logits_per_char": -0.983858299255371, "num_chars": 15}, {"sum_logits": -9.643257141113281, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -2.4108142852783203, "logits_per_char": -0.5075398495322779, "num_chars": 19}, {"sum_logits": -14.66688346862793, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.843425750732422, "logits_per_token": -7.333441734313965, "logits_per_char": -1.6296537187364366, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 791, "native_id": "a5ca7c89196e54938b5827814d0071d4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.647480010986328, "incorrect_loss_raw": 11.38816213607788, "correct_loss_per_char": 0.9728830777681791, "incorrect_loss_per_char": 1.0834452060737996, "correct_loss_per_token": 4.215826670328776, "incorrect_loss_per_token": 5.215947071711222, "correct_loss_uncond": -7.353757858276367, "incorrect_loss_uncond": -7.458719730377197}, "model_output": [{"sum_logits": -11.475215911865234, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -16.883235931396484, "logits_per_token": -3.8250719706217446, "logits_per_char": -0.9562679926554362, "num_chars": 12}, {"sum_logits": -14.359296798706055, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -22.13387680053711, "logits_per_token": -7.179648399353027, "logits_per_char": -1.3053906180641868, "num_chars": 11}, {"sum_logits": -9.027776718139648, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.998647689819336, "logits_per_token": -4.513888359069824, "logits_per_char": -1.0030863020155165, "num_chars": 9}, {"sum_logits": -10.690359115600586, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -21.371767044067383, "logits_per_token": -5.345179557800293, "logits_per_char": -1.0690359115600585, "num_chars": 10}, {"sum_logits": -12.647480010986328, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -20.001237869262695, "logits_per_token": -4.215826670328776, "logits_per_char": -0.9728830777681791, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 792, "native_id": "ffc3461d437a1c6c22d1c4f6439ebd9c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.317996621131897, "incorrect_loss_raw": 15.523004055023193, "correct_loss_per_char": 0.16474957764148712, "incorrect_loss_per_char": 1.874798354939518, "correct_loss_per_token": 1.317996621131897, "incorrect_loss_per_token": 10.706347227096558, "correct_loss_uncond": -12.55092465877533, "incorrect_loss_uncond": -1.1672062873840332}, "model_output": [{"sum_logits": -12.538004875183105, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.604683876037598, "logits_per_token": -12.538004875183105, "logits_per_char": -2.089667479197184, "num_chars": 6}, {"sum_logits": -11.020756721496582, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.764643669128418, "logits_per_token": -11.020756721496582, "logits_per_char": -1.836792786916097, "num_chars": 6}, {"sum_logits": -17.802621841430664, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -8.901310920715332, "logits_per_char": -1.978069093492296, "num_chars": 9}, {"sum_logits": -1.317996621131897, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -13.868921279907227, "logits_per_token": -1.317996621131897, "logits_per_char": -0.16474957764148712, "num_chars": 8}, {"sum_logits": -20.730632781982422, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.914505004882812, "logits_per_token": -10.365316390991211, "logits_per_char": -1.594664060152494, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 793, "native_id": "aa2dcd9bcce5e4445bd3bacbf0bb11d3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.116062641143799, "incorrect_loss_raw": 11.371103644371033, "correct_loss_per_char": 1.016580377306257, "incorrect_loss_per_char": 1.0990098164195106, "correct_loss_per_token": 7.116062641143799, "incorrect_loss_per_token": 6.596421420574188, "correct_loss_uncond": -8.174500942230225, "incorrect_loss_uncond": -5.49972140789032}, "model_output": [{"sum_logits": -7.286956787109375, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.351019859313965, "logits_per_token": -7.286956787109375, "logits_per_char": -1.0409938267299108, "num_chars": 7}, {"sum_logits": -7.691529750823975, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -3.8457648754119873, "logits_per_char": -0.5493949822017125, "num_chars": 14}, {"sum_logits": -7.116062641143799, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.290563583374023, "logits_per_token": -7.116062641143799, "logits_per_char": -1.016580377306257, "num_chars": 7}, {"sum_logits": -15.80938720703125, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.330198287963867, "logits_per_token": -7.904693603515625, "logits_per_char": -1.580938720703125, "num_chars": 10}, {"sum_logits": -14.696540832519531, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -7.348270416259766, "logits_per_char": -1.2247117360432942, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 794, "native_id": "6cc797ec148c1fc74592957a55bd0951", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.278131484985352, "incorrect_loss_raw": 8.773629546165466, "correct_loss_per_char": 0.6065109570821127, "incorrect_loss_per_char": 0.8170466474124364, "correct_loss_per_token": 3.639065742492676, "incorrect_loss_per_token": 6.866744220256805, "correct_loss_uncond": -9.796083450317383, "incorrect_loss_uncond": -6.442685961723328}, "model_output": [{"sum_logits": -6.5825886726379395, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.016836166381836, "logits_per_token": -3.2912943363189697, "logits_per_char": -0.7313987414042155, "num_chars": 9}, {"sum_logits": -13.894211769104004, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.910987854003906, "logits_per_token": -13.894211769104004, "logits_per_char": -1.3894211769104003, "num_chars": 10}, {"sum_logits": -8.672493934631348, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.48623275756836, "logits_per_token": -4.336246967315674, "logits_per_char": -0.7227078278859457, "num_chars": 12}, {"sum_logits": -5.945223808288574, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.451205253601074, "logits_per_token": -5.945223808288574, "logits_per_char": -0.42465884344918386, "num_chars": 14}, {"sum_logits": -7.278131484985352, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.074214935302734, "logits_per_token": -3.639065742492676, "logits_per_char": -0.6065109570821127, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 795, "native_id": "64dbe5cb840ef4f1d25f8b68db8d5fed", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.157338619232178, "incorrect_loss_raw": 10.968923449516296, "correct_loss_per_char": 0.41573386192321776, "incorrect_loss_per_char": 1.23048641510082, "correct_loss_per_token": 2.078669309616089, "incorrect_loss_per_token": 8.452628493309021, "correct_loss_uncond": -15.117495059967041, "incorrect_loss_uncond": -5.102611184120178}, "model_output": [{"sum_logits": -20.130359649658203, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.859161376953125, "logits_per_token": -10.065179824829102, "logits_per_char": -1.830032695423473, "num_chars": 11}, {"sum_logits": -6.43571138381958, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -6.43571138381958, "logits_per_char": -0.8044639229774475, "num_chars": 8}, {"sum_logits": -6.554837226867676, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -6.554837226867676, "logits_per_char": -1.0924728711446126, "num_chars": 6}, {"sum_logits": -10.754785537719727, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.663701057434082, "logits_per_token": -10.754785537719727, "logits_per_char": -1.1949761708577473, "num_chars": 9}, {"sum_logits": -4.157338619232178, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.27483367919922, "logits_per_token": -2.078669309616089, "logits_per_char": -0.41573386192321776, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 796, "native_id": "a74753bf249c1cbcff632c5c16b0397b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.304469108581543, "incorrect_loss_raw": 9.664742469787598, "correct_loss_per_char": 0.5380586385726929, "incorrect_loss_per_char": 1.365311689087839, "correct_loss_per_token": 4.304469108581543, "incorrect_loss_per_token": 7.445886015892029, "correct_loss_uncond": -8.417902946472168, "incorrect_loss_uncond": -7.120437145233154}, "model_output": [{"sum_logits": -10.613569259643555, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.31651496887207, "logits_per_token": -5.306784629821777, "logits_per_char": -0.9648699326948686, "num_chars": 11}, {"sum_logits": -10.022720336914062, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -10.022720336914062, "logits_per_char": -1.670453389485677, "num_chars": 6}, {"sum_logits": -7.137282371520996, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.36029624938965, "logits_per_token": -3.568641185760498, "logits_per_char": -0.6488438519564542, "num_chars": 11}, {"sum_logits": -4.304469108581543, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -4.304469108581543, "logits_per_char": -0.5380586385726929, "num_chars": 8}, {"sum_logits": -10.885397911071777, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.432815551757812, "logits_per_token": -10.885397911071777, "logits_per_char": -2.1770795822143554, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 797, "native_id": "9190efbd77fe10b989fcaae35e208a0f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.8510103225708, "incorrect_loss_raw": 11.305580854415894, "correct_loss_per_char": 1.60637629032135, "incorrect_loss_per_char": 1.0341359338203033, "correct_loss_per_token": 4.283670107523601, "incorrect_loss_per_token": 4.610184987386068, "correct_loss_uncond": -2.5569114685058594, "incorrect_loss_uncond": -5.409683465957642}, "model_output": [{"sum_logits": -12.8510103225708, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.40792179107666, "logits_per_token": -4.283670107523601, "logits_per_char": -1.60637629032135, "num_chars": 8}, {"sum_logits": -9.765607833862305, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.915063858032227, "logits_per_token": -4.882803916931152, "logits_per_char": -1.220700979232788, "num_chars": 8}, {"sum_logits": -13.295524597167969, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.437604904174805, "logits_per_token": -4.431841532389323, "logits_per_char": -1.208684054287997, "num_chars": 11}, {"sum_logits": -11.727005958557129, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.102293014526367, "logits_per_token": -3.9090019861857095, "logits_per_char": -0.8376432827540806, "num_chars": 14}, {"sum_logits": -10.434185028076172, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.406095504760742, "logits_per_token": -5.217092514038086, "logits_per_char": -0.8695154190063477, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 798, "native_id": "ff0303db294a823d4138fb81a6ee6438", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.67254638671875, "incorrect_loss_raw": 9.123280763626099, "correct_loss_per_char": 0.6975042169744318, "incorrect_loss_per_char": 0.7018411954243978, "correct_loss_per_token": 3.836273193359375, "incorrect_loss_per_token": 4.979771534601848, "correct_loss_uncond": -10.407676696777344, "incorrect_loss_uncond": -10.275703191757202}, "model_output": [{"sum_logits": -7.67254638671875, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.080223083496094, "logits_per_token": -3.836273193359375, "logits_per_char": -0.6975042169744318, "num_chars": 11}, {"sum_logits": -5.1090803146362305, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.036821365356445, "logits_per_token": -5.1090803146362305, "logits_per_char": -0.8515133857727051, "num_chars": 6}, {"sum_logits": -5.292093276977539, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.270851135253906, "logits_per_token": -1.7640310923258464, "logits_per_char": -0.4410077730814616, "num_chars": 12}, {"sum_logits": -16.6900634765625, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.819080352783203, "logits_per_token": -8.34503173828125, "logits_per_char": -0.9272257486979166, "num_chars": 18}, {"sum_logits": -9.401885986328125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -4.7009429931640625, "logits_per_char": -0.5876178741455078, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 799, "native_id": "63963c9c15835d451aac2e1e0b116388", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.368009090423584, "incorrect_loss_raw": 11.806777238845825, "correct_loss_per_char": 0.7668584414890834, "incorrect_loss_per_char": 1.2359586964437972, "correct_loss_per_token": 5.368009090423584, "incorrect_loss_per_token": 5.876318693161011, "correct_loss_uncond": -10.160373210906982, "incorrect_loss_uncond": -4.344872713088989}, "model_output": [{"sum_logits": -14.608855247497559, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.878366470336914, "logits_per_token": -4.8696184158325195, "logits_per_char": -2.08697932107108, "num_chars": 7}, {"sum_logits": -5.368009090423584, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.528382301330566, "logits_per_token": -5.368009090423584, "logits_per_char": -0.7668584414890834, "num_chars": 7}, {"sum_logits": -6.301836013793945, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.217859268188477, "logits_per_token": -6.301836013793945, "logits_per_char": -1.050306002298991, "num_chars": 6}, {"sum_logits": -7.672954559326172, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.623815536499023, "logits_per_token": -7.672954559326172, "logits_per_char": -0.9591193199157715, "num_chars": 8}, {"sum_logits": -18.643463134765625, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.886558532714844, "logits_per_token": -4.660865783691406, "logits_per_char": -0.8474301424893466, "num_chars": 22}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 800, "native_id": "cc8324b73ed9625e723ef041dfc77a37", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.694665908813477, "incorrect_loss_raw": 14.761127948760986, "correct_loss_per_char": 1.2824443181355794, "incorrect_loss_per_char": 1.3970426094996462, "correct_loss_per_token": 7.694665908813477, "incorrect_loss_per_token": 9.922782182693481, "correct_loss_uncond": -4.613345146179199, "incorrect_loss_uncond": -2.5213382244110107}, "model_output": [{"sum_logits": -14.017223358154297, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.935380935668945, "logits_per_token": -7.008611679077148, "logits_per_char": -1.2742930325594815, "num_chars": 11}, {"sum_logits": -12.05316162109375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.522719383239746, "logits_per_token": -12.05316162109375, "logits_per_char": -1.7218802315848214, "num_chars": 7}, {"sum_logits": -24.689542770385742, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.37803840637207, "logits_per_token": -12.344771385192871, "logits_per_char": -1.7635387693132674, "num_chars": 14}, {"sum_logits": -7.694665908813477, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -7.694665908813477, "logits_per_char": -1.2824443181355794, "num_chars": 6}, {"sum_logits": -8.284584045410156, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.293725967407227, "logits_per_token": -8.284584045410156, "logits_per_char": -0.8284584045410156, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 801, "native_id": "684dbde19719e8224113433981d6e01e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.507938861846924, "incorrect_loss_raw": 16.08208394050598, "correct_loss_per_char": 0.5007217147133567, "incorrect_loss_per_char": 1.317036280306903, "correct_loss_per_token": 2.753969430923462, "incorrect_loss_per_token": 8.050815065701803, "correct_loss_uncond": -10.798993587493896, "incorrect_loss_uncond": -2.941281795501709}, "model_output": [{"sum_logits": -5.507938861846924, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.30693244934082, "logits_per_token": -2.753969430923462, "logits_per_char": -0.5007217147133567, "num_chars": 11}, {"sum_logits": -10.093757629394531, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.56877326965332, "logits_per_token": -10.093757629394531, "logits_per_char": -2.018751525878906, "num_chars": 5}, {"sum_logits": -11.918754577636719, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.523645401000977, "logits_per_token": -5.959377288818359, "logits_per_char": -1.0835231434215198, "num_chars": 11}, {"sum_logits": -30.04671859741211, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -31.56140899658203, "logits_per_token": -10.015572865804037, "logits_per_char": -0.9389599561691284, "num_chars": 32}, {"sum_logits": -12.269104957580566, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.439635276794434, "logits_per_token": -6.134552478790283, "logits_per_char": -1.2269104957580566, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 802, "native_id": "21450618657881d8c5af73691f3423a7_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.578701496124268, "incorrect_loss_raw": 9.005838990211487, "correct_loss_per_char": 1.2631169160207112, "incorrect_loss_per_char": 1.0492355005168692, "correct_loss_per_token": 7.578701496124268, "incorrect_loss_per_token": 7.817311882972717, "correct_loss_uncond": -3.6540427207946777, "incorrect_loss_uncond": -5.344379305839539}, "model_output": [{"sum_logits": -9.401838302612305, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.999987602233887, "logits_per_token": -9.401838302612305, "logits_per_char": -1.566973050435384, "num_chars": 6}, {"sum_logits": -7.578701496124268, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.232744216918945, "logits_per_token": -7.578701496124268, "logits_per_char": -1.2631169160207112, "num_chars": 6}, {"sum_logits": -10.055849075317383, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -10.055849075317383, "logits_per_char": -1.2569811344146729, "num_chars": 8}, {"sum_logits": -7.0574517250061035, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.448383331298828, "logits_per_token": -7.0574517250061035, "logits_per_char": -0.6415865204551003, "num_chars": 11}, {"sum_logits": -9.508216857910156, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.994152069091797, "logits_per_token": -4.754108428955078, "logits_per_char": -0.7314012967623197, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 803, "native_id": "8b94b61b604ec0d7508804033eec6d23", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.980118751525879, "incorrect_loss_raw": 10.605335712432861, "correct_loss_per_char": 0.49751484394073486, "incorrect_loss_per_char": 1.0271512465917179, "correct_loss_per_token": 1.9900593757629395, "incorrect_loss_per_token": 4.575538794199626, "correct_loss_uncond": -10.590481758117676, "incorrect_loss_uncond": -6.5548388957977295}, "model_output": [{"sum_logits": -7.294554710388184, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.469799995422363, "logits_per_token": -3.647277355194092, "logits_per_char": -0.8105060789320204, "num_chars": 9}, {"sum_logits": -9.528890609741211, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.62204933166504, "logits_per_token": -4.7644453048706055, "logits_per_char": -1.3612700871058874, "num_chars": 7}, {"sum_logits": -8.14680004119873, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.63633918762207, "logits_per_token": -4.073400020599365, "logits_per_char": -1.0183500051498413, "num_chars": 8}, {"sum_logits": -17.45109748840332, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.91250991821289, "logits_per_token": -5.81703249613444, "logits_per_char": -0.9184788151791221, "num_chars": 19}, {"sum_logits": -3.980118751525879, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.570600509643555, "logits_per_token": -1.9900593757629395, "logits_per_char": -0.49751484394073486, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 804, "native_id": "52ecf169febc95a7f5ccb048fc85857d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.631988525390625, "incorrect_loss_raw": 12.24593734741211, "correct_loss_per_char": 0.5539042154947916, "incorrect_loss_per_char": 1.3456392326052227, "correct_loss_per_token": 5.8159942626953125, "incorrect_loss_per_token": 7.895926396052043, "correct_loss_uncond": -6.785747528076172, "incorrect_loss_uncond": -4.308308362960815}, "model_output": [{"sum_logits": -11.631988525390625, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.417736053466797, "logits_per_token": -5.8159942626953125, "logits_per_char": -0.5539042154947916, "num_chars": 21}, {"sum_logits": -17.799314498901367, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.35330581665039, "logits_per_token": -8.899657249450684, "logits_per_char": -1.9777016109890408, "num_chars": 9}, {"sum_logits": -8.67314338684082, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.811025619506836, "logits_per_token": -8.67314338684082, "logits_per_char": -0.867314338684082, "num_chars": 10}, {"sum_logits": -9.760711669921875, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.019183158874512, "logits_per_token": -9.760711669921875, "logits_per_char": -1.6267852783203125, "num_chars": 6}, {"sum_logits": -12.750579833984375, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.03346824645996, "logits_per_token": -4.250193277994792, "logits_per_char": -0.9107557024274554, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 805, "native_id": "e408a5a031caec33782cb3b3a005eecc", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.377878665924072, "incorrect_loss_raw": 8.690796256065369, "correct_loss_per_char": 0.672234833240509, "incorrect_loss_per_char": 1.1942552335205532, "correct_loss_per_token": 5.377878665924072, "incorrect_loss_per_token": 7.34680700302124, "correct_loss_uncond": -10.62022352218628, "incorrect_loss_uncond": -7.120381236076355}, "model_output": [{"sum_logits": -6.20192813873291, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -6.20192813873291, "logits_per_char": -0.5168273448944092, "num_chars": 12}, {"sum_logits": -5.856430530548096, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.997017860412598, "logits_per_token": -5.856430530548096, "logits_per_char": -0.732053816318512, "num_chars": 8}, {"sum_logits": -11.952912330627441, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.893515586853027, "logits_per_token": -11.952912330627441, "logits_per_char": -1.9921520551045735, "num_chars": 6}, {"sum_logits": -10.751914024353027, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.37739372253418, "logits_per_token": -5.375957012176514, "logits_per_char": -1.535987717764718, "num_chars": 7}, {"sum_logits": -5.377878665924072, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -5.377878665924072, "logits_per_char": -0.672234833240509, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 806, "native_id": "31bd05ba62a16ee35217224b98c6baea", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.148273468017578, "incorrect_loss_raw": 6.5754348039627075, "correct_loss_per_char": 0.6148273468017578, "incorrect_loss_per_char": 0.957182464713142, "correct_loss_per_token": 6.148273468017578, "incorrect_loss_per_token": 6.5754348039627075, "correct_loss_uncond": -7.341958999633789, "incorrect_loss_uncond": -7.718540549278259}, "model_output": [{"sum_logits": -5.014520168304443, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.07518482208252, "logits_per_token": -5.014520168304443, "logits_per_char": -0.5014520168304444, "num_chars": 10}, {"sum_logits": -5.21568489074707, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.51465129852295, "logits_per_token": -5.21568489074707, "logits_per_char": -0.8692808151245117, "num_chars": 6}, {"sum_logits": -6.148273468017578, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -6.148273468017578, "logits_per_char": -0.6148273468017578, "num_chars": 10}, {"sum_logits": -9.264863967895508, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.404559135437012, "logits_per_token": -9.264863967895508, "logits_per_char": -1.3235519954136439, "num_chars": 7}, {"sum_logits": -6.806670188903809, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.181506156921387, "logits_per_token": -6.806670188903809, "logits_per_char": -1.134445031483968, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 807, "native_id": "b4043bd1f65a8ad088e62042eca259c2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.395631790161133, "incorrect_loss_raw": 7.783102989196777, "correct_loss_per_char": 0.9328479766845703, "incorrect_loss_per_char": 0.9645692441198561, "correct_loss_per_token": 8.395631790161133, "incorrect_loss_per_token": 5.477222681045532, "correct_loss_uncond": -6.376025199890137, "incorrect_loss_uncond": -6.528754234313965}, "model_output": [{"sum_logits": -11.002415657043457, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.400142669677734, "logits_per_token": -5.5012078285217285, "logits_per_char": -0.9168679714202881, "num_chars": 12}, {"sum_logits": -4.302507400512695, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.023618698120117, "logits_per_token": -4.302507400512695, "logits_per_char": -0.7170845667521158, "num_chars": 6}, {"sum_logits": -8.395631790161133, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.77165699005127, "logits_per_token": -8.395631790161133, "logits_per_char": -0.9328479766845703, "num_chars": 9}, {"sum_logits": -7.444626808166504, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.85013484954834, "logits_per_token": -3.722313404083252, "logits_per_char": -0.8271807564629449, "num_chars": 9}, {"sum_logits": -8.382862091064453, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.973532676696777, "logits_per_token": -8.382862091064453, "logits_per_char": -1.3971436818440754, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 808, "native_id": "4302e727e47f464511d4d04f22bed0d2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.729289054870605, "incorrect_loss_raw": 10.728249073028564, "correct_loss_per_char": 1.7882148424784343, "incorrect_loss_per_char": 1.1915652562701513, "correct_loss_per_token": 10.729289054870605, "incorrect_loss_per_token": 7.03241753578186, "correct_loss_uncond": -6.95357608795166, "incorrect_loss_uncond": -6.299840450286865}, "model_output": [{"sum_logits": -9.854597091674805, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -4.927298545837402, "logits_per_char": -1.0949552324083116, "num_chars": 9}, {"sum_logits": -8.221757888793945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.889263153076172, "logits_per_token": -8.221757888793945, "logits_per_char": -1.174536841256278, "num_chars": 7}, {"sum_logits": -19.712055206298828, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -9.856027603149414, "logits_per_char": -1.642671267191569, "num_chars": 12}, {"sum_logits": -10.729289054870605, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.682865142822266, "logits_per_token": -10.729289054870605, "logits_per_char": -1.7882148424784343, "num_chars": 6}, {"sum_logits": -5.12458610534668, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.839284896850586, "logits_per_token": -5.12458610534668, "logits_per_char": -0.8540976842244467, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 809, "native_id": "f0d473701d52125dd055d23042de1b0d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.637205123901367, "incorrect_loss_raw": 9.320003032684326, "correct_loss_per_char": 0.37674358912876676, "incorrect_loss_per_char": 1.0537943247076753, "correct_loss_per_token": 2.637205123901367, "incorrect_loss_per_token": 7.3129284381866455, "correct_loss_uncond": -12.401803016662598, "incorrect_loss_uncond": -6.674980878829956}, "model_output": [{"sum_logits": -2.637205123901367, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -15.039008140563965, "logits_per_token": -2.637205123901367, "logits_per_char": -0.37674358912876676, "num_chars": 7}, {"sum_logits": -6.98288631439209, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.866344451904297, "logits_per_token": -3.491443157196045, "logits_per_char": -0.9975551877702985, "num_chars": 7}, {"sum_logits": -10.407503128051758, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.757722854614258, "logits_per_token": -10.407503128051758, "logits_per_char": -1.0407503128051758, "num_chars": 10}, {"sum_logits": -9.073710441589355, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.465423583984375, "logits_per_token": -4.536855220794678, "logits_per_char": -0.8248827674172141, "num_chars": 11}, {"sum_logits": -10.815912246704102, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.8904447555542, "logits_per_token": -10.815912246704102, "logits_per_char": -1.3519890308380127, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 810, "native_id": "d35112a99ab3983fb51c3adae80bc2da", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0231504440307617, "incorrect_loss_raw": 13.994112730026245, "correct_loss_per_char": 0.5038584073384603, "incorrect_loss_per_char": 1.5711329560431224, "correct_loss_per_token": 3.0231504440307617, "incorrect_loss_per_token": 9.780762195587158, "correct_loss_uncond": -10.65306568145752, "incorrect_loss_uncond": -4.448025703430176}, "model_output": [{"sum_logits": -10.853777885437012, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.424901008605957, "logits_per_token": -10.853777885437012, "logits_per_char": -1.8089629809061687, "num_chars": 6}, {"sum_logits": -14.924188613891602, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.793689727783203, "logits_per_token": -7.462094306945801, "logits_per_char": -1.8655235767364502, "num_chars": 8}, {"sum_logits": -11.415868759155273, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.055395126342773, "logits_per_token": -11.415868759155273, "logits_per_char": -1.2684298621283636, "num_chars": 9}, {"sum_logits": -3.0231504440307617, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -3.0231504440307617, "logits_per_char": -0.5038584073384603, "num_chars": 6}, {"sum_logits": -18.782615661621094, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -26.49456787109375, "logits_per_token": -9.391307830810547, "logits_per_char": -1.3416154044015067, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 811, "native_id": "661474a1a0c29dd7a243b284535ac934", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.848479270935059, "incorrect_loss_raw": 11.503820180892944, "correct_loss_per_char": 0.7034628050667899, "incorrect_loss_per_char": 1.444850766964448, "correct_loss_per_token": 4.924239635467529, "incorrect_loss_per_token": 9.186142444610596, "correct_loss_uncond": -8.758545875549316, "incorrect_loss_uncond": -2.6410040855407715}, "model_output": [{"sum_logits": -18.54142189025879, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.71283531188965, "logits_per_token": -9.270710945129395, "logits_per_char": -1.426263222327599, "num_chars": 13}, {"sum_logits": -11.883506774902344, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.252589225769043, "logits_per_token": -11.883506774902344, "logits_per_char": -1.485438346862793, "num_chars": 8}, {"sum_logits": -8.07928466796875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.6976318359375, "logits_per_token": -8.07928466796875, "logits_per_char": -1.61585693359375, "num_chars": 5}, {"sum_logits": -9.848479270935059, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.607025146484375, "logits_per_token": -4.924239635467529, "logits_per_char": -0.7034628050667899, "num_chars": 14}, {"sum_logits": -7.5110673904418945, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -10.916240692138672, "logits_per_token": -7.5110673904418945, "logits_per_char": -1.2518445650736492, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 812, "native_id": "6416dcdf9b8d7d2787f07e7426f86fe4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.979485511779785, "incorrect_loss_raw": 18.04866862297058, "correct_loss_per_char": 0.6649571259816488, "incorrect_loss_per_char": 1.5129141493158027, "correct_loss_per_token": 2.659828503926595, "incorrect_loss_per_token": 7.53285797437032, "correct_loss_uncond": -14.578038215637207, "incorrect_loss_uncond": -1.588163137435913}, "model_output": [{"sum_logits": -18.521669387817383, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.565021514892578, "logits_per_token": -6.173889795939128, "logits_per_char": -1.3229763848440987, "num_chars": 14}, {"sum_logits": -24.890113830566406, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.428247451782227, "logits_per_token": -8.296704610188803, "logits_per_char": -1.9146241408128004, "num_chars": 13}, {"sum_logits": -19.683080673217773, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.252992630004883, "logits_per_token": -6.561026891072591, "logits_per_char": -1.5140831287090595, "num_chars": 13}, {"sum_logits": -9.099810600280762, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.301065444946289, "logits_per_token": -9.099810600280762, "logits_per_char": -1.2999729428972517, "num_chars": 7}, {"sum_logits": -7.979485511779785, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.557523727416992, "logits_per_token": -2.659828503926595, "logits_per_char": -0.6649571259816488, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 813, "native_id": "0f54a1ee30a0034a3d2db1bfdef9ca85", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9846132397651672, "incorrect_loss_raw": 13.522205352783203, "correct_loss_per_char": 0.08951029452410611, "incorrect_loss_per_char": 1.4246298955037042, "correct_loss_per_token": 0.9846132397651672, "incorrect_loss_per_token": 10.02175259590149, "correct_loss_uncond": -11.758846461772919, "incorrect_loss_uncond": -3.2097980976104736}, "model_output": [{"sum_logits": -16.780345916748047, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.275532722473145, "logits_per_token": -16.780345916748047, "logits_per_char": -2.097543239593506, "num_chars": 8}, {"sum_logits": -9.304853439331055, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.946989059448242, "logits_per_token": -9.304853439331055, "logits_per_char": -0.9304853439331054, "num_chars": 10}, {"sum_logits": -0.9846132397651672, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -12.743459701538086, "logits_per_token": -0.9846132397651672, "logits_per_char": -0.08951029452410611, "num_chars": 11}, {"sum_logits": -15.10371208190918, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.92182159423828, "logits_per_token": -7.55185604095459, "logits_per_char": -1.6781902313232422, "num_chars": 9}, {"sum_logits": -12.899909973144531, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.78367042541504, "logits_per_token": -6.449954986572266, "logits_per_char": -0.992300767164964, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 814, "native_id": "7850beb1209c41fabe385cbedc96a61a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.683269500732422, "incorrect_loss_raw": 13.550715446472168, "correct_loss_per_char": 0.46040868759155273, "incorrect_loss_per_char": 0.912726023039975, "correct_loss_per_token": 1.841634750366211, "incorrect_loss_per_token": 5.593703468640646, "correct_loss_uncond": -12.22763442993164, "incorrect_loss_uncond": -8.965063571929932}, "model_output": [{"sum_logits": -3.683269500732422, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.910903930664062, "logits_per_token": -1.841634750366211, "logits_per_char": -0.46040868759155273, "num_chars": 8}, {"sum_logits": -14.165092468261719, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.11187744140625, "logits_per_token": -4.721697489420573, "logits_per_char": -0.8853182792663574, "num_chars": 16}, {"sum_logits": -14.194609642028809, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -24.38803482055664, "logits_per_token": -4.7315365473429365, "logits_per_char": -1.0139006887163435, "num_chars": 14}, {"sum_logits": -9.463143348693848, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.961870193481445, "logits_per_token": -4.731571674346924, "logits_per_char": -0.7279341037456806, "num_chars": 13}, {"sum_logits": -16.380016326904297, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.601333618164062, "logits_per_token": -8.190008163452148, "logits_per_char": -1.0237510204315186, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 815, "native_id": "cdb06b28b9c4e7ef7e880d1f096fd409", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.437782287597656, "incorrect_loss_raw": 15.81688141822815, "correct_loss_per_char": 0.5218891143798828, "incorrect_loss_per_char": 1.2386853573498904, "correct_loss_per_token": 5.218891143798828, "incorrect_loss_per_token": 9.11164657274882, "correct_loss_uncond": -8.950502395629883, "incorrect_loss_uncond": -3.9586641788482666}, "model_output": [{"sum_logits": -24.71063995361328, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -26.356292724609375, "logits_per_token": -8.23687998453776, "logits_per_char": -0.9152088871708622, "num_chars": 27}, {"sum_logits": -12.319662094116211, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.503639221191406, "logits_per_token": -12.319662094116211, "logits_per_char": -2.053277015686035, "num_chars": 6}, {"sum_logits": -10.437782287597656, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.38828468322754, "logits_per_token": -5.218891143798828, "logits_per_char": -0.5218891143798828, "num_chars": 20}, {"sum_logits": -5.542864799499512, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.510114669799805, "logits_per_token": -5.542864799499512, "logits_per_char": -0.692858099937439, "num_chars": 8}, {"sum_logits": -20.694358825683594, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.732135772705078, "logits_per_token": -10.347179412841797, "logits_per_char": -1.2933974266052246, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 816, "native_id": "14309d9bd3c13d1c0efb625198f6304a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.032315254211426, "incorrect_loss_raw": 8.65459132194519, "correct_loss_per_char": 0.2540394067764282, "incorrect_loss_per_char": 0.837778440827415, "correct_loss_per_token": 2.032315254211426, "incorrect_loss_per_token": 5.49443507194519, "correct_loss_uncond": -10.694913864135742, "incorrect_loss_uncond": -6.6869871616363525}, "model_output": [{"sum_logits": -10.609434127807617, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.405667304992676, "logits_per_token": -10.609434127807617, "logits_per_char": -1.5156334468296595, "num_chars": 7}, {"sum_logits": -7.994214057922363, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.543107032775879, "logits_per_token": -3.9971070289611816, "logits_per_char": -0.6661845048268636, "num_chars": 12}, {"sum_logits": -4.490026473999023, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -4.490026473999023, "logits_per_char": -0.44900264739990237, "num_chars": 10}, {"sum_logits": -11.524690628051758, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.92730712890625, "logits_per_token": -2.8811726570129395, "logits_per_char": -0.7202931642532349, "num_chars": 16}, {"sum_logits": -2.032315254211426, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.727229118347168, "logits_per_token": -2.032315254211426, "logits_per_char": -0.2540394067764282, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 817, "native_id": "a00276c6db928900772c0320aeff77c0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.960400104522705, "incorrect_loss_raw": 14.218181371688843, "correct_loss_per_char": 0.592080020904541, "incorrect_loss_per_char": 1.816653874013331, "correct_loss_per_token": 2.960400104522705, "incorrect_loss_per_token": 10.320163249969482, "correct_loss_uncond": -10.031126499176025, "incorrect_loss_uncond": -2.843057155609131}, "model_output": [{"sum_logits": -14.520831108093262, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.599336624145508, "logits_per_token": -14.520831108093262, "logits_per_char": -2.074404444013323, "num_chars": 7}, {"sum_logits": -2.960400104522705, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.99152660369873, "logits_per_token": -2.960400104522705, "logits_per_char": -0.592080020904541, "num_chars": 5}, {"sum_logits": -13.851940155029297, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.238255500793457, "logits_per_token": -13.851940155029297, "logits_per_char": -1.5391044616699219, "num_chars": 9}, {"sum_logits": -20.4473819732666, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.471710205078125, "logits_per_token": -10.2236909866333, "logits_per_char": -2.9210545676095143, "num_chars": 7}, {"sum_logits": -8.052572250366211, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.935651779174805, "logits_per_token": -2.6841907501220703, "logits_per_char": -0.7320520227605646, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 818, "native_id": "4706be6e24f1fafd9ff9fe63583acffd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.786956310272217, "incorrect_loss_raw": 11.506082773208618, "correct_loss_per_char": 0.598996639251709, "incorrect_loss_per_char": 0.6924134174982707, "correct_loss_per_token": 3.8934781551361084, "incorrect_loss_per_token": 5.000074505805969, "correct_loss_uncond": -10.96973466873169, "incorrect_loss_uncond": -7.115591287612915}, "model_output": [{"sum_logits": -8.959052085876465, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.88188362121582, "logits_per_token": -4.479526042938232, "logits_per_char": -0.4479526042938232, "num_chars": 20}, {"sum_logits": -18.071205139160156, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -6.023735046386719, "logits_per_char": -1.2047470092773438, "num_chars": 15}, {"sum_logits": -10.035021781921387, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.494022369384766, "logits_per_token": -5.017510890960693, "logits_per_char": -0.6690014521280925, "num_chars": 15}, {"sum_logits": -8.959052085876465, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.88188362121582, "logits_per_token": -4.479526042938232, "logits_per_char": -0.4479526042938232, "num_chars": 20}, {"sum_logits": -7.786956310272217, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.756690979003906, "logits_per_token": -3.8934781551361084, "logits_per_char": -0.598996639251709, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 819, "native_id": "ee8819b2da5453848c1cbb9d9c93403b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.83547306060791, "incorrect_loss_raw": 10.517763614654541, "correct_loss_per_char": 0.41681950432913645, "incorrect_loss_per_char": 1.082573516929851, "correct_loss_per_token": 1.4588682651519775, "incorrect_loss_per_token": 5.558163682619731, "correct_loss_uncond": -12.593680381774902, "incorrect_loss_uncond": -4.821002960205078}, "model_output": [{"sum_logits": -7.447073936462402, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.521529197692871, "logits_per_token": -7.447073936462402, "logits_per_char": -1.2411789894104004, "num_chars": 6}, {"sum_logits": -5.83547306060791, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.429153442382812, "logits_per_token": -1.4588682651519775, "logits_per_char": -0.41681950432913645, "num_chars": 14}, {"sum_logits": -15.158456802368164, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.69169807434082, "logits_per_token": -5.052818934122722, "logits_per_char": -0.8916739295510685, "num_chars": 17}, {"sum_logits": -9.429972648620605, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.569493293762207, "logits_per_token": -4.714986324310303, "logits_per_char": -0.9429972648620606, "num_chars": 10}, {"sum_logits": -10.035551071166992, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.572345733642578, "logits_per_token": -5.017775535583496, "logits_per_char": -1.254443883895874, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 820, "native_id": "84ea43b967259814d939c62131f74df0", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.885926246643066, "incorrect_loss_raw": 13.084096193313599, "correct_loss_per_char": 0.6107407808303833, "incorrect_loss_per_char": 1.6862384398778283, "correct_loss_per_token": 4.885926246643066, "incorrect_loss_per_token": 10.95457649230957, "correct_loss_uncond": -7.841302871704102, "incorrect_loss_uncond": -0.5335366725921631}, "model_output": [{"sum_logits": -9.680803298950195, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.680188179016113, "logits_per_token": -9.680803298950195, "logits_per_char": -1.6134672164916992, "num_chars": 6}, {"sum_logits": -4.885926246643066, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.727229118347168, "logits_per_token": -4.885926246643066, "logits_per_char": -0.6107407808303833, "num_chars": 8}, {"sum_logits": -12.630497932434082, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.747095108032227, "logits_per_token": -12.630497932434082, "logits_per_char": -1.2630497932434082, "num_chars": 10}, {"sum_logits": -17.036157608032227, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.087433815002441, "logits_per_token": -8.518078804016113, "logits_per_char": -1.7036157608032227, "num_chars": 10}, {"sum_logits": -12.98892593383789, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -12.98892593383789, "logits_per_char": -2.164820988972982, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 821, "native_id": "60e7338e9e6bfc746a15a161eb12706c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.884792804718018, "incorrect_loss_raw": 8.122625946998596, "correct_loss_per_char": 0.4070660670598348, "incorrect_loss_per_char": 1.2361317078272502, "correct_loss_per_token": 4.884792804718018, "incorrect_loss_per_token": 7.088679909706116, "correct_loss_uncond": -10.838307857513428, "incorrect_loss_uncond": -5.8620346784591675}, "model_output": [{"sum_logits": -8.271568298339844, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.102951049804688, "logits_per_token": -4.135784149169922, "logits_per_char": -1.181652614048549, "num_chars": 7}, {"sum_logits": -12.727104187011719, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.866719245910645, "logits_per_token": -12.727104187011719, "logits_per_char": -2.1211840311686196, "num_chars": 6}, {"sum_logits": -5.802771091461182, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.81967830657959, "logits_per_token": -5.802771091461182, "logits_per_char": -0.8289672987801688, "num_chars": 7}, {"sum_logits": -5.689060211181641, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -5.689060211181641, "logits_per_char": -0.812722887311663, "num_chars": 7}, {"sum_logits": -4.884792804718018, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -4.884792804718018, "logits_per_char": -0.4070660670598348, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 822, "native_id": "a0f5414bf98e094f4d807abee28861a4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.048415184020996, "incorrect_loss_raw": 8.810925126075745, "correct_loss_per_char": 1.1575703987708459, "incorrect_loss_per_char": 0.873821806666827, "correct_loss_per_token": 5.016138394673665, "incorrect_loss_per_token": 4.405462563037872, "correct_loss_uncond": -7.4600114822387695, "incorrect_loss_uncond": -9.357824921607971}, "model_output": [{"sum_logits": -11.498310089111328, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.94892120361328, "logits_per_token": -5.749155044555664, "logits_per_char": -1.045300917191939, "num_chars": 11}, {"sum_logits": -5.529499053955078, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.808349609375, "logits_per_token": -2.764749526977539, "logits_per_char": -0.5529499053955078, "num_chars": 10}, {"sum_logits": -15.048415184020996, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.508426666259766, "logits_per_token": -5.016138394673665, "logits_per_char": -1.1575703987708459, "num_chars": 13}, {"sum_logits": -11.425637245178223, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.439861297607422, "logits_per_token": -5.712818622589111, "logits_per_char": -1.1425637245178222, "num_chars": 10}, {"sum_logits": -6.79025411605835, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.47786808013916, "logits_per_token": -3.395127058029175, "logits_per_char": -0.7544726795620389, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 823, "native_id": "44120a9443c619d98ce5bfe4bb219c43", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.100533962249756, "incorrect_loss_raw": 6.282956212759018, "correct_loss_per_char": 0.2333926624721951, "incorrect_loss_per_char": 0.9110761639617738, "correct_loss_per_token": 2.100533962249756, "incorrect_loss_per_token": 5.49023512005806, "correct_loss_uncond": -14.954861164093018, "incorrect_loss_uncond": -9.285776764154434}, "model_output": [{"sum_logits": -2.100533962249756, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -17.055395126342773, "logits_per_token": -2.100533962249756, "logits_per_char": -0.2333926624721951, "num_chars": 9}, {"sum_logits": -5.586799144744873, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -5.586799144744873, "logits_per_char": -0.6983498930931091, "num_chars": 8}, {"sum_logits": -1.9869846105575562, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": true, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -1.9869846105575562, "logits_per_char": -0.2838549443653652, "num_chars": 7}, {"sum_logits": -11.216272354125977, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.97684383392334, "logits_per_token": -11.216272354125977, "logits_per_char": -1.8693787256876628, "num_chars": 6}, {"sum_logits": -6.341768741607666, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.745241165161133, "logits_per_token": -3.170884370803833, "logits_per_char": -0.7927210927009583, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 824, "native_id": "38ab26e29a0984b212006d39185c43f3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.797379016876221, "incorrect_loss_raw": 9.43661904335022, "correct_loss_per_char": 0.32207661204867893, "incorrect_loss_per_char": 1.2907969219344002, "correct_loss_per_token": 2.8986895084381104, "incorrect_loss_per_token": 8.11840546131134, "correct_loss_uncond": -10.49055814743042, "incorrect_loss_uncond": -6.429814338684082}, "model_output": [{"sum_logits": -10.471108436584473, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -10.471108436584473, "logits_per_char": -1.495872633797782, "num_chars": 7}, {"sum_logits": -5.797379016876221, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.28793716430664, "logits_per_token": -2.8986895084381104, "logits_per_char": -0.32207661204867893, "num_chars": 18}, {"sum_logits": -10.545708656311035, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -19.71932601928711, "logits_per_token": -5.272854328155518, "logits_per_char": -1.0545708656311035, "num_chars": 10}, {"sum_logits": -6.953824043273926, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -6.953824043273926, "logits_per_char": -1.3907648086547852, "num_chars": 5}, {"sum_logits": -9.775835037231445, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -9.775835037231445, "logits_per_char": -1.2219793796539307, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 825, "native_id": "a5e207803684eea8a43ca6670c50b354", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.601113319396973, "incorrect_loss_raw": 7.998541712760925, "correct_loss_per_char": 1.2001391649246216, "incorrect_loss_per_char": 1.4421306610107423, "correct_loss_per_token": 4.800556659698486, "incorrect_loss_per_token": 6.5897113879521685, "correct_loss_uncond": -5.334011077880859, "incorrect_loss_uncond": -5.6922324895858765}, "model_output": [{"sum_logits": -7.957097053527832, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.359113693237305, "logits_per_token": -7.957097053527832, "logits_per_char": -1.989274263381958, "num_chars": 4}, {"sum_logits": -9.601113319396973, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.935124397277832, "logits_per_token": -4.800556659698486, "logits_per_char": -1.2001391649246216, "num_chars": 8}, {"sum_logits": -3.7583298683166504, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.950617790222168, "logits_per_token": -3.7583298683166504, "logits_per_char": -0.7516659736633301, "num_chars": 5}, {"sum_logits": -11.82575798034668, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -10.927528381347656, "logits_per_token": -11.82575798034668, "logits_per_char": -1.9709596633911133, "num_chars": 6}, {"sum_logits": -8.452981948852539, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.525836944580078, "logits_per_token": -2.817660649617513, "logits_per_char": -1.0566227436065674, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 826, "native_id": "af3b9a8b1962cd3bcd19e644d873e7bc", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.042677879333496, "incorrect_loss_raw": 12.23839020729065, "correct_loss_per_char": 0.44918643103705513, "incorrect_loss_per_char": 0.9260948415423782, "correct_loss_per_token": 2.021338939666748, "incorrect_loss_per_token": 5.698258280754089, "correct_loss_uncond": -14.346261024475098, "incorrect_loss_uncond": -7.983402967453003}, "model_output": [{"sum_logits": -6.734989166259766, "num_tokens": 4, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.789535522460938, "logits_per_token": -1.6837472915649414, "logits_per_char": -0.396175833309398, "num_chars": 17}, {"sum_logits": -11.101706504821777, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -5.550853252410889, "logits_per_char": -0.7929790360586983, "num_chars": 14}, {"sum_logits": -22.421072006225586, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.729854583740234, "logits_per_token": -11.210536003112793, "logits_per_char": -1.7246978466327374, "num_chars": 13}, {"sum_logits": -8.695793151855469, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.216522216796875, "logits_per_token": -4.347896575927734, "logits_per_char": -0.790526650168679, "num_chars": 11}, {"sum_logits": -4.042677879333496, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.388938903808594, "logits_per_token": -2.021338939666748, "logits_per_char": -0.44918643103705513, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 827, "native_id": "43a91955fd0717997a16897c3324e095", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.023145079612732, "incorrect_loss_raw": 11.998380422592163, "correct_loss_per_char": 0.11368278662363689, "incorrect_loss_per_char": 1.7462554189893935, "correct_loss_per_token": 1.023145079612732, "incorrect_loss_per_token": 10.546550273895264, "correct_loss_uncond": -13.691135048866272, "incorrect_loss_uncond": -3.0690345764160156}, "model_output": [{"sum_logits": -13.65945816040039, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -13.65945816040039, "logits_per_char": -2.731891632080078, "num_chars": 5}, {"sum_logits": -12.48016357421875, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.35527515411377, "logits_per_token": -12.48016357421875, "logits_per_char": -1.5600204467773438, "num_chars": 8}, {"sum_logits": -11.614641189575195, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.097896575927734, "logits_per_token": -5.807320594787598, "logits_per_char": -0.6452578438652886, "num_chars": 18}, {"sum_logits": -1.023145079612732, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": true, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -1.023145079612732, "logits_per_char": -0.11368278662363689, "num_chars": 9}, {"sum_logits": -10.239258766174316, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.51126480102539, "logits_per_token": -10.239258766174316, "logits_per_char": -2.0478517532348635, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 828, "native_id": "7f7a6f2b3087bf37dadbe8aa8d358047", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.531369209289551, "incorrect_loss_raw": 7.362179398536682, "correct_loss_per_char": 0.4531369209289551, "incorrect_loss_per_char": 0.8742161122235386, "correct_loss_per_token": 2.2656846046447754, "incorrect_loss_per_token": 4.240442395210266, "correct_loss_uncond": -9.730745315551758, "incorrect_loss_uncond": -8.91391670703888}, "model_output": [{"sum_logits": -4.4748215675354, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -4.4748215675354, "logits_per_char": -0.49720239639282227, "num_chars": 9}, {"sum_logits": -6.388471603393555, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.98363208770752, "logits_per_token": -3.1942358016967773, "logits_per_char": -0.5807701457630504, "num_chars": 11}, {"sum_logits": -4.531369209289551, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.262114524841309, "logits_per_token": -2.2656846046447754, "logits_per_char": -0.4531369209289551, "num_chars": 10}, {"sum_logits": -8.144145965576172, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.87346076965332, "logits_per_token": -4.072072982788086, "logits_per_char": -0.678678830464681, "num_chars": 12}, {"sum_logits": -10.441278457641602, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.64111328125, "logits_per_token": -5.220639228820801, "logits_per_char": -1.7402130762736003, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 829, "native_id": "37d88a9bb24913c1973cc26d4ce3394f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.685847282409668, "incorrect_loss_raw": 18.123592615127563, "correct_loss_per_char": 1.5857309103012085, "incorrect_loss_per_char": 1.8874801399836585, "correct_loss_per_token": 4.228615760803223, "incorrect_loss_per_token": 7.659307990755353, "correct_loss_uncond": -6.2476911544799805, "incorrect_loss_uncond": -1.097139596939087}, "model_output": [{"sum_logits": -32.05195617675781, "num_tokens": 7, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -30.600067138671875, "logits_per_token": -4.578850882393973, "logits_per_char": -1.3935633120329485, "num_chars": 23}, {"sum_logits": -14.624076843261719, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.630889892578125, "logits_per_token": -7.312038421630859, "logits_per_char": -2.924815368652344, "num_chars": 5}, {"sum_logits": -14.143989562988281, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.696157455444336, "logits_per_token": -7.071994781494141, "logits_per_char": -1.2858172329989346, "num_chars": 11}, {"sum_logits": -11.674347877502441, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -11.674347877502441, "logits_per_char": -1.945724646250407, "num_chars": 6}, {"sum_logits": -12.685847282409668, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.93353843688965, "logits_per_token": -4.228615760803223, "logits_per_char": -1.5857309103012085, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 830, "native_id": "001b0f5a841fd81d13fbe67c7c7179d6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.879135608673096, "incorrect_loss_raw": 10.603878021240234, "correct_loss_per_char": 0.534466873515736, "incorrect_loss_per_char": 1.0101199513608283, "correct_loss_per_token": 1.9597118695576985, "incorrect_loss_per_token": 7.518142342567444, "correct_loss_uncond": -11.90871000289917, "incorrect_loss_uncond": -6.7928876876831055}, "model_output": [{"sum_logits": -9.657102584838867, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.549368858337402, "logits_per_token": -9.657102584838867, "logits_per_char": -1.2071378231048584, "num_chars": 8}, {"sum_logits": -11.101807594299316, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.863563537597656, "logits_per_token": -5.550903797149658, "logits_per_char": -0.8539851995614859, "num_chars": 13}, {"sum_logits": -13.584077835083008, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.793872833251953, "logits_per_token": -6.792038917541504, "logits_per_char": -0.9702912739345005, "num_chars": 14}, {"sum_logits": -8.072524070739746, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.380257606506348, "logits_per_token": -8.072524070739746, "logits_per_char": -1.0090655088424683, "num_chars": 8}, {"sum_logits": -5.879135608673096, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.787845611572266, "logits_per_token": -1.9597118695576985, "logits_per_char": -0.534466873515736, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 831, "native_id": "9f9ca9bb06d6afc31b19c365fb29a1c9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.583826541900635, "incorrect_loss_raw": 11.254119396209717, "correct_loss_per_char": 0.4167115038091486, "incorrect_loss_per_char": 1.4726793414070494, "correct_loss_per_token": 4.583826541900635, "incorrect_loss_per_token": 8.647906462351482, "correct_loss_uncond": -10.378989696502686, "incorrect_loss_uncond": -4.748059034347534}, "model_output": [{"sum_logits": -10.528446197509766, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.967434883117676, "logits_per_token": -10.528446197509766, "logits_per_char": -1.3160557746887207, "num_chars": 8}, {"sum_logits": -15.637277603149414, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.73639678955078, "logits_per_token": -5.212425867716472, "logits_per_char": -1.1169484002249581, "num_chars": 14}, {"sum_logits": -4.583826541900635, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -4.583826541900635, "logits_per_char": -0.4167115038091486, "num_chars": 11}, {"sum_logits": -9.47762680053711, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -9.47762680053711, "logits_per_char": -1.8955253601074218, "num_chars": 5}, {"sum_logits": -9.373126983642578, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.331038475036621, "logits_per_token": -9.373126983642578, "logits_per_char": -1.5621878306070964, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 832, "native_id": "d60c5a494539c66982c0f692afde9499", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.674641132354736, "incorrect_loss_raw": 11.200671195983887, "correct_loss_per_char": 0.5158764665777033, "incorrect_loss_per_char": 1.662836780245342, "correct_loss_per_token": 2.837320566177368, "incorrect_loss_per_token": 7.298665523529053, "correct_loss_uncond": -11.421708583831787, "incorrect_loss_uncond": -4.919086933135986}, "model_output": [{"sum_logits": -14.665447235107422, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.666006088256836, "logits_per_token": -7.332723617553711, "logits_per_char": -2.0950638907296315, "num_chars": 7}, {"sum_logits": -5.674641132354736, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.096349716186523, "logits_per_token": -2.837320566177368, "logits_per_char": -0.5158764665777033, "num_chars": 11}, {"sum_logits": -8.045706748962402, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -8.045706748962402, "logits_per_char": -1.6091413497924805, "num_chars": 5}, {"sum_logits": -5.540932655334473, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -5.540932655334473, "logits_per_char": -1.1081865310668946, "num_chars": 5}, {"sum_logits": -16.55059814453125, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.288299560546875, "logits_per_token": -8.275299072265625, "logits_per_char": -1.8389553493923612, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 833, "native_id": "a6d3a2cb250a6310b8cabd31dbe2138c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.792210578918457, "incorrect_loss_raw": 12.258098363876343, "correct_loss_per_char": 0.34071826934814453, "incorrect_loss_per_char": 1.426414020751652, "correct_loss_per_token": 2.8961052894592285, "incorrect_loss_per_token": 10.664947509765625, "correct_loss_uncond": -9.48158073425293, "incorrect_loss_uncond": -2.4194695949554443}, "model_output": [{"sum_logits": -5.792210578918457, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.273791313171387, "logits_per_token": -2.8961052894592285, "logits_per_char": -0.34071826934814453, "num_chars": 17}, {"sum_logits": -12.745206832885742, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -6.372603416442871, "logits_per_char": -0.6708003596255654, "num_chars": 19}, {"sum_logits": -18.15996551513672, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.21950626373291, "logits_per_token": -18.15996551513672, "logits_per_char": -2.26999568939209, "num_chars": 8}, {"sum_logits": -11.474455833435059, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.765140533447266, "logits_per_token": -11.474455833435059, "logits_per_char": -1.4343069791793823, "num_chars": 8}, {"sum_logits": -6.652765274047852, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.062310218811035, "logits_per_token": -6.652765274047852, "logits_per_char": -1.3305530548095703, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 834, "native_id": "27c523eb9099d2eec66296558eb4448e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.289125442504883, "incorrect_loss_raw": 11.962580919265747, "correct_loss_per_char": 1.3815209070841472, "incorrect_loss_per_char": 1.4774884893343998, "correct_loss_per_token": 8.289125442504883, "incorrect_loss_per_token": 9.58577847480774, "correct_loss_uncond": -5.315558433532715, "incorrect_loss_uncond": -2.879929780960083}, "model_output": [{"sum_logits": -5.24033260345459, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.821972846984863, "logits_per_token": -5.24033260345459, "logits_per_char": -1.0480665206909179, "num_chars": 5}, {"sum_logits": -12.800531387329102, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.868921279907227, "logits_per_token": -12.800531387329102, "logits_per_char": -1.6000664234161377, "num_chars": 8}, {"sum_logits": -8.289125442504883, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.604683876037598, "logits_per_token": -8.289125442504883, "logits_per_char": -1.3815209070841472, "num_chars": 6}, {"sum_logits": -19.014419555664062, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.914505004882812, "logits_per_token": -9.507209777832031, "logits_per_char": -1.4626476581280048, "num_chars": 13}, {"sum_logits": -10.795040130615234, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.764643669128418, "logits_per_token": -10.795040130615234, "logits_per_char": -1.799173355102539, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 835, "native_id": "2509fdd7d94afe9d0c021654ce0ba93f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.962929725646973, "incorrect_loss_raw": 14.03742265701294, "correct_loss_per_char": 0.45868690197284406, "incorrect_loss_per_char": 1.464904557334052, "correct_loss_per_token": 1.9876432418823242, "incorrect_loss_per_token": 7.01871132850647, "correct_loss_uncond": -14.463334083557129, "incorrect_loss_uncond": -3.91459321975708}, "model_output": [{"sum_logits": -14.771272659301758, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.39065933227539, "logits_per_token": -7.385636329650879, "logits_per_char": -1.4771272659301757, "num_chars": 10}, {"sum_logits": -5.962929725646973, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.4262638092041, "logits_per_token": -1.9876432418823242, "logits_per_char": -0.45868690197284406, "num_chars": 13}, {"sum_logits": -19.35999298095703, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.648202896118164, "logits_per_token": -9.679996490478516, "logits_per_char": -1.935999298095703, "num_chars": 10}, {"sum_logits": -15.821259498596191, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.10430335998535, "logits_per_token": -7.910629749298096, "logits_per_char": -1.7579177220662434, "num_chars": 9}, {"sum_logits": -6.197165489196777, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.664897918701172, "logits_per_token": -3.0985827445983887, "logits_per_char": -0.6885739432440864, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 836, "native_id": "75b8195e23c6bada574f1e41471b8f23", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.512741565704346, "incorrect_loss_raw": 8.821213960647583, "correct_loss_per_char": 0.6125268406338162, "incorrect_loss_per_char": 0.8661350223753188, "correct_loss_per_token": 2.756370782852173, "incorrect_loss_per_token": 6.058613061904907, "correct_loss_uncond": -10.534316539764404, "incorrect_loss_uncond": -7.9195237159729}, "model_output": [{"sum_logits": -5.512741565704346, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.04705810546875, "logits_per_token": -2.756370782852173, "logits_per_char": -0.6125268406338162, "num_chars": 9}, {"sum_logits": -7.982242584228516, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -7.982242584228516, "logits_per_char": -0.8869158426920573, "num_chars": 9}, {"sum_logits": -10.863763809204102, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.39474105834961, "logits_per_token": -5.431881904602051, "logits_per_char": -1.0863763809204101, "num_chars": 10}, {"sum_logits": -5.20180606842041, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.809617042541504, "logits_per_token": -5.20180606842041, "logits_per_char": -0.8669676780700684, "num_chars": 6}, {"sum_logits": -11.237043380737305, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.152414321899414, "logits_per_token": -5.618521690368652, "logits_per_char": -0.6242801878187392, "num_chars": 18}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 837, "native_id": "df1bf6f3f87975aa0c1b6d6153d9ecef", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.286618232727051, "incorrect_loss_raw": 10.682415843009949, "correct_loss_per_char": 0.6608272790908813, "incorrect_loss_per_char": 1.3004222439395057, "correct_loss_per_token": 5.286618232727051, "incorrect_loss_per_token": 6.187772035598755, "correct_loss_uncond": -10.494403839111328, "incorrect_loss_uncond": -3.224127173423767}, "model_output": [{"sum_logits": -14.17630386352539, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.260398864746094, "logits_per_token": -7.088151931762695, "logits_per_char": -1.5751448737250433, "num_chars": 9}, {"sum_logits": -5.286618232727051, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.781022071838379, "logits_per_token": -5.286618232727051, "logits_per_char": -0.6608272790908813, "num_chars": 8}, {"sum_logits": -10.813543319702148, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.932318687438965, "logits_per_token": -5.406771659851074, "logits_per_char": -0.9011286099751791, "num_chars": 12}, {"sum_logits": -10.967303276062012, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.718420028686523, "logits_per_token": -5.483651638031006, "logits_per_char": -1.3709129095077515, "num_chars": 8}, {"sum_logits": -6.772512912750244, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.715034484863281, "logits_per_token": -6.772512912750244, "logits_per_char": -1.3545025825500487, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 838, "native_id": "e99d4cb2e69d3e020ee9e4e9a84ac45b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.016079902648926, "incorrect_loss_raw": 14.132162809371948, "correct_loss_per_char": 0.6016079902648925, "incorrect_loss_per_char": 1.228623946507772, "correct_loss_per_token": 3.008039951324463, "incorrect_loss_per_token": 6.437414248784383, "correct_loss_uncond": -14.597071647644043, "incorrect_loss_uncond": -5.094822645187378}, "model_output": [{"sum_logits": -11.100883483886719, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -20.33024787902832, "logits_per_token": -5.550441741943359, "logits_per_char": -1.1100883483886719, "num_chars": 10}, {"sum_logits": -6.016079902648926, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -20.61315155029297, "logits_per_token": -3.008039951324463, "logits_per_char": -0.6016079902648925, "num_chars": 10}, {"sum_logits": -14.126136779785156, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -16.903358459472656, "logits_per_token": -7.063068389892578, "logits_per_char": -1.1771780649820964, "num_chars": 12}, {"sum_logits": -15.088011741638184, "num_tokens": 3, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -5.029337247212728, "logits_per_char": -1.0058674494425455, "num_chars": 15}, {"sum_logits": -16.213619232177734, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -22.4454288482666, "logits_per_token": -8.106809616088867, "logits_per_char": -1.6213619232177734, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 839, "native_id": "b1274d6f5969dea4d46f43fbdc28fd97", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.252818584442139, "incorrect_loss_raw": 6.17909049987793, "correct_loss_per_char": 0.4725353982713487, "incorrect_loss_per_char": 0.7486662353788103, "correct_loss_per_token": 4.252818584442139, "incorrect_loss_per_token": 6.17909049987793, "correct_loss_uncond": -11.501285076141357, "incorrect_loss_uncond": -7.649032831192017}, "model_output": [{"sum_logits": -4.252818584442139, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.754103660583496, "logits_per_token": -4.252818584442139, "logits_per_char": -0.4725353982713487, "num_chars": 9}, {"sum_logits": -6.518229961395264, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.655372619628906, "logits_per_token": -6.518229961395264, "logits_per_char": -0.543185830116272, "num_chars": 12}, {"sum_logits": -8.302226066589355, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.002788543701172, "logits_per_token": -8.302226066589355, "logits_per_char": -1.0377782583236694, "num_chars": 8}, {"sum_logits": -4.936822891235352, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.35391902923584, "logits_per_token": -4.936822891235352, "logits_per_char": -0.7052604130336216, "num_chars": 7}, {"sum_logits": -4.959083080291748, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.300413131713867, "logits_per_token": -4.959083080291748, "logits_per_char": -0.7084404400416783, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 840, "native_id": "001cb999a61a5c8b4031ff53cf261714", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1978588104248047, "incorrect_loss_raw": 8.878412246704102, "correct_loss_per_char": 0.43957176208496096, "incorrect_loss_per_char": 0.8725642025470733, "correct_loss_per_token": 2.1978588104248047, "incorrect_loss_per_token": 5.458297610282898, "correct_loss_uncond": -12.028355598449707, "incorrect_loss_uncond": -8.150254011154175}, "model_output": [{"sum_logits": -2.1978588104248047, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.226214408874512, "logits_per_token": -2.1978588104248047, "logits_per_char": -0.43957176208496096, "num_chars": 5}, {"sum_logits": -2.1978588104248047, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.226214408874512, "logits_per_token": -2.1978588104248047, "logits_per_char": -0.43957176208496096, "num_chars": 5}, {"sum_logits": -4.585236549377441, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.101953506469727, "logits_per_token": -2.2926182746887207, "logits_per_char": -0.5731545686721802, "num_chars": 8}, {"sum_logits": -22.775680541992188, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.31139373779297, "logits_per_token": -11.387840270996094, "logits_per_char": -1.6268343244280135, "num_chars": 14}, {"sum_logits": -5.954873085021973, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.475103378295898, "logits_per_token": -5.954873085021973, "logits_per_char": -0.850696155003139, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 841, "native_id": "18ee7a93410a6b4c9cec5d4894775991_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6959553956985474, "incorrect_loss_raw": 10.68174421787262, "correct_loss_per_char": 0.42398884892463684, "incorrect_loss_per_char": 1.555863751305474, "correct_loss_per_token": 1.6959553956985474, "incorrect_loss_per_token": 9.129270672798157, "correct_loss_uncond": -12.789811968803406, "incorrect_loss_uncond": -3.800991415977478}, "model_output": [{"sum_logits": -5.7152485847473145, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.246232032775879, "logits_per_token": -5.7152485847473145, "logits_per_char": -1.143049716949463, "num_chars": 5}, {"sum_logits": -14.68533706665039, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.471000671386719, "logits_per_token": -14.68533706665039, "logits_per_char": -1.2237780888875325, "num_chars": 12}, {"sum_logits": -1.6959553956985474, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": true, "sum_logits_uncond": -14.485767364501953, "logits_per_token": -1.6959553956985474, "logits_per_char": -0.42398884892463684, "num_chars": 4}, {"sum_logits": -9.90660285949707, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -9.90660285949707, "logits_per_char": -2.4766507148742676, "num_chars": 4}, {"sum_logits": -12.419788360595703, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.883464813232422, "logits_per_token": -6.209894180297852, "logits_per_char": -1.3799764845106337, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 842, "native_id": "3b8be90fdd8c67571d8d692eaa6dd87b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9894633293151855, "incorrect_loss_raw": 5.245791137218475, "correct_loss_per_char": 0.33245527744293213, "incorrect_loss_per_char": 0.6473195667777744, "correct_loss_per_token": 1.9947316646575928, "incorrect_loss_per_token": 4.135453879833221, "correct_loss_uncond": -15.488831043243408, "incorrect_loss_uncond": -9.721993029117584}, "model_output": [{"sum_logits": -8.882698059082031, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.86941909790039, "logits_per_token": -4.441349029541016, "logits_per_char": -0.5921798706054687, "num_chars": 15}, {"sum_logits": -3.7377915382385254, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -3.7377915382385254, "logits_per_char": -0.4672239422798157, "num_chars": 8}, {"sum_logits": -5.866115570068359, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -9.008064270019531, "logits_per_token": -5.866115570068359, "logits_per_char": -1.173223114013672, "num_chars": 5}, {"sum_logits": -3.9894633293151855, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.478294372558594, "logits_per_token": -1.9947316646575928, "logits_per_char": -0.33245527744293213, "num_chars": 12}, {"sum_logits": -2.4965593814849854, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -2.4965593814849854, "logits_per_char": -0.3566513402121408, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 843, "native_id": "300bd7704ae8c5fcef618902f18fd01d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 3.7483949661254883, "incorrect_loss_raw": 10.299691319465637, "correct_loss_per_char": 0.37483949661254884, "incorrect_loss_per_char": 0.8815388971915492, "correct_loss_per_token": 1.249464988708496, "incorrect_loss_per_token": 4.930915594100952, "correct_loss_uncond": -11.562464714050293, "incorrect_loss_uncond": -6.560442805290222}, "model_output": [{"sum_logits": -13.24770736694336, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.632976531982422, "logits_per_token": -6.62385368347168, "logits_per_char": -1.324770736694336, "num_chars": 10}, {"sum_logits": -12.925440788269043, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.07874298095703, "logits_per_token": -6.4627203941345215, "logits_per_char": -1.175040071660822, "num_chars": 11}, {"sum_logits": -5.254321575164795, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.308332443237305, "logits_per_token": -1.7514405250549316, "logits_per_char": -0.3283950984477997, "num_chars": 16}, {"sum_logits": -3.7483949661254883, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.310859680175781, "logits_per_token": -1.249464988708496, "logits_per_char": -0.37483949661254884, "num_chars": 10}, {"sum_logits": -9.771295547485352, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.42048454284668, "logits_per_token": -4.885647773742676, "logits_per_char": -0.6979496819632394, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 844, "native_id": "f18833ace65a54709377134168b457a9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.187593460083008, "incorrect_loss_raw": 12.726101279258728, "correct_loss_per_char": 0.598966121673584, "incorrect_loss_per_char": 1.0069805761178334, "correct_loss_per_token": 3.593796730041504, "incorrect_loss_per_token": 5.843327482541403, "correct_loss_uncond": -13.371566772460938, "incorrect_loss_uncond": -7.714281916618347}, "model_output": [{"sum_logits": -11.457324981689453, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.46918296813965, "logits_per_token": -5.728662490844727, "logits_per_char": -0.7160828113555908, "num_chars": 16}, {"sum_logits": -7.6918044090271, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.934810638427734, "logits_per_token": -2.563934803009033, "logits_per_char": -0.384590220451355, "num_chars": 20}, {"sum_logits": -7.187593460083008, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -3.593796730041504, "logits_per_char": -0.598966121673584, "num_chars": 12}, {"sum_logits": -25.011844635009766, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -27.79389190673828, "logits_per_token": -8.337281545003256, "logits_per_char": -2.084320386250814, "num_chars": 12}, {"sum_logits": -6.743431091308594, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -6.743431091308594, "logits_per_char": -0.8429288864135742, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 845, "native_id": "5bba03b425f5abc6e017f194cf074b06", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.817742347717285, "incorrect_loss_raw": 13.16007113456726, "correct_loss_per_char": 2.1362903912862143, "incorrect_loss_per_char": 1.5150774995485943, "correct_loss_per_token": 6.408871173858643, "incorrect_loss_per_token": 11.601672172546387, "correct_loss_uncond": -3.2113943099975586, "incorrect_loss_uncond": -3.5706191062927246}, "model_output": [{"sum_logits": -15.211450576782227, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.612943649291992, "logits_per_token": -15.211450576782227, "logits_per_char": -1.5211450576782226, "num_chars": 10}, {"sum_logits": -12.817742347717285, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.029136657714844, "logits_per_token": -6.408871173858643, "logits_per_char": -2.1362903912862143, "num_chars": 6}, {"sum_logits": -7.8375139236450195, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.821721076965332, "logits_per_token": -7.8375139236450195, "logits_per_char": -1.567502784729004, "num_chars": 5}, {"sum_logits": -17.124128341674805, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.96087646484375, "logits_per_token": -17.124128341674805, "logits_per_char": -2.1405160427093506, "num_chars": 8}, {"sum_logits": -12.467191696166992, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.527219772338867, "logits_per_token": -6.233595848083496, "logits_per_char": -0.8311461130777995, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 846, "native_id": "78276a4eab6e8d6b9ae3749211816977", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.0880866050720215, "incorrect_loss_raw": 6.251740872859955, "correct_loss_per_char": 0.5088086605072022, "incorrect_loss_per_char": 0.8438974738121033, "correct_loss_per_token": 5.0880866050720215, "incorrect_loss_per_token": 4.910619556903839, "correct_loss_uncond": -8.75664758682251, "incorrect_loss_uncond": -6.72791987657547}, "model_output": [{"sum_logits": -10.728970527648926, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.16194725036621, "logits_per_token": -5.364485263824463, "logits_per_char": -1.1921078364054363, "num_chars": 9}, {"sum_logits": -7.963242530822754, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.459546089172363, "logits_per_token": -7.963242530822754, "logits_per_char": -0.9954053163528442, "num_chars": 8}, {"sum_logits": -5.0880866050720215, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -13.844734191894531, "logits_per_token": -5.0880866050720215, "logits_per_char": -0.5088086605072022, "num_chars": 10}, {"sum_logits": -4.068550109863281, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -10.616961479187012, "logits_per_token": -4.068550109863281, "logits_per_char": -0.8137100219726563, "num_chars": 5}, {"sum_logits": -2.2462003231048584, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": true, "sum_logits_uncond": -12.680188179016113, "logits_per_token": -2.2462003231048584, "logits_per_char": -0.3743667205174764, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 847, "native_id": "cf33e0f5891ce53a716432be06a46ee1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.412981986999512, "incorrect_loss_raw": 8.55912446975708, "correct_loss_per_char": 0.8412981986999511, "incorrect_loss_per_char": 0.9759511255750469, "correct_loss_per_token": 8.412981986999512, "incorrect_loss_per_token": 8.55912446975708, "correct_loss_uncond": -5.639514923095703, "incorrect_loss_uncond": -5.2948384284973145}, "model_output": [{"sum_logits": -7.554360389709473, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.87220573425293, "logits_per_token": -7.554360389709473, "logits_per_char": -0.8393733766343858, "num_chars": 9}, {"sum_logits": -9.291138648986816, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.39179801940918, "logits_per_token": -9.291138648986816, "logits_per_char": -0.5465375675874597, "num_chars": 17}, {"sum_logits": -8.412981986999512, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.052496910095215, "logits_per_token": -8.412981986999512, "logits_per_char": -0.8412981986999511, "num_chars": 10}, {"sum_logits": -6.587553977966309, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.887779235839844, "logits_per_token": -6.587553977966309, "logits_per_char": -1.3175107955932617, "num_chars": 5}, {"sum_logits": -10.803444862365723, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.264068603515625, "logits_per_token": -10.803444862365723, "logits_per_char": -1.2003827624850802, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 848, "native_id": "3938d6e50d38b1f8774b4f00a89bdb39", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.360926628112793, "incorrect_loss_raw": 9.50008761882782, "correct_loss_per_char": 0.31534862518310547, "incorrect_loss_per_char": 1.4505194365978242, "correct_loss_per_token": 1.7869755427042644, "incorrect_loss_per_token": 6.835426052411397, "correct_loss_uncond": -14.608050346374512, "incorrect_loss_uncond": -7.348907828330994}, "model_output": [{"sum_logits": -15.222879409790039, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.630889892578125, "logits_per_token": -7.6114397048950195, "logits_per_char": -3.044575881958008, "num_chars": 5}, {"sum_logits": -7.582232475280762, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -7.582232475280762, "logits_per_char": -0.7582232475280761, "num_chars": 10}, {"sum_logits": -5.360926628112793, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.968976974487305, "logits_per_token": -1.7869755427042644, "logits_per_char": -0.31534862518310547, "num_chars": 17}, {"sum_logits": -10.624428749084473, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.724822998046875, "logits_per_token": -10.624428749084473, "logits_per_char": -1.770738124847412, "num_chars": 6}, {"sum_logits": -4.570809841156006, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.934810638427734, "logits_per_token": -1.5236032803853352, "logits_per_char": -0.2285404920578003, "num_chars": 20}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 849, "native_id": "cabefb7063a728e77abd44d97397a2a4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.502677917480469, "incorrect_loss_raw": 13.969561100006104, "correct_loss_per_char": 0.8752231597900391, "incorrect_loss_per_char": 2.2386207841691514, "correct_loss_per_token": 5.251338958740234, "incorrect_loss_per_token": 13.969561100006104, "correct_loss_uncond": -7.427726745605469, "incorrect_loss_uncond": 0.7468161582946777}, "model_output": [{"sum_logits": -12.56956672668457, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -12.56956672668457, "logits_per_char": -3.1423916816711426, "num_chars": 4}, {"sum_logits": -16.228910446166992, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.197978019714355, "logits_per_token": -16.228910446166992, "logits_per_char": -2.318415778023856, "num_chars": 7}, {"sum_logits": -10.502677917480469, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.930404663085938, "logits_per_token": -5.251338958740234, "logits_per_char": -0.8752231597900391, "num_chars": 12}, {"sum_logits": -15.294282913208008, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -15.294282913208008, "logits_per_char": -1.5294282913208008, "num_chars": 10}, {"sum_logits": -11.785484313964844, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.640676498413086, "logits_per_token": -11.785484313964844, "logits_per_char": -1.9642473856608074, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 850, "native_id": "60b909ad1d7956218a5d99954fdebecd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6778674125671387, "incorrect_loss_raw": 7.266306042671204, "correct_loss_per_char": 0.525409630366734, "incorrect_loss_per_char": 0.8705623802684602, "correct_loss_per_token": 3.6778674125671387, "incorrect_loss_per_token": 4.38505345582962, "correct_loss_uncond": -11.593413829803467, "incorrect_loss_uncond": -9.735190749168396}, "model_output": [{"sum_logits": -7.103353977203369, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -3.5516769886016846, "logits_per_char": -0.7892615530225966, "num_chars": 9}, {"sum_logits": -4.451135635375977, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.726114273071289, "logits_per_token": -2.2255678176879883, "logits_per_char": -0.5563919544219971, "num_chars": 8}, {"sum_logits": -6.015203475952148, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -6.015203475952148, "logits_per_char": -0.8593147822788784, "num_chars": 7}, {"sum_logits": -3.6778674125671387, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -3.6778674125671387, "logits_per_char": -0.525409630366734, "num_chars": 7}, {"sum_logits": -11.49553108215332, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.7644100189209, "logits_per_token": -5.74776554107666, "logits_per_char": -1.277281231350369, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 851, "native_id": "9fdebd1c2cf498f1d726a025b780a39a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.628740310668945, "incorrect_loss_raw": 10.606057405471802, "correct_loss_per_char": 0.6935218464244496, "incorrect_loss_per_char": 0.9925323244598176, "correct_loss_per_token": 2.5429134368896484, "incorrect_loss_per_token": 4.866131981213888, "correct_loss_uncond": -9.642509460449219, "incorrect_loss_uncond": -6.883115768432617}, "model_output": [{"sum_logits": -7.628740310668945, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -17.271249771118164, "logits_per_token": -2.5429134368896484, "logits_per_char": -0.6935218464244496, "num_chars": 11}, {"sum_logits": -11.826108932495117, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -19.392860412597656, "logits_per_token": -5.913054466247559, "logits_per_char": -0.9855090777079264, "num_chars": 12}, {"sum_logits": -8.54065990447998, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -16.74160385131836, "logits_per_token": -4.27032995223999, "logits_per_char": -0.5337912440299988, "num_chars": 16}, {"sum_logits": -10.48552131652832, "num_tokens": 3, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -3.495173772176107, "logits_per_char": -1.1650579240587022, "num_chars": 9}, {"sum_logits": -11.571939468383789, "num_tokens": 2, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -18.186935424804688, "logits_per_token": -5.7859697341918945, "logits_per_char": -1.2857710520426433, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 852, "native_id": "f36027954e43cfd926451bdf7cb0c3ac", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.710415840148926, "incorrect_loss_raw": 16.847634077072144, "correct_loss_per_char": 0.5931089107806866, "incorrect_loss_per_char": 1.4080904465455275, "correct_loss_per_token": 2.570138613382975, "incorrect_loss_per_token": 7.1274497509002686, "correct_loss_uncond": -11.122828483581543, "incorrect_loss_uncond": -4.747615575790405}, "model_output": [{"sum_logits": -20.74187660217285, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -26.03689956665039, "logits_per_token": -5.185469150543213, "logits_per_char": -1.0370938301086425, "num_chars": 20}, {"sum_logits": -7.710415840148926, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.83324432373047, "logits_per_token": -2.570138613382975, "logits_per_char": -0.5931089107806866, "num_chars": 13}, {"sum_logits": -19.634288787841797, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.618450164794922, "logits_per_token": -9.817144393920898, "logits_per_char": -1.5103299067570612, "num_chars": 13}, {"sum_logits": -8.561177253723145, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.086233139038086, "logits_per_token": -4.280588626861572, "logits_per_char": -0.7782888412475586, "num_chars": 11}, {"sum_logits": -18.45319366455078, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.639415740966797, "logits_per_token": -9.22659683227539, "logits_per_char": -2.3066492080688477, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 853, "native_id": "7ec14907622c6d5a6087cd59a22d8c9d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.4777302742004395, "incorrect_loss_raw": 9.688413619995117, "correct_loss_per_char": 0.6797936612909491, "incorrect_loss_per_char": 1.0121529091408836, "correct_loss_per_token": 3.7388651371002197, "incorrect_loss_per_token": 6.613101363182068, "correct_loss_uncond": -12.970317363739014, "incorrect_loss_uncond": -6.155239105224609}, "model_output": [{"sum_logits": -10.17430305480957, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -5.087151527404785, "logits_per_char": -0.7267359324863979, "num_chars": 14}, {"sum_logits": -14.428194999694824, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.81740951538086, "logits_per_token": -7.214097499847412, "logits_per_char": -1.1098611538226788, "num_chars": 13}, {"sum_logits": -7.9976725578308105, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -7.9976725578308105, "logits_per_char": -1.332945426305135, "num_chars": 6}, {"sum_logits": -7.4777302742004395, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.448047637939453, "logits_per_token": -3.7388651371002197, "logits_per_char": -0.6797936612909491, "num_chars": 11}, {"sum_logits": -6.153483867645264, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -6.153483867645264, "logits_per_char": -0.8790691239493233, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 854, "native_id": "efe488f67b53a4b6e69782c01c84f06c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.39470911026001, "incorrect_loss_raw": 5.764216184616089, "correct_loss_per_char": 1.078941822052002, "incorrect_loss_per_char": 0.7510940892355782, "correct_loss_per_token": 5.39470911026001, "incorrect_loss_per_token": 4.418514013290405, "correct_loss_uncond": -8.190709590911865, "incorrect_loss_uncond": -9.54814076423645}, "model_output": [{"sum_logits": -10.765617370605469, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.45631217956543, "logits_per_token": -5.382808685302734, "logits_per_char": -0.7177078247070312, "num_chars": 15}, {"sum_logits": -3.217164993286133, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -3.217164993286133, "logits_per_char": -0.6434329986572266, "num_chars": 5}, {"sum_logits": -5.39470911026001, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.585418701171875, "logits_per_token": -5.39470911026001, "logits_per_char": -1.078941822052002, "num_chars": 5}, {"sum_logits": -3.002666473388672, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -3.002666473388672, "logits_per_char": -0.4289523533412388, "num_chars": 7}, {"sum_logits": -6.071415901184082, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -6.071415901184082, "logits_per_char": -1.2142831802368164, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 855, "native_id": "7c62637437ad7515452886074010a438", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.497589111328125, "incorrect_loss_raw": 14.625731468200684, "correct_loss_per_char": 0.9581324259440104, "incorrect_loss_per_char": 0.9884960030064438, "correct_loss_per_token": 5.7487945556640625, "incorrect_loss_per_token": 4.626889092581613, "correct_loss_uncond": -7.851297378540039, "incorrect_loss_uncond": -6.684245586395264}, "model_output": [{"sum_logits": -11.497589111328125, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.348886489868164, "logits_per_token": -5.7487945556640625, "logits_per_char": -0.9581324259440104, "num_chars": 12}, {"sum_logits": -10.373147964477539, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.428735733032227, "logits_per_token": -3.4577159881591797, "logits_per_char": -0.9430134513161399, "num_chars": 11}, {"sum_logits": -25.242136001586914, "num_tokens": 7, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -31.68354034423828, "logits_per_token": -3.6060194287981306, "logits_per_char": -0.8414045333862304, "num_chars": 30}, {"sum_logits": -7.151889801025391, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.418819427490234, "logits_per_token": -3.5759449005126953, "logits_per_char": -0.5959908167521158, "num_chars": 12}, {"sum_logits": -15.73575210571289, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.708812713623047, "logits_per_token": -7.867876052856445, "logits_per_char": -1.573575210571289, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 856, "native_id": "4f7be1c68654e2924c161c8eca652928", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.467085838317871, "incorrect_loss_raw": 9.070210218429565, "correct_loss_per_char": 0.8606441671198065, "incorrect_loss_per_char": 1.2462548149956598, "correct_loss_per_token": 4.7335429191589355, "incorrect_loss_per_token": 5.100934863090515, "correct_loss_uncond": -10.254673957824707, "incorrect_loss_uncond": -7.017366409301758}, "model_output": [{"sum_logits": -10.98480224609375, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.20847511291504, "logits_per_token": -5.492401123046875, "logits_per_char": -1.2205335828993056, "num_chars": 9}, {"sum_logits": -9.467085838317871, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.721759796142578, "logits_per_token": -4.7335429191589355, "logits_per_char": -0.8606441671198065, "num_chars": 11}, {"sum_logits": -8.796262741088867, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.008298873901367, "logits_per_token": -2.199065685272217, "logits_per_char": -0.5864175160725912, "num_chars": 15}, {"sum_logits": -8.924769401550293, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.974520683288574, "logits_per_token": -8.924769401550293, "logits_per_char": -2.2311923503875732, "num_chars": 4}, {"sum_logits": -7.575006484985352, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.159011840820312, "logits_per_token": -3.787503242492676, "logits_per_char": -0.946875810623169, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 857, "native_id": "e4976ee741cf4b28b8a42780ffb15774", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.491786003112793, "incorrect_loss_raw": 12.141433238983154, "correct_loss_per_char": 0.8324206670125326, "incorrect_loss_per_char": 1.1073678786938008, "correct_loss_per_token": 7.491786003112793, "incorrect_loss_per_token": 9.827965259552002, "correct_loss_uncond": -6.366789817810059, "incorrect_loss_uncond": -3.265364408493042}, "model_output": [{"sum_logits": -10.645872116088867, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.745762825012207, "logits_per_token": -10.645872116088867, "logits_per_char": -1.0645872116088868, "num_chars": 10}, {"sum_logits": -7.491786003112793, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.858575820922852, "logits_per_token": -7.491786003112793, "logits_per_char": -0.8324206670125326, "num_chars": 9}, {"sum_logits": -10.226018905639648, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -10.226018905639648, "logits_per_char": -1.0226018905639649, "num_chars": 10}, {"sum_logits": -9.186098098754883, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -9.186098098754883, "logits_per_char": -0.9186098098754882, "num_chars": 10}, {"sum_logits": -18.50774383544922, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -9.25387191772461, "logits_per_char": -1.423672602726863, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 858, "native_id": "14e75a42a416d32a24e2826cae34d2bf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.607282638549805, "incorrect_loss_raw": 15.949007987976074, "correct_loss_per_char": 0.8607282638549805, "incorrect_loss_per_char": 1.7797986160625112, "correct_loss_per_token": 4.303641319274902, "incorrect_loss_per_token": 9.874907732009888, "correct_loss_uncond": -7.666660308837891, "incorrect_loss_uncond": -1.8439993858337402}, "model_output": [{"sum_logits": -15.203229904174805, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -15.203229904174805, "logits_per_char": -2.533871650695801, "num_chars": 6}, {"sum_logits": -13.578886032104492, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.477210998535156, "logits_per_token": -6.789443016052246, "logits_per_char": -1.234444184736772, "num_chars": 11}, {"sum_logits": -16.556427001953125, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.957216262817383, "logits_per_token": -8.278213500976562, "logits_per_char": -1.5051297274502842, "num_chars": 11}, {"sum_logits": -18.457489013671875, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.781787872314453, "logits_per_token": -9.228744506835938, "logits_per_char": -1.8457489013671875, "num_chars": 10}, {"sum_logits": -8.607282638549805, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.273942947387695, "logits_per_token": -4.303641319274902, "logits_per_char": -0.8607282638549805, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 859, "native_id": "004607228ad49b69eac932c1005d6106", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.007713317871094, "incorrect_loss_raw": 9.618543863296509, "correct_loss_per_char": 0.667180887858073, "incorrect_loss_per_char": 1.0083102284915864, "correct_loss_per_token": 3.3359044392903647, "incorrect_loss_per_token": 6.58143424987793, "correct_loss_uncond": -11.203720092773438, "incorrect_loss_uncond": -7.01092529296875}, "model_output": [{"sum_logits": -13.568474769592285, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.542523384094238, "logits_per_token": -6.784237384796143, "logits_per_char": -1.3568474769592285, "num_chars": 10}, {"sum_logits": -10.728402137756348, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -5.364201068878174, "logits_per_char": -0.8940335114796957, "num_chars": 12}, {"sum_logits": -10.007713317871094, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -3.3359044392903647, "logits_per_char": -0.667180887858073, "num_chars": 15}, {"sum_logits": -6.523792743682861, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -6.523792743682861, "logits_per_char": -0.9319703919546944, "num_chars": 7}, {"sum_logits": -7.653505802154541, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.70147705078125, "logits_per_token": -7.653505802154541, "logits_per_char": -0.8503895335727267, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 860, "native_id": "a7f54ee1866d5db34eacf40efa53c93e", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.303350448608398, "incorrect_loss_raw": 12.74636960029602, "correct_loss_per_char": 1.0606700897216796, "incorrect_loss_per_char": 1.908369273420364, "correct_loss_per_token": 5.303350448608398, "incorrect_loss_per_token": 10.775353074073792, "correct_loss_uncond": -7.046175956726074, "incorrect_loss_uncond": -2.397843599319458}, "model_output": [{"sum_logits": -8.419210433959961, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -8.419210433959961, "logits_per_char": -1.0524013042449951, "num_chars": 8}, {"sum_logits": -17.457746505737305, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.137375831604004, "logits_per_token": -17.457746505737305, "logits_per_char": -2.4939637865339006, "num_chars": 7}, {"sum_logits": -5.303350448608398, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.349526405334473, "logits_per_token": -5.303350448608398, "logits_per_char": -1.0606700897216796, "num_chars": 5}, {"sum_logits": -9.340389251708984, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.573386192321777, "logits_per_token": -9.340389251708984, "logits_per_char": -2.335097312927246, "num_chars": 4}, {"sum_logits": -15.768132209777832, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.143718719482422, "logits_per_token": -7.884066104888916, "logits_per_char": -1.7520146899753146, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 861, "native_id": "e56c56c3cfe50ba0c787c2bd67255be8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.177189826965332, "incorrect_loss_raw": 8.42149007320404, "correct_loss_per_char": 0.5221487283706665, "incorrect_loss_per_char": 1.4963658392429353, "correct_loss_per_token": 4.177189826965332, "incorrect_loss_per_token": 8.42149007320404, "correct_loss_uncond": -2.7669715881347656, "incorrect_loss_uncond": -2.0524710416793823}, "model_output": [{"sum_logits": -10.359752655029297, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.402257919311523, "logits_per_token": -10.359752655029297, "logits_per_char": -2.0719505310058595, "num_chars": 5}, {"sum_logits": -10.015417098999023, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.426210403442383, "logits_per_token": -10.015417098999023, "logits_per_char": -1.4307738712855749, "num_chars": 7}, {"sum_logits": -5.424509525299072, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -8.91393756866455, "logits_per_token": -5.424509525299072, "logits_per_char": -1.356127381324768, "num_chars": 4}, {"sum_logits": -4.177189826965332, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -6.944161415100098, "logits_per_token": -4.177189826965332, "logits_per_char": -0.5221487283706665, "num_chars": 8}, {"sum_logits": -7.8862810134887695, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -9.153438568115234, "logits_per_token": -7.8862810134887695, "logits_per_char": -1.1266115733555384, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 862, "native_id": "6f48ee564a48293eb501cc0d8197bdd9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.0342698097229, "incorrect_loss_raw": 8.233534008264542, "correct_loss_per_char": 1.0057116349538167, "incorrect_loss_per_char": 0.9323245323366589, "correct_loss_per_token": 6.0342698097229, "incorrect_loss_per_token": 6.651305586099625, "correct_loss_uncond": -9.996821880340576, "incorrect_loss_uncond": -7.652862399816513}, "model_output": [{"sum_logits": -13.243573188781738, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.097356796264648, "logits_per_token": -13.243573188781738, "logits_per_char": -1.3243573188781739, "num_chars": 10}, {"sum_logits": -5.440420627593994, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.563647270202637, "logits_per_token": -5.440420627593994, "logits_per_char": -0.6800525784492493, "num_chars": 8}, {"sum_logits": -6.0342698097229, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -6.0342698097229, "logits_per_char": -1.0057116349538167, "num_chars": 6}, {"sum_logits": -12.657827377319336, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.8222713470459, "logits_per_token": -6.328913688659668, "logits_per_char": -1.4064252641465929, "num_chars": 9}, {"sum_logits": -1.5923148393630981, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -14.062310218811035, "logits_per_token": -1.5923148393630981, "logits_per_char": -0.31846296787261963, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 863, "native_id": "13d2a103abbed930cabc9567a1ba12f2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.5628132820129395, "incorrect_loss_raw": 7.39558732509613, "correct_loss_per_char": 0.42790871400099534, "incorrect_loss_per_char": 1.0747876248662434, "correct_loss_per_token": 2.7814066410064697, "incorrect_loss_per_token": 6.6240957379341125, "correct_loss_uncond": -12.678550243377686, "incorrect_loss_uncond": -7.9516841173172}, "model_output": [{"sum_logits": -9.401412963867188, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.301679611206055, "logits_per_token": -9.401412963867188, "logits_per_char": -1.3430589948381697, "num_chars": 7}, {"sum_logits": -8.801350593566895, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.748185157775879, "logits_per_token": -8.801350593566895, "logits_per_char": -1.760270118713379, "num_chars": 5}, {"sum_logits": -6.171932697296143, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.226425170898438, "logits_per_token": -3.0859663486480713, "logits_per_char": -0.6171932697296143, "num_chars": 10}, {"sum_logits": -5.207653045654297, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.11279582977295, "logits_per_token": -5.207653045654297, "logits_per_char": -0.5786281161838107, "num_chars": 9}, {"sum_logits": -5.5628132820129395, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.241363525390625, "logits_per_token": -2.7814066410064697, "logits_per_char": -0.42790871400099534, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 864, "native_id": "0c1efb38e023ee9725486fbec4f2d797", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.918023109436035, "incorrect_loss_raw": 11.667494297027588, "correct_loss_per_char": 0.5597175870622907, "incorrect_loss_per_char": 1.2741696488160592, "correct_loss_per_token": 3.918023109436035, "incorrect_loss_per_token": 10.296423316001892, "correct_loss_uncond": -8.525978088378906, "incorrect_loss_uncond": -3.6381402015686035}, "model_output": [{"sum_logits": -12.422388076782227, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -12.422388076782227, "logits_per_char": -1.7746268681117467, "num_chars": 7}, {"sum_logits": -10.014650344848633, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -10.014650344848633, "logits_per_char": -1.430664334978376, "num_chars": 7}, {"sum_logits": -13.264370918273926, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -13.264370918273926, "logits_per_char": -1.2058519016612659, "num_chars": 11}, {"sum_logits": -3.918023109436035, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -3.918023109436035, "logits_per_char": -0.5597175870622907, "num_chars": 7}, {"sum_logits": -10.968567848205566, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.01458740234375, "logits_per_token": -5.484283924102783, "logits_per_char": -0.6855354905128479, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 865, "native_id": "b7ab4a5e0c19a98f41cd1ba3176f2dff", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.316340923309326, "incorrect_loss_raw": 10.997690439224243, "correct_loss_per_char": 0.5316340923309326, "incorrect_loss_per_char": 1.4177614569664, "correct_loss_per_token": 5.316340923309326, "incorrect_loss_per_token": 8.156361818313599, "correct_loss_uncond": -8.682563304901123, "incorrect_loss_uncond": -2.4595587253570557}, "model_output": [{"sum_logits": -10.755949020385742, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.183961868286133, "logits_per_token": -10.755949020385742, "logits_per_char": -1.0755949020385742, "num_chars": 10}, {"sum_logits": -13.86857795715332, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.45909595489502, "logits_per_token": -6.93428897857666, "logits_per_char": -1.386857795715332, "num_chars": 10}, {"sum_logits": -5.316340923309326, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.99890422821045, "logits_per_token": -5.316340923309326, "logits_per_char": -0.5316340923309326, "num_chars": 10}, {"sum_logits": -8.862051010131836, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.136773109436035, "logits_per_token": -4.431025505065918, "logits_per_char": -1.1077563762664795, "num_chars": 8}, {"sum_logits": -10.504183769226074, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.049165725708008, "logits_per_token": -10.504183769226074, "logits_per_char": -2.100836753845215, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 866, "native_id": "8bcbb5098876940b2382db3a9a0b1beb", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.56553840637207, "incorrect_loss_raw": 10.63183307647705, "correct_loss_per_char": 0.7137948671976725, "incorrect_loss_per_char": 0.8207896454509718, "correct_loss_per_token": 2.85517946879069, "incorrect_loss_per_token": 5.00081213315328, "correct_loss_uncond": -8.317697525024414, "incorrect_loss_uncond": -7.398058891296387}, "model_output": [{"sum_logits": -7.562505722045898, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.918935775756836, "logits_per_token": -2.5208352406819663, "logits_per_char": -0.6875005201859907, "num_chars": 11}, {"sum_logits": -13.659713745117188, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.345237731933594, "logits_per_token": -6.829856872558594, "logits_per_char": -0.9106475830078125, "num_chars": 15}, {"sum_logits": -7.59490966796875, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -3.797454833984375, "logits_per_char": -0.5424935477120536, "num_chars": 14}, {"sum_logits": -8.56553840637207, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.883235931396484, "logits_per_token": -2.85517946879069, "logits_per_char": -0.7137948671976725, "num_chars": 12}, {"sum_logits": -13.710203170776367, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.877809524536133, "logits_per_token": -6.855101585388184, "logits_per_char": -1.1425169308980305, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 867, "native_id": "c7ce02d9365fe9275f88338ad51cbde6", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9014978408813477, "incorrect_loss_raw": 11.514010190963745, "correct_loss_per_char": 0.48768723011016846, "incorrect_loss_per_char": 1.200796640108502, "correct_loss_per_token": 3.9014978408813477, "incorrect_loss_per_token": 8.044536590576172, "correct_loss_uncond": -9.907456398010254, "incorrect_loss_uncond": -5.184173107147217}, "model_output": [{"sum_logits": -3.9014978408813477, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.808954238891602, "logits_per_token": -3.9014978408813477, "logits_per_char": -0.48768723011016846, "num_chars": 8}, {"sum_logits": -14.45294189453125, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.09600830078125, "logits_per_token": -7.226470947265625, "logits_per_char": -0.8029412163628472, "num_chars": 18}, {"sum_logits": -8.842952728271484, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.863496780395508, "logits_per_token": -8.842952728271484, "logits_per_char": -1.473825454711914, "num_chars": 6}, {"sum_logits": -9.45729923248291, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.060309410095215, "logits_per_token": -9.45729923248291, "logits_per_char": -1.5762165387471516, "num_chars": 6}, {"sum_logits": -13.302846908569336, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.772918701171875, "logits_per_token": -6.651423454284668, "logits_per_char": -0.9502033506120954, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 868, "native_id": "fb54a118d46b2776e435d411ae3dd9c8", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.6302595138549805, "incorrect_loss_raw": 11.938560962677002, "correct_loss_per_char": 0.8287824392318726, "incorrect_loss_per_char": 1.4077790431487256, "correct_loss_per_token": 3.3151297569274902, "incorrect_loss_per_token": 8.826054573059082, "correct_loss_uncond": -6.490605354309082, "incorrect_loss_uncond": -4.7254979610443115}, "model_output": [{"sum_logits": -10.488916397094727, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.097559928894043, "logits_per_token": -10.488916397094727, "logits_per_char": -1.1654351552327473, "num_chars": 9}, {"sum_logits": -12.85104751586914, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.844573974609375, "logits_per_token": -6.42552375793457, "logits_per_char": -0.9885421166053185, "num_chars": 13}, {"sum_logits": -12.049003601074219, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.309917449951172, "logits_per_token": -6.024501800537109, "logits_per_char": -1.0040836334228516, "num_chars": 12}, {"sum_logits": -6.6302595138549805, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.120864868164062, "logits_per_token": -3.3151297569274902, "logits_per_char": -0.8287824392318726, "num_chars": 8}, {"sum_logits": -12.365276336669922, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.404184341430664, "logits_per_token": -12.365276336669922, "logits_per_char": -2.4730552673339843, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 869, "native_id": "2c13e6d61e3733db90a9fd22d72b3337", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9710512161254883, "incorrect_loss_raw": 7.693596363067627, "correct_loss_per_char": 0.24758760134379068, "incorrect_loss_per_char": 0.7520992155112918, "correct_loss_per_token": 1.4855256080627441, "incorrect_loss_per_token": 4.425103783607483, "correct_loss_uncond": -19.30202007293701, "incorrect_loss_uncond": -10.578893661499023}, "model_output": [{"sum_logits": -7.182927131652832, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.64804458618164, "logits_per_token": -3.591463565826416, "logits_per_char": -0.513066223689488, "num_chars": 14}, {"sum_logits": -4.6264448165893555, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.622055053710938, "logits_per_token": -4.6264448165893555, "logits_per_char": -0.5783056020736694, "num_chars": 8}, {"sum_logits": -2.9710512161254883, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -1.4855256080627441, "logits_per_char": -0.24758760134379068, "num_chars": 12}, {"sum_logits": -9.550178527832031, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.817781448364258, "logits_per_token": -4.775089263916016, "logits_per_char": -1.0611309475368924, "num_chars": 9}, {"sum_logits": -9.414834976196289, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -4.7074174880981445, "logits_per_char": -0.8558940887451172, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 870, "native_id": "350292ae429060a00ff2cf64d71558e4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.591440677642822, "incorrect_loss_raw": 8.245319545269012, "correct_loss_per_char": 0.39938861983163015, "incorrect_loss_per_char": 1.1320384393135707, "correct_loss_per_token": 2.795720338821411, "incorrect_loss_per_token": 8.245319545269012, "correct_loss_uncond": -11.857007503509521, "incorrect_loss_uncond": -5.967334091663361}, "model_output": [{"sum_logits": -9.758755683898926, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -9.758755683898926, "logits_per_char": -0.8132296403249105, "num_chars": 12}, {"sum_logits": -3.3295228481292725, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.781716346740723, "logits_per_token": -3.3295228481292725, "logits_per_char": -0.8323807120323181, "num_chars": 4}, {"sum_logits": -7.348760604858398, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.912240028381348, "logits_per_token": -7.348760604858398, "logits_per_char": -1.8371901512145996, "num_chars": 4}, {"sum_logits": -12.544239044189453, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.179875373840332, "logits_per_token": -12.544239044189453, "logits_per_char": -1.0453532536824544, "num_chars": 12}, {"sum_logits": -5.591440677642822, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.448448181152344, "logits_per_token": -2.795720338821411, "logits_per_char": -0.39938861983163015, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 871, "native_id": "179fff4b5928e5ac3d3ae3e1db782547", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.344544887542725, "incorrect_loss_raw": 15.00322437286377, "correct_loss_per_char": 0.3103246348244803, "incorrect_loss_per_char": 1.2705499192078906, "correct_loss_per_token": 2.1722724437713623, "incorrect_loss_per_token": 6.865217049916585, "correct_loss_uncond": -13.847565174102783, "incorrect_loss_uncond": -6.5838234424591064}, "model_output": [{"sum_logits": -15.997798919677734, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.310131072998047, "logits_per_token": -7.998899459838867, "logits_per_char": -0.9998624324798584, "num_chars": 16}, {"sum_logits": -16.483226776123047, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -27.247451782226562, "logits_per_token": -5.494408925374349, "logits_per_char": -1.3736022313435872, "num_chars": 12}, {"sum_logits": -7.185403823852539, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -7.185403823852539, "logits_per_char": -1.4370807647705077, "num_chars": 5}, {"sum_logits": -4.344544887542725, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.192110061645508, "logits_per_token": -2.1722724437713623, "logits_per_char": -0.3103246348244803, "num_chars": 14}, {"sum_logits": -20.346467971801758, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.703123092651367, "logits_per_token": -6.782155990600586, "logits_per_char": -1.2716542482376099, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 872, "native_id": "81cc0d320488c7bacafb285cf7db5fbd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.8026857376098633, "incorrect_loss_raw": 7.432648301124573, "correct_loss_per_char": 0.23355714480082193, "incorrect_loss_per_char": 0.8058282799827747, "correct_loss_per_token": 2.8026857376098633, "incorrect_loss_per_token": 6.5753213961919155, "correct_loss_uncond": -13.174097061157227, "incorrect_loss_uncond": -8.307325005531311}, "model_output": [{"sum_logits": -10.652727127075195, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -10.652727127075195, "logits_per_char": -1.3315908908843994, "num_chars": 8}, {"sum_logits": -2.8026857376098633, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -2.8026857376098633, "logits_per_char": -0.23355714480082193, "num_chars": 12}, {"sum_logits": -5.143961429595947, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.712093353271484, "logits_per_token": -1.7146538098653157, "logits_per_char": -0.3214975893497467, "num_chars": 16}, {"sum_logits": -5.553442001342773, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.650419235229492, "logits_per_token": -5.553442001342773, "logits_per_char": -0.9255736668904623, "num_chars": 6}, {"sum_logits": -8.380462646484375, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -8.380462646484375, "logits_per_char": -0.6446509728064904, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 873, "native_id": "26c8a7165d0ed7250b9328f90d83ba83", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.429043769836426, "incorrect_loss_raw": 16.330541372299194, "correct_loss_per_char": 0.6286029179890951, "incorrect_loss_per_char": 1.187206340002847, "correct_loss_per_token": 3.143014589945475, "incorrect_loss_per_token": 5.759698510169983, "correct_loss_uncond": -9.260910987854004, "incorrect_loss_uncond": -1.3555305004119873}, "model_output": [{"sum_logits": -15.740375518798828, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.712406158447266, "logits_per_token": -3.935093879699707, "logits_per_char": -1.2107981168306792, "num_chars": 13}, {"sum_logits": -15.45862102508545, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.320322036743164, "logits_per_token": -7.729310512542725, "logits_per_char": -1.4053291840986772, "num_chars": 11}, {"sum_logits": -13.057985305786133, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.862110137939453, "logits_per_token": -4.352661768595378, "logits_per_char": -0.8161240816116333, "num_chars": 16}, {"sum_logits": -9.429043769836426, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.68995475769043, "logits_per_token": -3.143014589945475, "logits_per_char": -0.6286029179890951, "num_chars": 15}, {"sum_logits": -21.065183639526367, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.849449157714844, "logits_per_token": -7.021727879842122, "logits_per_char": -1.316573977470398, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 874, "native_id": "636fc69dee35cd357b4191b47e64d0e5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.270875930786133, "incorrect_loss_raw": 12.774352312088013, "correct_loss_per_char": 1.2838594913482666, "incorrect_loss_per_char": 1.3892379032240973, "correct_loss_per_token": 10.270875930786133, "incorrect_loss_per_token": 9.488753080368042, "correct_loss_uncond": -6.508331298828125, "incorrect_loss_uncond": -3.2537496089935303}, "model_output": [{"sum_logits": -12.349077224731445, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -12.349077224731445, "logits_per_char": -1.5436346530914307, "num_chars": 8}, {"sum_logits": -12.46353816986084, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -12.46353816986084, "logits_per_char": -1.3848375744289823, "num_chars": 9}, {"sum_logits": -10.648609161376953, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.941822052001953, "logits_per_token": -5.324304580688477, "logits_per_char": -1.0648609161376954, "num_chars": 10}, {"sum_logits": -15.636184692382812, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.972463607788086, "logits_per_token": -7.818092346191406, "logits_per_char": -1.5636184692382813, "num_chars": 10}, {"sum_logits": -10.270875930786133, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.779207229614258, "logits_per_token": -10.270875930786133, "logits_per_char": -1.2838594913482666, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 875, "native_id": "f0c4622a082eb9ad0690dd36dcf61297", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.664796829223633, "incorrect_loss_raw": 12.191213607788086, "correct_loss_per_char": 0.5776531219482421, "incorrect_loss_per_char": 1.0624860260221693, "correct_loss_per_token": 4.332398414611816, "incorrect_loss_per_token": 5.329493856430053, "correct_loss_uncond": -10.821903228759766, "incorrect_loss_uncond": -8.32783055305481}, "model_output": [{"sum_logits": -18.912919998168945, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.872493743896484, "logits_per_token": -9.456459999084473, "logits_per_char": -1.5760766665140789, "num_chars": 12}, {"sum_logits": -2.0872554779052734, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.34796142578125, "logits_per_token": -2.0872554779052734, "logits_per_char": -0.5218138694763184, "num_chars": 4}, {"sum_logits": -5.276655197143555, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.464104652404785, "logits_per_token": -5.276655197143555, "logits_per_char": -1.3191637992858887, "num_chars": 4}, {"sum_logits": -22.48802375793457, "num_tokens": 5, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -35.39161682128906, "logits_per_token": -4.497604751586914, "logits_per_char": -0.8328897688123915, "num_chars": 27}, {"sum_logits": -8.664796829223633, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.4867000579834, "logits_per_token": -4.332398414611816, "logits_per_char": -0.5776531219482421, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 876, "native_id": "4499ebd5e8188b0d5fdef6afd893017a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.978415012359619, "incorrect_loss_raw": 6.904777526855469, "correct_loss_per_char": 0.7956830024719238, "incorrect_loss_per_char": 0.7881263691668559, "correct_loss_per_token": 3.978415012359619, "incorrect_loss_per_token": 5.811421751976013, "correct_loss_uncond": -7.251275539398193, "incorrect_loss_uncond": -8.37078332901001}, "model_output": [{"sum_logits": -10.203055381774902, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.558618545532227, "logits_per_token": -10.203055381774902, "logits_per_char": -1.1336728201972113, "num_chars": 9}, {"sum_logits": -3.360330104827881, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.121448516845703, "logits_per_token": -3.360330104827881, "logits_per_char": -0.5600550174713135, "num_chars": 6}, {"sum_logits": -8.746846199035645, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.197805404663086, "logits_per_token": -4.373423099517822, "logits_per_char": -0.7951678362759677, "num_chars": 11}, {"sum_logits": -5.308878421783447, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -5.308878421783447, "logits_per_char": -0.6636098027229309, "num_chars": 8}, {"sum_logits": -3.978415012359619, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -3.978415012359619, "logits_per_char": -0.7956830024719238, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 877, "native_id": "230cc491829307e8edb5423c8d09f945", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.401900291442871, "incorrect_loss_raw": 15.95678186416626, "correct_loss_per_char": 0.826793352762858, "incorrect_loss_per_char": 1.3723511977629228, "correct_loss_per_token": 4.13396676381429, "incorrect_loss_per_token": 6.637823104858398, "correct_loss_uncond": -7.747679710388184, "incorrect_loss_uncond": -3.616300106048584}, "model_output": [{"sum_logits": -15.939598083496094, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.657976150512695, "logits_per_token": -7.969799041748047, "logits_per_char": -1.5939598083496094, "num_chars": 10}, {"sum_logits": -21.449085235595703, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -26.13020133972168, "logits_per_token": -5.362271308898926, "logits_per_char": -1.0724542617797852, "num_chars": 20}, {"sum_logits": -12.401900291442871, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.149580001831055, "logits_per_token": -4.13396676381429, "logits_per_char": -0.826793352762858, "num_chars": 15}, {"sum_logits": -14.133234024047852, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.084571838378906, "logits_per_token": -7.066617012023926, "logits_per_char": -1.2848394567316228, "num_chars": 11}, {"sum_logits": -12.30521011352539, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.419578552246094, "logits_per_token": -6.152605056762695, "logits_per_char": -1.5381512641906738, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 878, "native_id": "6163a897cd7eac1deddd4c002a1930ae", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.988812446594238, "incorrect_loss_raw": 9.629606127738953, "correct_loss_per_char": 0.39925416310628253, "incorrect_loss_per_char": 0.8012665123002142, "correct_loss_per_token": 1.9962708155314128, "incorrect_loss_per_token": 5.711422681808472, "correct_loss_uncond": -17.254088401794434, "incorrect_loss_uncond": -7.632177710533142}, "model_output": [{"sum_logits": -7.172956943511963, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -7.172956943511963, "logits_per_char": -0.7969952159457736, "num_chars": 9}, {"sum_logits": -8.477800369262695, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.44867515563965, "logits_per_token": -4.238900184631348, "logits_per_char": -0.4709889094034831, "num_chars": 18}, {"sum_logits": -10.138479232788086, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.442933082580566, "logits_per_token": -5.069239616394043, "logits_per_char": -0.7798830179067758, "num_chars": 13}, {"sum_logits": -12.729187965393066, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.02006721496582, "logits_per_token": -6.364593982696533, "logits_per_char": -1.1571989059448242, "num_chars": 11}, {"sum_logits": -5.988812446594238, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.242900848388672, "logits_per_token": -1.9962708155314128, "logits_per_char": -0.39925416310628253, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 879, "native_id": "55478486079423907508a06be13ca536", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 2.8215863704681396, "incorrect_loss_raw": 8.11470079421997, "correct_loss_per_char": 0.4030837672097342, "incorrect_loss_per_char": 1.3148883819580077, "correct_loss_per_token": 2.8215863704681396, "incorrect_loss_per_token": 6.794478893280029, "correct_loss_uncond": -9.327707529067993, "incorrect_loss_uncond": -7.897722244262695}, "model_output": [{"sum_logits": -9.61112117767334, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.091991424560547, "logits_per_token": -9.61112117767334, "logits_per_char": -1.922224235534668, "num_chars": 5}, {"sum_logits": -10.561775207519531, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.00414276123047, "logits_per_token": -5.280887603759766, "logits_per_char": -0.8801479339599609, "num_chars": 12}, {"sum_logits": -2.8215863704681396, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -2.8215863704681396, "logits_per_char": -0.4030837672097342, "num_chars": 7}, {"sum_logits": -7.168905258178711, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.648334503173828, "logits_per_token": -7.168905258178711, "logits_per_char": -1.433781051635742, "num_chars": 5}, {"sum_logits": -5.117001533508301, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -5.117001533508301, "logits_per_char": -1.0234003067016602, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 880, "native_id": "4fa0d61ec82eb1e238d8938d5f43f392", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.320100784301758, "incorrect_loss_raw": 14.115853071212769, "correct_loss_per_char": 0.9477000603309045, "incorrect_loss_per_char": 1.4801425184522357, "correct_loss_per_token": 4.106700261433919, "incorrect_loss_per_token": 7.434809327125549, "correct_loss_uncond": -6.903860092163086, "incorrect_loss_uncond": -4.2377095222473145}, "model_output": [{"sum_logits": -12.776126861572266, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.613496780395508, "logits_per_token": -6.388063430786133, "logits_per_char": -1.2776126861572266, "num_chars": 10}, {"sum_logits": -20.284767150878906, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -23.017406463623047, "logits_per_token": -6.761589050292969, "logits_per_char": -1.4489119393484933, "num_chars": 14}, {"sum_logits": -13.625866889953613, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.813697814941406, "logits_per_token": -6.812933444976807, "logits_per_char": -1.2387151718139648, "num_chars": 11}, {"sum_logits": -12.320100784301758, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.223960876464844, "logits_per_token": -4.106700261433919, "logits_per_char": -0.9477000603309045, "num_chars": 13}, {"sum_logits": -9.776651382446289, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.969649314880371, "logits_per_token": -9.776651382446289, "logits_per_char": -1.9553302764892577, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 881, "native_id": "b4f79ca5f3595248ee25292ab60ad105", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.463090896606445, "incorrect_loss_raw": 11.680076360702515, "correct_loss_per_char": 0.7885909080505371, "incorrect_loss_per_char": 1.1160731808103697, "correct_loss_per_token": 4.731545448303223, "incorrect_loss_per_token": 5.234954595565796, "correct_loss_uncond": -10.109533309936523, "incorrect_loss_uncond": -8.041624784469604}, "model_output": [{"sum_logits": -9.463090896606445, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -19.57262420654297, "logits_per_token": -4.731545448303223, "logits_per_char": -0.7885909080505371, "num_chars": 12}, {"sum_logits": -14.522006034851074, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -25.582637786865234, "logits_per_token": -4.840668678283691, "logits_per_char": -1.320182366804643, "num_chars": 11}, {"sum_logits": -10.039389610290527, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.857383728027344, "logits_per_token": -5.019694805145264, "logits_per_char": -1.0039389610290528, "num_chars": 10}, {"sum_logits": -11.589468955993652, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.348520278930664, "logits_per_token": -5.794734477996826, "logits_per_char": -0.9657890796661377, "num_chars": 12}, {"sum_logits": -10.569440841674805, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.098262786865234, "logits_per_token": -5.284720420837402, "logits_per_char": -1.1743823157416449, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 882, "native_id": "c39131d979c9205c11d0e109e18188e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.79008960723877, "incorrect_loss_raw": 13.28610634803772, "correct_loss_per_char": 0.7523582121905159, "incorrect_loss_per_char": 1.818979161977768, "correct_loss_per_token": 4.263363202412923, "incorrect_loss_per_token": 11.721680283546448, "correct_loss_uncond": -7.318846702575684, "incorrect_loss_uncond": -2.10183048248291}, "model_output": [{"sum_logits": -12.485815048217773, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.648334503173828, "logits_per_token": -12.485815048217773, "logits_per_char": -2.497163009643555, "num_chars": 5}, {"sum_logits": -12.515408515930176, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.774613380432129, "logits_per_token": -6.257704257965088, "logits_per_char": -1.564426064491272, "num_chars": 8}, {"sum_logits": -14.600220680236816, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -14.600220680236816, "logits_per_char": -2.0857458114624023, "num_chars": 7}, {"sum_logits": -13.542981147766113, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -13.542981147766113, "logits_per_char": -1.1285817623138428, "num_chars": 12}, {"sum_logits": -12.79008960723877, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -4.263363202412923, "logits_per_char": -0.7523582121905159, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 883, "native_id": "bd773d64f4e22db2358c6e00cbdf2d83", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.33509635925293, "incorrect_loss_raw": 8.539996862411499, "correct_loss_per_char": 1.190728051321847, "incorrect_loss_per_char": 1.3316005144800458, "correct_loss_per_token": 8.33509635925293, "incorrect_loss_per_token": 8.539996862411499, "correct_loss_uncond": -7.209095001220703, "incorrect_loss_uncond": -5.8518171310424805}, "model_output": [{"sum_logits": -8.33509635925293, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -8.33509635925293, "logits_per_char": -1.190728051321847, "num_chars": 7}, {"sum_logits": -6.959120750427246, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.546051979064941, "logits_per_token": -6.959120750427246, "logits_per_char": -1.3918241500854491, "num_chars": 5}, {"sum_logits": -9.81671142578125, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.889263153076172, "logits_per_token": -9.81671142578125, "logits_per_char": -1.4023873465401786, "num_chars": 7}, {"sum_logits": -8.620107650756836, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -8.620107650756836, "logits_per_char": -1.4366846084594727, "num_chars": 6}, {"sum_logits": -8.764047622680664, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -8.764047622680664, "logits_per_char": -1.095505952835083, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 884, "native_id": "2b416120e2fbd84b44b5dcd4eb42ed5c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4690723419189453, "incorrect_loss_raw": 7.915924072265625, "correct_loss_per_char": 0.20406307893640854, "incorrect_loss_per_char": 0.9802082684304979, "correct_loss_per_token": 1.7345361709594727, "incorrect_loss_per_token": 6.757613182067871, "correct_loss_uncond": -14.786090850830078, "incorrect_loss_uncond": -7.49726128578186}, "model_output": [{"sum_logits": -8.294181823730469, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.630324363708496, "logits_per_token": -8.294181823730469, "logits_per_char": -1.0367727279663086, "num_chars": 8}, {"sum_logits": -7.027354717254639, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.797646522521973, "logits_per_token": -7.027354717254639, "logits_per_char": -1.1712257862091064, "num_chars": 6}, {"sum_logits": -9.266487121582031, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.51049041748047, "logits_per_token": -4.633243560791016, "logits_per_char": -0.9266487121582031, "num_chars": 10}, {"sum_logits": -3.4690723419189453, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.255163192749023, "logits_per_token": -1.7345361709594727, "logits_per_char": -0.20406307893640854, "num_chars": 17}, {"sum_logits": -7.075672626495361, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -7.075672626495361, "logits_per_char": -0.7861858473883735, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 885, "native_id": "cef855ec07c66a731741026c2839b0d3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.301647186279297, "incorrect_loss_raw": 10.586243152618408, "correct_loss_per_char": 0.7301647186279296, "incorrect_loss_per_char": 1.1397407249944405, "correct_loss_per_token": 3.6508235931396484, "incorrect_loss_per_token": 6.355920195579529, "correct_loss_uncond": -8.938627243041992, "incorrect_loss_uncond": -6.593201160430908}, "model_output": [{"sum_logits": -12.400761604309082, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.040197372436523, "logits_per_token": -6.200380802154541, "logits_per_char": -1.377862400478787, "num_chars": 9}, {"sum_logits": -8.502388954162598, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.656972885131836, "logits_per_token": -8.502388954162598, "logits_per_char": -1.2146269934517997, "num_chars": 7}, {"sum_logits": -7.301647186279297, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.24027442932129, "logits_per_token": -3.6508235931396484, "logits_per_char": -0.7301647186279296, "num_chars": 10}, {"sum_logits": -12.166571617126465, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.155887603759766, "logits_per_token": -6.083285808563232, "logits_per_char": -0.9358901243943435, "num_chars": 13}, {"sum_logits": -9.275250434875488, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -15.86471939086914, "logits_per_token": -4.637625217437744, "logits_per_char": -1.030583381652832, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 886, "native_id": "0bbb82c1dc4bfd3b0e0c409a0afd248b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.924507141113281, "incorrect_loss_raw": 11.58826744556427, "correct_loss_per_char": 0.9022279219193892, "incorrect_loss_per_char": 1.4579415063665373, "correct_loss_per_token": 9.924507141113281, "incorrect_loss_per_token": 7.730399250984192, "correct_loss_uncond": -5.1664276123046875, "incorrect_loss_uncond": -5.394717335700989}, "model_output": [{"sum_logits": -9.924507141113281, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.090934753417969, "logits_per_token": -9.924507141113281, "logits_per_char": -0.9022279219193892, "num_chars": 11}, {"sum_logits": -19.185312271118164, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -9.592656135559082, "logits_per_char": -1.7441192973743787, "num_chars": 11}, {"sum_logits": -6.253336429595947, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.406512260437012, "logits_per_token": -6.253336429595947, "logits_per_char": -1.2506672859191894, "num_chars": 5}, {"sum_logits": -11.677633285522461, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.68996238708496, "logits_per_token": -5.8388166427612305, "logits_per_char": -1.2975148095024958, "num_chars": 9}, {"sum_logits": -9.236787796020508, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -9.236787796020508, "logits_per_char": -1.5394646326700847, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 887, "native_id": "67beae081a9b5ef56988f205f80cf129", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.204806327819824, "incorrect_loss_raw": 9.626125574111938, "correct_loss_per_char": 0.35608959197998047, "incorrect_loss_per_char": 0.984735818342729, "correct_loss_per_token": 3.204806327819824, "incorrect_loss_per_token": 7.997167587280273, "correct_loss_uncond": -9.148329734802246, "incorrect_loss_uncond": -4.3052756786346436}, "model_output": [{"sum_logits": -8.577544212341309, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.961477279663086, "logits_per_token": -8.577544212341309, "logits_per_char": -0.7797767465764825, "num_chars": 11}, {"sum_logits": -13.03166389465332, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.40397071838379, "logits_per_token": -6.51583194732666, "logits_per_char": -1.303166389465332, "num_chars": 10}, {"sum_logits": -3.204806327819824, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.35313606262207, "logits_per_token": -3.204806327819824, "logits_per_char": -0.35608959197998047, "num_chars": 9}, {"sum_logits": -10.236465454101562, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.47290325164795, "logits_per_token": -10.236465454101562, "logits_per_char": -1.0236465454101562, "num_chars": 10}, {"sum_logits": -6.6588287353515625, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -8.887253761291504, "logits_per_token": -6.6588287353515625, "logits_per_char": -0.8323535919189453, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 888, "native_id": "3b4dcfcab4726496bdbe9535cc669082", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.785244941711426, "incorrect_loss_raw": 9.299772143363953, "correct_loss_per_char": 0.39877041180928546, "incorrect_loss_per_char": 0.8967234218662435, "correct_loss_per_token": 1.5950816472371419, "incorrect_loss_per_token": 6.255581736564636, "correct_loss_uncond": -11.358210563659668, "incorrect_loss_uncond": -6.251149535179138}, "model_output": [{"sum_logits": -5.837157249450684, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.908309936523438, "logits_per_token": -5.837157249450684, "logits_per_char": -0.5837157249450684, "num_chars": 10}, {"sum_logits": -11.46152114868164, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.29062843322754, "logits_per_token": -5.73076057434082, "logits_per_char": -0.9551267623901367, "num_chars": 12}, {"sum_logits": -12.89200210571289, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.007293701171875, "logits_per_token": -6.446001052856445, "logits_per_char": -1.1720001914284446, "num_chars": 11}, {"sum_logits": -4.785244941711426, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.143455505371094, "logits_per_token": -1.5950816472371419, "logits_per_char": -0.39877041180928546, "num_chars": 12}, {"sum_logits": -7.008408069610596, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.997454643249512, "logits_per_token": -7.008408069610596, "logits_per_char": -0.8760510087013245, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 889, "native_id": "eebddf5f35d85e9fe2ecbd9b56c1db60", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.09979772567749, "incorrect_loss_raw": 10.284947156906128, "correct_loss_per_char": 0.6454361568797718, "incorrect_loss_per_char": 1.0982916752497356, "correct_loss_per_token": 3.549898862838745, "incorrect_loss_per_token": 7.456866025924683, "correct_loss_uncond": -13.902281284332275, "incorrect_loss_uncond": -6.313874959945679}, "model_output": [{"sum_logits": -7.09979772567749, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.002079010009766, "logits_per_token": -3.549898862838745, "logits_per_char": -0.6454361568797718, "num_chars": 11}, {"sum_logits": -10.405391693115234, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -10.405391693115234, "logits_per_char": -1.1561546325683594, "num_chars": 9}, {"sum_logits": -8.109747886657715, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -8.109747886657715, "logits_per_char": -1.3516246477762859, "num_chars": 6}, {"sum_logits": -9.475350379943848, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -4.737675189971924, "logits_per_char": -0.7896125316619873, "num_chars": 12}, {"sum_logits": -13.149298667907715, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -6.574649333953857, "logits_per_char": -1.0957748889923096, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 890, "native_id": "5393ba1ce298bd1ac4744c07d7373a9c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.772877216339111, "incorrect_loss_raw": 7.4938119649887085, "correct_loss_per_char": 0.7216096520423889, "incorrect_loss_per_char": 0.8396042368628761, "correct_loss_per_token": 5.772877216339111, "incorrect_loss_per_token": 7.4938119649887085, "correct_loss_uncond": -7.419527530670166, "incorrect_loss_uncond": -6.976843476295471}, "model_output": [{"sum_logits": -6.096421241760254, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.286706924438477, "logits_per_token": -6.096421241760254, "logits_per_char": -0.8709173202514648, "num_chars": 7}, {"sum_logits": -10.857994079589844, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.720630645751953, "logits_per_token": -10.857994079589844, "logits_per_char": -0.987090370871804, "num_chars": 11}, {"sum_logits": -6.096421241760254, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.286706924438477, "logits_per_token": -6.096421241760254, "logits_per_char": -0.8709173202514648, "num_chars": 7}, {"sum_logits": -5.772877216339111, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.192404747009277, "logits_per_token": -5.772877216339111, "logits_per_char": -0.7216096520423889, "num_chars": 8}, {"sum_logits": -6.924411296844482, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.588577270507812, "logits_per_token": -6.924411296844482, "logits_per_char": -0.6294919360767711, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 891, "native_id": "fde48d43e27cefed6ed9c52514e0bb6d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.829459190368652, "incorrect_loss_raw": 8.68584668636322, "correct_loss_per_char": 0.7829459190368653, "incorrect_loss_per_char": 0.92256970902284, "correct_loss_per_token": 2.6098197301228843, "incorrect_loss_per_token": 4.404995401700338, "correct_loss_uncond": -10.461865425109863, "incorrect_loss_uncond": -8.02891981601715}, "model_output": [{"sum_logits": -7.829459190368652, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.291324615478516, "logits_per_token": -2.6098197301228843, "logits_per_char": -0.7829459190368653, "num_chars": 10}, {"sum_logits": -11.067737579345703, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.686290740966797, "logits_per_token": -5.533868789672852, "logits_per_char": -1.383467197418213, "num_chars": 8}, {"sum_logits": -8.780122756958008, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.903614044189453, "logits_per_token": -2.9267075856526694, "logits_per_char": -0.7316768964131674, "num_chars": 12}, {"sum_logits": -11.47224235534668, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.555559158325195, "logits_per_token": -5.73612117767334, "logits_per_char": -1.147224235534668, "num_chars": 10}, {"sum_logits": -3.4232840538024902, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.713602066040039, "logits_per_token": -3.4232840538024902, "logits_per_char": -0.4279105067253113, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 892, "native_id": "da83d85e28778c082d9a63f5b890b26d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.525413990020752, "incorrect_loss_raw": 11.136490285396576, "correct_loss_per_char": 0.4350275993347168, "incorrect_loss_per_char": 1.0144109591347141, "correct_loss_per_token": 3.262706995010376, "incorrect_loss_per_token": 8.057787835597992, "correct_loss_uncond": -12.391612529754639, "incorrect_loss_uncond": -5.482093870639801}, "model_output": [{"sum_logits": -6.525413990020752, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.91702651977539, "logits_per_token": -3.262706995010376, "logits_per_char": -0.4350275993347168, "num_chars": 15}, {"sum_logits": -5.879647254943848, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.606099128723145, "logits_per_token": -5.879647254943848, "logits_per_char": -0.6532941394382052, "num_chars": 9}, {"sum_logits": -3.211871862411499, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.145611763000488, "logits_per_token": -3.211871862411499, "logits_per_char": -0.2919883511283181, "num_chars": 11}, {"sum_logits": -10.824822425842285, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.790460586547852, "logits_per_token": -10.824822425842285, "logits_per_char": -1.3531028032302856, "num_chars": 8}, {"sum_logits": -24.629619598388672, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -23.932165145874023, "logits_per_token": -12.314809799194336, "logits_per_char": -1.759258542742048, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 893, "native_id": "cfa980561efe82e7ae7080d4f081b463", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.767515182495117, "incorrect_loss_raw": 12.956223011016846, "correct_loss_per_char": 0.2691082273210798, "incorrect_loss_per_char": 1.2074820267386128, "correct_loss_per_token": 1.8837575912475586, "incorrect_loss_per_token": 9.343159914016724, "correct_loss_uncond": -13.533699035644531, "incorrect_loss_uncond": -4.098935604095459}, "model_output": [{"sum_logits": -12.953296661376953, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.858963012695312, "logits_per_token": -12.953296661376953, "logits_per_char": -1.850470951625279, "num_chars": 7}, {"sum_logits": -9.967090606689453, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.409046173095703, "logits_per_token": -9.967090606689453, "logits_per_char": -1.4238700866699219, "num_chars": 7}, {"sum_logits": -12.912504196166992, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.417736053466797, "logits_per_token": -6.456252098083496, "logits_per_char": -0.6148811521984282, "num_chars": 21}, {"sum_logits": -3.767515182495117, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.30121421813965, "logits_per_token": -1.8837575912475586, "logits_per_char": -0.2691082273210798, "num_chars": 14}, {"sum_logits": -15.992000579833984, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.534889221191406, "logits_per_token": -7.996000289916992, "logits_per_char": -0.9407059164608226, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 894, "native_id": "384b89e789e0f4b4796120394fb6303b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.030477523803711, "incorrect_loss_raw": 12.856734275817871, "correct_loss_per_char": 0.472381030811983, "incorrect_loss_per_char": 1.2282114356171852, "correct_loss_per_token": 4.0152387619018555, "incorrect_loss_per_token": 8.30587323506673, "correct_loss_uncond": -9.238798141479492, "incorrect_loss_uncond": -6.237020969390869}, "model_output": [{"sum_logits": -12.15997314453125, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.698734283447266, "logits_per_token": -12.15997314453125, "logits_per_char": -2.0266621907552085, "num_chars": 6}, {"sum_logits": -11.961797714233398, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.055395126342773, "logits_per_token": -11.961797714233398, "logits_per_char": -1.329088634914822, "num_chars": 9}, {"sum_logits": -15.021947860717773, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.810890197753906, "logits_per_token": -5.007315953572591, "logits_per_char": -0.8345526589287652, "num_chars": 18}, {"sum_logits": -8.030477523803711, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.269275665283203, "logits_per_token": -4.0152387619018555, "logits_per_char": -0.472381030811983, "num_chars": 17}, {"sum_logits": -12.283218383789062, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.810001373291016, "logits_per_token": -4.0944061279296875, "logits_per_char": -0.7225422578699449, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 895, "native_id": "0d66d33a17e41eaa3278ca7b3930c5ea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.9670798778533936, "incorrect_loss_raw": 8.275079488754272, "correct_loss_per_char": 0.5667256968361991, "incorrect_loss_per_char": 1.2054931169206446, "correct_loss_per_token": 3.9670798778533936, "incorrect_loss_per_token": 6.276096224784851, "correct_loss_uncond": -9.54323410987854, "incorrect_loss_uncond": -6.12747597694397}, "model_output": [{"sum_logits": -8.110671997070312, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.903755187988281, "logits_per_token": -8.110671997070312, "logits_per_char": -2.027667999267578, "num_chars": 4}, {"sum_logits": -6.329428672790527, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -3.1647143363952637, "logits_per_char": -0.7911785840988159, "num_chars": 8}, {"sum_logits": -8.997779846191406, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.727229118347168, "logits_per_token": -8.997779846191406, "logits_per_char": -1.1247224807739258, "num_chars": 8}, {"sum_logits": -9.662437438964844, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.13006019592285, "logits_per_token": -4.831218719482422, "logits_per_char": -0.8784034035422585, "num_chars": 11}, {"sum_logits": -3.9670798778533936, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.510313987731934, "logits_per_token": -3.9670798778533936, "logits_per_char": -0.5667256968361991, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 896, "native_id": "732183ead4206e51ed4df18b9c9f14fe", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.2371490001678467, "incorrect_loss_raw": 8.842162370681763, "correct_loss_per_char": 0.18642908334732056, "incorrect_loss_per_char": 1.668806154387338, "correct_loss_per_token": 1.1185745000839233, "incorrect_loss_per_token": 8.842162370681763, "correct_loss_uncond": -18.94225239753723, "incorrect_loss_uncond": -5.006034851074219}, "model_output": [{"sum_logits": -4.665578842163086, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.322771072387695, "logits_per_token": -4.665578842163086, "logits_per_char": -1.1663947105407715, "num_chars": 4}, {"sum_logits": -2.2371490001678467, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.179401397705078, "logits_per_token": -1.1185745000839233, "logits_per_char": -0.18642908334732056, "num_chars": 12}, {"sum_logits": -11.05622386932373, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.541668891906738, "logits_per_token": -11.05622386932373, "logits_per_char": -1.579460552760533, "num_chars": 7}, {"sum_logits": -9.984111785888672, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.738322257995605, "logits_per_token": -9.984111785888672, "logits_per_char": -1.9968223571777344, "num_chars": 5}, {"sum_logits": -9.662734985351562, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.790026664733887, "logits_per_token": -9.662734985351562, "logits_per_char": -1.9325469970703124, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 897, "native_id": "2632ff6c9b781d3aa74e8dd36b990871", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7312591075897217, "incorrect_loss_raw": 9.994820833206177, "correct_loss_per_char": 0.2164073884487152, "incorrect_loss_per_char": 0.9115556721503918, "correct_loss_per_token": 0.8656295537948608, "incorrect_loss_per_token": 8.096785306930542, "correct_loss_uncond": -14.9098961353302, "incorrect_loss_uncond": -4.408734321594238}, "model_output": [{"sum_logits": -7.5139594078063965, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -11.582568168640137, "logits_per_token": -7.5139594078063965, "logits_per_char": -0.9392449259757996, "num_chars": 8}, {"sum_logits": -1.7312591075897217, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": true, "sum_logits_uncond": -16.641155242919922, "logits_per_token": -0.8656295537948608, "logits_per_char": -0.2164073884487152, "num_chars": 8}, {"sum_logits": -7.59993314743042, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -11.762351036071777, "logits_per_token": -7.59993314743042, "logits_per_char": -0.9499916434288025, "num_chars": 8}, {"sum_logits": -9.681106567382812, "num_tokens": 1, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -9.681106567382812, "logits_per_char": -0.7447005051832932, "num_chars": 13}, {"sum_logits": -15.184284210205078, "num_tokens": 2, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -19.627140045166016, "logits_per_token": -7.592142105102539, "logits_per_char": -1.0122856140136718, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 898, "native_id": "63db79b940f36f0333377f85c19eacb2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.032130241394043, "incorrect_loss_raw": 10.460663914680481, "correct_loss_per_char": 0.3360108534495036, "incorrect_loss_per_char": 0.7349853389538251, "correct_loss_per_token": 4.032130241394043, "incorrect_loss_per_token": 5.2303319573402405, "correct_loss_uncond": -9.483189582824707, "incorrect_loss_uncond": -6.639097571372986}, "model_output": [{"sum_logits": -13.156582832336426, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.079998016357422, "logits_per_token": -6.578291416168213, "logits_per_char": -0.8222864270210266, "num_chars": 16}, {"sum_logits": -4.032130241394043, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.51531982421875, "logits_per_token": -4.032130241394043, "logits_per_char": -0.3360108534495036, "num_chars": 12}, {"sum_logits": -11.76162052154541, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.364669799804688, "logits_per_token": -5.880810260772705, "logits_per_char": -0.8401157515389579, "num_chars": 14}, {"sum_logits": -4.430202007293701, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.755987167358398, "logits_per_token": -2.2151010036468506, "logits_per_char": -0.3164430005209787, "num_chars": 14}, {"sum_logits": -12.494250297546387, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.19839096069336, "logits_per_token": -6.247125148773193, "logits_per_char": -0.9610961767343374, "num_chars": 13}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 899, "native_id": "1520a8fd3116e7b856947c5e308d7ce5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.48714828491211, "incorrect_loss_raw": 9.952301621437073, "correct_loss_per_char": 0.8487148284912109, "incorrect_loss_per_char": 1.074808795300741, "correct_loss_per_token": 8.48714828491211, "incorrect_loss_per_token": 7.6465455293655396, "correct_loss_uncond": -5.003084182739258, "incorrect_loss_uncond": -5.646099925041199}, "model_output": [{"sum_logits": -18.446048736572266, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -23.25450325012207, "logits_per_token": -9.223024368286133, "logits_per_char": -1.1528780460357666, "num_chars": 16}, {"sum_logits": -4.835113048553467, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.577110290527344, "logits_per_token": -4.835113048553467, "logits_per_char": -0.6907304355076381, "num_chars": 7}, {"sum_logits": -8.48714828491211, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -8.48714828491211, "logits_per_char": -0.8487148284912109, "num_chars": 10}, {"sum_logits": -11.145191192626953, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -11.145191192626953, "logits_per_char": -1.8575318654378254, "num_chars": 6}, {"sum_logits": -5.3828535079956055, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -5.3828535079956055, "logits_per_char": -0.5980948342217339, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 900, "native_id": "bd780fea2d4dd262583446e64c0f314d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.898829460144043, "incorrect_loss_raw": 8.055671095848083, "correct_loss_per_char": 0.2721571922302246, "incorrect_loss_per_char": 1.1806376863803183, "correct_loss_per_token": 2.4494147300720215, "incorrect_loss_per_token": 6.522323191165924, "correct_loss_uncond": -16.467751502990723, "incorrect_loss_uncond": -7.8951274156570435}, "model_output": [{"sum_logits": -9.89514446258545, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -12.04128646850586, "logits_per_token": -9.89514446258545, "logits_per_char": -1.4135920660836356, "num_chars": 7}, {"sum_logits": -10.06075668334961, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -12.830101013183594, "logits_per_token": -10.06075668334961, "logits_per_char": -2.5151891708374023, "num_chars": 4}, {"sum_logits": -4.898829460144043, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -21.366580963134766, "logits_per_token": -2.4494147300720215, "logits_per_char": -0.2721571922302246, "num_chars": 18}, {"sum_logits": -5.76384973526001, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.651273727416992, "logits_per_token": -2.881924867630005, "logits_per_char": -0.3602406084537506, "num_chars": 16}, {"sum_logits": -6.502933502197266, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -19.280532836914062, "logits_per_token": -3.251466751098633, "logits_per_char": -0.4335289001464844, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 901, "native_id": "99e0b2ddf88ebed98b977043b7c2331b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.478778839111328, "incorrect_loss_raw": 11.106783628463745, "correct_loss_per_char": 1.0531976487901475, "incorrect_loss_per_char": 1.091514940695329, "correct_loss_per_token": 3.159592946370443, "incorrect_loss_per_token": 6.478773832321167, "correct_loss_uncond": -6.1565141677856445, "incorrect_loss_uncond": -6.552556991577148}, "model_output": [{"sum_logits": -7.4030561447143555, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.377196311950684, "logits_per_token": -7.4030561447143555, "logits_per_char": -0.7403056144714355, "num_chars": 10}, {"sum_logits": -11.224992752075195, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -16.69115447998047, "logits_per_token": -5.612496376037598, "logits_per_char": -1.1224992752075196, "num_chars": 10}, {"sum_logits": -17.36717987060547, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -19.673725128173828, "logits_per_token": -8.683589935302734, "logits_per_char": -1.736717987060547, "num_chars": 10}, {"sum_logits": -8.431905746459961, "num_tokens": 2, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -4.2159528732299805, "logits_per_char": -0.7665368860418146, "num_chars": 11}, {"sum_logits": -9.478778839111328, "num_tokens": 3, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -3.159592946370443, "logits_per_char": -1.0531976487901475, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 902, "native_id": "eb0e0c4eaf19c1e9b4df3b4d3a11be3d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.814272880554199, "incorrect_loss_raw": 11.213325023651123, "correct_loss_per_char": 0.9734675543648856, "incorrect_loss_per_char": 1.6822007939929053, "correct_loss_per_token": 6.814272880554199, "incorrect_loss_per_token": 11.213325023651123, "correct_loss_uncond": -7.074990272521973, "incorrect_loss_uncond": -3.7290260791778564}, "model_output": [{"sum_logits": -12.597613334655762, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -12.597613334655762, "logits_per_char": -1.3997348149617512, "num_chars": 9}, {"sum_logits": -6.776315689086914, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -6.776315689086914, "logits_per_char": -1.3552631378173827, "num_chars": 5}, {"sum_logits": -6.814272880554199, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.889263153076172, "logits_per_token": -6.814272880554199, "logits_per_char": -0.9734675543648856, "num_chars": 7}, {"sum_logits": -14.023592948913574, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.682865142822266, "logits_per_token": -14.023592948913574, "logits_per_char": -2.3372654914855957, "num_chars": 6}, {"sum_logits": -11.455778121948242, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -11.455778121948242, "logits_per_char": -1.6365397317068917, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 903, "native_id": "467a3b464b08b3ffc9922e2a726554f6", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.682666778564453, "incorrect_loss_raw": 9.725268840789795, "correct_loss_per_char": 0.7460392222684973, "incorrect_loss_per_char": 1.0321454216490733, "correct_loss_per_token": 6.341333389282227, "incorrect_loss_per_token": 6.767512202262878, "correct_loss_uncond": -7.626224517822266, "incorrect_loss_uncond": -6.5609657764434814}, "model_output": [{"sum_logits": -7.882482528686523, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.445460319519043, "logits_per_token": -7.882482528686523, "logits_per_char": -1.1260689326695033, "num_chars": 7}, {"sum_logits": -12.682666778564453, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.30889129638672, "logits_per_token": -6.341333389282227, "logits_per_char": -0.7460392222684973, "num_chars": 17}, {"sum_logits": -11.387452125549316, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.601245880126953, "logits_per_token": -5.693726062774658, "logits_per_char": -1.1387452125549316, "num_chars": 10}, {"sum_logits": -12.274600982666016, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.12040138244629, "logits_per_token": -6.137300491333008, "logits_per_char": -0.9442000755896935, "num_chars": 13}, {"sum_logits": -7.356539726257324, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.97783088684082, "logits_per_token": -7.356539726257324, "logits_per_char": -0.9195674657821655, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 904, "native_id": "dea70fe40fac9ad03bf319bf8a480efa", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.8626983165740967, "incorrect_loss_raw": 10.57300329208374, "correct_loss_per_char": 0.4771163860956828, "incorrect_loss_per_char": 1.268044278356764, "correct_loss_per_token": 2.8626983165740967, "incorrect_loss_per_token": 6.974130153656006, "correct_loss_uncond": -11.713440179824829, "incorrect_loss_uncond": -6.821961164474487}, "model_output": [{"sum_logits": -5.186047554016113, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.082191467285156, "logits_per_token": -5.186047554016113, "logits_per_char": -0.8643412590026855, "num_chars": 6}, {"sum_logits": -16.48479652404785, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -23.74078369140625, "logits_per_token": -8.242398262023926, "logits_per_char": -1.177485466003418, "num_chars": 14}, {"sum_logits": -8.314980506896973, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.80953311920166, "logits_per_token": -8.314980506896973, "logits_per_char": -1.6629961013793946, "num_chars": 5}, {"sum_logits": -2.8626983165740967, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.576138496398926, "logits_per_token": -2.8626983165740967, "logits_per_char": -0.4771163860956828, "num_chars": 6}, {"sum_logits": -12.306188583374023, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.947349548339844, "logits_per_token": -6.153094291687012, "logits_per_char": -1.3673542870415583, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 905, "native_id": "2f1680da0d388a8453150ff3637e4689", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.936952590942383, "incorrect_loss_raw": 10.142255425453186, "correct_loss_per_char": 0.48949209849039715, "incorrect_loss_per_char": 0.9139664383270801, "correct_loss_per_token": 2.936952590942383, "incorrect_loss_per_token": 5.518487453460693, "correct_loss_uncond": -10.955524444580078, "incorrect_loss_uncond": -8.34020483493805}, "model_output": [{"sum_logits": -14.20380687713623, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.272645950317383, "logits_per_token": -7.101903438568115, "logits_per_char": -1.0926005290104792, "num_chars": 13}, {"sum_logits": -5.209057807922363, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.815526962280273, "logits_per_token": -5.209057807922363, "logits_per_char": -0.8681763013203939, "num_chars": 6}, {"sum_logits": -5.96526575088501, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.605565071105957, "logits_per_token": -5.96526575088501, "logits_per_char": -0.7456582188606262, "num_chars": 8}, {"sum_logits": -15.19089126586914, "num_tokens": 4, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.236103057861328, "logits_per_token": -3.797722816467285, "logits_per_char": -0.9494307041168213, "num_chars": 16}, {"sum_logits": -2.936952590942383, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.892477035522461, "logits_per_token": -2.936952590942383, "logits_per_char": -0.48949209849039715, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 906, "native_id": "8369adc4b4710d00f917d80a75d844d7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.376569747924805, "incorrect_loss_raw": 15.400382995605469, "correct_loss_per_char": 0.6485356092453003, "incorrect_loss_per_char": 2.1826009455480073, "correct_loss_per_token": 5.188284873962402, "incorrect_loss_per_token": 12.598071336746216, "correct_loss_uncond": -9.265035629272461, "incorrect_loss_uncond": 0.47033238410949707}, "model_output": [{"sum_logits": -22.418493270874023, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.609106063842773, "logits_per_token": -11.209246635437012, "logits_per_char": -1.1799206984670538, "num_chars": 19}, {"sum_logits": -17.1739559173584, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.000298500061035, "logits_per_token": -17.1739559173584, "logits_per_char": -3.4347911834716798, "num_chars": 5}, {"sum_logits": -10.376569747924805, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.641605377197266, "logits_per_token": -5.188284873962402, "logits_per_char": -0.6485356092453003, "num_chars": 16}, {"sum_logits": -13.293444633483887, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.768277168273926, "logits_per_token": -13.293444633483887, "logits_per_char": -3.3233611583709717, "num_chars": 4}, {"sum_logits": -8.715638160705566, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.342520713806152, "logits_per_token": -8.715638160705566, "logits_per_char": -0.7923307418823242, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 907, "native_id": "20a3bb788cf408d9a3e25e610fe60905", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.312665939331055, "incorrect_loss_raw": 10.597759485244751, "correct_loss_per_char": 0.7312665939331054, "incorrect_loss_per_char": 1.0168252635569799, "correct_loss_per_token": 3.6563329696655273, "incorrect_loss_per_token": 6.373091419537862, "correct_loss_uncond": -8.83384895324707, "incorrect_loss_uncond": -5.942086935043335}, "model_output": [{"sum_logits": -12.317362785339355, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.931836128234863, "logits_per_token": -12.317362785339355, "logits_per_char": -1.5396703481674194, "num_chars": 8}, {"sum_logits": -14.01788330078125, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.329730987548828, "logits_per_token": -4.672627766927083, "logits_per_char": -1.0012773786272322, "num_chars": 14}, {"sum_logits": -5.984569549560547, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -5.984569549560547, "logits_per_char": -0.8549385070800781, "num_chars": 7}, {"sum_logits": -7.312665939331055, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.146514892578125, "logits_per_token": -3.6563329696655273, "logits_per_char": -0.7312665939331054, "num_chars": 10}, {"sum_logits": -10.071222305297852, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.332897186279297, "logits_per_token": -2.517805576324463, "logits_per_char": -0.6714148203531901, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 908, "native_id": "36c1f50eec01c287b8ef6ffe69fe0528", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.720634460449219, "incorrect_loss_raw": 12.732166290283203, "correct_loss_per_char": 0.8933862050374349, "incorrect_loss_per_char": 1.3408509738861567, "correct_loss_per_token": 5.360317230224609, "incorrect_loss_per_token": 5.196849346160889, "correct_loss_uncond": -11.768770217895508, "incorrect_loss_uncond": -4.596620798110962}, "model_output": [{"sum_logits": -14.474441528320312, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -18.566150665283203, "logits_per_token": -4.8248138427734375, "logits_per_char": -2.067777361188616, "num_chars": 7}, {"sum_logits": -10.720634460449219, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -22.489404678344727, "logits_per_token": -5.360317230224609, "logits_per_char": -0.8933862050374349, "num_chars": 12}, {"sum_logits": -9.300183296203613, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.55059051513672, "logits_per_token": -4.650091648101807, "logits_per_char": -1.0333536995781794, "num_chars": 9}, {"sum_logits": -13.56687068939209, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -6.783435344696045, "logits_per_char": -1.507430076599121, "num_chars": 9}, {"sum_logits": -13.587169647216797, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.462738037109375, "logits_per_token": -4.529056549072266, "logits_per_char": -0.7548427581787109, "num_chars": 18}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 909, "native_id": "5f4825137a27f369fe859e85dfe1793f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.804073333740234, "incorrect_loss_raw": 12.445793390274048, "correct_loss_per_char": 1.1005091667175293, "incorrect_loss_per_char": 1.0078286340186646, "correct_loss_per_token": 4.402036666870117, "incorrect_loss_per_token": 5.4729960560798645, "correct_loss_uncond": -8.203784942626953, "incorrect_loss_uncond": -8.994017839431763}, "model_output": [{"sum_logits": -8.804073333740234, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.007858276367188, "logits_per_token": -4.402036666870117, "logits_per_char": -1.1005091667175293, "num_chars": 8}, {"sum_logits": -17.591575622558594, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -21.60751724243164, "logits_per_token": -8.795787811279297, "logits_per_char": -1.7591575622558593, "num_chars": 10}, {"sum_logits": -8.502374649047852, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.338302612304688, "logits_per_token": -4.251187324523926, "logits_per_char": -0.772943149913441, "num_chars": 11}, {"sum_logits": -11.690813064575195, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.855281829833984, "logits_per_token": -5.845406532287598, "logits_per_char": -0.8992933126596304, "num_chars": 13}, {"sum_logits": -11.99841022491455, "num_tokens": 4, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -23.95814323425293, "logits_per_token": -2.9996025562286377, "logits_per_char": -0.5999205112457275, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 910, "native_id": "b3dc6d6a5e2f9d7da8eb72816c80b3f8_1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.119912624359131, "incorrect_loss_raw": 14.22861909866333, "correct_loss_per_char": 1.0171303749084473, "incorrect_loss_per_char": 1.7030002834159377, "correct_loss_per_token": 7.119912624359131, "incorrect_loss_per_token": 7.959172526995341, "correct_loss_uncond": -5.9251322746276855, "incorrect_loss_uncond": -2.3764214515686035}, "model_output": [{"sum_logits": -7.119912624359131, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -13.045044898986816, "logits_per_token": -7.119912624359131, "logits_per_char": -1.0171303749084473, "num_chars": 7}, {"sum_logits": -12.068659782409668, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -12.068659782409668, "logits_per_char": -2.011443297068278, "num_chars": 6}, {"sum_logits": -15.929267883300781, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -18.574459075927734, "logits_per_token": -5.309755961100261, "logits_per_char": -1.5929267883300782, "num_chars": 10}, {"sum_logits": -14.913097381591797, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.70133399963379, "logits_per_token": -7.456548690795898, "logits_per_char": -2.1304424830845425, "num_chars": 7}, {"sum_logits": -14.003451347351074, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.188554763793945, "logits_per_token": -7.001725673675537, "logits_per_char": -1.077188565180852, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 911, "native_id": "63bb6128026ce24209583d0eea75fc27", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.542187213897705, "incorrect_loss_raw": 4.558429419994354, "correct_loss_per_char": 0.7570312023162842, "incorrect_loss_per_char": 0.7272306876523155, "correct_loss_per_token": 4.542187213897705, "incorrect_loss_per_token": 4.240867584943771, "correct_loss_uncond": -8.83219861984253, "incorrect_loss_uncond": -9.939651787281036}, "model_output": [{"sum_logits": -2.540494680404663, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -17.61488914489746, "logits_per_token": -1.2702473402023315, "logits_per_char": -0.21170789003372192, "num_chars": 12}, {"sum_logits": -2.4203553199768066, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -2.4203553199768066, "logits_per_char": -0.30254441499710083, "num_chars": 8}, {"sum_logits": -8.724563598632812, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.466949462890625, "logits_per_token": -8.724563598632812, "logits_per_char": -1.7449127197265626, "num_chars": 5}, {"sum_logits": -4.542187213897705, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -4.542187213897705, "logits_per_char": -0.7570312023162842, "num_chars": 6}, {"sum_logits": -4.548304080963135, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.706184387207031, "logits_per_token": -4.548304080963135, "logits_per_char": -0.6497577258518764, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 912, "native_id": "e8a9142d2402f818273dd62cf5a7b559_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.032816410064697, "incorrect_loss_raw": 12.31900143623352, "correct_loss_per_char": 0.8388027350107828, "incorrect_loss_per_char": 1.267151311276451, "correct_loss_per_token": 5.032816410064697, "incorrect_loss_per_token": 8.510495066642761, "correct_loss_uncond": -9.298222064971924, "incorrect_loss_uncond": -4.878018140792847}, "model_output": [{"sum_logits": -14.542323112487793, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -20.35413360595703, "logits_per_token": -7.2711615562438965, "logits_per_char": -1.6158136791653104, "num_chars": 9}, {"sum_logits": -9.379742622375488, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -9.379742622375488, "logits_per_char": -1.3399632317679269, "num_chars": 7}, {"sum_logits": -5.032816410064697, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.331038475036621, "logits_per_token": -5.032816410064697, "logits_per_char": -0.8388027350107828, "num_chars": 6}, {"sum_logits": -13.40964412689209, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -13.40964412689209, "logits_per_char": -1.117470343907674, "num_chars": 12}, {"sum_logits": -11.944295883178711, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.23796844482422, "logits_per_token": -3.9814319610595703, "logits_per_char": -0.9953579902648926, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 913, "native_id": "ead9c9744aee08678759158efe005175", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.495852470397949, "incorrect_loss_raw": 9.154430747032166, "correct_loss_per_char": 0.46398946217128206, "incorrect_loss_per_char": 1.090354225370619, "correct_loss_per_token": 6.495852470397949, "incorrect_loss_per_token": 8.224749863147736, "correct_loss_uncond": -7.193366050720215, "incorrect_loss_uncond": -3.1202014684677124}, "model_output": [{"sum_logits": -6.495852470397949, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.689218521118164, "logits_per_token": -6.495852470397949, "logits_per_char": -0.46398946217128206, "num_chars": 14}, {"sum_logits": -13.025752067565918, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.5693359375, "logits_per_token": -13.025752067565918, "logits_per_char": -1.184159278869629, "num_chars": 11}, {"sum_logits": -7.4374470710754395, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.12315559387207, "logits_per_token": -3.7187235355377197, "logits_per_char": -0.826383007897271, "num_chars": 9}, {"sum_logits": -7.354222297668457, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -8.672348022460938, "logits_per_token": -7.354222297668457, "logits_per_char": -1.4708444595336914, "num_chars": 5}, {"sum_logits": -8.800301551818848, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -10.733689308166504, "logits_per_token": -8.800301551818848, "logits_per_char": -0.8800301551818848, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 914, "native_id": "ab8bf60f76bc6119459271140ccae781", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.770787000656128, "incorrect_loss_raw": 10.990532636642456, "correct_loss_per_char": 0.25138580004374184, "incorrect_loss_per_char": 0.8905642970577701, "correct_loss_per_token": 1.885393500328064, "incorrect_loss_per_token": 4.897808790206909, "correct_loss_uncond": -16.37567448616028, "incorrect_loss_uncond": -7.5265820026397705}, "model_output": [{"sum_logits": -5.602241516113281, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.542440414428711, "logits_per_token": -2.8011207580566406, "logits_per_char": -0.4001601082938058, "num_chars": 14}, {"sum_logits": -23.099620819091797, "num_tokens": 4, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -26.113605499267578, "logits_per_token": -5.774905204772949, "logits_per_char": -1.1549810409545898, "num_chars": 20}, {"sum_logits": -8.490118026733398, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.107189178466797, "logits_per_token": -4.245059013366699, "logits_per_char": -0.6530860020564153, "num_chars": 13}, {"sum_logits": -3.770787000656128, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.146461486816406, "logits_per_token": -1.885393500328064, "logits_per_char": -0.25138580004374184, "num_chars": 15}, {"sum_logits": -6.770150184631348, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -6.770150184631348, "logits_per_char": -1.3540300369262694, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 915, "native_id": "3c6e2d95a63316b31986e8c7979582c9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.15522575378418, "incorrect_loss_raw": 13.059134602546692, "correct_loss_per_char": 1.2253732681274414, "incorrect_loss_per_char": 1.2604200296587758, "correct_loss_per_token": 8.57761287689209, "incorrect_loss_per_token": 7.228463172912598, "correct_loss_uncond": -2.0843143463134766, "incorrect_loss_uncond": -4.555797696113586}, "model_output": [{"sum_logits": -10.579681396484375, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.35748291015625, "logits_per_token": -10.579681396484375, "logits_per_char": -2.115936279296875, "num_chars": 5}, {"sum_logits": -5.540607929229736, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.71825122833252, "logits_per_token": -5.540607929229736, "logits_per_char": -0.5540607929229736, "num_chars": 10}, {"sum_logits": -17.15522575378418, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.239540100097656, "logits_per_token": -8.57761287689209, "logits_per_char": -1.2253732681274414, "num_chars": 14}, {"sum_logits": -15.058004379272461, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.773582458496094, "logits_per_token": -7.5290021896362305, "logits_per_char": -1.3689094890247693, "num_chars": 11}, {"sum_logits": -21.058244705200195, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.61041259765625, "logits_per_token": -5.264561176300049, "logits_per_char": -1.0027735573904855, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 916, "native_id": "5c171b9837af49211891ce40e4a10204", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.7127999067306519, "incorrect_loss_raw": 9.738688230514526, "correct_loss_per_char": 0.2446857009615217, "incorrect_loss_per_char": 1.347538322210312, "correct_loss_per_token": 1.7127999067306519, "incorrect_loss_per_token": 8.107115030288696, "correct_loss_uncond": -12.506393551826477, "incorrect_loss_uncond": -5.287774085998535}, "model_output": [{"sum_logits": -9.348692893981934, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.889263153076172, "logits_per_token": -9.348692893981934, "logits_per_char": -1.3355275562831335, "num_chars": 7}, {"sum_logits": -9.065350532531738, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -9.065350532531738, "logits_per_char": -1.2950500760759627, "num_chars": 7}, {"sum_logits": -10.751270294189453, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -10.751270294189453, "logits_per_char": -1.535895756312779, "num_chars": 7}, {"sum_logits": -1.7127999067306519, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -1.7127999067306519, "logits_per_char": -0.2446857009615217, "num_chars": 7}, {"sum_logits": -9.78943920135498, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.850818634033203, "logits_per_token": -3.26314640045166, "logits_per_char": -1.2236799001693726, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 917, "native_id": "56d0fc282a144565f2c852415c6fa92c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.432541847229004, "incorrect_loss_raw": 11.917981386184692, "correct_loss_per_char": 0.5847765315662731, "incorrect_loss_per_char": 0.8624653435888745, "correct_loss_per_token": 6.432541847229004, "incorrect_loss_per_token": 8.5873863697052, "correct_loss_uncond": -7.711785316467285, "incorrect_loss_uncond": -5.761640787124634}, "model_output": [{"sum_logits": -13.564940452575684, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.700839042663574, "logits_per_token": -13.564940452575684, "logits_per_char": -1.130411704381307, "num_chars": 12}, {"sum_logits": -7.462224960327148, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.405741691589355, "logits_per_token": -7.462224960327148, "logits_per_char": -0.4974816640218099, "num_chars": 15}, {"sum_logits": -6.432541847229004, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.144327163696289, "logits_per_token": -6.432541847229004, "logits_per_char": -0.5847765315662731, "num_chars": 11}, {"sum_logits": -17.058120727539062, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -26.05291748046875, "logits_per_token": -8.529060363769531, "logits_per_char": -1.1372080485026042, "num_chars": 15}, {"sum_logits": -9.586639404296875, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.558990478515625, "logits_per_token": -4.7933197021484375, "logits_per_char": -0.6847599574497768, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 918, "native_id": "5b8a3081c3235d62bc77e2d15f3ad454", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.7641838192939758, "incorrect_loss_raw": 11.320981621742249, "correct_loss_per_char": 0.10916911704199654, "incorrect_loss_per_char": 1.1987541450394525, "correct_loss_per_token": 0.7641838192939758, "incorrect_loss_per_token": 7.8315194845199585, "correct_loss_uncond": -14.055494487285614, "incorrect_loss_uncond": -3.153940796852112}, "model_output": [{"sum_logits": -0.7641838192939758, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -14.81967830657959, "logits_per_token": -0.7641838192939758, "logits_per_char": -0.10916911704199654, "num_chars": 7}, {"sum_logits": -12.554300308227539, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -12.554300308227539, "logits_per_char": -1.3949222564697266, "num_chars": 9}, {"sum_logits": -4.813929080963135, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.11130428314209, "logits_per_token": -4.813929080963135, "logits_per_char": -0.8023215134938558, "num_chars": 6}, {"sum_logits": -12.700077056884766, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -6.350038528442383, "logits_per_char": -0.9071483612060547, "num_chars": 14}, {"sum_logits": -15.215620040893555, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.607810020446777, "logits_per_char": -1.6906244489881728, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 919, "native_id": "e43c4eaa04243ddee30f29171718eb92", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.342575073242188, "incorrect_loss_raw": 8.006468057632446, "correct_loss_per_char": 1.0311431884765625, "incorrect_loss_per_char": 1.1526119735505846, "correct_loss_per_token": 5.671287536621094, "incorrect_loss_per_token": 8.006468057632446, "correct_loss_uncond": -10.78135871887207, "incorrect_loss_uncond": -5.99996542930603}, "model_output": [{"sum_logits": -11.342575073242188, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -22.123933792114258, "logits_per_token": -5.671287536621094, "logits_per_char": -1.0311431884765625, "num_chars": 11}, {"sum_logits": -7.461494445800781, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -7.461494445800781, "logits_per_char": -0.7461494445800781, "num_chars": 10}, {"sum_logits": -4.135761260986328, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -4.135761260986328, "logits_per_char": -0.4595290289984809, "num_chars": 9}, {"sum_logits": -13.786327362060547, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -14.295975685119629, "logits_per_token": -13.786327362060547, "logits_per_char": -2.2977212270100913, "num_chars": 6}, {"sum_logits": -6.642289161682129, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -6.642289161682129, "logits_per_char": -1.1070481936136882, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 920, "native_id": "84a736d4b702a6869d8fa8523aee6f1b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.601230621337891, "incorrect_loss_raw": 15.223834991455078, "correct_loss_per_char": 0.8251538276672363, "incorrect_loss_per_char": 1.4864495707437548, "correct_loss_per_token": 6.601230621337891, "incorrect_loss_per_token": 10.773789723714193, "correct_loss_uncond": -9.020824432373047, "incorrect_loss_uncond": -1.48818039894104}, "model_output": [{"sum_logits": -6.601230621337891, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.622055053710938, "logits_per_token": -6.601230621337891, "logits_per_char": -0.8251538276672363, "num_chars": 8}, {"sum_logits": -14.804036140441895, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -14.804036140441895, "logits_per_char": -1.8505045175552368, "num_chars": 8}, {"sum_logits": -26.700271606445312, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.704574584960938, "logits_per_token": -8.900090535481771, "logits_per_char": -1.570604212143842, "num_chars": 17}, {"sum_logits": -7.731924057006836, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -7.731924057006836, "logits_per_char": -0.8591026730007596, "num_chars": 9}, {"sum_logits": -11.65910816192627, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -11.65910816192627, "logits_per_char": -1.6655868802751814, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 921, "native_id": "72611791cdcb040f2d699827fb9cebc4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.798402786254883, "incorrect_loss_raw": 12.61858606338501, "correct_loss_per_char": 0.339060990706734, "incorrect_loss_per_char": 1.0665473209315048, "correct_loss_per_token": 3.8992013931274414, "incorrect_loss_per_token": 7.227981090545654, "correct_loss_uncond": -12.232097625732422, "incorrect_loss_uncond": -4.269907236099243}, "model_output": [{"sum_logits": -16.283370971679688, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.280351638793945, "logits_per_token": -8.141685485839844, "logits_per_char": -1.4803064519708806, "num_chars": 11}, {"sum_logits": -11.229507446289062, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.433670997619629, "logits_per_token": -5.614753723144531, "logits_per_char": -0.9357922871907552, "num_chars": 12}, {"sum_logits": -7.798402786254883, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.030500411987305, "logits_per_token": -3.8992013931274414, "logits_per_char": -0.339060990706734, "num_chars": 23}, {"sum_logits": -15.611961364746094, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.795581817626953, "logits_per_token": -7.805980682373047, "logits_per_char": -1.115140097481864, "num_chars": 14}, {"sum_logits": -7.349504470825195, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.044368743896484, "logits_per_token": -7.349504470825195, "logits_per_char": -0.7349504470825196, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 922, "native_id": "4477fb61fde4bb8695c241dfc366b554", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.0394058227539062, "incorrect_loss_raw": 10.654938578605652, "correct_loss_per_char": 0.2549257278442383, "incorrect_loss_per_char": 1.2089216510454814, "correct_loss_per_token": 1.0197029113769531, "incorrect_loss_per_token": 8.34370082616806, "correct_loss_uncond": -14.615642547607422, "incorrect_loss_uncond": -5.4095529317855835}, "model_output": [{"sum_logits": -10.720093727111816, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.968193054199219, "logits_per_token": -5.360046863555908, "logits_per_char": -0.8933411439259847, "num_chars": 12}, {"sum_logits": -7.769808292388916, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -3.884904146194458, "logits_per_char": -1.1099726131984167, "num_chars": 7}, {"sum_logits": -10.327781677246094, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -10.327781677246094, "logits_per_char": -0.8606484731038412, "num_chars": 12}, {"sum_logits": -2.0394058227539062, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.655048370361328, "logits_per_token": -1.0197029113769531, "logits_per_char": -0.2549257278442383, "num_chars": 8}, {"sum_logits": -13.802070617675781, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.249696731567383, "logits_per_token": -13.802070617675781, "logits_per_char": -1.971724373953683, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 923, "native_id": "ce246bc94a54431b9c0530e71d2456b5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.513113021850586, "incorrect_loss_raw": 12.396445155143738, "correct_loss_per_char": 0.6260927518208822, "incorrect_loss_per_char": 1.191015471683608, "correct_loss_per_token": 3.756556510925293, "incorrect_loss_per_token": 6.198222577571869, "correct_loss_uncond": -12.656160354614258, "incorrect_loss_uncond": -5.21749746799469}, "model_output": [{"sum_logits": -7.858595371246338, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -3.929297685623169, "logits_per_char": -0.9823244214057922, "num_chars": 8}, {"sum_logits": -12.070548057556152, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.85031509399414, "logits_per_token": -6.035274028778076, "logits_per_char": -1.2070548057556152, "num_chars": 10}, {"sum_logits": -16.687650680541992, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -8.343825340270996, "logits_per_char": -1.8541834089491103, "num_chars": 9}, {"sum_logits": -12.968986511230469, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.02060890197754, "logits_per_token": -6.484493255615234, "logits_per_char": -0.720499250623915, "num_chars": 18}, {"sum_logits": -7.513113021850586, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.169273376464844, "logits_per_token": -3.756556510925293, "logits_per_char": -0.6260927518208822, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 924, "native_id": "2eef2d255fe629414f4d24ade8590102", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.283451080322266, "incorrect_loss_raw": 15.084130764007568, "correct_loss_per_char": 0.8092723422580295, "incorrect_loss_per_char": 1.455712304013804, "correct_loss_per_token": 7.283451080322266, "incorrect_loss_per_token": 8.556683897972107, "correct_loss_uncond": -6.791680335998535, "incorrect_loss_uncond": -3.9442412853240967}, "model_output": [{"sum_logits": -24.80506134033203, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -27.140914916992188, "logits_per_token": -12.402530670166016, "logits_per_char": -2.255005576393821, "num_chars": 11}, {"sum_logits": -8.116948127746582, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -8.116948127746582, "logits_per_char": -1.0146185159683228, "num_chars": 8}, {"sum_logits": -7.283451080322266, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.0751314163208, "logits_per_token": -7.283451080322266, "logits_per_char": -0.8092723422580295, "num_chars": 9}, {"sum_logits": -12.419364929199219, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.228248596191406, "logits_per_token": -6.209682464599609, "logits_per_char": -0.8870974949428013, "num_chars": 14}, {"sum_logits": -14.995148658752441, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.497574329376221, "logits_per_char": -1.6661276287502713, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 925, "native_id": "2f85d53721ccc8b3fa4cfc184186d124", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 11.274713516235352, "incorrect_loss_raw": 12.93726658821106, "correct_loss_per_char": 1.0249739560213955, "incorrect_loss_per_char": 1.3627928043113036, "correct_loss_per_token": 11.274713516235352, "incorrect_loss_per_token": 10.767669677734375, "correct_loss_uncond": -2.6607446670532227, "incorrect_loss_uncond": -0.7271788120269775}, "model_output": [{"sum_logits": -11.274713516235352, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -13.935458183288574, "logits_per_token": -11.274713516235352, "logits_per_char": -1.0249739560213955, "num_chars": 11}, {"sum_logits": -17.356775283813477, "num_tokens": 2, "num_tokens_all": 164, "is_greedy": false, "sum_logits_uncond": -16.58812141418457, "logits_per_token": -8.678387641906738, "logits_per_char": -1.0209867814007927, "num_chars": 17}, {"sum_logits": -10.832427024841309, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -11.153367042541504, "logits_per_token": -10.832427024841309, "logits_per_char": -1.3540533781051636, "num_chars": 8}, {"sum_logits": -12.757694244384766, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -13.844734191894531, "logits_per_token": -12.757694244384766, "logits_per_char": -1.2757694244384765, "num_chars": 10}, {"sum_logits": -10.802169799804688, "num_tokens": 1, "num_tokens_all": 163, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -10.802169799804688, "logits_per_char": -1.8003616333007812, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 926, "native_id": "2192c5c2145a6e03755ad89a02e64055", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.892129421234131, "incorrect_loss_raw": 9.687282085418701, "correct_loss_per_char": 0.6892129421234131, "incorrect_loss_per_char": 1.1034552166816303, "correct_loss_per_token": 3.4460647106170654, "incorrect_loss_per_token": 5.422571182250977, "correct_loss_uncond": -12.827196598052979, "incorrect_loss_uncond": -8.053523778915405}, "model_output": [{"sum_logits": -13.571050643920898, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.115755081176758, "logits_per_token": -4.523683547973633, "logits_per_char": -1.507894515991211, "num_chars": 9}, {"sum_logits": -6.892129421234131, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.71932601928711, "logits_per_token": -3.4460647106170654, "logits_per_char": -0.6892129421234131, "num_chars": 10}, {"sum_logits": -9.15512466430664, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -9.15512466430664, "logits_per_char": -1.3078749520438058, "num_chars": 7}, {"sum_logits": -7.000255584716797, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.732988357543945, "logits_per_token": -3.5001277923583984, "logits_per_char": -0.7778061760796441, "num_chars": 9}, {"sum_logits": -9.022697448730469, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -4.511348724365234, "logits_per_char": -0.8202452226118608, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 927, "native_id": "bea07406aaadeef50110883b6932d86a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.410369396209717, "incorrect_loss_raw": 6.263350486755371, "correct_loss_per_char": 0.7350615660349528, "incorrect_loss_per_char": 0.6275641287957039, "correct_loss_per_token": 4.410369396209717, "incorrect_loss_per_token": 5.217335224151611, "correct_loss_uncond": -6.700934886932373, "incorrect_loss_uncond": -7.90861439704895}, "model_output": [{"sum_logits": -4.410369396209717, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.11130428314209, "logits_per_token": -4.410369396209717, "logits_per_char": -0.7350615660349528, "num_chars": 6}, {"sum_logits": -4.297301769256592, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.10653305053711, "logits_per_token": -4.297301769256592, "logits_per_char": -0.3906637972051447, "num_chars": 11}, {"sum_logits": -9.483728408813477, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.899171829223633, "logits_per_token": -9.483728408813477, "logits_per_char": -1.1854660511016846, "num_chars": 8}, {"sum_logits": -2.904249668121338, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.877777099609375, "logits_per_token": -2.904249668121338, "logits_per_char": -0.2904249668121338, "num_chars": 10}, {"sum_logits": -8.368122100830078, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.804377555847168, "logits_per_token": -4.184061050415039, "logits_per_char": -0.6437017000638522, "num_chars": 13}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 928, "native_id": "7a58e7e7bf76658751e850f790922aba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.5510700941085815, "incorrect_loss_raw": 9.091553211212158, "correct_loss_per_char": 0.17234112156762016, "incorrect_loss_per_char": 1.203573567526681, "correct_loss_per_token": 1.5510700941085815, "incorrect_loss_per_token": 7.965517997741699, "correct_loss_uncond": -14.419041752815247, "incorrect_loss_uncond": -6.131724834442139}, "model_output": [{"sum_logits": -9.318901062011719, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.04128646850586, "logits_per_token": -9.318901062011719, "logits_per_char": -1.3312715802873885, "num_chars": 7}, {"sum_logits": -9.008281707763672, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.788606643676758, "logits_per_token": -4.504140853881836, "logits_per_char": -1.2868973868233817, "num_chars": 7}, {"sum_logits": -6.041341781616211, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -6.041341781616211, "logits_per_char": -0.863048825945173, "num_chars": 7}, {"sum_logits": -11.997688293457031, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.519027709960938, "logits_per_token": -11.997688293457031, "logits_per_char": -1.3330764770507812, "num_chars": 9}, {"sum_logits": -1.5510700941085815, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -1.5510700941085815, "logits_per_char": -0.17234112156762016, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 929, "native_id": "76b2c6d254f9127b4fd66d90e1a330e7", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.4204816818237305, "incorrect_loss_raw": 18.75492262840271, "correct_loss_per_char": 0.7367469469706217, "incorrect_loss_per_char": 1.2843452875384065, "correct_loss_per_token": 4.4204816818237305, "incorrect_loss_per_token": 8.766696333885193, "correct_loss_uncond": -9.548497200012207, "incorrect_loss_uncond": -0.17053556442260742}, "model_output": [{"sum_logits": -24.250877380371094, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -21.485973358154297, "logits_per_token": -8.083625793457031, "logits_per_char": -1.732205527169364, "num_chars": 14}, {"sum_logits": -4.4204816818237305, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.968978881835938, "logits_per_token": -4.4204816818237305, "logits_per_char": -0.7367469469706217, "num_chars": 6}, {"sum_logits": -25.175931930541992, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.956085205078125, "logits_per_token": -8.391977310180664, "logits_per_char": -1.480937172384823, "num_chars": 17}, {"sum_logits": -11.589483261108398, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.629544258117676, "logits_per_token": -11.589483261108398, "logits_per_char": -1.2877203623453777, "num_chars": 9}, {"sum_logits": -14.003397941589355, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.630229949951172, "logits_per_token": -7.001698970794678, "logits_per_char": -0.6365180882540616, "num_chars": 22}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 930, "native_id": "cdd3d074031fbd3efeb4f9408abef04e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.011469841003418, "incorrect_loss_raw": 10.381240606307983, "correct_loss_per_char": 0.6674313227335612, "incorrect_loss_per_char": 0.8463290752786579, "correct_loss_per_token": 2.5028674602508545, "incorrect_loss_per_token": 5.190620303153992, "correct_loss_uncond": -7.321427345275879, "incorrect_loss_uncond": -7.969406366348267}, "model_output": [{"sum_logits": -12.072505950927734, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.93790054321289, "logits_per_token": -6.036252975463867, "logits_per_char": -1.0060421625773113, "num_chars": 12}, {"sum_logits": -9.641298294067383, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.52931785583496, "logits_per_token": -4.820649147033691, "logits_per_char": -0.6427532196044922, "num_chars": 15}, {"sum_logits": -10.011469841003418, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.332897186279297, "logits_per_token": -2.5028674602508545, "logits_per_char": -0.6674313227335612, "num_chars": 15}, {"sum_logits": -8.513136863708496, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.571249008178711, "logits_per_token": -4.256568431854248, "logits_per_char": -0.709428071975708, "num_chars": 12}, {"sum_logits": -11.29802131652832, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.364120483398438, "logits_per_token": -5.64901065826416, "logits_per_char": -1.02709284695712, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 931, "native_id": "359aed918343d228e67cef329b693904", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.46816635131836, "incorrect_loss_raw": 13.881202936172485, "correct_loss_per_char": 0.736579350062779, "incorrect_loss_per_char": 1.2845518864118135, "correct_loss_per_token": 3.86704158782959, "incorrect_loss_per_token": 6.836242198944092, "correct_loss_uncond": -15.434574127197266, "incorrect_loss_uncond": -5.290611505508423}, "model_output": [{"sum_logits": -29.758529663085938, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -26.080947875976562, "logits_per_token": -9.919509887695312, "logits_per_char": -2.289117666391226, "num_chars": 13}, {"sum_logits": -8.603910446166992, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.013736724853516, "logits_per_token": -4.301955223083496, "logits_per_char": -0.6618392650897686, "num_chars": 13}, {"sum_logits": -15.46816635131836, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -30.902740478515625, "logits_per_token": -3.86704158782959, "logits_per_char": -0.736579350062779, "num_chars": 21}, {"sum_logits": -9.084635734558105, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.019948959350586, "logits_per_token": -9.084635734558105, "logits_per_char": -1.5141059557596843, "num_chars": 6}, {"sum_logits": -8.077735900878906, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.57262420654297, "logits_per_token": -4.038867950439453, "logits_per_char": -0.6731446584065756, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 932, "native_id": "cf02cca40a47c2deefd8b2e5a5ff2f70", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 22.84738540649414, "incorrect_loss_raw": 12.032632112503052, "correct_loss_per_char": 1.2024939687628495, "incorrect_loss_per_char": 2.235435778754098, "correct_loss_per_token": 5.711846351623535, "incorrect_loss_per_token": 8.34309983253479, "correct_loss_uncond": -8.94851303100586, "incorrect_loss_uncond": -2.9812071323394775}, "model_output": [{"sum_logits": -22.84738540649414, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -31.7958984375, "logits_per_token": -5.711846351623535, "logits_per_char": -1.2024939687628495, "num_chars": 19}, {"sum_logits": -7.616159439086914, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.670047760009766, "logits_per_token": -7.616159439086914, "logits_per_char": -1.5232318878173827, "num_chars": 5}, {"sum_logits": -11.97634506225586, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.84996795654297, "logits_per_token": -5.98817253112793, "logits_per_char": -1.7109064374651228, "num_chars": 7}, {"sum_logits": -10.9981107711792, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.923734664916992, "logits_per_token": -10.9981107711792, "logits_per_char": -2.19962215423584, "num_chars": 5}, {"sum_logits": -17.539913177490234, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.61160659790039, "logits_per_token": -8.769956588745117, "logits_per_char": -3.507982635498047, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 933, "native_id": "ac1abecdbbd7bcde6592ca645c2ecb1e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.341264724731445, "incorrect_loss_raw": 10.657180666923523, "correct_loss_per_char": 0.5958046231951032, "incorrect_loss_per_char": 0.9492440943916639, "correct_loss_per_token": 4.170632362365723, "incorrect_loss_per_token": 5.3285903334617615, "correct_loss_uncond": -11.309621810913086, "incorrect_loss_uncond": -8.612078309059143}, "model_output": [{"sum_logits": -8.341264724731445, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.65088653564453, "logits_per_token": -4.170632362365723, "logits_per_char": -0.5958046231951032, "num_chars": 14}, {"sum_logits": -9.226712226867676, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.070524215698242, "logits_per_token": -4.613356113433838, "logits_per_char": -0.5766695141792297, "num_chars": 16}, {"sum_logits": -11.845802307128906, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.36295509338379, "logits_per_token": -5.922901153564453, "logits_per_char": -0.9871501922607422, "num_chars": 12}, {"sum_logits": -18.224098205566406, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -9.112049102783203, "logits_per_char": -2.0248998006184897, "num_chars": 9}, {"sum_logits": -3.3321099281311035, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.166547775268555, "logits_per_token": -1.6660549640655518, "logits_per_char": -0.20825687050819397, "num_chars": 16}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 934, "native_id": "2adbb4fc0d5249dc411dda433f378591", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.151786804199219, "incorrect_loss_raw": 10.111267328262329, "correct_loss_per_char": 0.5592533458362926, "incorrect_loss_per_char": 1.2674605978859794, "correct_loss_per_token": 6.151786804199219, "incorrect_loss_per_token": 6.097079594930013, "correct_loss_uncond": -9.441572189331055, "incorrect_loss_uncond": -5.0249974727630615}, "model_output": [{"sum_logits": -13.492124557495117, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.20165252685547, "logits_per_token": -6.746062278747559, "logits_per_char": -1.4991249508327908, "num_chars": 9}, {"sum_logits": -5.821971893310547, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.972463607788086, "logits_per_token": -2.9109859466552734, "logits_per_char": -0.5821971893310547, "num_chars": 10}, {"sum_logits": -11.531418800354004, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.623113632202148, "logits_per_token": -11.531418800354004, "logits_per_char": -1.921903133392334, "num_chars": 6}, {"sum_logits": -6.151786804199219, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.593358993530273, "logits_per_token": -6.151786804199219, "logits_per_char": -0.5592533458362926, "num_chars": 11}, {"sum_logits": -9.599554061889648, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.74782943725586, "logits_per_token": -3.1998513539632163, "logits_per_char": -1.0666171179877386, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 935, "native_id": "5a1c8a9dbbb60e523cc1ba14a370729c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.446979522705078, "incorrect_loss_raw": 18.345725059509277, "correct_loss_per_char": 0.8893959045410156, "incorrect_loss_per_char": 1.3960368004902317, "correct_loss_per_token": 4.446979522705078, "incorrect_loss_per_token": 5.1096542517344155, "correct_loss_uncond": -8.001718521118164, "incorrect_loss_uncond": -4.70297384262085}, "model_output": [{"sum_logits": -41.27606201171875, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -37.00548553466797, "logits_per_token": -8.25521240234375, "logits_per_char": -3.439671834309896, "num_chars": 12}, {"sum_logits": -11.619543075561523, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.188064575195312, "logits_per_token": -3.8731810251871743, "logits_per_char": -0.6115548987137643, "num_chars": 19}, {"sum_logits": -8.886751174926758, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.906938552856445, "logits_per_token": -4.443375587463379, "logits_per_char": -0.8078864704478871, "num_chars": 11}, {"sum_logits": -11.600543975830078, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.09430694580078, "logits_per_token": -3.8668479919433594, "logits_per_char": -0.7250339984893799, "num_chars": 16}, {"sum_logits": -4.446979522705078, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.448698043823242, "logits_per_token": -4.446979522705078, "logits_per_char": -0.8893959045410156, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 936, "native_id": "3665b329f93f7c84edeabe394140f8d2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 15.495613098144531, "incorrect_loss_raw": 10.081557512283325, "correct_loss_per_char": 1.408692099831321, "incorrect_loss_per_char": 1.275538406674824, "correct_loss_per_token": 7.747806549072266, "incorrect_loss_per_token": 8.180510520935059, "correct_loss_uncond": -7.948722839355469, "incorrect_loss_uncond": -5.0432658195495605}, "model_output": [{"sum_logits": -5.503259658813477, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.319314002990723, "logits_per_token": -5.503259658813477, "logits_per_char": -1.3758149147033691, "num_chars": 4}, {"sum_logits": -15.495613098144531, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.4443359375, "logits_per_token": -7.747806549072266, "logits_per_char": -1.408692099831321, "num_chars": 11}, {"sum_logits": -11.323312759399414, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -11.323312759399414, "logits_per_char": -1.2581458621554904, "num_chars": 9}, {"sum_logits": -8.291281700134277, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.167571067810059, "logits_per_token": -8.291281700134277, "logits_per_char": -1.381880283355713, "num_chars": 6}, {"sum_logits": -15.208375930786133, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.206947326660156, "logits_per_token": -7.604187965393066, "logits_per_char": -1.0863125664847237, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 937, "native_id": "dbcedaa6a6f1f68bc8f2bf7aef23294e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.491067886352539, "incorrect_loss_raw": 9.91916799545288, "correct_loss_per_char": 0.9151779810587565, "incorrect_loss_per_char": 1.2116201751761966, "correct_loss_per_token": 5.491067886352539, "incorrect_loss_per_token": 9.91916799545288, "correct_loss_uncond": -8.6705904006958, "incorrect_loss_uncond": -3.946838140487671}, "model_output": [{"sum_logits": -9.937468528747559, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -9.937468528747559, "logits_per_char": -1.2421835660934448, "num_chars": 8}, {"sum_logits": -8.143437385559082, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.015922546386719, "logits_per_token": -8.143437385559082, "logits_per_char": -0.9048263761732314, "num_chars": 9}, {"sum_logits": -13.050479888916016, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -13.050479888916016, "logits_per_char": -1.631309986114502, "num_chars": 8}, {"sum_logits": -8.545286178588867, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.252389907836914, "logits_per_token": -8.545286178588867, "logits_per_char": -1.0681607723236084, "num_chars": 8}, {"sum_logits": -5.491067886352539, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.16165828704834, "logits_per_token": -5.491067886352539, "logits_per_char": -0.9151779810587565, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 938, "native_id": "ba3a2b9ff289c106051163f840a6f5ba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.536155700683594, "incorrect_loss_raw": 14.198606848716736, "correct_loss_per_char": 0.6811539786202567, "incorrect_loss_per_char": 1.2274478899968135, "correct_loss_per_token": 3.1787185668945312, "incorrect_loss_per_token": 7.099303424358368, "correct_loss_uncond": -11.974040985107422, "incorrect_loss_uncond": -3.818588376045227}, "model_output": [{"sum_logits": -2.2800545692443848, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.356916427612305, "logits_per_token": -1.1400272846221924, "logits_per_char": -0.20727768811312589, "num_chars": 11}, {"sum_logits": -21.582460403442383, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.773582458496094, "logits_per_token": -10.791230201721191, "logits_per_char": -1.9620418548583984, "num_chars": 11}, {"sum_logits": -9.536155700683594, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.510196685791016, "logits_per_token": -3.1787185668945312, "logits_per_char": -0.6811539786202567, "num_chars": 14}, {"sum_logits": -13.586739540100098, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.698741912841797, "logits_per_token": -6.793369770050049, "logits_per_char": -1.3586739540100097, "num_chars": 10}, {"sum_logits": -19.345172882080078, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.239540100097656, "logits_per_token": -9.672586441040039, "logits_per_char": -1.3817980630057198, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 939, "native_id": "13fc28f53423a9b3a656c9431df1b3b5", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.601411819458008, "incorrect_loss_raw": 9.874842405319214, "correct_loss_per_char": 0.5601411819458008, "incorrect_loss_per_char": 0.9096468403950144, "correct_loss_per_token": 5.601411819458008, "incorrect_loss_per_token": 6.52603554725647, "correct_loss_uncond": -7.888820648193359, "incorrect_loss_uncond": -6.9180920124053955}, "model_output": [{"sum_logits": -12.202272415161133, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.709430694580078, "logits_per_token": -6.101136207580566, "logits_per_char": -0.6422248639558491, "num_chars": 19}, {"sum_logits": -8.12896728515625, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.019386291503906, "logits_per_token": -8.12896728515625, "logits_per_char": -1.1612810407366072, "num_chars": 7}, {"sum_logits": -4.579947471618652, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -4.579947471618652, "logits_per_char": -0.5088830524020724, "num_chars": 9}, {"sum_logits": -14.58818244934082, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.836742401123047, "logits_per_token": -7.29409122467041, "logits_per_char": -1.326198404485529, "num_chars": 11}, {"sum_logits": -5.601411819458008, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -5.601411819458008, "logits_per_char": -0.5601411819458008, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 940, "native_id": "3f4b48708d08f8bf7bec796531023f9c", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.40727424621582, "incorrect_loss_raw": 10.169975757598877, "correct_loss_per_char": 0.7345457077026367, "incorrect_loss_per_char": 1.6683633940560476, "correct_loss_per_token": 4.40727424621582, "incorrect_loss_per_token": 10.169975757598877, "correct_loss_uncond": -9.63813304901123, "incorrect_loss_uncond": -4.23552393913269}, "model_output": [{"sum_logits": -4.40727424621582, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -14.04540729522705, "logits_per_token": -4.40727424621582, "logits_per_char": -0.7345457077026367, "num_chars": 6}, {"sum_logits": -10.222297668457031, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -10.222297668457031, "logits_per_char": -1.7037162780761719, "num_chars": 6}, {"sum_logits": -9.348104476928711, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -16.30998420715332, "logits_per_token": -9.348104476928711, "logits_per_char": -1.3354434967041016, "num_chars": 7}, {"sum_logits": -10.283111572265625, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -12.444001197814941, "logits_per_token": -10.283111572265625, "logits_per_char": -1.4690159388950892, "num_chars": 7}, {"sum_logits": -10.82638931274414, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -14.821721076965332, "logits_per_token": -10.82638931274414, "logits_per_char": -2.165277862548828, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 941, "native_id": "c61790eb63ff6652b878ca051493c07d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.732335090637207, "incorrect_loss_raw": 8.478116631507874, "correct_loss_per_char": 0.440948853125939, "incorrect_loss_per_char": 0.8961552123010378, "correct_loss_per_token": 2.8661675453186035, "incorrect_loss_per_token": 5.515633225440979, "correct_loss_uncond": -16.566065788269043, "incorrect_loss_uncond": -8.660046458244324}, "model_output": [{"sum_logits": -3.1842827796936035, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -3.1842827796936035, "logits_per_char": -0.45489753995622906, "num_chars": 7}, {"sum_logits": -7.028316497802734, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.758267402648926, "logits_per_token": -7.028316497802734, "logits_per_char": -1.405663299560547, "num_chars": 5}, {"sum_logits": -5.732335090637207, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.29840087890625, "logits_per_token": -2.8661675453186035, "logits_per_char": -0.440948853125939, "num_chars": 13}, {"sum_logits": -9.653153419494629, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -4.8265767097473145, "logits_per_char": -0.643543561299642, "num_chars": 15}, {"sum_logits": -14.046713829040527, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.368896484375, "logits_per_token": -7.023356914520264, "logits_per_char": -1.080516448387733, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 942, "native_id": "e5ebbe0ea4097bb197ac525b49108362", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.380573570728302, "incorrect_loss_raw": 10.826144695281982, "correct_loss_per_char": 0.0380573570728302, "incorrect_loss_per_char": 1.5203065211956317, "correct_loss_per_token": 0.380573570728302, "incorrect_loss_per_token": 7.434713840484619, "correct_loss_uncond": -13.46812504529953, "incorrect_loss_uncond": -3.286604166030884}, "model_output": [{"sum_logits": -11.335784912109375, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.20197105407715, "logits_per_token": -5.6678924560546875, "logits_per_char": -0.8719834547776443, "num_chars": 13}, {"sum_logits": -15.795661926269531, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.119783401489258, "logits_per_token": -7.897830963134766, "logits_per_char": -2.632610321044922, "num_chars": 6}, {"sum_logits": -0.380573570728302, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -0.380573570728302, "logits_per_char": -0.0380573570728302, "num_chars": 10}, {"sum_logits": -7.399877548217773, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.415973663330078, "logits_per_token": -7.399877548217773, "logits_per_char": -1.4799755096435547, "num_chars": 5}, {"sum_logits": -8.77325439453125, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.71326732635498, "logits_per_token": -8.77325439453125, "logits_per_char": -1.0966567993164062, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 943, "native_id": "029e36d8f65982b142c319064dc5e32f", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.103140830993652, "incorrect_loss_raw": 13.589169979095459, "correct_loss_per_char": 1.1103140830993652, "incorrect_loss_per_char": 1.098036141884058, "correct_loss_per_token": 5.551570415496826, "incorrect_loss_per_token": 7.113887012004852, "correct_loss_uncond": -3.344087600708008, "incorrect_loss_uncond": -5.468531370162964}, "model_output": [{"sum_logits": -14.32767391204834, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.05949592590332, "logits_per_token": -4.775891304016113, "logits_per_char": -0.8954796195030212, "num_chars": 16}, {"sum_logits": -11.103140830993652, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.44722843170166, "logits_per_token": -5.551570415496826, "logits_per_char": -1.1103140830993652, "num_chars": 10}, {"sum_logits": -12.615846633911133, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.153244018554688, "logits_per_token": -6.307923316955566, "logits_per_char": -0.7884904146194458, "num_chars": 16}, {"sum_logits": -14.024591445922852, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -14.024591445922852, "logits_per_char": -2.0035130637032643, "num_chars": 7}, {"sum_logits": -13.388567924499512, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -3.347141981124878, "logits_per_char": -0.7046614697105006, "num_chars": 19}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 944, "native_id": "3d1a67f87b34303f97549ba83e5521c2", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.610339164733887, "incorrect_loss_raw": 10.509186744689941, "correct_loss_per_char": 1.2683898607889812, "incorrect_loss_per_char": 1.3227757138091247, "correct_loss_per_token": 3.8051695823669434, "incorrect_loss_per_token": 5.433796366055807, "correct_loss_uncond": -7.937177658081055, "incorrect_loss_uncond": -5.701867580413818}, "model_output": [{"sum_logits": -7.610339164733887, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.547516822814941, "logits_per_token": -3.8051695823669434, "logits_per_char": -1.2683898607889812, "num_chars": 6}, {"sum_logits": -10.560789108276367, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.849666595458984, "logits_per_token": -3.5202630360921225, "logits_per_char": -0.9600717371160333, "num_chars": 11}, {"sum_logits": -4.953886985778809, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.219193458557129, "logits_per_token": -4.953886985778809, "logits_per_char": -0.707698140825544, "num_chars": 7}, {"sum_logits": -14.346219062805176, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -7.173109531402588, "logits_per_char": -1.5940243403116863, "num_chars": 9}, {"sum_logits": -12.175851821899414, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.039688110351562, "logits_per_token": -6.087925910949707, "logits_per_char": -2.029308636983236, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 945, "native_id": "e050bce7048da1b3743a54153e91694e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.3859992027282715, "incorrect_loss_raw": 13.872894406318665, "correct_loss_per_char": 0.21929996013641356, "incorrect_loss_per_char": 1.1876399954160055, "correct_loss_per_token": 2.1929996013641357, "incorrect_loss_per_token": 6.461162169774374, "correct_loss_uncond": -14.970606327056885, "incorrect_loss_uncond": -5.602757811546326}, "model_output": [{"sum_logits": -4.3859992027282715, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.356605529785156, "logits_per_token": -2.1929996013641357, "logits_per_char": -0.21929996013641356, "num_chars": 20}, {"sum_logits": -12.657193183898926, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.222137451171875, "logits_per_token": -4.219064394632976, "logits_per_char": -1.054766098658244, "num_chars": 12}, {"sum_logits": -13.781893730163574, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.997291564941406, "logits_per_token": -6.890946865081787, "logits_per_char": -1.1484911441802979, "num_chars": 12}, {"sum_logits": -7.575710773468018, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.356603622436523, "logits_per_token": -7.575710773468018, "logits_per_char": -0.7575710773468017, "num_chars": 10}, {"sum_logits": -21.47677993774414, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.326576232910156, "logits_per_token": -7.158926645914714, "logits_per_char": -1.7897316614786785, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 946, "native_id": "8233ccb60dd0c0ff3b7ca5d73e5681f2", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.986966133117676, "incorrect_loss_raw": 15.271855592727661, "correct_loss_per_char": 0.49927589628431535, "incorrect_loss_per_char": 1.440318587062123, "correct_loss_per_token": 4.493483066558838, "incorrect_loss_per_token": 7.83891749382019, "correct_loss_uncond": -9.566658973693848, "incorrect_loss_uncond": -2.6594743728637695}, "model_output": [{"sum_logits": -18.20669174194336, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.381427764892578, "logits_per_token": -6.068897247314453, "logits_per_char": -1.4005147493802583, "num_chars": 13}, {"sum_logits": -8.986966133117676, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.553625106811523, "logits_per_token": -4.493483066558838, "logits_per_char": -0.49927589628431535, "num_chars": 18}, {"sum_logits": -13.227738380432129, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.523576736450195, "logits_per_token": -13.227738380432129, "logits_per_char": -1.889676911490304, "num_chars": 7}, {"sum_logits": -16.60477066040039, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.843421936035156, "logits_per_token": -5.534923553466797, "logits_per_char": -1.3837308883666992, "num_chars": 12}, {"sum_logits": -13.048221588134766, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.976893424987793, "logits_per_token": -6.524110794067383, "logits_per_char": -1.0873517990112305, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 947, "native_id": "eb4b2cd0f2a69686e5a82250c5806b84", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0720016956329346, "incorrect_loss_raw": 14.26341962814331, "correct_loss_per_char": 0.11911129951477051, "incorrect_loss_per_char": 1.3534016631898427, "correct_loss_per_token": 1.0720016956329346, "incorrect_loss_per_token": 8.979400952657063, "correct_loss_uncond": -12.95378851890564, "incorrect_loss_uncond": -2.4355905055999756}, "model_output": [{"sum_logits": -10.1646728515625, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.649393081665039, "logits_per_token": -10.1646728515625, "logits_per_char": -1.4520961216517858, "num_chars": 7}, {"sum_logits": -16.72846221923828, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.044836044311523, "logits_per_token": -5.576154073079427, "logits_per_char": -0.836423110961914, "num_chars": 20}, {"sum_logits": -19.967533111572266, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.814163208007812, "logits_per_token": -9.983766555786133, "logits_per_char": -1.4262523651123047, "num_chars": 14}, {"sum_logits": -10.193010330200195, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.28764820098877, "logits_per_token": -10.193010330200195, "logits_per_char": -1.698835055033366, "num_chars": 6}, {"sum_logits": -1.0720016956329346, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -14.025790214538574, "logits_per_token": -1.0720016956329346, "logits_per_char": -0.11911129951477051, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 948, "native_id": "d0bda97a087904320216e4d0b8a08a8d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.158077239990234, "incorrect_loss_raw": 17.84881830215454, "correct_loss_per_char": 0.868434088570731, "incorrect_loss_per_char": 1.7352913738964322, "correct_loss_per_token": 4.052692413330078, "incorrect_loss_per_token": 10.694032192230225, "correct_loss_uncond": -6.442970275878906, "incorrect_loss_uncond": -0.07858037948608398}, "model_output": [{"sum_logits": -15.383617401123047, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.86186981201172, "logits_per_token": -7.691808700561523, "logits_per_char": -1.183355184701773, "num_chars": 13}, {"sum_logits": -14.156984329223633, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.05655288696289, "logits_per_token": -14.156984329223633, "logits_per_char": -1.769623041152954, "num_chars": 8}, {"sum_logits": -20.153884887695312, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.32097625732422, "logits_per_token": -10.076942443847656, "logits_per_char": -2.0153884887695312, "num_chars": 10}, {"sum_logits": -12.158077239990234, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.60104751586914, "logits_per_token": -4.052692413330078, "logits_per_char": -0.868434088570731, "num_chars": 14}, {"sum_logits": -21.700786590576172, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.470195770263672, "logits_per_token": -10.850393295288086, "logits_per_char": -1.9727987809614702, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 949, "native_id": "e216381e9f0ddd1d248ee25fccca2b1f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0854694843292236, "incorrect_loss_raw": 14.390437364578247, "correct_loss_per_char": 0.13568368554115295, "incorrect_loss_per_char": 1.4166802165828227, "correct_loss_per_token": 1.0854694843292236, "incorrect_loss_per_token": 8.408265670140585, "correct_loss_uncond": -11.419516324996948, "incorrect_loss_uncond": -3.655649185180664}, "model_output": [{"sum_logits": -9.001267433166504, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.332571983337402, "logits_per_token": -9.001267433166504, "logits_per_char": -0.9001267433166504, "num_chars": 10}, {"sum_logits": -8.713443756103516, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.555574417114258, "logits_per_token": -8.713443756103516, "logits_per_char": -1.4522406260172527, "num_chars": 6}, {"sum_logits": -15.816032409667969, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.750076293945312, "logits_per_token": -7.908016204833984, "logits_per_char": -1.1297166006905692, "num_chars": 14}, {"sum_logits": -24.031005859375, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -23.546123504638672, "logits_per_token": -8.010335286458334, "logits_per_char": -2.1846368963068183, "num_chars": 11}, {"sum_logits": -1.0854694843292236, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -12.504985809326172, "logits_per_token": -1.0854694843292236, "logits_per_char": -0.13568368554115295, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 950, "native_id": "b1fba9ad6193c6751ddb3f58f7f39b35", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.371786117553711, "incorrect_loss_raw": 10.733954668045044, "correct_loss_per_char": 0.3581190745035807, "incorrect_loss_per_char": 0.7983574151992798, "correct_loss_per_token": 2.6858930587768555, "incorrect_loss_per_token": 4.829464435577393, "correct_loss_uncond": -13.116958618164062, "incorrect_loss_uncond": -8.923998832702637}, "model_output": [{"sum_logits": -5.371786117553711, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.488744735717773, "logits_per_token": -2.6858930587768555, "logits_per_char": -0.3581190745035807, "num_chars": 15}, {"sum_logits": -10.384895324707031, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.562074661254883, "logits_per_token": -3.4616317749023438, "logits_per_char": -0.6490559577941895, "num_chars": 16}, {"sum_logits": -17.699512481689453, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -25.636783599853516, "logits_per_token": -5.899837493896484, "logits_per_char": -0.8849756240844726, "num_chars": 20}, {"sum_logits": -9.790044784545898, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.529138565063477, "logits_per_token": -4.895022392272949, "logits_per_char": -0.8158370653788248, "num_chars": 12}, {"sum_logits": -5.061366081237793, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.903817176818848, "logits_per_token": -5.061366081237793, "logits_per_char": -0.8435610135396322, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 951, "native_id": "3ceae7a18073050bd2c0448abef1f393", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3574919700622559, "incorrect_loss_raw": 11.343113660812378, "correct_loss_per_char": 0.096963712147304, "incorrect_loss_per_char": 1.827614026410239, "correct_loss_per_token": 1.3574919700622559, "incorrect_loss_per_token": 8.87713885307312, "correct_loss_uncond": -11.638617992401123, "incorrect_loss_uncond": -4.197251081466675}, "model_output": [{"sum_logits": -12.240476608276367, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.867777824401855, "logits_per_token": -12.240476608276367, "logits_per_char": -1.530059576034546, "num_chars": 8}, {"sum_logits": -1.3574919700622559, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -12.996109962463379, "logits_per_token": -1.3574919700622559, "logits_per_char": -0.096963712147304, "num_chars": 14}, {"sum_logits": -3.6297826766967773, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.765427589416504, "logits_per_token": -3.6297826766967773, "logits_per_char": -0.5185403823852539, "num_chars": 7}, {"sum_logits": -9.774396896362305, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.903755187988281, "logits_per_token": -9.774396896362305, "logits_per_char": -2.443599224090576, "num_chars": 4}, {"sum_logits": -19.727798461914062, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -23.62449836730957, "logits_per_token": -9.863899230957031, "logits_per_char": -2.8182569231305803, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 952, "native_id": "f1182e3a070f5a1be529843aa6e5c20c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.895305633544922, "incorrect_loss_raw": 10.592215538024902, "correct_loss_per_char": 0.7661450703938802, "incorrect_loss_per_char": 1.296365078393515, "correct_loss_per_token": 6.895305633544922, "incorrect_loss_per_token": 9.115561962127686, "correct_loss_uncond": -7.710872650146484, "incorrect_loss_uncond": -3.9589006900787354}, "model_output": [{"sum_logits": -11.813228607177734, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.396651268005371, "logits_per_token": -5.906614303588867, "logits_per_char": -1.6876040867396764, "num_chars": 7}, {"sum_logits": -10.525345802307129, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.006147384643555, "logits_per_token": -10.525345802307129, "logits_per_char": -0.9568496183915571, "num_chars": 11}, {"sum_logits": -10.525345802307129, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.006147384643555, "logits_per_token": -10.525345802307129, "logits_per_char": -0.9568496183915571, "num_chars": 11}, {"sum_logits": -6.895305633544922, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -6.895305633544922, "logits_per_char": -0.7661450703938802, "num_chars": 9}, {"sum_logits": -9.504941940307617, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.79551887512207, "logits_per_token": -9.504941940307617, "logits_per_char": -1.5841569900512695, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 953, "native_id": "5799089c131e26473697afc54d5f6964", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.991010904312134, "incorrect_loss_raw": 13.89302110671997, "correct_loss_per_char": 0.27191008221019397, "incorrect_loss_per_char": 1.4176930084563137, "correct_loss_per_token": 1.495505452156067, "incorrect_loss_per_token": 7.8751486937205, "correct_loss_uncond": -12.42712664604187, "incorrect_loss_uncond": -4.6044676303863525}, "model_output": [{"sum_logits": -8.303335189819336, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.487808227539062, "logits_per_token": -4.151667594909668, "logits_per_char": -0.553555679321289, "num_chars": 15}, {"sum_logits": -19.224308013916016, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.566539764404297, "logits_per_token": -6.408102671305339, "logits_per_char": -1.6020256678263347, "num_chars": 12}, {"sum_logits": -13.837207794189453, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.601668357849121, "logits_per_token": -13.837207794189453, "logits_per_char": -2.7674415588378904, "num_chars": 5}, {"sum_logits": -14.207233428955078, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.333938598632812, "logits_per_token": -7.103616714477539, "logits_per_char": -0.7477491278397409, "num_chars": 19}, {"sum_logits": -2.991010904312134, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.418137550354004, "logits_per_token": -1.495505452156067, "logits_per_char": -0.27191008221019397, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 954, "native_id": "7ce1f99e8185489a7113e6d18c71abb0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.742806911468506, "incorrect_loss_raw": 9.415029883384705, "correct_loss_per_char": 0.9485613822937011, "incorrect_loss_per_char": 1.2611542474655877, "correct_loss_per_token": 4.742806911468506, "incorrect_loss_per_token": 6.950202941894531, "correct_loss_uncond": -8.927064418792725, "incorrect_loss_uncond": -6.054986119270325}, "model_output": [{"sum_logits": -10.78395938873291, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -10.78395938873291, "logits_per_char": -1.5405656269618444, "num_chars": 7}, {"sum_logits": -7.212651252746582, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.102951049804688, "logits_per_token": -3.606325626373291, "logits_per_char": -1.030378750392369, "num_chars": 7}, {"sum_logits": -7.1575446128845215, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.158048629760742, "logits_per_token": -7.1575446128845215, "logits_per_char": -1.4315089225769042, "num_chars": 5}, {"sum_logits": -4.742806911468506, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.66987133026123, "logits_per_token": -4.742806911468506, "logits_per_char": -0.9485613822937011, "num_chars": 5}, {"sum_logits": -12.505964279174805, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.139053344726562, "logits_per_token": -6.252982139587402, "logits_per_char": -1.0421636899312336, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 955, "native_id": "69425fb4cd2dc034e9ff223d2d5676ec", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.598982334136963, "incorrect_loss_raw": 12.93138861656189, "correct_loss_per_char": 0.633248527844747, "incorrect_loss_per_char": 1.104303297653697, "correct_loss_per_token": 3.7994911670684814, "incorrect_loss_per_token": 7.346023797988892, "correct_loss_uncond": -9.546395778656006, "incorrect_loss_uncond": -5.757262229919434}, "model_output": [{"sum_logits": -16.672420501708984, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -8.336210250854492, "logits_per_char": -1.3893683751424153, "num_chars": 12}, {"sum_logits": -7.598982334136963, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -3.7994911670684814, "logits_per_char": -0.633248527844747, "num_chars": 12}, {"sum_logits": -7.042635917663574, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.392474174499512, "logits_per_token": -7.042635917663574, "logits_per_char": -0.7825151019626193, "num_chars": 9}, {"sum_logits": -16.58037757873535, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.69821548461914, "logits_per_token": -8.290188789367676, "logits_per_char": -0.9753163281609031, "num_chars": 17}, {"sum_logits": -11.430120468139648, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -21.013092041015625, "logits_per_token": -5.715060234069824, "logits_per_char": -1.2700133853488498, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 956, "native_id": "f75b22d5b88ac56ae7df030c1ebeded5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.723690032958984, "incorrect_loss_raw": 6.207429766654968, "correct_loss_per_char": 0.6748128618512835, "incorrect_loss_per_char": 0.7492389957110086, "correct_loss_per_token": 4.723690032958984, "incorrect_loss_per_token": 5.63217967748642, "correct_loss_uncond": -9.991025924682617, "incorrect_loss_uncond": -10.639456152915955}, "model_output": [{"sum_logits": -4.602000713348389, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -2.3010003566741943, "logits_per_char": -0.38350005944569904, "num_chars": 12}, {"sum_logits": -7.329758644104004, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -7.329758644104004, "logits_per_char": -0.9162198305130005, "num_chars": 8}, {"sum_logits": -6.111601829528809, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.724822998046875, "logits_per_token": -6.111601829528809, "logits_per_char": -1.018600304921468, "num_chars": 6}, {"sum_logits": -6.786357879638672, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -6.786357879638672, "logits_per_char": -0.6786357879638671, "num_chars": 10}, {"sum_logits": -4.723690032958984, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -4.723690032958984, "logits_per_char": -0.6748128618512835, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 957, "native_id": "4eb3e69c0d42a2287692d2b9d2cb5979", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.933454990386963, "incorrect_loss_raw": 10.649242639541626, "correct_loss_per_char": 1.1555758317311604, "incorrect_loss_per_char": 1.4940288318528068, "correct_loss_per_token": 6.933454990386963, "incorrect_loss_per_token": 9.24425494670868, "correct_loss_uncond": -6.661901950836182, "incorrect_loss_uncond": -2.153886079788208}, "model_output": [{"sum_logits": -10.309343338012695, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -10.309343338012695, "logits_per_char": -1.1454825931125217, "num_chars": 9}, {"sum_logits": -6.933454990386963, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.595356941223145, "logits_per_token": -6.933454990386963, "logits_per_char": -1.1555758317311604, "num_chars": 6}, {"sum_logits": -11.580599784851074, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -11.580599784851074, "logits_per_char": -2.3161199569702147, "num_chars": 5}, {"sum_logits": -9.46712589263916, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.1411771774292, "logits_per_token": -9.46712589263916, "logits_per_char": -1.57785431543986, "num_chars": 6}, {"sum_logits": -11.239901542663574, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.269561767578125, "logits_per_token": -5.619950771331787, "logits_per_char": -0.9366584618886312, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 958, "native_id": "7d937233b4a9043da0b976dbd42d141b", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.252606391906738, "incorrect_loss_raw": 6.935568571090698, "correct_loss_per_char": 0.4809697224543645, "incorrect_loss_per_char": 0.7404194351226565, "correct_loss_per_token": 6.252606391906738, "incorrect_loss_per_token": 6.935568571090698, "correct_loss_uncond": -8.601075172424316, "incorrect_loss_uncond": -6.988185167312622}, "model_output": [{"sum_logits": -10.707895278930664, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.939125061035156, "logits_per_token": -10.707895278930664, "logits_per_char": -1.1897661421034071, "num_chars": 9}, {"sum_logits": -6.568950653076172, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.295148849487305, "logits_per_token": -6.568950653076172, "logits_per_char": -0.46921076093401226, "num_chars": 14}, {"sum_logits": -5.1669816970825195, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.684188842773438, "logits_per_token": -5.1669816970825195, "logits_per_char": -0.8611636161804199, "num_chars": 6}, {"sum_logits": -6.252606391906738, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.853681564331055, "logits_per_token": -6.252606391906738, "logits_per_char": -0.4809697224543645, "num_chars": 13}, {"sum_logits": -5.2984466552734375, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.776552200317383, "logits_per_token": -5.2984466552734375, "logits_per_char": -0.44153722127278644, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 959, "native_id": "6bd176cc91a2a2088807ec446c008856", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9742839336395264, "incorrect_loss_raw": 9.391873836517334, "correct_loss_per_char": 0.1645236611366272, "incorrect_loss_per_char": 1.27053099738227, "correct_loss_per_token": 1.9742839336395264, "incorrect_loss_per_token": 7.671410083770752, "correct_loss_uncond": -14.002498865127563, "incorrect_loss_uncond": -6.228636980056763}, "model_output": [{"sum_logits": -1.9742839336395264, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -1.9742839336395264, "logits_per_char": -0.1645236611366272, "num_chars": 12}, {"sum_logits": -6.918210029602051, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.496217727661133, "logits_per_token": -6.918210029602051, "logits_per_char": -0.8647762537002563, "num_chars": 8}, {"sum_logits": -9.187579154968262, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -9.187579154968262, "logits_per_char": -1.1484473943710327, "num_chars": 8}, {"sum_logits": -13.763710021972656, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -6.881855010986328, "logits_per_char": -1.5293011135525174, "num_chars": 9}, {"sum_logits": -7.697996139526367, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.993163108825684, "logits_per_token": -7.697996139526367, "logits_per_char": -1.5395992279052735, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 960, "native_id": "c3890d43b84635d9e61c007ca2521d5b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.607632637023926, "incorrect_loss_raw": 16.16098666191101, "correct_loss_per_char": 0.6621255874633789, "incorrect_loss_per_char": 1.2024045859271215, "correct_loss_per_token": 4.303816318511963, "incorrect_loss_per_token": 6.539361894130707, "correct_loss_uncond": -10.595643043518066, "incorrect_loss_uncond": -3.1442911624908447}, "model_output": [{"sum_logits": -14.34388256072998, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.08214569091797, "logits_per_token": -3.585970640182495, "logits_per_char": -0.7549411874068411, "num_chars": 19}, {"sum_logits": -19.916790008544922, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.349485397338867, "logits_per_token": -9.958395004272461, "logits_per_char": -1.5320607698880708, "num_chars": 13}, {"sum_logits": -14.911943435668945, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.56057357788086, "logits_per_token": -7.455971717834473, "logits_per_char": -1.4911943435668946, "num_chars": 10}, {"sum_logits": -8.607632637023926, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.203275680541992, "logits_per_token": -4.303816318511963, "logits_per_char": -0.6621255874633789, "num_chars": 13}, {"sum_logits": -15.471330642700195, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -5.157110214233398, "logits_per_char": -1.0314220428466796, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 961, "native_id": "6195ed74cf445cb5d991e1076a080dde", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.202851295471191, "incorrect_loss_raw": 8.798505783081055, "correct_loss_per_char": 0.4771424073439378, "incorrect_loss_per_char": 0.6795003996995499, "correct_loss_per_token": 3.1014256477355957, "incorrect_loss_per_token": 3.8714509407679243, "correct_loss_uncond": -11.867146492004395, "incorrect_loss_uncond": -8.906369686126709}, "model_output": [{"sum_logits": -6.026481628417969, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.151260375976562, "logits_per_token": -3.0132408142089844, "logits_per_char": -0.4304629734584263, "num_chars": 14}, {"sum_logits": -12.66724681854248, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.017868041992188, "logits_per_token": -4.222415606180827, "logits_per_char": -1.151567892594771, "num_chars": 11}, {"sum_logits": -6.746898651123047, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.11442756652832, "logits_per_token": -3.3734493255615234, "logits_per_char": -0.5622415542602539, "num_chars": 12}, {"sum_logits": -6.202851295471191, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.069997787475586, "logits_per_token": -3.1014256477355957, "logits_per_char": -0.4771424073439378, "num_chars": 13}, {"sum_logits": -9.753396034240723, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.535945892333984, "logits_per_token": -4.876698017120361, "logits_per_char": -0.5737291784847484, "num_chars": 17}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 962, "native_id": "37644422df4bcd28b3f54bbf3fc2c0f8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.4853219985961914, "incorrect_loss_raw": 10.782703399658203, "correct_loss_per_char": 0.24755366643269858, "incorrect_loss_per_char": 1.228037411040002, "correct_loss_per_token": 0.7426609992980957, "incorrect_loss_per_token": 6.209983944892883, "correct_loss_uncond": -12.902801513671875, "incorrect_loss_uncond": -3.6836163997650146}, "model_output": [{"sum_logits": -12.914241790771484, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.534873008728027, "logits_per_token": -6.457120895385742, "logits_per_char": -1.8448916843959264, "num_chars": 7}, {"sum_logits": -1.4853219985961914, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.388123512268066, "logits_per_token": -0.7426609992980957, "logits_per_char": -0.24755366643269858, "num_chars": 6}, {"sum_logits": -11.489954948425293, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.80850601196289, "logits_per_token": -5.7449774742126465, "logits_per_char": -0.8207110677446637, "num_chars": 14}, {"sum_logits": -12.177558898925781, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.105926513671875, "logits_per_token": -6.088779449462891, "logits_per_char": -0.9367352999173678, "num_chars": 13}, {"sum_logits": -6.549057960510254, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.415973663330078, "logits_per_token": -6.549057960510254, "logits_per_char": -1.3098115921020508, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 963, "native_id": "23d97480fe45bace231503f8fc367a5b", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.499192237854004, "incorrect_loss_raw": 18.870776891708374, "correct_loss_per_char": 0.39279944556100027, "incorrect_loss_per_char": 1.0864607863482976, "correct_loss_per_token": 2.749596118927002, "incorrect_loss_per_token": 6.913701551301139, "correct_loss_uncond": -15.96972942352295, "incorrect_loss_uncond": -6.359027624130249}, "model_output": [{"sum_logits": -18.659399032592773, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -22.032726287841797, "logits_per_token": -9.329699516296387, "logits_per_char": -1.243959935506185, "num_chars": 15}, {"sum_logits": -14.731559753417969, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.72524642944336, "logits_per_token": -7.365779876708984, "logits_per_char": -1.2276299794514973, "num_chars": 12}, {"sum_logits": -28.24289321899414, "num_tokens": 7, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -40.337100982666016, "logits_per_token": -4.034699031284878, "logits_per_char": -1.0086747578212194, "num_chars": 28}, {"sum_logits": -13.849255561828613, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.82414436340332, "logits_per_token": -6.924627780914307, "logits_per_char": -0.8655784726142883, "num_chars": 16}, {"sum_logits": -5.499192237854004, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -21.468921661376953, "logits_per_token": -2.749596118927002, "logits_per_char": -0.39279944556100027, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 964, "native_id": "15556e26feaa5a8a29c9f30896e535d4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.685572147369385, "incorrect_loss_raw": 8.450793981552124, "correct_loss_per_char": 0.3346837248120989, "incorrect_loss_per_char": 0.5055272110061794, "correct_loss_per_token": 2.3427860736846924, "incorrect_loss_per_token": 3.7483416001001992, "correct_loss_uncond": -14.255950450897217, "incorrect_loss_uncond": -10.504625082015991}, "model_output": [{"sum_logits": -4.685572147369385, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.9415225982666, "logits_per_token": -2.3427860736846924, "logits_per_char": -0.3346837248120989, "num_chars": 14}, {"sum_logits": -8.339754104614258, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.69821548461914, "logits_per_token": -4.169877052307129, "logits_per_char": -0.49057377085966225, "num_chars": 17}, {"sum_logits": -5.075468063354492, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.991018295288086, "logits_per_token": -2.537734031677246, "logits_per_char": -0.39042062025803786, "num_chars": 13}, {"sum_logits": -8.938624382019043, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.91702651977539, "logits_per_token": -4.4693121910095215, "logits_per_char": -0.5959082921346028, "num_chars": 15}, {"sum_logits": -11.449329376220703, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.215415954589844, "logits_per_token": -3.816443125406901, "logits_per_char": -0.5452061607724145, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 965, "native_id": "6be05d227f4f6fe727218fc8be9df340", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.387033462524414, "incorrect_loss_raw": 13.753406286239624, "correct_loss_per_char": 1.0322527885437012, "incorrect_loss_per_char": 1.225439977645874, "correct_loss_per_token": 6.193516731262207, "incorrect_loss_per_token": 7.564751903216044, "correct_loss_uncond": -4.91609001159668, "incorrect_loss_uncond": -2.871422290802002}, "model_output": [{"sum_logits": -15.688976287841797, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.95945167541504, "logits_per_token": -5.229658762613933, "logits_per_char": -1.3074146906534831, "num_chars": 12}, {"sum_logits": -13.835257530212402, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.20165252685547, "logits_per_token": -6.917628765106201, "logits_per_char": -1.5372508366902669, "num_chars": 9}, {"sum_logits": -10.734048843383789, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.246752738952637, "logits_per_token": -10.734048843383789, "logits_per_char": -1.073404884338379, "num_chars": 10}, {"sum_logits": -14.755342483520508, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.09145736694336, "logits_per_token": -7.377671241760254, "logits_per_char": -0.9836894989013671, "num_chars": 15}, {"sum_logits": -12.387033462524414, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.303123474121094, "logits_per_token": -6.193516731262207, "logits_per_char": -1.0322527885437012, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 966, "native_id": "3f3ba1d9a3bfe63df11247a968eaddce", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 20.26321029663086, "incorrect_loss_raw": 6.594210505485535, "correct_loss_per_char": 1.2664506435394287, "incorrect_loss_per_char": 0.9207656624771299, "correct_loss_per_token": 6.754403432210286, "incorrect_loss_per_token": 4.112065275510153, "correct_loss_uncond": -11.101312637329102, "incorrect_loss_uncond": -8.604690432548523}, "model_output": [{"sum_logits": -4.546777248382568, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.82797622680664, "logits_per_token": -1.5155924161275227, "logits_per_char": -0.6495396069117955, "num_chars": 7}, {"sum_logits": -20.26321029663086, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -31.36452293395996, "logits_per_token": -6.754403432210286, "logits_per_char": -1.2664506435394287, "num_chars": 16}, {"sum_logits": -8.035272598266602, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.456756591796875, "logits_per_token": -8.035272598266602, "logits_per_char": -1.0044090747833252, "num_chars": 8}, {"sum_logits": -7.314358711242676, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.76834774017334, "logits_per_token": -3.657179355621338, "logits_per_char": -1.2190597852071126, "num_chars": 6}, {"sum_logits": -6.480433464050293, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.742523193359375, "logits_per_token": -3.2402167320251465, "logits_per_char": -0.8100541830062866, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 967, "native_id": "ca9a3ccfb140aa66816f96ac983b6d9f_1", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.360262870788574, "incorrect_loss_raw": 7.4552470445632935, "correct_loss_per_char": 0.8933771451314291, "incorrect_loss_per_char": 0.7087331586413914, "correct_loss_per_token": 5.360262870788574, "incorrect_loss_per_token": 5.954705595970154, "correct_loss_uncond": -9.098785400390625, "incorrect_loss_uncond": -8.76440155506134}, "model_output": [{"sum_logits": -5.363826751708984, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.10545825958252, "logits_per_token": -5.363826751708984, "logits_per_char": -0.5363826751708984, "num_chars": 10}, {"sum_logits": -12.004331588745117, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.804922103881836, "logits_per_token": -6.002165794372559, "logits_per_char": -0.8002887725830078, "num_chars": 15}, {"sum_logits": -5.360262870788574, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -5.360262870788574, "logits_per_char": -0.8933771451314291, "num_chars": 6}, {"sum_logits": -4.200663089752197, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -4.200663089752197, "logits_per_char": -0.4667403433057997, "num_chars": 9}, {"sum_logits": -8.252166748046875, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -8.252166748046875, "logits_per_char": -1.0315208435058594, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 968, "native_id": "487cabfcd776d89748ee7e7bb681ad59", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 16.669893264770508, "incorrect_loss_raw": 15.509016036987305, "correct_loss_per_char": 1.1113262176513672, "incorrect_loss_per_char": 1.0200700763901476, "correct_loss_per_token": 5.556631088256836, "incorrect_loss_per_token": 6.250582893689473, "correct_loss_uncond": -3.2581138610839844, "incorrect_loss_uncond": -5.261995077133179}, "model_output": [{"sum_logits": -16.713911056518555, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.280540466308594, "logits_per_token": -5.571303685506185, "logits_per_char": -1.3928259213765461, "num_chars": 12}, {"sum_logits": -9.99911880493164, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.4194278717041, "logits_per_token": -4.99955940246582, "logits_per_char": -0.9090108004483309, "num_chars": 11}, {"sum_logits": -16.669893264770508, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.928007125854492, "logits_per_token": -5.556631088256836, "logits_per_char": -1.1113262176513672, "num_chars": 15}, {"sum_logits": -7.467613220214844, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.809311866760254, "logits_per_token": -7.467613220214844, "logits_per_char": -0.7467613220214844, "num_chars": 10}, {"sum_logits": -27.85542106628418, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -30.574764251708984, "logits_per_token": -6.963855266571045, "logits_per_char": -1.031682261714229, "num_chars": 27}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 969, "native_id": "6915dfdefe3b1cd5fd8886c8bb84929a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.840647220611572, "incorrect_loss_raw": 13.121977090835571, "correct_loss_per_char": 0.40338726838429767, "incorrect_loss_per_char": 1.3009192065587118, "correct_loss_per_token": 4.840647220611572, "incorrect_loss_per_token": 10.019461154937744, "correct_loss_uncond": -10.168917179107666, "incorrect_loss_uncond": -2.941035032272339}, "model_output": [{"sum_logits": -4.840647220611572, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.009564399719238, "logits_per_token": -4.840647220611572, "logits_per_char": -0.40338726838429767, "num_chars": 12}, {"sum_logits": -13.432195663452148, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.826990127563477, "logits_per_token": -13.432195663452148, "logits_per_char": -1.9188850947788783, "num_chars": 7}, {"sum_logits": -10.160823822021484, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -19.294719696044922, "logits_per_token": -5.080411911010742, "logits_per_char": -0.7257731301443917, "num_chars": 14}, {"sum_logits": -14.659303665161133, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.106880187988281, "logits_per_token": -7.329651832580566, "logits_per_char": -0.9772869110107422, "num_chars": 15}, {"sum_logits": -14.23558521270752, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.023458480834961, "logits_per_token": -14.23558521270752, "logits_per_char": -1.5817316903008356, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 970, "native_id": "ec224c1dbfb569cce7ec317fe987ae68", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.642332077026367, "incorrect_loss_raw": 12.142880201339722, "correct_loss_per_char": 1.1642332077026367, "incorrect_loss_per_char": 1.2505653492816082, "correct_loss_per_token": 5.821166038513184, "incorrect_loss_per_token": 6.7594451904296875, "correct_loss_uncond": -4.76930046081543, "incorrect_loss_uncond": -4.799604892730713}, "model_output": [{"sum_logits": -17.799728393554688, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.376291275024414, "logits_per_token": -8.899864196777344, "logits_per_char": -1.7799728393554688, "num_chars": 10}, {"sum_logits": -11.642332077026367, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.411632537841797, "logits_per_token": -5.821166038513184, "logits_per_char": -1.1642332077026367, "num_chars": 10}, {"sum_logits": -9.985595703125, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.17339324951172, "logits_per_token": -4.9927978515625, "logits_per_char": -0.9077814275568182, "num_chars": 11}, {"sum_logits": -15.282155990600586, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.698741912841797, "logits_per_token": -7.641077995300293, "logits_per_char": -1.5282155990600585, "num_chars": 10}, {"sum_logits": -5.504040718078613, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.521513938903809, "logits_per_token": -5.504040718078613, "logits_per_char": -0.7862915311540876, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 971, "native_id": "0cba8ddda21e29c8c53482e131d741cd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.876470565795898, "incorrect_loss_raw": 12.797497749328613, "correct_loss_per_char": 1.073039213816325, "incorrect_loss_per_char": 1.231645502646764, "correct_loss_per_token": 6.438235282897949, "incorrect_loss_per_token": 8.298508961995443, "correct_loss_uncond": -6.157819747924805, "incorrect_loss_uncond": -3.375839948654175}, "model_output": [{"sum_logits": -12.684853553771973, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.165050506591797, "logits_per_token": -4.228284517923991, "logits_per_char": -1.4094281726413302, "num_chars": 9}, {"sum_logits": -12.876470565795898, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.034290313720703, "logits_per_token": -6.438235282897949, "logits_per_char": -1.073039213816325, "num_chars": 12}, {"sum_logits": -14.30907917022705, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.585615158081055, "logits_per_token": -4.769693056742351, "logits_per_char": -0.7949488427903917, "num_chars": 18}, {"sum_logits": -12.09209156036377, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -12.09209156036377, "logits_per_char": -1.209209156036377, "num_chars": 10}, {"sum_logits": -12.10396671295166, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -12.10396671295166, "logits_per_char": -1.5129958391189575, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 972, "native_id": "e65559cd9f5d96b577caeb78d9033502", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.107667922973633, "incorrect_loss_raw": 13.719353556632996, "correct_loss_per_char": 0.4107667922973633, "incorrect_loss_per_char": 1.1154745202772425, "correct_loss_per_token": 4.107667922973633, "incorrect_loss_per_token": 8.166710019111633, "correct_loss_uncond": -9.7410306930542, "incorrect_loss_uncond": -4.246109843254089}, "model_output": [{"sum_logits": -10.551130294799805, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.07139778137207, "logits_per_token": -10.551130294799805, "logits_per_char": -0.8792608578999838, "num_chars": 12}, {"sum_logits": -4.107667922973633, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.848698616027832, "logits_per_token": -4.107667922973633, "logits_per_char": -0.4107667922973633, "num_chars": 10}, {"sum_logits": -7.443053722381592, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.821576118469238, "logits_per_token": -7.443053722381592, "logits_per_char": -1.063293388911656, "num_chars": 7}, {"sum_logits": -14.269475936889648, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.859943389892578, "logits_per_token": -7.134737968444824, "logits_per_char": -1.189122994740804, "num_chars": 12}, {"sum_logits": -22.613754272460938, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.108936309814453, "logits_per_token": -7.5379180908203125, "logits_per_char": -1.3302208395565258, "num_chars": 17}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 973, "native_id": "b8937a30f25093910c040f4e63e1d352", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.159029960632324, "incorrect_loss_raw": 15.713849306106567, "correct_loss_per_char": 0.25993937253952026, "incorrect_loss_per_char": 1.4196815519899755, "correct_loss_per_token": 2.079514980316162, "incorrect_loss_per_token": 7.8630852699279785, "correct_loss_uncond": -19.322745323181152, "incorrect_loss_uncond": -2.9644412994384766}, "model_output": [{"sum_logits": -17.374622344970703, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.98822784423828, "logits_per_token": -8.687311172485352, "logits_per_char": -1.579511122270064, "num_chars": 11}, {"sum_logits": -17.6188907623291, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.43353271484375, "logits_per_token": -8.80944538116455, "logits_per_char": -1.355299289409931, "num_chars": 13}, {"sum_logits": -4.159029960632324, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.481775283813477, "logits_per_token": -2.079514980316162, "logits_per_char": -0.25993937253952026, "num_chars": 16}, {"sum_logits": -7.002434730529785, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.464104652404785, "logits_per_token": -7.002434730529785, "logits_per_char": -1.7506086826324463, "num_chars": 4}, {"sum_logits": -20.85944938659668, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.82729721069336, "logits_per_token": -6.953149795532227, "logits_per_char": -0.9933071136474609, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 974, "native_id": "aabe8eb218468fc63b6c9aa6d428c951", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.695493698120117, "incorrect_loss_raw": 8.87730598449707, "correct_loss_per_char": 1.3369367122650146, "incorrect_loss_per_char": 1.2567452061743962, "correct_loss_per_token": 5.347746849060059, "incorrect_loss_per_token": 7.572149276733398, "correct_loss_uncond": -6.61949348449707, "incorrect_loss_uncond": -5.54461145401001}, "model_output": [{"sum_logits": -6.764278888702393, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.019075393676758, "logits_per_token": -6.764278888702393, "logits_per_char": -1.3528557777404786, "num_chars": 5}, {"sum_logits": -10.695493698120117, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.314987182617188, "logits_per_token": -5.347746849060059, "logits_per_char": -1.3369367122650146, "num_chars": 8}, {"sum_logits": -10.441253662109375, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -21.159774780273438, "logits_per_token": -5.2206268310546875, "logits_per_char": -0.8701044718424479, "num_chars": 12}, {"sum_logits": -10.356975555419922, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.699202537536621, "logits_per_token": -10.356975555419922, "logits_per_char": -1.4795679364885603, "num_chars": 7}, {"sum_logits": -7.946715831756592, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.809617042541504, "logits_per_token": -7.946715831756592, "logits_per_char": -1.3244526386260986, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 975, "native_id": "43ba9669564217f2f909f33acbedaf95", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.882200241088867, "incorrect_loss_raw": 15.032065629959106, "correct_loss_per_char": 0.8487285886492048, "incorrect_loss_per_char": 1.8313496203133557, "correct_loss_per_token": 3.960733413696289, "incorrect_loss_per_token": 15.032065629959106, "correct_loss_uncond": -9.109018325805664, "incorrect_loss_uncond": 0.4197702407836914}, "model_output": [{"sum_logits": -13.654457092285156, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.279019355773926, "logits_per_token": -13.654457092285156, "logits_per_char": -2.275742848714193, "num_chars": 6}, {"sum_logits": -11.882200241088867, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.99121856689453, "logits_per_token": -3.960733413696289, "logits_per_char": -0.8487285886492048, "num_chars": 14}, {"sum_logits": -15.861108779907227, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.378949165344238, "logits_per_token": -15.861108779907227, "logits_per_char": -1.9826385974884033, "num_chars": 8}, {"sum_logits": -14.060208320617676, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -14.060208320617676, "logits_per_char": -1.5622453689575195, "num_chars": 9}, {"sum_logits": -16.552488327026367, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.18503475189209, "logits_per_token": -16.552488327026367, "logits_per_char": -1.5047716660933061, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 976, "native_id": "2b9b625c788584b8d41f1a74d740e126", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.575500965118408, "incorrect_loss_raw": 13.653973817825317, "correct_loss_per_char": 0.6195001072353787, "incorrect_loss_per_char": 1.3798492336156751, "correct_loss_per_token": 5.575500965118408, "incorrect_loss_per_token": 8.039317011833191, "correct_loss_uncond": -10.456538677215576, "incorrect_loss_uncond": -2.497507095336914}, "model_output": [{"sum_logits": -17.212099075317383, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.182411193847656, "logits_per_token": -8.606049537658691, "logits_per_char": -1.9124554528130426, "num_chars": 9}, {"sum_logits": -9.698640823364258, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.076739311218262, "logits_per_token": -9.698640823364258, "logits_per_char": -1.3855201176234655, "num_chars": 7}, {"sum_logits": -13.625287055969238, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.003299713134766, "logits_per_token": -6.812643527984619, "logits_per_char": -1.048099004305326, "num_chars": 13}, {"sum_logits": -5.575500965118408, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.032039642333984, "logits_per_token": -5.575500965118408, "logits_per_char": -0.6195001072353787, "num_chars": 9}, {"sum_logits": -14.07986831665039, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.343473434448242, "logits_per_token": -7.039934158325195, "logits_per_char": -1.173322359720866, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 977, "native_id": "eb6807290df71b040e2c7bcc5d11fdea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.296810150146484, "incorrect_loss_raw": 11.287556171417236, "correct_loss_per_char": 0.6648405075073243, "incorrect_loss_per_char": 1.439977988982812, "correct_loss_per_token": 6.648405075073242, "incorrect_loss_per_token": 9.5640230178833, "correct_loss_uncond": -8.123332977294922, "incorrect_loss_uncond": -2.8923375606536865}, "model_output": [{"sum_logits": -13.296810150146484, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -21.420143127441406, "logits_per_token": -6.648405075073242, "logits_per_char": -0.6648405075073243, "num_chars": 20}, {"sum_logits": -9.603440284729004, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.922107696533203, "logits_per_token": -9.603440284729004, "logits_per_char": -1.6005733807881672, "num_chars": 6}, {"sum_logits": -12.665209770202637, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -12.665209770202637, "logits_per_char": -1.5831512212753296, "num_chars": 8}, {"sum_logits": -13.788265228271484, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.374744415283203, "logits_per_token": -6.894132614135742, "logits_per_char": -1.0606357867901142, "num_chars": 13}, {"sum_logits": -9.09330940246582, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.321873664855957, "logits_per_token": -9.09330940246582, "logits_per_char": -1.5155515670776367, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 978, "native_id": "f06852fb4bb2764dc208a991d037f211", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.958361625671387, "incorrect_loss_raw": 11.41521668434143, "correct_loss_per_char": 3.2395904064178467, "incorrect_loss_per_char": 0.7201947573622967, "correct_loss_per_token": 6.479180812835693, "incorrect_loss_per_token": 3.9507397015889487, "correct_loss_uncond": -2.417057991027832, "incorrect_loss_uncond": -9.426607131958008}, "model_output": [{"sum_logits": -8.040102005004883, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.934810638427734, "logits_per_token": -2.6800340016682944, "logits_per_char": -0.40200510025024416, "num_chars": 20}, {"sum_logits": -8.919090270996094, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.968976974487305, "logits_per_token": -2.9730300903320312, "logits_per_char": -0.5246523688821232, "num_chars": 17}, {"sum_logits": -12.958361625671387, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.375419616699219, "logits_per_token": -6.479180812835693, "logits_per_char": -3.2395904064178467, "num_chars": 4}, {"sum_logits": -24.735706329345703, "num_tokens": 4, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -28.0704288482666, "logits_per_token": -6.183926582336426, "logits_per_char": -1.6490470886230468, "num_chars": 15}, {"sum_logits": -3.965968132019043, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -3.965968132019043, "logits_per_char": -0.30507447169377255, "num_chars": 13}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 979, "native_id": "5efadabaf61b5174916e3ab659bcd283", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.361961364746094, "incorrect_loss_raw": 7.15860903263092, "correct_loss_per_char": 1.262440151638455, "incorrect_loss_per_char": 0.7524381806754102, "correct_loss_per_token": 5.680980682373047, "incorrect_loss_per_token": 5.283764660358429, "correct_loss_uncond": -8.751012802124023, "incorrect_loss_uncond": -9.059249758720398}, "model_output": [{"sum_logits": -9.535809516906738, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -17.831636428833008, "logits_per_token": -4.767904758453369, "logits_per_char": -1.1919761896133423, "num_chars": 8}, {"sum_logits": -4.331722259521484, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -4.331722259521484, "logits_per_char": -0.3937929326837713, "num_chars": 11}, {"sum_logits": -9.303958892822266, "num_tokens": 1, "num_tokens_all": 188, "is_greedy": false, "sum_logits_uncond": -13.135459899902344, "logits_per_token": -9.303958892822266, "logits_per_char": -1.033773210313585, "num_chars": 9}, {"sum_logits": -5.462945461273193, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -18.9415225982666, "logits_per_token": -2.7314727306365967, "logits_per_char": -0.3902103900909424, "num_chars": 14}, {"sum_logits": -11.361961364746094, "num_tokens": 2, "num_tokens_all": 189, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -5.680980682373047, "logits_per_char": -1.262440151638455, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 980, "native_id": "e9d4c747018ff81b8c0aefb5abc3c539", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.75139045715332, "incorrect_loss_raw": 12.133172035217285, "correct_loss_per_char": 0.8393850326538086, "incorrect_loss_per_char": 0.8565941097244383, "correct_loss_per_token": 5.87569522857666, "incorrect_loss_per_token": 5.065567302703857, "correct_loss_uncond": -9.599950790405273, "incorrect_loss_uncond": -8.43619680404663}, "model_output": [{"sum_logits": -11.75139045715332, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.351341247558594, "logits_per_token": -5.87569522857666, "logits_per_char": -0.8393850326538086, "num_chars": 14}, {"sum_logits": -10.530616760253906, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.020206451416016, "logits_per_token": -5.265308380126953, "logits_per_char": -0.5850342644585503, "num_chars": 18}, {"sum_logits": -9.070708274841309, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.521936416625977, "logits_per_token": -4.535354137420654, "logits_per_char": -0.9070708274841308, "num_chars": 10}, {"sum_logits": -13.346916198730469, "num_tokens": 5, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.973186492919922, "logits_per_token": -2.669383239746094, "logits_per_char": -0.6355674380347842, "num_chars": 21}, {"sum_logits": -15.584446907043457, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.76214599609375, "logits_per_token": -7.7922234535217285, "logits_per_char": -1.298703908920288, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 981, "native_id": "30a8cfd186f1aae5acd425a52d058863", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.463204383850098, "incorrect_loss_raw": 12.53712511062622, "correct_loss_per_char": 1.2438673973083496, "incorrect_loss_per_char": 1.2315409626279559, "correct_loss_per_token": 7.463204383850098, "incorrect_loss_per_token": 8.975780129432678, "correct_loss_uncond": -6.175302505493164, "incorrect_loss_uncond": -2.561758041381836}, "model_output": [{"sum_logits": -10.861908912658691, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.222362518310547, "logits_per_token": -5.430954456329346, "logits_per_char": -1.086190891265869, "num_chars": 10}, {"sum_logits": -7.463204383850098, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.638506889343262, "logits_per_token": -7.463204383850098, "logits_per_char": -1.2438673973083496, "num_chars": 6}, {"sum_logits": -11.974517822265625, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.077799797058105, "logits_per_token": -11.974517822265625, "logits_per_char": -1.1974517822265625, "num_chars": 10}, {"sum_logits": -9.683222770690918, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -9.683222770690918, "logits_per_char": -1.383317538670131, "num_chars": 7}, {"sum_logits": -17.62885093688965, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.61535930633545, "logits_per_token": -8.814425468444824, "logits_per_char": -1.2592036383492606, "num_chars": 14}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 982, "native_id": "9e7805871c8a276300a89fe910a90949", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.214783668518066, "incorrect_loss_raw": 11.525054812431335, "correct_loss_per_char": 0.6012319723765055, "incorrect_loss_per_char": 1.379506099765951, "correct_loss_per_token": 3.607391834259033, "incorrect_loss_per_token": 8.263009548187256, "correct_loss_uncond": -7.924190521240234, "incorrect_loss_uncond": -2.33464252948761}, "model_output": [{"sum_logits": -7.214783668518066, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.1389741897583, "logits_per_token": -3.607391834259033, "logits_per_char": -0.6012319723765055, "num_chars": 12}, {"sum_logits": -12.070467948913574, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.057981491088867, "logits_per_token": -6.035233974456787, "logits_per_char": -1.0973152680830522, "num_chars": 11}, {"sum_logits": -14.025894165039062, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.244917869567871, "logits_per_token": -7.012947082519531, "logits_per_char": -1.4025894165039063, "num_chars": 10}, {"sum_logits": -7.540262699127197, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -8.306073188781738, "logits_per_token": -7.540262699127197, "logits_per_char": -1.8850656747817993, "num_chars": 4}, {"sum_logits": -12.463594436645508, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.829816818237305, "logits_per_token": -12.463594436645508, "logits_per_char": -1.1330540396950461, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 983, "native_id": "047c2d8c65d297b39aa42821c1ca76a9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 17.384550094604492, "incorrect_loss_raw": 18.627042293548584, "correct_loss_per_char": 0.8692275047302246, "incorrect_loss_per_char": 1.2935002629573529, "correct_loss_per_token": 5.794850031534831, "incorrect_loss_per_token": 6.473323265711467, "correct_loss_uncond": -11.455770492553711, "incorrect_loss_uncond": -6.5767011642456055}, "model_output": [{"sum_logits": -18.71929931640625, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -25.767526626586914, "logits_per_token": -9.359649658203125, "logits_per_char": -1.5599416097005208, "num_chars": 12}, {"sum_logits": -17.384550094604492, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -28.840320587158203, "logits_per_token": -5.794850031534831, "logits_per_char": -0.8692275047302246, "num_chars": 20}, {"sum_logits": -10.903985977172852, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -5.451992988586426, "logits_per_char": -1.3629982471466064, "num_chars": 8}, {"sum_logits": -10.802509307861328, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.77212142944336, "logits_per_token": -5.401254653930664, "logits_per_char": -0.8309622544508714, "num_chars": 13}, {"sum_logits": -34.082374572753906, "num_tokens": 6, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -38.449466705322266, "logits_per_token": -5.680395762125651, "logits_per_char": -1.4200989405314128, "num_chars": 24}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 984, "native_id": "0bed77da54b6c54facd0ee6614aad72e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 13.591058731079102, "incorrect_loss_raw": 10.411582350730896, "correct_loss_per_char": 0.970789909362793, "incorrect_loss_per_char": 1.2579558881975357, "correct_loss_per_token": 4.5303529103597, "incorrect_loss_per_token": 8.610498428344727, "correct_loss_uncond": -7.988424301147461, "incorrect_loss_uncond": -4.488459229469299}, "model_output": [{"sum_logits": -13.591058731079102, "num_tokens": 3, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -21.579483032226562, "logits_per_token": -4.5303529103597, "logits_per_char": -0.970789909362793, "num_chars": 14}, {"sum_logits": -7.883643627166748, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.87862491607666, "logits_per_token": -7.883643627166748, "logits_per_char": -0.9854554533958435, "num_chars": 8}, {"sum_logits": -10.062819480895996, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -10.062819480895996, "logits_per_char": -1.1180910534328885, "num_chars": 9}, {"sum_logits": -14.408671379089355, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -15.848628997802734, "logits_per_token": -7.204335689544678, "logits_per_char": -1.600963486565484, "num_chars": 9}, {"sum_logits": -9.291194915771484, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.127243041992188, "logits_per_token": -9.291194915771484, "logits_per_char": -1.3273135593959264, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 985, "native_id": "32e2adee67aace0a98c830fb39463015", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 2.743302345275879, "incorrect_loss_raw": 10.990883648395538, "correct_loss_per_char": 0.30481137169731987, "incorrect_loss_per_char": 1.4170114329366972, "correct_loss_per_token": 1.3716511726379395, "incorrect_loss_per_token": 8.208485901355743, "correct_loss_uncond": -13.202340126037598, "incorrect_loss_uncond": -4.113756835460663}, "model_output": [{"sum_logits": -3.3383519649505615, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.156774520874023, "logits_per_token": -3.3383519649505615, "logits_per_char": -0.30348654226823285, "num_chars": 11}, {"sum_logits": -10.425111770629883, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.118462562561035, "logits_per_token": -10.425111770629883, "logits_per_char": -1.3031389713287354, "num_chars": 8}, {"sum_logits": -7.94088888168335, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -7.94088888168335, "logits_per_char": -1.58817777633667, "num_chars": 5}, {"sum_logits": -2.743302345275879, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.945642471313477, "logits_per_token": -1.3716511726379395, "logits_per_char": -0.30481137169731987, "num_chars": 9}, {"sum_logits": -22.25918197631836, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -11.12959098815918, "logits_per_char": -2.473242441813151, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 986, "native_id": "8272f08792b873885f93d4c148e307e5", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.119239807128906, "incorrect_loss_raw": 13.931608438491821, "correct_loss_per_char": 0.6119239807128907, "incorrect_loss_per_char": 1.913300326040813, "correct_loss_per_token": 2.0397466023763022, "incorrect_loss_per_token": 7.457868059476216, "correct_loss_uncond": -10.024545669555664, "incorrect_loss_uncond": -2.959743022918701}, "model_output": [{"sum_logits": -13.293591499328613, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.77623176574707, "logits_per_token": -6.646795749664307, "logits_per_char": -1.6616989374160767, "num_chars": 8}, {"sum_logits": -6.119239807128906, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.14378547668457, "logits_per_token": -2.0397466023763022, "logits_per_char": -0.6119239807128907, "num_chars": 10}, {"sum_logits": -21.960891723632812, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.886249542236328, "logits_per_token": -10.980445861816406, "logits_per_char": -3.137270246233259, "num_chars": 7}, {"sum_logits": -12.401579856872559, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.909761428833008, "logits_per_token": -4.133859952290853, "logits_per_char": -1.2401579856872558, "num_chars": 10}, {"sum_logits": -8.0703706741333, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.993163108825684, "logits_per_token": -8.0703706741333, "logits_per_char": -1.61407413482666, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 987, "native_id": "bc05bc6b4df7a3d25a361515fe8912ad", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.40085220336914, "incorrect_loss_raw": 12.848987102508545, "correct_loss_per_char": 0.9400852203369141, "incorrect_loss_per_char": 1.3125076823764377, "correct_loss_per_token": 3.133617401123047, "incorrect_loss_per_token": 5.9394611120224, "correct_loss_uncond": -7.502811431884766, "incorrect_loss_uncond": -3.7850522994995117}, "model_output": [{"sum_logits": -10.541333198547363, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.673110961914062, "logits_per_token": -5.270666599273682, "logits_per_char": -1.1712592442830403, "num_chars": 9}, {"sum_logits": -11.640778541564941, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -3.8802595138549805, "logits_per_char": -1.2934198379516602, "num_chars": 9}, {"sum_logits": -12.631149291992188, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -6.315574645996094, "logits_per_char": -1.4034610324435763, "num_chars": 9}, {"sum_logits": -9.40085220336914, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.903663635253906, "logits_per_token": -3.133617401123047, "logits_per_char": -0.9400852203369141, "num_chars": 10}, {"sum_logits": -16.582687377929688, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.492084503173828, "logits_per_token": -8.291343688964844, "logits_per_char": -1.3818906148274739, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 988, "native_id": "b893a6e7a2b172bd71f03c9dbee4f960", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.045939922332764, "incorrect_loss_raw": 8.784610509872437, "correct_loss_per_char": 0.6405399929393422, "incorrect_loss_per_char": 1.0956181680399275, "correct_loss_per_token": 7.045939922332764, "incorrect_loss_per_token": 7.746857762336731, "correct_loss_uncond": -7.748010158538818, "incorrect_loss_uncond": -6.396285772323608}, "model_output": [{"sum_logits": -8.302021980285645, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.805913925170898, "logits_per_token": -4.151010990142822, "logits_per_char": -1.0377527475357056, "num_chars": 8}, {"sum_logits": -12.996936798095703, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.432260513305664, "logits_per_token": -12.996936798095703, "logits_per_char": -1.624617099761963, "num_chars": 8}, {"sum_logits": -5.745047569274902, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.753283500671387, "logits_per_token": -5.745047569274902, "logits_per_char": -0.820721081324986, "num_chars": 7}, {"sum_logits": -7.045939922332764, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -7.045939922332764, "logits_per_char": -0.6405399929393422, "num_chars": 11}, {"sum_logits": -8.094435691833496, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.73212718963623, "logits_per_token": -8.094435691833496, "logits_per_char": -0.8993817435370551, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 989, "native_id": "cf8e30dd6956d03e3f0f0397112a8696", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.846451759338379, "incorrect_loss_raw": 15.595015287399292, "correct_loss_per_char": 0.7372043132781982, "incorrect_loss_per_char": 1.271957741748719, "correct_loss_per_token": 4.4232258796691895, "incorrect_loss_per_token": 8.668647050857544, "correct_loss_uncond": -8.360482215881348, "incorrect_loss_uncond": -4.970689535140991}, "model_output": [{"sum_logits": -8.846451759338379, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.206933975219727, "logits_per_token": -4.4232258796691895, "logits_per_char": -0.7372043132781982, "num_chars": 12}, {"sum_logits": -20.454858779907227, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -24.394817352294922, "logits_per_token": -10.227429389953613, "logits_per_char": -1.7045715649922688, "num_chars": 12}, {"sum_logits": -12.456509590148926, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -12.456509590148926, "logits_per_char": -1.5570636987686157, "num_chars": 8}, {"sum_logits": -16.462182998657227, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.2967472076416, "logits_per_token": -5.487394332885742, "logits_per_char": -1.1758702141898019, "num_chars": 14}, {"sum_logits": -13.006509780883789, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.34688377380371, "logits_per_token": -6.5032548904418945, "logits_per_char": -0.6503254890441894, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 990, "native_id": "159d50e325b59c6d29ec371500e173b4", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.93074893951416, "incorrect_loss_raw": 13.265396356582642, "correct_loss_per_char": 1.48268723487854, "incorrect_loss_per_char": 1.374853937442486, "correct_loss_per_token": 5.93074893951416, "incorrect_loss_per_token": 7.797635436058044, "correct_loss_uncond": -6.0036468505859375, "incorrect_loss_uncond": -2.3339104652404785}, "model_output": [{"sum_logits": -13.52553939819336, "num_tokens": 4, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.519821166992188, "logits_per_token": -3.38138484954834, "logits_per_char": -0.676276969909668, "num_chars": 20}, {"sum_logits": -8.780070304870605, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.67490005493164, "logits_per_token": -4.390035152435303, "logits_per_char": -0.675390023451585, "num_chars": 13}, {"sum_logits": -16.08226776123047, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.239937782287598, "logits_per_token": -16.08226776123047, "logits_per_char": -2.680377960205078, "num_chars": 6}, {"sum_logits": -14.673707962036133, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.962568283081055, "logits_per_token": -7.336853981018066, "logits_per_char": -1.4673707962036133, "num_chars": 10}, {"sum_logits": -5.93074893951416, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.934395790100098, "logits_per_token": -5.93074893951416, "logits_per_char": -1.48268723487854, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 991, "native_id": "17eafc807b198236faf06a66f4c05313", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.8316419124603271, "incorrect_loss_raw": 15.686429500579834, "correct_loss_per_char": 0.06397245480464055, "incorrect_loss_per_char": 2.150721161706107, "correct_loss_per_token": 0.4158209562301636, "incorrect_loss_per_token": 11.024709860483805, "correct_loss_uncond": -13.175673723220825, "incorrect_loss_uncond": -0.05303835868835449}, "model_output": [{"sum_logits": -11.75545883178711, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.969649314880371, "logits_per_token": -11.75545883178711, "logits_per_char": -2.351091766357422, "num_chars": 5}, {"sum_logits": -6.980318069458008, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.287192344665527, "logits_per_token": -6.980318069458008, "logits_per_char": -1.163386344909668, "num_chars": 6}, {"sum_logits": -0.8316419124603271, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -14.007315635681152, "logits_per_token": -0.4158209562301636, "logits_per_char": -0.06397245480464055, "num_chars": 13}, {"sum_logits": -27.970317840576172, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.94774627685547, "logits_per_token": -9.323439280192057, "logits_per_char": -2.797031784057617, "num_chars": 10}, {"sum_logits": -16.039623260498047, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.753283500671387, "logits_per_token": -16.039623260498047, "logits_per_char": -2.291374751499721, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 992, "native_id": "24eebfa678112100803da16dde148b2d", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.260816812515259, "incorrect_loss_raw": 6.957880020141602, "correct_loss_per_char": 0.46583097321646555, "incorrect_loss_per_char": 1.2591006869361514, "correct_loss_per_token": 3.260816812515259, "incorrect_loss_per_token": 6.397988319396973, "correct_loss_uncond": -12.010464429855347, "incorrect_loss_uncond": -7.33711838722229}, "model_output": [{"sum_logits": -4.479133605957031, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -2.2395668029785156, "logits_per_char": -0.639876229422433, "num_chars": 7}, {"sum_logits": -4.829892158508301, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -4.829892158508301, "logits_per_char": -0.8049820264180502, "num_chars": 6}, {"sum_logits": -8.824422836303711, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.11971378326416, "logits_per_token": -8.824422836303711, "logits_per_char": -2.2061057090759277, "num_chars": 4}, {"sum_logits": -9.698071479797363, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.537938117980957, "logits_per_token": -9.698071479797363, "logits_per_char": -1.3854387828281947, "num_chars": 7}, {"sum_logits": -3.260816812515259, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -3.260816812515259, "logits_per_char": -0.46583097321646555, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 993, "native_id": "ec882fc3a9bfaeae2a26fe31c2ef2c07", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 0, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.9802000522613525, "incorrect_loss_raw": 10.560059309005737, "correct_loss_per_char": 0.2828857217516218, "incorrect_loss_per_char": 0.7988291720604919, "correct_loss_per_token": 1.9802000522613525, "incorrect_loss_per_token": 5.5921701192855835, "correct_loss_uncond": -10.499810934066772, "incorrect_loss_uncond": -7.742926359176636}, "model_output": [{"sum_logits": -8.374218940734863, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -2.7914063135782876, "logits_per_char": -0.5582812627156576, "num_chars": 15}, {"sum_logits": -1.9802000522613525, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -1.9802000522613525, "logits_per_char": -0.2828857217516218, "num_chars": 7}, {"sum_logits": -13.128022193908691, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.36546516418457, "logits_per_token": -4.3760073979695635, "logits_per_char": -0.7722365996416878, "num_chars": 17}, {"sum_logits": -11.073458671569824, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.076425552368164, "logits_per_token": -5.536729335784912, "logits_per_char": -0.7909613336835589, "num_chars": 14}, {"sum_logits": -9.66453742980957, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.558618545532227, "logits_per_token": -9.66453742980957, "logits_per_char": -1.0738374922010634, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 994, "native_id": "0a006d16d9042e0c170935e5fbf7f9af", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.0011749267578125, "incorrect_loss_raw": 9.176892042160034, "correct_loss_per_char": 0.6251468658447266, "incorrect_loss_per_char": 1.1625508069992065, "correct_loss_per_token": 5.0011749267578125, "incorrect_loss_per_token": 7.8908268213272095, "correct_loss_uncond": -9.773357391357422, "incorrect_loss_uncond": -6.878593444824219}, "model_output": [{"sum_logits": -11.2478609085083, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.519027709960938, "logits_per_token": -11.2478609085083, "logits_per_char": -1.249762323167589, "num_chars": 9}, {"sum_logits": -10.288521766662598, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -17.000349044799805, "logits_per_token": -5.144260883331299, "logits_per_char": -1.7147536277770996, "num_chars": 6}, {"sum_logits": -8.792765617370605, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.917571067810059, "logits_per_token": -8.792765617370605, "logits_per_char": -0.9769739574856229, "num_chars": 9}, {"sum_logits": -6.378419876098633, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.784994125366211, "logits_per_token": -6.378419876098633, "logits_per_char": -0.7087133195665147, "num_chars": 9}, {"sum_logits": -5.0011749267578125, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.774532318115234, "logits_per_token": -5.0011749267578125, "logits_per_char": -0.6251468658447266, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 995, "native_id": "d33a81660058e570a18fb2eafa284a78", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.996932983398438, "incorrect_loss_raw": 11.278063774108887, "correct_loss_per_char": 0.7854952130998883, "incorrect_loss_per_char": 1.4768853043064927, "correct_loss_per_token": 5.498466491699219, "incorrect_loss_per_token": 9.614331722259521, "correct_loss_uncond": -7.827533721923828, "incorrect_loss_uncond": -3.690969944000244}, "model_output": [{"sum_logits": -10.996932983398438, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.824466705322266, "logits_per_token": -5.498466491699219, "logits_per_char": -0.7854952130998883, "num_chars": 14}, {"sum_logits": -10.69848346710205, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.598287582397461, "logits_per_token": -10.69848346710205, "logits_per_char": -1.1887203852335613, "num_chars": 9}, {"sum_logits": -12.459110260009766, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -12.459110260009766, "logits_per_char": -1.7798728942871094, "num_chars": 7}, {"sum_logits": -8.644804954528809, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.008196830749512, "logits_per_token": -8.644804954528809, "logits_per_char": -1.7289609909057617, "num_chars": 5}, {"sum_logits": -13.309856414794922, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.98604965209961, "logits_per_token": -6.654928207397461, "logits_per_char": -1.2099869467995383, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 996, "native_id": "1e09c3136a743b862e783700b7667028", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.591446876525879, "incorrect_loss_raw": 8.0959712266922, "correct_loss_per_char": 0.5992224433205344, "incorrect_loss_per_char": 1.1772909031973944, "correct_loss_per_token": 6.591446876525879, "incorrect_loss_per_token": 8.0959712266922, "correct_loss_uncond": -9.64777660369873, "incorrect_loss_uncond": -5.032070994377136}, "model_output": [{"sum_logits": -5.80303430557251, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.039102554321289, "logits_per_token": -5.80303430557251, "logits_per_char": -1.160606861114502, "num_chars": 5}, {"sum_logits": -5.413233757019043, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.45186996459961, "logits_per_token": -5.413233757019043, "logits_per_char": -0.6014704174465604, "num_chars": 9}, {"sum_logits": -12.864370346069336, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.509931564331055, "logits_per_token": -12.864370346069336, "logits_per_char": -1.2864370346069336, "num_chars": 10}, {"sum_logits": -8.30324649810791, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.51126480102539, "logits_per_token": -8.30324649810791, "logits_per_char": -1.660649299621582, "num_chars": 5}, {"sum_logits": -6.591446876525879, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.23922348022461, "logits_per_token": -6.591446876525879, "logits_per_char": -0.5992224433205344, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 997, "native_id": "5e851c47682bdf79ec7c139ecf124c9a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.093623161315918, "incorrect_loss_raw": 9.950289011001587, "correct_loss_per_char": 0.9093623161315918, "incorrect_loss_per_char": 1.1145640353361765, "correct_loss_per_token": 9.093623161315918, "incorrect_loss_per_token": 6.690768003463745, "correct_loss_uncond": -5.0037336349487305, "incorrect_loss_uncond": -6.254882335662842}, "model_output": [{"sum_logits": -17.327571868896484, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.620359420776367, "logits_per_token": -8.663785934448242, "logits_per_char": -1.7327571868896485, "num_chars": 10}, {"sum_logits": -6.985169410705566, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -6.985169410705566, "logits_per_char": -0.8731461763381958, "num_chars": 8}, {"sum_logits": -9.093623161315918, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.097356796264648, "logits_per_token": -9.093623161315918, "logits_per_char": -0.9093623161315918, "num_chars": 10}, {"sum_logits": -8.74859619140625, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -4.374298095703125, "logits_per_char": -0.7290496826171875, "num_chars": 12}, {"sum_logits": -6.739818572998047, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -14.046292304992676, "logits_per_token": -6.739818572998047, "logits_per_char": -1.1233030954996746, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 998, "native_id": "b148f18fb8b5a504b67078ef6ac29717", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.622906684875488, "incorrect_loss_raw": 17.435712337493896, "correct_loss_per_char": 0.8748096986250444, "incorrect_loss_per_char": 1.2199065919015921, "correct_loss_per_token": 4.811453342437744, "incorrect_loss_per_token": 7.942047754923503, "correct_loss_uncond": -10.214823722839355, "incorrect_loss_uncond": -2.8190598487854004}, "model_output": [{"sum_logits": -18.619401931762695, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.837158203125, "logits_per_token": -6.206467310587565, "logits_per_char": -1.0952589371625114, "num_chars": 17}, {"sum_logits": -19.417530059814453, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.476987838745117, "logits_per_token": -9.708765029907227, "logits_per_char": -1.1422076505773209, "num_chars": 17}, {"sum_logits": -17.4348201751709, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.651683807373047, "logits_per_token": -8.71741008758545, "logits_per_char": -1.4529016812642415, "num_chars": 12}, {"sum_logits": -9.622906684875488, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.837730407714844, "logits_per_token": -4.811453342437744, "logits_per_char": -0.8748096986250444, "num_chars": 11}, {"sum_logits": -14.271097183227539, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.053258895874023, "logits_per_token": -7.1355485916137695, "logits_per_char": -1.189258098602295, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 999, "native_id": "b6bbe013995fdb5def3d504319af0791", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9479549527168274, "incorrect_loss_raw": 11.609491050243378, "correct_loss_per_char": 0.13542213610240392, "incorrect_loss_per_char": 0.8692527681258495, "correct_loss_per_token": 0.9479549527168274, "incorrect_loss_per_token": 6.293343186378479, "correct_loss_uncond": -13.846343219280243, "incorrect_loss_uncond": -7.3493011593818665}, "model_output": [{"sum_logits": -17.04944610595703, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -8.524723052978516, "logits_per_char": -0.8973392687345806, "num_chars": 19}, {"sum_logits": -21.097164154052734, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -26.689613342285156, "logits_per_token": -10.548582077026367, "logits_per_char": -1.6228587810809796, "num_chars": 13}, {"sum_logits": -0.9479549527168274, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": true, "sum_logits_uncond": -14.79429817199707, "logits_per_token": -0.9479549527168274, "logits_per_char": -0.13542213610240392, "num_chars": 7}, {"sum_logits": -4.382572650909424, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.450950622558594, "logits_per_token": -2.191286325454712, "logits_per_char": -0.39841569553722034, "num_chars": 11}, {"sum_logits": -3.9087812900543213, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.03129005432129, "logits_per_token": -3.9087812900543213, "logits_per_char": -0.5583973271506173, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1000, "native_id": "0c2fa15a02d0b6ca6707e98fac7589e4", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.441748142242432, "incorrect_loss_raw": 17.02348041534424, "correct_loss_per_char": 0.28640779696012797, "incorrect_loss_per_char": 1.2012934715028794, "correct_loss_per_token": 2.720874071121216, "incorrect_loss_per_token": 8.51174020767212, "correct_loss_uncond": -11.365247249603271, "incorrect_loss_uncond": -1.972041368484497}, "model_output": [{"sum_logits": -5.441748142242432, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.806995391845703, "logits_per_token": -2.720874071121216, "logits_per_char": -0.28640779696012797, "num_chars": 19}, {"sum_logits": -17.795320510864258, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.75242805480957, "logits_per_token": -8.897660255432129, "logits_per_char": -1.7795320510864259, "num_chars": 10}, {"sum_logits": -14.487062454223633, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.3908634185791, "logits_per_token": -7.243531227111816, "logits_per_char": -0.804836803012424, "num_chars": 18}, {"sum_logits": -14.433822631835938, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.433670997619629, "logits_per_token": -7.216911315917969, "logits_per_char": -1.2028185526529949, "num_chars": 12}, {"sum_logits": -21.377716064453125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -25.40512466430664, "logits_per_token": -10.688858032226562, "logits_per_char": -1.0179864792596727, "num_chars": 21}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1001, "native_id": "a656e74a943f9e2698a25bbcfb4e96db", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.688699722290039, "incorrect_loss_raw": 9.023009777069092, "correct_loss_per_char": 0.8073916435241699, "incorrect_loss_per_char": 1.0530634100475007, "correct_loss_per_token": 9.688699722290039, "incorrect_loss_per_token": 9.023009777069092, "correct_loss_uncond": -5.087852478027344, "incorrect_loss_uncond": -4.54372763633728}, "model_output": [{"sum_logits": -10.251848220825195, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -10.251848220825195, "logits_per_char": -1.0251848220825195, "num_chars": 10}, {"sum_logits": -8.087821960449219, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -11.939125061035156, "logits_per_token": -8.087821960449219, "logits_per_char": -0.8986468844943576, "num_chars": 9}, {"sum_logits": -9.688699722290039, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.776552200317383, "logits_per_token": -9.688699722290039, "logits_per_char": -0.8073916435241699, "num_chars": 12}, {"sum_logits": -7.8003692626953125, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.238255500793457, "logits_per_token": -7.8003692626953125, "logits_per_char": -0.8667076958550347, "num_chars": 9}, {"sum_logits": -9.95199966430664, "num_tokens": 1, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -14.599336624145508, "logits_per_token": -9.95199966430664, "logits_per_char": -1.4217142377580916, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1002, "native_id": "8086f022f2d4a4888ae1f8c7e4541ab9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.0416741371154785, "incorrect_loss_raw": 11.412764549255371, "correct_loss_per_char": 0.3776046335697174, "incorrect_loss_per_char": 1.4663314660390219, "correct_loss_per_token": 2.0138913790384927, "incorrect_loss_per_token": 7.546353578567505, "correct_loss_uncond": -11.3450026512146, "incorrect_loss_uncond": -4.502715587615967}, "model_output": [{"sum_logits": -8.230350494384766, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.11971378326416, "logits_per_token": -8.230350494384766, "logits_per_char": -2.0575876235961914, "num_chars": 4}, {"sum_logits": -15.901622772216797, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.863563537597656, "logits_per_token": -7.950811386108398, "logits_per_char": -1.2232017517089844, "num_chars": 13}, {"sum_logits": -6.0416741371154785, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.386676788330078, "logits_per_token": -2.0138913790384927, "logits_per_char": -0.3776046335697174, "num_chars": 16}, {"sum_logits": -6.489419937133789, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -6.489419937133789, "logits_per_char": -1.081569989522298, "num_chars": 6}, {"sum_logits": -15.029664993286133, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.37063217163086, "logits_per_token": -7.514832496643066, "logits_per_char": -1.5029664993286134, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1003, "native_id": "5655a3002dd9a6b7dabede1dd26a5893", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.69258975982666, "incorrect_loss_raw": 6.68160355091095, "correct_loss_per_char": 0.7820982933044434, "incorrect_loss_per_char": 1.002996189253671, "correct_loss_per_token": 4.69258975982666, "incorrect_loss_per_token": 6.68160355091095, "correct_loss_uncond": -9.111001014709473, "incorrect_loss_uncond": -6.817323565483093}, "model_output": [{"sum_logits": -4.302538871765137, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -9.548184394836426, "logits_per_token": -4.302538871765137, "logits_per_char": -0.7170898119608561, "num_chars": 6}, {"sum_logits": -3.8423357009887695, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -3.8423357009887695, "logits_per_char": -0.6403892834981283, "num_chars": 6}, {"sum_logits": -10.775304794311523, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -10.775304794311523, "logits_per_char": -1.5393292563302177, "num_chars": 7}, {"sum_logits": -7.806234836578369, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.220428466796875, "logits_per_token": -7.806234836578369, "logits_per_char": -1.1151764052254813, "num_chars": 7}, {"sum_logits": -4.69258975982666, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.803590774536133, "logits_per_token": -4.69258975982666, "logits_per_char": -0.7820982933044434, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1004, "native_id": "17d9bfaee1efac51b1ca240125bc5977", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.549711227416992, "incorrect_loss_raw": 12.744841814041138, "correct_loss_per_char": 0.7033140818277995, "incorrect_loss_per_char": 1.0174975642136164, "correct_loss_per_token": 5.274855613708496, "incorrect_loss_per_token": 8.810775399208069, "correct_loss_uncond": -10.584833145141602, "incorrect_loss_uncond": -5.4675819873809814}, "model_output": [{"sum_logits": -14.399611473083496, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.578399658203125, "logits_per_token": -14.399611473083496, "logits_per_char": -0.9599740982055665, "num_chars": 15}, {"sum_logits": -10.79822826385498, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.681432723999023, "logits_per_token": -10.79822826385498, "logits_per_char": -1.3497785329818726, "num_chars": 8}, {"sum_logits": -17.07301139831543, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.000240325927734, "logits_per_token": -5.691003799438477, "logits_per_char": -1.1382007598876953, "num_chars": 15}, {"sum_logits": -10.549711227416992, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.134544372558594, "logits_per_token": -5.274855613708496, "logits_per_char": -0.7033140818277995, "num_chars": 15}, {"sum_logits": -8.708516120910645, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.589622497558594, "logits_per_token": -4.354258060455322, "logits_per_char": -0.6220368657793317, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1005, "native_id": "801431167b8bff06b9870abe9721536b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.325905799865723, "incorrect_loss_raw": 10.752883434295654, "correct_loss_per_char": 0.8139895333184136, "incorrect_loss_per_char": 1.0674912902858706, "correct_loss_per_token": 7.325905799865723, "incorrect_loss_per_token": 7.758254766464233, "correct_loss_uncond": -5.477634429931641, "incorrect_loss_uncond": -5.478879451751709}, "model_output": [{"sum_logits": -10.585100173950195, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.92629623413086, "logits_per_token": -10.585100173950195, "logits_per_char": -1.7641833623250325, "num_chars": 6}, {"sum_logits": -10.433713912963867, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.716171264648438, "logits_per_token": -5.216856956481934, "logits_per_char": -0.6955809275309245, "num_chars": 15}, {"sum_logits": -13.5233154296875, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.824705123901367, "logits_per_token": -6.76165771484375, "logits_per_char": -1.0402550330528846, "num_chars": 13}, {"sum_logits": -7.325905799865723, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.803540229797363, "logits_per_token": -7.325905799865723, "logits_per_char": -0.8139895333184136, "num_chars": 9}, {"sum_logits": -8.469404220581055, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.459878921508789, "logits_per_token": -8.469404220581055, "logits_per_char": -0.7699458382346414, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1006, "native_id": "85ebdd4f1a3c2ac900eee8e75e48ccaa", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.602978706359863, "incorrect_loss_raw": 7.389662384986877, "correct_loss_per_char": 0.35407528510460484, "incorrect_loss_per_char": 0.9126613791972872, "correct_loss_per_token": 4.602978706359863, "incorrect_loss_per_token": 6.058067560195923, "correct_loss_uncond": -10.039182662963867, "incorrect_loss_uncond": -6.9625102281570435}, "model_output": [{"sum_logits": -8.608756065368652, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.049534797668457, "logits_per_token": -8.608756065368652, "logits_per_char": -1.2298222950526647, "num_chars": 7}, {"sum_logits": -10.652758598327637, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -5.326379299163818, "logits_per_char": -1.3315948247909546, "num_chars": 8}, {"sum_logits": -5.35634183883667, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -5.35634183883667, "logits_per_char": -0.5951490932040744, "num_chars": 9}, {"sum_logits": -4.940793037414551, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -4.940793037414551, "logits_per_char": -0.4940793037414551, "num_chars": 10}, {"sum_logits": -4.602978706359863, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.64216136932373, "logits_per_token": -4.602978706359863, "logits_per_char": -0.35407528510460484, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1007, "native_id": "db1eb157671109bbb9113b0f71a6b957", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.151693344116211, "incorrect_loss_raw": 6.136795341968536, "correct_loss_per_char": 0.5501302572397085, "incorrect_loss_per_char": 0.9006011262536049, "correct_loss_per_token": 7.151693344116211, "incorrect_loss_per_token": 3.986014584700267, "correct_loss_uncond": -8.241385459899902, "incorrect_loss_uncond": -9.833997189998627}, "model_output": [{"sum_logits": -7.151693344116211, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.393078804016113, "logits_per_token": -7.151693344116211, "logits_per_char": -0.5501302572397085, "num_chars": 13}, {"sum_logits": -5.076826095581055, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -5.076826095581055, "logits_per_char": -0.8461376825968424, "num_chars": 6}, {"sum_logits": -2.2745680809020996, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": true, "sum_logits_uncond": -18.712093353271484, "logits_per_token": -0.7581893603006998, "logits_per_char": -0.14216050505638123, "num_chars": 16}, {"sum_logits": -3.022298574447632, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -3.022298574447632, "logits_per_char": -0.25185821453730267, "num_chars": 12}, {"sum_logits": -14.17348861694336, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.735245704650879, "logits_per_token": -7.08674430847168, "logits_per_char": -2.362248102823893, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1008, "native_id": "c02a3c2d4f726b9e1be99533a24a6ab4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8046724796295166, "incorrect_loss_raw": 11.039127349853516, "correct_loss_per_char": 0.6341120799382528, "incorrect_loss_per_char": 1.44112543984065, "correct_loss_per_token": 3.8046724796295166, "incorrect_loss_per_token": 8.731102466583252, "correct_loss_uncond": -9.569713354110718, "incorrect_loss_uncond": -4.281325340270996}, "model_output": [{"sum_logits": -18.46419906616211, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.142658233642578, "logits_per_token": -9.232099533081055, "logits_per_char": -2.051577674018012, "num_chars": 9}, {"sum_logits": -7.1738080978393555, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -7.1738080978393555, "logits_per_char": -1.4347616195678712, "num_chars": 5}, {"sum_logits": -9.947285652160645, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -9.947285652160645, "logits_per_char": -1.4210408074515206, "num_chars": 7}, {"sum_logits": -3.8046724796295166, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -3.8046724796295166, "logits_per_char": -0.6341120799382528, "num_chars": 6}, {"sum_logits": -8.571216583251953, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -8.571216583251953, "logits_per_char": -0.8571216583251953, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1009, "native_id": "3ed6391c539e6daa5b5fdb1b6d5d8ace", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 14.581085205078125, "incorrect_loss_raw": 11.027504920959473, "correct_loss_per_char": 1.1216219388521635, "incorrect_loss_per_char": 0.9915853131530632, "correct_loss_per_token": 7.2905426025390625, "incorrect_loss_per_token": 7.555765986442566, "correct_loss_uncond": -5.3153533935546875, "incorrect_loss_uncond": -5.135517597198486}, "model_output": [{"sum_logits": -14.581085205078125, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.896438598632812, "logits_per_token": -7.2905426025390625, "logits_per_char": -1.1216219388521635, "num_chars": 13}, {"sum_logits": -13.138506889343262, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -20.452566146850586, "logits_per_token": -6.569253444671631, "logits_per_char": -1.0106543761033278, "num_chars": 13}, {"sum_logits": -14.635404586791992, "num_tokens": 2, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.082725524902344, "logits_per_token": -7.317702293395996, "logits_per_char": -0.9756936391194662, "num_chars": 15}, {"sum_logits": -2.9676618576049805, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -10.5106201171875, "logits_per_token": -2.9676618576049805, "logits_per_char": -0.4946103096008301, "num_chars": 6}, {"sum_logits": -13.368446350097656, "num_tokens": 1, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -14.606178283691406, "logits_per_token": -13.368446350097656, "logits_per_char": -1.4853829277886286, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1010, "native_id": "1db19a32a3edbff9981976dc9ec800ce", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.656850337982178, "incorrect_loss_raw": 10.975782632827759, "correct_loss_per_char": 0.6380708614985148, "incorrect_loss_per_char": 1.9020155622845603, "correct_loss_per_token": 2.552283445994059, "incorrect_loss_per_token": 10.047675967216492, "correct_loss_uncond": -10.581118106842041, "incorrect_loss_uncond": -4.192433834075928}, "model_output": [{"sum_logits": -7.656850337982178, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.23796844482422, "logits_per_token": -2.552283445994059, "logits_per_char": -0.6380708614985148, "num_chars": 12}, {"sum_logits": -9.593772888183594, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -9.593772888183594, "logits_per_char": -1.9187545776367188, "num_chars": 5}, {"sum_logits": -11.609375953674316, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.822380065917969, "logits_per_token": -11.609375953674316, "logits_per_char": -1.658482279096331, "num_chars": 7}, {"sum_logits": -7.424853324890137, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.331665992736816, "logits_per_token": -3.7124266624450684, "logits_per_char": -1.4849706649780274, "num_chars": 5}, {"sum_logits": -15.275128364562988, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.21359634399414, "logits_per_token": -15.275128364562988, "logits_per_char": -2.5458547274271646, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1011, "native_id": "1e5a138b4c7d456c37abf4990b402bbe", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.66627311706543, "incorrect_loss_raw": 10.066336631774902, "correct_loss_per_char": 0.60602482882413, "incorrect_loss_per_char": 1.141090444275192, "correct_loss_per_token": 6.66627311706543, "incorrect_loss_per_token": 6.881518602371216, "correct_loss_uncond": -6.832511901855469, "incorrect_loss_uncond": -4.753991365432739}, "model_output": [{"sum_logits": -6.66627311706543, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.498785018920898, "logits_per_token": -6.66627311706543, "logits_per_char": -0.60602482882413, "num_chars": 11}, {"sum_logits": -9.883255004882812, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.935651779174805, "logits_per_token": -3.2944183349609375, "logits_per_char": -0.8984777277166193, "num_chars": 11}, {"sum_logits": -12.300870895385742, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.283376693725586, "logits_per_token": -6.150435447692871, "logits_per_char": -0.9462208381065955, "num_chars": 13}, {"sum_logits": -5.738531112670898, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.638725280761719, "logits_per_token": -5.738531112670898, "logits_per_char": -0.9564218521118164, "num_chars": 6}, {"sum_logits": -12.342689514160156, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -12.342689514160156, "logits_per_char": -1.7632413591657365, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1012, "native_id": "9402864beae075392d2ee6c10115fc21", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.226743698120117, "incorrect_loss_raw": 11.308372735977173, "correct_loss_per_char": 0.801910264151437, "incorrect_loss_per_char": 1.1688249031702678, "correct_loss_per_token": 5.613371849060059, "incorrect_loss_per_token": 8.02819538116455, "correct_loss_uncond": -8.234197616577148, "incorrect_loss_uncond": -5.856975078582764}, "model_output": [{"sum_logits": -11.878222465515137, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -11.878222465515137, "logits_per_char": -1.6968889236450195, "num_chars": 7}, {"sum_logits": -14.614189147949219, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.945842742919922, "logits_per_token": -7.307094573974609, "logits_per_char": -0.974279276529948, "num_chars": 15}, {"sum_logits": -11.627229690551758, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.84540367126465, "logits_per_token": -5.813614845275879, "logits_per_char": -0.5813614845275878, "num_chars": 20}, {"sum_logits": -11.226743698120117, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.460941314697266, "logits_per_token": -5.613371849060059, "logits_per_char": -0.801910264151437, "num_chars": 14}, {"sum_logits": -7.113849639892578, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -7.113849639892578, "logits_per_char": -1.4227699279785155, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1013, "native_id": "25136807f7b2e78b115698daa1677b4a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.664910793304443, "incorrect_loss_raw": 12.451319456100464, "correct_loss_per_char": 0.5109940528869629, "incorrect_loss_per_char": 1.2653052078353033, "correct_loss_per_token": 3.8324553966522217, "incorrect_loss_per_token": 7.187280535697937, "correct_loss_uncond": -10.36165475845337, "incorrect_loss_uncond": -3.77278470993042}, "model_output": [{"sum_logits": -15.963865280151367, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.156307220458984, "logits_per_token": -7.981932640075684, "logits_per_char": -1.7737628089057074, "num_chars": 9}, {"sum_logits": -16.25460433959961, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.701950073242188, "logits_per_token": -8.127302169799805, "logits_per_char": -1.0159127712249756, "num_chars": 16}, {"sum_logits": -9.893841743469238, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.663773536682129, "logits_per_token": -4.946920871734619, "logits_per_char": -0.9893841743469238, "num_chars": 10}, {"sum_logits": -7.664910793304443, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.026565551757812, "logits_per_token": -3.8324553966522217, "logits_per_char": -0.5109940528869629, "num_chars": 15}, {"sum_logits": -7.692966461181641, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -7.692966461181641, "logits_per_char": -1.2821610768636067, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1014, "native_id": "bc10bf2bfae26a2226823d42956f6cf0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.584051132202148, "incorrect_loss_raw": 8.331869840621948, "correct_loss_per_char": 0.5056034088134765, "incorrect_loss_per_char": 0.8568473208518255, "correct_loss_per_token": 2.528017044067383, "incorrect_loss_per_token": 6.256280183792114, "correct_loss_uncond": -13.627382278442383, "incorrect_loss_uncond": -7.950746297836304}, "model_output": [{"sum_logits": -16.604717254638672, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -24.07889175415039, "logits_per_token": -8.302358627319336, "logits_per_char": -1.1069811503092448, "num_chars": 15}, {"sum_logits": -7.584051132202148, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -2.528017044067383, "logits_per_char": -0.5056034088134765, "num_chars": 15}, {"sum_logits": -5.622077465057373, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.462380409240723, "logits_per_token": -5.622077465057373, "logits_per_char": -0.8031539235796247, "num_chars": 7}, {"sum_logits": -4.071857452392578, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -4.071857452392578, "logits_per_char": -0.8143714904785156, "num_chars": 5}, {"sum_logits": -7.02882719039917, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -7.02882719039917, "logits_per_char": -0.702882719039917, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1015, "native_id": "5a6559db6bae37e3a8af7350be212219", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.292279243469238, "incorrect_loss_raw": 12.437167406082153, "correct_loss_per_char": 0.35768993695576984, "incorrect_loss_per_char": 0.911354700344576, "correct_loss_per_token": 2.146139621734619, "incorrect_loss_per_token": 5.1866066455841064, "correct_loss_uncond": -13.36188793182373, "incorrect_loss_uncond": -5.644219160079956}, "model_output": [{"sum_logits": -17.25026512145996, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -19.415863037109375, "logits_per_token": -5.750088373819987, "logits_per_char": -1.2321617943899972, "num_chars": 14}, {"sum_logits": -6.245305061340332, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.687694549560547, "logits_per_token": -6.245305061340332, "logits_per_char": -0.8921864373343331, "num_chars": 7}, {"sum_logits": -14.902118682861328, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -17.968996047973633, "logits_per_token": -4.967372894287109, "logits_per_char": -0.6479182036026664, "num_chars": 23}, {"sum_logits": -4.292279243469238, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.65416717529297, "logits_per_token": -2.146139621734619, "logits_per_char": -0.35768993695576984, "num_chars": 12}, {"sum_logits": -11.350980758666992, "num_tokens": 3, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -19.252992630004883, "logits_per_token": -3.7836602528889975, "logits_per_char": -0.8731523660513071, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1016, "native_id": "7ae17f5aecacf18c94a47cc48deb6c36", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5837671756744385, "incorrect_loss_raw": 7.496317625045776, "correct_loss_per_char": 0.2559833696910313, "incorrect_loss_per_char": 0.9947997853869484, "correct_loss_per_token": 1.1945890585581462, "incorrect_loss_per_token": 5.7226439118385315, "correct_loss_uncond": -14.139655828475952, "incorrect_loss_uncond": -8.571797847747803}, "model_output": [{"sum_logits": -6.847367286682129, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.93790054321289, "logits_per_token": -3.4236836433410645, "logits_per_char": -0.5706139405568441, "num_chars": 12}, {"sum_logits": -8.483880996704102, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.910656929016113, "logits_per_token": -8.483880996704102, "logits_per_char": -1.2119829995291573, "num_chars": 7}, {"sum_logits": -7.34202241897583, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.146514892578125, "logits_per_token": -3.671011209487915, "logits_per_char": -0.734202241897583, "num_chars": 10}, {"sum_logits": -3.5837671756744385, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.72342300415039, "logits_per_token": -1.1945890585581462, "logits_per_char": -0.2559833696910313, "num_chars": 14}, {"sum_logits": -7.311999797821045, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.277389526367188, "logits_per_token": -7.311999797821045, "logits_per_char": -1.462399959564209, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1017, "native_id": "5d809e0ee19badc66071653630ea7c51", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.774621486663818, "incorrect_loss_raw": 8.970896542072296, "correct_loss_per_char": 0.43405649878761987, "incorrect_loss_per_char": 0.778038870860515, "correct_loss_per_token": 2.387310743331909, "incorrect_loss_per_token": 5.326063960790634, "correct_loss_uncond": -10.77994966506958, "incorrect_loss_uncond": -6.144720017910004}, "model_output": [{"sum_logits": -4.774621486663818, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.554571151733398, "logits_per_token": -2.387310743331909, "logits_per_char": -0.43405649878761987, "num_chars": 11}, {"sum_logits": -6.724925518035889, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.311675071716309, "logits_per_token": -6.724925518035889, "logits_per_char": -0.6113568652759899, "num_chars": 11}, {"sum_logits": -3.2646567821502686, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.367666244506836, "logits_per_token": -1.6323283910751343, "logits_per_char": -0.21764378547668456, "num_chars": 15}, {"sum_logits": -12.717045783996582, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.119810104370117, "logits_per_token": -6.358522891998291, "logits_per_char": -1.5896307229995728, "num_chars": 8}, {"sum_logits": -13.176958084106445, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -6.588479042053223, "logits_per_char": -0.6935241096898129, "num_chars": 19}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1018, "native_id": "ad0943fc37034cd2b7e485021f8b1b8c", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.23225660622119904, "incorrect_loss_raw": 12.537776827812195, "correct_loss_per_char": 0.03870943437019984, "incorrect_loss_per_char": 1.3245925547240618, "correct_loss_per_token": 0.23225660622119904, "incorrect_loss_per_token": 8.001782059669495, "correct_loss_uncond": -13.142129227519035, "incorrect_loss_uncond": -4.634550929069519}, "model_output": [{"sum_logits": -8.100479125976562, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.337035179138184, "logits_per_token": -8.100479125976562, "logits_per_char": -1.0125598907470703, "num_chars": 8}, {"sum_logits": -18.28464698791504, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.689945220947266, "logits_per_token": -9.14232349395752, "logits_per_char": -1.6622406352650037, "num_chars": 11}, {"sum_logits": -5.762670040130615, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.290563583374023, "logits_per_token": -5.762670040130615, "logits_per_char": -0.8232385771615165, "num_chars": 7}, {"sum_logits": -0.23225660622119904, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -0.23225660622119904, "logits_per_char": -0.03870943437019984, "num_chars": 6}, {"sum_logits": -18.003311157226562, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.371767044067383, "logits_per_token": -9.001655578613281, "logits_per_char": -1.8003311157226562, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1019, "native_id": "c2a8c6814ed3e207771cfc23b3b42cf1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 13.315149307250977, "incorrect_loss_raw": 8.225627064704895, "correct_loss_per_char": 0.9510820933750698, "incorrect_loss_per_char": 1.169912494280759, "correct_loss_per_token": 6.657574653625488, "incorrect_loss_per_token": 5.549792846043905, "correct_loss_uncond": -6.866571426391602, "incorrect_loss_uncond": -8.900617241859436}, "model_output": [{"sum_logits": -14.271416664123535, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.930715560913086, "logits_per_token": -7.135708332061768, "logits_per_char": -2.3785694440205893, "num_chars": 6}, {"sum_logits": -13.315149307250977, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.181720733642578, "logits_per_token": -6.657574653625488, "logits_per_char": -0.9510820933750698, "num_chars": 14}, {"sum_logits": -5.351442813873291, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.117111206054688, "logits_per_token": -1.783814271291097, "logits_per_char": -0.3147907537572524, "num_chars": 17}, {"sum_logits": -5.447640419006348, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -5.447640419006348, "logits_per_char": -0.6809550523757935, "num_chars": 8}, {"sum_logits": -7.832008361816406, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -7.832008361816406, "logits_per_char": -1.3053347269694011, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1020, "native_id": "0b52cc905fff0ca69a45e6353d10e401", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.000810384750366, "incorrect_loss_raw": 9.322706818580627, "correct_loss_per_char": 0.4286871978214809, "incorrect_loss_per_char": 0.8520429650562231, "correct_loss_per_token": 3.000810384750366, "incorrect_loss_per_token": 3.3501110871632895, "correct_loss_uncond": -11.713905572891235, "incorrect_loss_uncond": -10.532144904136658}, "model_output": [{"sum_logits": -5.821011543273926, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.35814666748047, "logits_per_token": -2.910505771636963, "logits_per_char": -0.48508429527282715, "num_chars": 12}, {"sum_logits": -18.693134307861328, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.73434066772461, "logits_per_token": -6.231044769287109, "logits_per_char": -2.077014923095703, "num_chars": 9}, {"sum_logits": -8.240970611572266, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.49367332458496, "logits_per_token": -2.746990203857422, "logits_per_char": -0.4337352953459087, "num_chars": 19}, {"sum_logits": -3.000810384750366, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.714715957641602, "logits_per_token": -3.000810384750366, "logits_per_char": -0.4286871978214809, "num_chars": 7}, {"sum_logits": -4.53571081161499, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.8332462310791, "logits_per_token": -1.5119036038716633, "logits_per_char": -0.4123373465104537, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1021, "native_id": "30d0c2006613eec41ae814d76c17a798", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 0.7484455704689026, "incorrect_loss_raw": 9.527511894702911, "correct_loss_per_char": 0.09355569630861282, "incorrect_loss_per_char": 0.8596665286355549, "correct_loss_per_token": 0.7484455704689026, "incorrect_loss_per_token": 4.242916494607925, "correct_loss_uncond": -13.455856263637543, "incorrect_loss_uncond": -8.644447982311249}, "model_output": [{"sum_logits": -8.08875846862793, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -8.08875846862793, "logits_per_char": -1.617751693725586, "num_chars": 5}, {"sum_logits": -24.510948181152344, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -26.866695404052734, "logits_per_token": -6.127737045288086, "logits_per_char": -1.3617193433973525, "num_chars": 18}, {"sum_logits": -2.7272446155548096, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.839492797851562, "logits_per_token": -1.3636223077774048, "logits_per_char": -0.22727038462956747, "num_chars": 12}, {"sum_logits": -2.7830963134765625, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -1.3915481567382812, "logits_per_char": -0.23192469278971353, "num_chars": 12}, {"sum_logits": -0.7484455704689026, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -0.7484455704689026, "logits_per_char": -0.09355569630861282, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1022, "native_id": "f7a6d0d816d14210f3af5dabe21bf804", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.9972052574157715, "incorrect_loss_raw": 8.508249402046204, "correct_loss_per_char": 0.6663561397128634, "incorrect_loss_per_char": 0.9938125593321664, "correct_loss_per_token": 5.9972052574157715, "incorrect_loss_per_token": 4.693833231925964, "correct_loss_uncond": -7.561413288116455, "incorrect_loss_uncond": -7.485027432441711}, "model_output": [{"sum_logits": -5.9972052574157715, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.558618545532227, "logits_per_token": -5.9972052574157715, "logits_per_char": -0.6663561397128634, "num_chars": 9}, {"sum_logits": -10.64858341217041, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.677568435668945, "logits_per_token": -5.324291706085205, "logits_per_char": -1.3310729265213013, "num_chars": 8}, {"sum_logits": -12.099743843078613, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -18.902355194091797, "logits_per_token": -6.049871921539307, "logits_per_char": -1.2099743843078614, "num_chars": 10}, {"sum_logits": -3.5176682472229004, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -3.5176682472229004, "logits_per_char": -0.8794170618057251, "num_chars": 4}, {"sum_logits": -7.767002105712891, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.062938690185547, "logits_per_token": -3.8835010528564453, "logits_per_char": -0.5547858646937779, "num_chars": 14}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1023, "native_id": "c306ab28498b67c53decb9dde1d78bd5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.392000198364258, "incorrect_loss_raw": 8.948272585868835, "correct_loss_per_char": 3.0980000495910645, "incorrect_loss_per_char": 0.8685973243835645, "correct_loss_per_token": 12.392000198364258, "incorrect_loss_per_token": 7.06456313530604, "correct_loss_uncond": -1.2116069793701172, "incorrect_loss_uncond": -7.779788374900818}, "model_output": [{"sum_logits": -5.909997463226318, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.323654174804688, "logits_per_token": -2.954998731613159, "logits_per_char": -0.39399983088175455, "num_chars": 15}, {"sum_logits": -10.02452278137207, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -10.02452278137207, "logits_per_char": -1.2530653476715088, "num_chars": 8}, {"sum_logits": -6.869758605957031, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.827014923095703, "logits_per_token": -2.2899195353190103, "logits_per_char": -0.5284429696890024, "num_chars": 13}, {"sum_logits": -12.988811492919922, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -12.988811492919922, "logits_per_char": -1.2988811492919923, "num_chars": 10}, {"sum_logits": -12.392000198364258, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.603607177734375, "logits_per_token": -12.392000198364258, "logits_per_char": -3.0980000495910645, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1024, "native_id": "637c710ec9582fd9b9e8eaa3f3fe83bb", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.281222343444824, "incorrect_loss_raw": 9.105903029441833, "correct_loss_per_char": 0.9201358159383138, "incorrect_loss_per_char": 0.8578751402314619, "correct_loss_per_token": 4.140611171722412, "incorrect_loss_per_token": 5.841392695903778, "correct_loss_uncond": -11.831751823425293, "incorrect_loss_uncond": -7.649428486824036}, "model_output": [{"sum_logits": -9.194720268249512, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -4.597360134124756, "logits_per_char": -1.0216355853610568, "num_chars": 9}, {"sum_logits": -8.281222343444824, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -4.140611171722412, "logits_per_char": -0.9201358159383138, "num_chars": 9}, {"sum_logits": -6.7447991371154785, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.542440414428711, "logits_per_token": -3.3723995685577393, "logits_per_char": -0.4817713669368199, "num_chars": 14}, {"sum_logits": -10.176563262939453, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.53750228881836, "logits_per_token": -5.088281631469727, "logits_per_char": -0.7828125586876502, "num_chars": 13}, {"sum_logits": -10.30752944946289, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -10.30752944946289, "logits_per_char": -1.1452810499403212, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1025, "native_id": "9ae52783d8fdb5cc2e8caa01542c3341", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 14.978798866271973, "incorrect_loss_raw": 14.596182346343994, "correct_loss_per_char": 0.7132761364891416, "incorrect_loss_per_char": 0.8872342374589708, "correct_loss_per_token": 2.9957597732543944, "incorrect_loss_per_token": 6.184423208236694, "correct_loss_uncond": -6.994387626647949, "incorrect_loss_uncond": -5.299980401992798}, "model_output": [{"sum_logits": -10.12200927734375, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.768940925598145, "logits_per_token": -5.061004638671875, "logits_per_char": -0.8435007731119791, "num_chars": 12}, {"sum_logits": -17.5819091796875, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -23.020206451416016, "logits_per_token": -8.79095458984375, "logits_per_char": -0.9767727322048612, "num_chars": 18}, {"sum_logits": -12.862123489379883, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.351341247558594, "logits_per_token": -6.431061744689941, "logits_per_char": -0.9187231063842773, "num_chars": 14}, {"sum_logits": -17.818687438964844, "num_tokens": 4, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.444162368774414, "logits_per_token": -4.454671859741211, "logits_per_char": -0.8099403381347656, "num_chars": 22}, {"sum_logits": -14.978798866271973, "num_tokens": 5, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.973186492919922, "logits_per_token": -2.9957597732543944, "logits_per_char": -0.7132761364891416, "num_chars": 21}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1026, "native_id": "4f23829b96b38b5633ecc3325281726d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.368282318115234, "incorrect_loss_raw": 8.860754609107971, "correct_loss_per_char": 0.8947137196858724, "incorrect_loss_per_char": 1.2534178654352823, "correct_loss_per_token": 5.368282318115234, "incorrect_loss_per_token": 7.49421751499176, "correct_loss_uncond": -9.565269470214844, "incorrect_loss_uncond": -7.6121004819869995}, "model_output": [{"sum_logits": -2.9642128944396973, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.014530181884766, "logits_per_token": -2.9642128944396973, "logits_per_char": -0.32935698827107746, "num_chars": 9}, {"sum_logits": -10.794477462768555, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.701746940612793, "logits_per_token": -10.794477462768555, "logits_per_char": -1.7990795771280925, "num_chars": 6}, {"sum_logits": -10.932296752929688, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -21.426063537597656, "logits_per_token": -5.466148376464844, "logits_per_char": -1.0932296752929687, "num_chars": 10}, {"sum_logits": -5.368282318115234, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -14.933551788330078, "logits_per_token": -5.368282318115234, "logits_per_char": -0.8947137196858724, "num_chars": 6}, {"sum_logits": -10.752031326293945, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.749079704284668, "logits_per_token": -10.752031326293945, "logits_per_char": -1.792005221048991, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1027, "native_id": "3fcdc0b03e3c8b10692d642676931f4b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.6659493446350098, "incorrect_loss_raw": 8.006820797920227, "correct_loss_per_char": 0.3332436680793762, "incorrect_loss_per_char": 0.7837747125914603, "correct_loss_per_token": 2.6659493446350098, "incorrect_loss_per_token": 4.7565367221832275, "correct_loss_uncond": -13.512961864471436, "incorrect_loss_uncond": -8.98590075969696}, "model_output": [{"sum_logits": -2.6659493446350098, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.178911209106445, "logits_per_token": -2.6659493446350098, "logits_per_char": -0.3332436680793762, "num_chars": 8}, {"sum_logits": -4.269113540649414, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -4.269113540649414, "logits_per_char": -0.711518923441569, "num_chars": 6}, {"sum_logits": -5.584909439086914, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -5.584909439086914, "logits_per_char": -1.1169818878173827, "num_chars": 5}, {"sum_logits": -4.838411808013916, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -4.838411808013916, "logits_per_char": -0.439855618910356, "num_chars": 11}, {"sum_logits": -17.334848403930664, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -27.56207275390625, "logits_per_token": -4.333712100982666, "logits_per_char": -0.8667424201965332, "num_chars": 20}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1028, "native_id": "ddd606743cf71679438a85280f64593a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.290665626525879, "incorrect_loss_raw": 7.05051064491272, "correct_loss_per_char": 0.8817776044209799, "incorrect_loss_per_char": 0.8825627224785941, "correct_loss_per_token": 5.290665626525879, "incorrect_loss_per_token": 5.596224665641785, "correct_loss_uncond": -8.385550498962402, "incorrect_loss_uncond": -8.06770944595337}, "model_output": [{"sum_logits": -5.290665626525879, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -5.290665626525879, "logits_per_char": -0.8817776044209799, "num_chars": 6}, {"sum_logits": -11.63428783416748, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.177396774291992, "logits_per_token": -5.81714391708374, "logits_per_char": -1.163428783416748, "num_chars": 10}, {"sum_logits": -6.140192985534668, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -6.140192985534668, "logits_per_char": -0.8771704265049526, "num_chars": 7}, {"sum_logits": -4.148436546325684, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -4.148436546325684, "logits_per_char": -0.5926337923322406, "num_chars": 7}, {"sum_logits": -6.279125213623047, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -6.279125213623047, "logits_per_char": -0.8970178876604352, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1029, "native_id": "420641003ba20b966887dfac684efb17", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.236443519592285, "incorrect_loss_raw": 15.676631927490234, "correct_loss_per_char": 0.3596048355102539, "incorrect_loss_per_char": 1.3244277903102937, "correct_loss_per_token": 1.6182217597961426, "incorrect_loss_per_token": 9.077518383661907, "correct_loss_uncond": -15.952227592468262, "incorrect_loss_uncond": -1.665898084640503}, "model_output": [{"sum_logits": -9.633987426757812, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.972463607788086, "logits_per_token": -4.816993713378906, "logits_per_char": -0.9633987426757813, "num_chars": 10}, {"sum_logits": -3.236443519592285, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.188671112060547, "logits_per_token": -1.6182217597961426, "logits_per_char": -0.3596048355102539, "num_chars": 9}, {"sum_logits": -15.008995056152344, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.512303352355957, "logits_per_token": -15.008995056152344, "logits_per_char": -1.3644540960138494, "num_chars": 11}, {"sum_logits": -15.286127090454102, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.892047882080078, "logits_per_token": -5.095375696818034, "logits_per_char": -0.8991839464973, "num_chars": 17}, {"sum_logits": -22.77741813659668, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.993305206298828, "logits_per_token": -11.38870906829834, "logits_per_char": -2.0706743760542436, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1030, "native_id": "064c3074a682893d49c3c5b4f1e89984", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.837003707885742, "incorrect_loss_raw": 12.111454486846924, "correct_loss_per_char": 0.9796254634857178, "incorrect_loss_per_char": 1.3091022272302646, "correct_loss_per_token": 7.837003707885742, "incorrect_loss_per_token": 9.66097640991211, "correct_loss_uncond": -6.257282257080078, "incorrect_loss_uncond": -2.9226741790771484}, "model_output": [{"sum_logits": -7.0479936599731445, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -11.638154983520508, "logits_per_token": -7.0479936599731445, "logits_per_char": -1.4095987319946288, "num_chars": 5}, {"sum_logits": -8.22519588470459, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.533967018127441, "logits_per_token": -8.22519588470459, "logits_per_char": -0.9139106538560655, "num_chars": 9}, {"sum_logits": -7.837003707885742, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.09428596496582, "logits_per_token": -7.837003707885742, "logits_per_char": -0.9796254634857178, "num_chars": 8}, {"sum_logits": -19.603824615478516, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.25627899169922, "logits_per_token": -9.801912307739258, "logits_per_char": -1.7821658741344104, "num_chars": 11}, {"sum_logits": -13.568803787231445, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.708113670349121, "logits_per_token": -13.568803787231445, "logits_per_char": -1.1307336489359539, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1031, "native_id": "c640116ca6905d5256edadb616b3f76e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.512094259262085, "incorrect_loss_raw": 9.626364707946777, "correct_loss_per_char": 0.2791215843624539, "incorrect_loss_per_char": 1.281715942753686, "correct_loss_per_token": 2.512094259262085, "incorrect_loss_per_token": 6.616020043690999, "correct_loss_uncond": -11.258830308914185, "incorrect_loss_uncond": -7.560885190963745}, "model_output": [{"sum_logits": -11.157240867614746, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.240230560302734, "logits_per_token": -11.157240867614746, "logits_per_char": -1.8595401446024578, "num_chars": 6}, {"sum_logits": -7.175407409667969, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.190845489501953, "logits_per_token": -2.3918024698893228, "logits_per_char": -0.8969259262084961, "num_chars": 8}, {"sum_logits": -9.286149978637695, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.004813194274902, "logits_per_token": -9.286149978637695, "logits_per_char": -1.160768747329712, "num_chars": 8}, {"sum_logits": -2.512094259262085, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.77092456817627, "logits_per_token": -2.512094259262085, "logits_per_char": -0.2791215843624539, "num_chars": 9}, {"sum_logits": -10.8866605758667, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.3131103515625, "logits_per_token": -3.628886858622233, "logits_per_char": -1.2096289528740778, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1032, "native_id": "35ad89c198d5d6311a71c993bb7b6cba", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.582033157348633, "incorrect_loss_raw": 10.837995290756226, "correct_loss_per_char": 0.9164066314697266, "incorrect_loss_per_char": 1.2856186471142612, "correct_loss_per_token": 4.582033157348633, "incorrect_loss_per_token": 9.32889723777771, "correct_loss_uncond": -7.892915725708008, "incorrect_loss_uncond": -4.409088611602783}, "model_output": [{"sum_logits": -14.26083755493164, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.486090660095215, "logits_per_token": -14.26083755493164, "logits_per_char": -1.782604694366455, "num_chars": 8}, {"sum_logits": -12.315681457519531, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.521513938903809, "logits_per_token": -12.315681457519531, "logits_per_char": -1.759383065359933, "num_chars": 7}, {"sum_logits": -4.7026777267456055, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.283600807189941, "logits_per_token": -4.7026777267456055, "logits_per_char": -0.6718111038208008, "num_chars": 7}, {"sum_logits": -12.072784423828125, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.69713020324707, "logits_per_token": -6.0363922119140625, "logits_per_char": -0.9286757249098557, "num_chars": 13}, {"sum_logits": -4.582033157348633, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -4.582033157348633, "logits_per_char": -0.9164066314697266, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1033, "native_id": "916bbd27545446ca5d83d07c10d013ea", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6198482513427734, "incorrect_loss_raw": 12.041248559951782, "correct_loss_per_char": 0.14725893194025214, "incorrect_loss_per_char": 1.4497109476536039, "correct_loss_per_token": 1.6198482513427734, "incorrect_loss_per_token": 8.844614267349243, "correct_loss_uncond": -13.342967987060547, "incorrect_loss_uncond": -5.290462255477905}, "model_output": [{"sum_logits": -10.276436805725098, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -10.276436805725098, "logits_per_char": -1.2845546007156372, "num_chars": 8}, {"sum_logits": -12.315483093261719, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": false, "sum_logits_uncond": -15.263690948486328, "logits_per_token": -12.315483093261719, "logits_per_char": -2.0525805155436196, "num_chars": 6}, {"sum_logits": -9.569549560546875, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -18.9415225982666, "logits_per_token": -4.7847747802734375, "logits_per_char": -0.6835392543247768, "num_chars": 14}, {"sum_logits": -16.003524780273438, "num_tokens": 2, "num_tokens_all": 166, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -8.001762390136719, "logits_per_char": -1.778169420030382, "num_chars": 9}, {"sum_logits": -1.6198482513427734, "num_tokens": 1, "num_tokens_all": 165, "is_greedy": true, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -1.6198482513427734, "logits_per_char": -0.14725893194025214, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1034, "native_id": "e40fd2c17fe2cde4bd4af540d35fd518", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.494983673095703, "incorrect_loss_raw": 12.239493131637573, "correct_loss_per_char": 0.9494983673095703, "incorrect_loss_per_char": 1.0494245515560259, "correct_loss_per_token": 4.747491836547852, "incorrect_loss_per_token": 6.9442139863967896, "correct_loss_uncond": -10.118967056274414, "incorrect_loss_uncond": -3.9167261123657227}, "model_output": [{"sum_logits": -6.595739364624023, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -6.595739364624023, "logits_per_char": -1.3191478729248047, "num_chars": 5}, {"sum_logits": -22.51597785949707, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -11.257988929748535, "logits_per_char": -1.1850514662893195, "num_chars": 19}, {"sum_logits": -9.785126686096191, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.715438842773438, "logits_per_token": -4.892563343048096, "logits_per_char": -0.5755956874174231, "num_chars": 17}, {"sum_logits": -10.061128616333008, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.57980728149414, "logits_per_token": -5.030564308166504, "logits_per_char": -1.1179031795925565, "num_chars": 9}, {"sum_logits": -9.494983673095703, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.613950729370117, "logits_per_token": -4.747491836547852, "logits_per_char": -0.9494983673095703, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1035, "native_id": "98a04457025f18c2287d5c610ff8000d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 9.618423461914062, "incorrect_loss_raw": 7.49531090259552, "correct_loss_per_char": 0.6870302472795758, "incorrect_loss_per_char": 1.1782408021745228, "correct_loss_per_token": 4.809211730957031, "incorrect_loss_per_token": 6.100966095924377, "correct_loss_uncond": -10.491649627685547, "incorrect_loss_uncond": -7.357808470726013}, "model_output": [{"sum_logits": -6.692243576049805, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.87264060974121, "logits_per_token": -6.692243576049805, "logits_per_char": -0.9560347965785435, "num_chars": 7}, {"sum_logits": -11.15475845336914, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.763057708740234, "logits_per_token": -5.57737922668457, "logits_per_char": -0.9295632044474283, "num_chars": 12}, {"sum_logits": -4.123903751373291, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.842041969299316, "logits_per_token": -4.123903751373291, "logits_per_char": -0.8247807502746582, "num_chars": 5}, {"sum_logits": -8.010337829589844, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -8.010337829589844, "logits_per_char": -2.002584457397461, "num_chars": 4}, {"sum_logits": -9.618423461914062, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.11007308959961, "logits_per_token": -4.809211730957031, "logits_per_char": -0.6870302472795758, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1036, "native_id": "f656a475f07d3adba9d1486eda8e834a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.1532416343688965, "incorrect_loss_raw": 14.415156364440918, "correct_loss_per_char": 0.5961034695307413, "incorrect_loss_per_char": 1.5209957051586795, "correct_loss_per_token": 3.5766208171844482, "incorrect_loss_per_token": 10.182028532028198, "correct_loss_uncond": -7.9880547523498535, "incorrect_loss_uncond": -2.3103086948394775}, "model_output": [{"sum_logits": -17.023635864257812, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.340621948242188, "logits_per_token": -5.6745452880859375, "logits_per_char": -1.2159739903041296, "num_chars": 14}, {"sum_logits": -15.493329048156738, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.15460205078125, "logits_per_token": -15.493329048156738, "logits_per_char": -2.582221508026123, "num_chars": 6}, {"sum_logits": -13.976819038391113, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.793950080871582, "logits_per_token": -13.976819038391113, "logits_per_char": -1.2706199125810103, "num_chars": 11}, {"sum_logits": -11.166841506958008, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.612686157226562, "logits_per_token": -5.583420753479004, "logits_per_char": -1.0151674097234553, "num_chars": 11}, {"sum_logits": -7.1532416343688965, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.14129638671875, "logits_per_token": -3.5766208171844482, "logits_per_char": -0.5961034695307413, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1037, "native_id": "c865b3547c2a2e3c3916d7be6ab25752", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.661423683166504, "incorrect_loss_raw": 12.053899765014648, "correct_loss_per_char": 0.7401581870185004, "incorrect_loss_per_char": 1.5802011728286742, "correct_loss_per_token": 6.661423683166504, "incorrect_loss_per_token": 10.343491554260254, "correct_loss_uncond": -10.040053367614746, "incorrect_loss_uncond": -3.419706106185913}, "model_output": [{"sum_logits": -9.861531257629395, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.031091690063477, "logits_per_token": -9.861531257629395, "logits_per_char": -1.6435885429382324, "num_chars": 6}, {"sum_logits": -14.565680503845215, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.462380409240723, "logits_per_token": -14.565680503845215, "logits_per_char": -2.0808115005493164, "num_chars": 7}, {"sum_logits": -6.661423683166504, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.70147705078125, "logits_per_token": -6.661423683166504, "logits_per_char": -0.7401581870185004, "num_chars": 9}, {"sum_logits": -13.683265686035156, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.026565551757812, "logits_per_token": -6.841632843017578, "logits_per_char": -0.9122177124023437, "num_chars": 15}, {"sum_logits": -10.105121612548828, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -10.105121612548828, "logits_per_char": -1.6841869354248047, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1038, "native_id": "abd30bab9b96f902fead5378d4f4a1e4", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.965530395507812, "incorrect_loss_raw": 16.0198233127594, "correct_loss_per_char": 0.6228456497192383, "incorrect_loss_per_char": 1.167581512066193, "correct_loss_per_token": 3.321843465169271, "incorrect_loss_per_token": 9.685227235158283, "correct_loss_uncond": -8.318140029907227, "incorrect_loss_uncond": -2.0354931354522705}, "model_output": [{"sum_logits": -18.190271377563477, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.836193084716797, "logits_per_token": -9.095135688781738, "logits_per_char": -1.1368919610977173, "num_chars": 16}, {"sum_logits": -17.192684173583984, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.578399658203125, "logits_per_token": -17.192684173583984, "logits_per_char": -1.146178944905599, "num_chars": 15}, {"sum_logits": -9.965530395507812, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.28367042541504, "logits_per_token": -3.321843465169271, "logits_per_char": -0.6228456497192383, "num_chars": 16}, {"sum_logits": -11.370478630065918, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.739667892456055, "logits_per_token": -3.790159543355306, "logits_per_char": -0.8121770450047084, "num_chars": 14}, {"sum_logits": -17.32585906982422, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.067005157470703, "logits_per_token": -8.66292953491211, "logits_per_char": -1.575078097256747, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1039, "native_id": "a4b44a986e7f9045432e20ea75611df4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.37733268737793, "incorrect_loss_raw": 5.629870891571045, "correct_loss_per_char": 0.6444102067213792, "incorrect_loss_per_char": 0.6666351840609596, "correct_loss_per_token": 2.7924442291259766, "incorrect_loss_per_token": 4.42084014415741, "correct_loss_uncond": -10.715730667114258, "incorrect_loss_uncond": -7.563661098480225}, "model_output": [{"sum_logits": -5.417717933654785, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.67548656463623, "logits_per_token": -2.7088589668273926, "logits_per_char": -0.38697985240391325, "num_chars": 14}, {"sum_logits": -5.188545227050781, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.330512046813965, "logits_per_token": -5.188545227050781, "logits_per_char": -0.6485681533813477, "num_chars": 8}, {"sum_logits": -8.37733268737793, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.093063354492188, "logits_per_token": -2.7924442291259766, "logits_per_char": -0.6444102067213792, "num_chars": 13}, {"sum_logits": -4.254528045654297, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.525420188903809, "logits_per_token": -2.1272640228271484, "logits_per_char": -0.3545440038045247, "num_chars": 12}, {"sum_logits": -7.658692359924316, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.242709159851074, "logits_per_token": -7.658692359924316, "logits_per_char": -1.2764487266540527, "num_chars": 6}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1040, "native_id": "1f492f556fae64f72ce36b6caa242dd0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.656116008758545, "incorrect_loss_raw": 11.16935408115387, "correct_loss_per_char": 0.40623511208428276, "incorrect_loss_per_char": 1.4812341431776683, "correct_loss_per_token": 3.656116008758545, "incorrect_loss_per_token": 6.782444059848785, "correct_loss_uncond": -8.942140102386475, "incorrect_loss_uncond": -5.150977253913879}, "model_output": [{"sum_logits": -13.25953483581543, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.259456634521484, "logits_per_token": -6.629767417907715, "logits_per_char": -2.209922472635905, "num_chars": 6}, {"sum_logits": -9.582136154174805, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.410623550415039, "logits_per_token": -9.582136154174805, "logits_per_char": -1.3688765934535436, "num_chars": 7}, {"sum_logits": -3.656116008758545, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.59825611114502, "logits_per_token": -3.656116008758545, "logits_per_char": -0.40623511208428276, "num_chars": 9}, {"sum_logits": -7.155505657196045, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.741591453552246, "logits_per_token": -3.5777528285980225, "logits_per_char": -0.5111075469425747, "num_chars": 14}, {"sum_logits": -14.6802396774292, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.869653701782227, "logits_per_token": -7.3401198387146, "logits_per_char": -1.83502995967865, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1041, "native_id": "d0c67c7ae6f2361fe237110455127866", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.980893850326538, "incorrect_loss_raw": 14.07169795036316, "correct_loss_per_char": 0.4423215389251709, "incorrect_loss_per_char": 1.776696665978535, "correct_loss_per_token": 3.980893850326538, "incorrect_loss_per_token": 6.27309262752533, "correct_loss_uncond": -10.875165224075317, "incorrect_loss_uncond": -4.49040150642395}, "model_output": [{"sum_logits": -10.096535682678223, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.381830215454102, "logits_per_token": -5.048267841339111, "logits_per_char": -0.9178668802434747, "num_chars": 11}, {"sum_logits": -13.71264362335205, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.870458602905273, "logits_per_token": -6.856321811676025, "logits_per_char": -2.2854406038920083, "num_chars": 6}, {"sum_logits": -3.980893850326538, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.856059074401855, "logits_per_token": -3.980893850326538, "logits_per_char": -0.4423215389251709, "num_chars": 9}, {"sum_logits": -18.30615234375, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.297340393066406, "logits_per_token": -6.10205078125, "logits_per_char": -2.6151646205357144, "num_chars": 7}, {"sum_logits": -14.171460151672363, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.698768615722656, "logits_per_token": -7.085730075836182, "logits_per_char": -1.2883145592429421, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1042, "native_id": "7bb279e38a1c9eb47a0c7af979a131a2", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.838860511779785, "incorrect_loss_raw": 12.48927903175354, "correct_loss_per_char": 0.5260661932138296, "incorrect_loss_per_char": 0.9074916639111259, "correct_loss_per_token": 3.4194302558898926, "incorrect_loss_per_token": 8.295978665351868, "correct_loss_uncond": -8.16443920135498, "incorrect_loss_uncond": -5.006734848022461}, "model_output": [{"sum_logits": -8.147771835327148, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.442310333251953, "logits_per_token": -8.147771835327148, "logits_per_char": -0.7407065304842863, "num_chars": 11}, {"sum_logits": -13.222143173217773, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -20.584592819213867, "logits_per_token": -4.407381057739258, "logits_per_char": -0.8263839483261108, "num_chars": 16}, {"sum_logits": -12.67032241821289, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.977568626403809, "logits_per_token": -12.67032241821289, "logits_per_char": -1.2670322418212892, "num_chars": 10}, {"sum_logits": -6.838860511779785, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.003299713134766, "logits_per_token": -3.4194302558898926, "logits_per_char": -0.5260661932138296, "num_chars": 13}, {"sum_logits": -15.916878700256348, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.979583740234375, "logits_per_token": -7.958439350128174, "logits_per_char": -0.7958439350128174, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1043, "native_id": "3095078e4771053d9d5fa8d4f5f3dc38", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5721282958984375, "incorrect_loss_raw": 9.16565227508545, "correct_loss_per_char": 0.35721282958984374, "incorrect_loss_per_char": 0.825130009651184, "correct_loss_per_token": 3.5721282958984375, "incorrect_loss_per_token": 6.296928405761719, "correct_loss_uncond": -9.91810417175293, "incorrect_loss_uncond": -5.356346607208252}, "model_output": [{"sum_logits": -11.474895477294922, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.291610717773438, "logits_per_token": -5.737447738647461, "logits_per_char": -0.7171809673309326, "num_chars": 16}, {"sum_logits": -4.948762893676758, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -4.948762893676758, "logits_per_char": -0.9897525787353516, "num_chars": 5}, {"sum_logits": -3.5721282958984375, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -3.5721282958984375, "logits_per_char": -0.35721282958984374, "num_chars": 10}, {"sum_logits": -11.474895477294922, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.291610717773438, "logits_per_token": -5.737447738647461, "logits_per_char": -0.7171809673309326, "num_chars": 16}, {"sum_logits": -8.764055252075195, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.029825210571289, "logits_per_token": -8.764055252075195, "logits_per_char": -0.8764055252075196, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1044, "native_id": "b23edb651e623e5d1e03e8ed3937e8fc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.038089394569397, "incorrect_loss_raw": 10.637595534324646, "correct_loss_per_char": 0.25952234864234924, "incorrect_loss_per_char": 1.6159728038878667, "correct_loss_per_token": 1.038089394569397, "incorrect_loss_per_token": 7.115136504173279, "correct_loss_uncond": -14.261353850364685, "incorrect_loss_uncond": -4.7020968198776245}, "model_output": [{"sum_logits": -3.949669361114502, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.910656929016113, "logits_per_token": -3.949669361114502, "logits_per_char": -0.5642384801592145, "num_chars": 7}, {"sum_logits": -1.038089394569397, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -15.299443244934082, "logits_per_token": -1.038089394569397, "logits_per_char": -0.25952234864234924, "num_chars": 4}, {"sum_logits": -10.421040534973145, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.99152660369873, "logits_per_token": -10.421040534973145, "logits_per_char": -2.084208106994629, "num_chars": 5}, {"sum_logits": -12.318658828735352, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.97957706451416, "logits_per_token": -6.159329414367676, "logits_per_char": -2.053109804789225, "num_chars": 6}, {"sum_logits": -15.861013412475586, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -7.930506706237793, "logits_per_char": -1.7623348236083984, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1045, "native_id": "acf6b667e9353b1743b7c4f60a6a9017", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.829251289367676, "incorrect_loss_raw": 10.198722124099731, "correct_loss_per_char": 0.5886167526245117, "incorrect_loss_per_char": 1.0419497229650951, "correct_loss_per_token": 2.207312822341919, "incorrect_loss_per_token": 5.690067291259766, "correct_loss_uncond": -11.531975746154785, "incorrect_loss_uncond": -7.711552143096924}, "model_output": [{"sum_logits": -4.725649833679199, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.321873664855957, "logits_per_token": -4.725649833679199, "logits_per_char": -0.7876083056131998, "num_chars": 6}, {"sum_logits": -13.248382568359375, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.14734649658203, "logits_per_token": -6.6241912841796875, "logits_per_char": -1.0191063514122596, "num_chars": 13}, {"sum_logits": -8.829251289367676, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.36122703552246, "logits_per_token": -2.207312822341919, "logits_per_char": -0.5886167526245117, "num_chars": 15}, {"sum_logits": -14.179817199707031, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.09969711303711, "logits_per_token": -7.089908599853516, "logits_per_char": -1.5755352444118924, "num_chars": 9}, {"sum_logits": -8.64103889465332, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.072179794311523, "logits_per_token": -4.32051944732666, "logits_per_char": -0.7855489904230292, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1046, "native_id": "15b090801256085ad465e74af47cbee9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.107144355773926, "incorrect_loss_raw": 12.61903977394104, "correct_loss_per_char": 0.827922214161266, "incorrect_loss_per_char": 1.0967038238547409, "correct_loss_per_token": 4.553572177886963, "incorrect_loss_per_token": 8.828613519668579, "correct_loss_uncond": -10.859856605529785, "incorrect_loss_uncond": -3.2464747428894043}, "model_output": [{"sum_logits": -11.6080961227417, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.148365020751953, "logits_per_token": -11.6080961227417, "logits_per_char": -1.0552814657037908, "num_chars": 11}, {"sum_logits": -8.544652938842773, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.709712028503418, "logits_per_token": -8.544652938842773, "logits_per_char": -0.9494058820936415, "num_chars": 9}, {"sum_logits": -9.107144355773926, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.96700096130371, "logits_per_token": -4.553572177886963, "logits_per_char": -0.827922214161266, "num_chars": 11}, {"sum_logits": -19.2266788482666, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.606021881103516, "logits_per_token": -9.6133394241333, "logits_per_char": -1.3733342034476144, "num_chars": 14}, {"sum_logits": -11.096731185913086, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.99795913696289, "logits_per_token": -5.548365592956543, "logits_per_char": -1.008793744173917, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1047, "native_id": "790b3f583e9bc9424c771691ecc70c20", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.083164691925049, "incorrect_loss_raw": 10.481380939483643, "correct_loss_per_char": 0.39101266860961914, "incorrect_loss_per_char": 1.15500906742245, "correct_loss_per_token": 2.5415823459625244, "incorrect_loss_per_token": 5.694180488586426, "correct_loss_uncond": -15.327863216400146, "incorrect_loss_uncond": -6.836002588272095}, "model_output": [{"sum_logits": -8.604280471801758, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.947824478149414, "logits_per_token": -8.604280471801758, "logits_per_char": -1.7208560943603515, "num_chars": 5}, {"sum_logits": -11.013998985290527, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -5.506999492645264, "logits_per_char": -1.223777665032281, "num_chars": 9}, {"sum_logits": -7.3781633377075195, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.977584838867188, "logits_per_token": -3.6890816688537598, "logits_per_char": -0.5270116669791085, "num_chars": 14}, {"sum_logits": -5.083164691925049, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -20.411027908325195, "logits_per_token": -2.5415823459625244, "logits_per_char": -0.39101266860961914, "num_chars": 13}, {"sum_logits": -14.929080963134766, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.608455657958984, "logits_per_token": -4.976360321044922, "logits_per_char": -1.148390843318059, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1048, "native_id": "22b8219d43a38a1130e0a35ece152337", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.051790714263916, "incorrect_loss_raw": 7.735424518585205, "correct_loss_per_char": 0.6752984523773193, "incorrect_loss_per_char": 1.0324278593063354, "correct_loss_per_token": 4.051790714263916, "incorrect_loss_per_token": 7.735424518585205, "correct_loss_uncond": -8.115780353546143, "incorrect_loss_uncond": -6.950085639953613}, "model_output": [{"sum_logits": -4.051790714263916, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.167571067810059, "logits_per_token": -4.051790714263916, "logits_per_char": -0.6752984523773193, "num_chars": 6}, {"sum_logits": -7.954287528991699, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -7.954287528991699, "logits_per_char": -1.136326789855957, "num_chars": 7}, {"sum_logits": -4.345733642578125, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -4.345733642578125, "logits_per_char": -0.5432167053222656, "num_chars": 8}, {"sum_logits": -8.790003776550293, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.287192344665527, "logits_per_token": -8.790003776550293, "logits_per_char": -1.4650006294250488, "num_chars": 6}, {"sum_logits": -9.851673126220703, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.706355094909668, "logits_per_token": -9.851673126220703, "logits_per_char": -0.9851673126220704, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1049, "native_id": "5d4233146435ab0ca211e8ac9bfce76f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.358316421508789, "incorrect_loss_raw": 13.685847282409668, "correct_loss_per_char": 0.2798597017923991, "incorrect_loss_per_char": 1.2950005756484138, "correct_loss_per_token": 3.358316421508789, "incorrect_loss_per_token": 8.018271207809448, "correct_loss_uncond": -12.6184663772583, "incorrect_loss_uncond": -7.092973709106445}, "model_output": [{"sum_logits": -3.358316421508789, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -3.358316421508789, "logits_per_char": -0.2798597017923991, "num_chars": 12}, {"sum_logits": -16.258148193359375, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -23.274658203125, "logits_per_token": -8.129074096679688, "logits_per_char": -1.354845682779948, "num_chars": 12}, {"sum_logits": -15.997013092041016, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -25.36575698852539, "logits_per_token": -7.998506546020508, "logits_per_char": -1.7774458991156683, "num_chars": 9}, {"sum_logits": -13.085447311401367, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -19.46621322631836, "logits_per_token": -6.542723655700684, "logits_per_char": -0.8723631540934245, "num_chars": 15}, {"sum_logits": -9.402780532836914, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -9.402780532836914, "logits_per_char": -1.1753475666046143, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1050, "native_id": "be737cd4db844574ef594442ce6c9453", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.383723258972168, "incorrect_loss_raw": 8.56563127040863, "correct_loss_per_char": 1.2767446517944336, "incorrect_loss_per_char": 1.1756396736417498, "correct_loss_per_token": 6.383723258972168, "incorrect_loss_per_token": 6.864682555198669, "correct_loss_uncond": -7.768857002258301, "incorrect_loss_uncond": -5.631664156913757}, "model_output": [{"sum_logits": -6.383723258972168, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.152580261230469, "logits_per_token": -6.383723258972168, "logits_per_char": -1.2767446517944336, "num_chars": 5}, {"sum_logits": -13.723697662353516, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.185674667358398, "logits_per_token": -13.723697662353516, "logits_per_char": -1.3723697662353516, "num_chars": 10}, {"sum_logits": -3.466709852218628, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.478693962097168, "logits_per_token": -3.466709852218628, "logits_per_char": -0.6933419704437256, "num_chars": 5}, {"sum_logits": -3.4645278453826904, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.021862030029297, "logits_per_token": -3.4645278453826904, "logits_per_char": -0.6929055690765381, "num_chars": 5}, {"sum_logits": -13.607589721679688, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.102951049804688, "logits_per_token": -6.803794860839844, "logits_per_char": -1.943941388811384, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1051, "native_id": "550164b7cf4e03153484136f10122c70", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.006040573120117, "incorrect_loss_raw": 7.83167028427124, "correct_loss_per_char": 0.5628775358200073, "incorrect_loss_per_char": 0.5432376464207967, "correct_loss_per_token": 3.0020135243733725, "incorrect_loss_per_token": 3.3043679396311445, "correct_loss_uncond": -14.163108825683594, "incorrect_loss_uncond": -12.411345481872559}, "model_output": [{"sum_logits": -7.7746124267578125, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -24.820106506347656, "logits_per_token": -3.8873062133789062, "logits_per_char": -0.6478843688964844, "num_chars": 12}, {"sum_logits": -5.983510971069336, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.391292572021484, "logits_per_token": -1.994503657023112, "logits_per_char": -0.5983510971069336, "num_chars": 10}, {"sum_logits": -8.691701889038086, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -20.286928176879883, "logits_per_token": -2.8972339630126953, "logits_per_char": -0.48287232716878253, "num_chars": 18}, {"sum_logits": -9.006040573120117, "num_tokens": 3, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -23.16914939880371, "logits_per_token": -3.0020135243733725, "logits_per_char": -0.5628775358200073, "num_chars": 16}, {"sum_logits": -8.876855850219727, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.473735809326172, "logits_per_token": -4.438427925109863, "logits_per_char": -0.44384279251098635, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1052, "native_id": "a617eb4d27edea93e7fd630ce00c8219", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 1.0297470092773438, "incorrect_loss_raw": 16.67513918876648, "correct_loss_per_char": 0.17162450154622397, "incorrect_loss_per_char": 1.3907384480312195, "correct_loss_per_token": 1.0297470092773438, "incorrect_loss_per_token": 8.33756959438324, "correct_loss_uncond": -12.60942554473877, "incorrect_loss_uncond": -4.516150712966919}, "model_output": [{"sum_logits": -21.848411560058594, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -23.903873443603516, "logits_per_token": -10.924205780029297, "logits_per_char": -1.820700963338216, "num_chars": 12}, {"sum_logits": -16.320316314697266, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.20868492126465, "logits_per_token": -8.160158157348633, "logits_per_char": -1.6320316314697265, "num_chars": 10}, {"sum_logits": -1.0297470092773438, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": true, "sum_logits_uncond": -13.639172554016113, "logits_per_token": -1.0297470092773438, "logits_per_char": -0.17162450154622397, "num_chars": 6}, {"sum_logits": -13.146482467651367, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -25.95277214050293, "logits_per_token": -6.573241233825684, "logits_per_char": -1.0112678821270282, "num_chars": 13}, {"sum_logits": -15.385346412658691, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.6998291015625, "logits_per_token": -7.692673206329346, "logits_per_char": -1.0989533151899065, "num_chars": 14}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1053, "native_id": "bd47827418d5b8d7fb3502a398644435", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.9889302253723145, "incorrect_loss_raw": 16.478515148162842, "correct_loss_per_char": 0.7988930225372315, "incorrect_loss_per_char": 1.2242462914505283, "correct_loss_per_token": 3.9944651126861572, "incorrect_loss_per_token": 9.970460176467896, "correct_loss_uncond": -13.915862560272217, "incorrect_loss_uncond": -5.2759785652160645}, "model_output": [{"sum_logits": -17.568233489990234, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.55019760131836, "logits_per_token": -8.784116744995117, "logits_per_char": -1.351402576153095, "num_chars": 13}, {"sum_logits": -15.239114761352539, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.687076568603516, "logits_per_token": -7.6195573806762695, "logits_per_char": -1.0159409840901692, "num_chars": 15}, {"sum_logits": -13.849620819091797, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.6195068359375, "logits_per_token": -13.849620819091797, "logits_per_char": -1.1541350682576497, "num_chars": 12}, {"sum_logits": -7.9889302253723145, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.90479278564453, "logits_per_token": -3.9944651126861572, "logits_per_char": -0.7988930225372315, "num_chars": 10}, {"sum_logits": -19.257091522216797, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -26.16119384765625, "logits_per_token": -9.628545761108398, "logits_per_char": -1.3755065373011999, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1054, "native_id": "31487ab8b1e8f12e252590cc58bd19c2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.651052951812744, "incorrect_loss_raw": 10.505560874938965, "correct_loss_per_char": 0.456381618976593, "incorrect_loss_per_char": 1.589373815059662, "correct_loss_per_token": 3.651052951812744, "incorrect_loss_per_token": 8.840788841247559, "correct_loss_uncond": -12.347049236297607, "incorrect_loss_uncond": -5.483946084976196}, "model_output": [{"sum_logits": -3.651052951812744, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.998102188110352, "logits_per_token": -3.651052951812744, "logits_per_char": -0.456381618976593, "num_chars": 8}, {"sum_logits": -7.398133277893066, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -7.398133277893066, "logits_per_char": -0.6165111064910889, "num_chars": 12}, {"sum_logits": -11.29824447631836, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.56877326965332, "logits_per_token": -11.29824447631836, "logits_per_char": -2.259648895263672, "num_chars": 5}, {"sum_logits": -10.007689476013184, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.417911529541016, "logits_per_token": -10.007689476013184, "logits_per_char": -2.001537895202637, "num_chars": 5}, {"sum_logits": -13.31817626953125, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.99456024169922, "logits_per_token": -6.659088134765625, "logits_per_char": -1.47979736328125, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1055, "native_id": "ce2fd94212243f843b3f357046051f57", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.622008323669434, "incorrect_loss_raw": 11.303269386291504, "correct_loss_per_char": 0.8277510404586792, "incorrect_loss_per_char": 1.3869126117590702, "correct_loss_per_token": 6.622008323669434, "incorrect_loss_per_token": 7.7907634973526, "correct_loss_uncond": -7.6303815841674805, "incorrect_loss_uncond": -4.353058338165283}, "model_output": [{"sum_logits": -6.622008323669434, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.252389907836914, "logits_per_token": -6.622008323669434, "logits_per_char": -0.8277510404586792, "num_chars": 8}, {"sum_logits": -6.321250915527344, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.872519493103027, "logits_per_token": -6.321250915527344, "logits_per_char": -1.0535418192545574, "num_chars": 6}, {"sum_logits": -10.791779518127441, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.521110534667969, "logits_per_token": -10.791779518127441, "logits_per_char": -1.798629919687907, "num_chars": 6}, {"sum_logits": -12.597860336303711, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.744099617004395, "logits_per_token": -6.2989301681518555, "logits_per_char": -1.1452600305730647, "num_chars": 11}, {"sum_logits": -15.50218677520752, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.487581253051758, "logits_per_token": -7.75109338760376, "logits_per_char": -1.550218677520752, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1056, "native_id": "f87f40db71a56b5beda3194550202dc9_1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.719586133956909, "incorrect_loss_raw": 10.727097272872925, "correct_loss_per_char": 0.30996551116307575, "incorrect_loss_per_char": 1.3377948297394646, "correct_loss_per_token": 1.8597930669784546, "incorrect_loss_per_token": 7.286860346794128, "correct_loss_uncond": -16.839574098587036, "incorrect_loss_uncond": -6.594950914382935}, "model_output": [{"sum_logits": -6.502867698669434, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.970111846923828, "logits_per_token": -6.502867698669434, "logits_per_char": -0.7225408554077148, "num_chars": 9}, {"sum_logits": -8.883625984191895, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.831886291503906, "logits_per_token": -8.883625984191895, "logits_per_char": -1.7767251968383788, "num_chars": 5}, {"sum_logits": -3.719586133956909, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.559160232543945, "logits_per_token": -1.8597930669784546, "logits_per_char": -0.30996551116307575, "num_chars": 12}, {"sum_logits": -20.103191375732422, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -10.051595687866211, "logits_per_char": -2.2336879306369357, "num_chars": 9}, {"sum_logits": -7.418704032897949, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.009185791015625, "logits_per_token": -3.7093520164489746, "logits_per_char": -0.6182253360748291, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1057, "native_id": "0b25bbd9e9aa976655e1975e31331709", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.2811055183410645, "incorrect_loss_raw": 14.604675054550171, "correct_loss_per_char": 0.5281105518341065, "incorrect_loss_per_char": 1.1577308389875625, "correct_loss_per_token": 2.6405527591705322, "incorrect_loss_per_token": 7.043772300084432, "correct_loss_uncond": -13.047990322113037, "incorrect_loss_uncond": -4.38674259185791}, "model_output": [{"sum_logits": -18.961238861083984, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -25.362499237060547, "logits_per_token": -6.320412953694661, "logits_per_char": -1.0534021589491103, "num_chars": 18}, {"sum_logits": -5.2811055183410645, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.3290958404541, "logits_per_token": -2.6405527591705322, "logits_per_char": -0.5281105518341065, "num_chars": 10}, {"sum_logits": -4.251891136169434, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -11.454092979431152, "logits_per_token": -4.251891136169434, "logits_per_char": -0.8503782272338867, "num_chars": 5}, {"sum_logits": -17.3537540435791, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -20.06051254272461, "logits_per_token": -8.67687702178955, "logits_per_char": -1.7353754043579102, "num_chars": 10}, {"sum_logits": -17.851816177368164, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.088565826416016, "logits_per_token": -8.925908088684082, "logits_per_char": -0.9917675654093424, "num_chars": 18}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1058, "native_id": "925232b4c9bba945a38ac7ef0f15f8d0", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.178569316864014, "incorrect_loss_raw": 12.134346902370453, "correct_loss_per_char": 0.43154744307200116, "incorrect_loss_per_char": 1.1815916703204916, "correct_loss_per_token": 5.178569316864014, "incorrect_loss_per_token": 6.518198072910309, "correct_loss_uncond": -10.892828464508057, "incorrect_loss_uncond": -3.5476275086402893}, "model_output": [{"sum_logits": -3.608196973800659, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -3.608196973800659, "logits_per_char": -0.7216393947601318, "num_chars": 5}, {"sum_logits": -17.061870574951172, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.520990371704102, "logits_per_token": -8.530935287475586, "logits_per_char": -1.5510791431773792, "num_chars": 11}, {"sum_logits": -5.178569316864014, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.07139778137207, "logits_per_token": -5.178569316864014, "logits_per_char": -0.43154744307200116, "num_chars": 12}, {"sum_logits": -14.461216926574707, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -18.804922103881836, "logits_per_token": -7.2306084632873535, "logits_per_char": -0.9640811284383138, "num_chars": 15}, {"sum_logits": -13.406103134155273, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -6.703051567077637, "logits_per_char": -1.4895670149061415, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1059, "native_id": "3338109fcafaaa370c8900a53e1b3ed8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.338228225708008, "incorrect_loss_raw": 15.880958080291748, "correct_loss_per_char": 0.38130201612200054, "incorrect_loss_per_char": 1.1395458259967843, "correct_loss_per_token": 2.669114112854004, "incorrect_loss_per_token": 5.695831648508708, "correct_loss_uncond": -12.110219955444336, "incorrect_loss_uncond": -8.66901421546936}, "model_output": [{"sum_logits": -2.9692459106445312, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.701706886291504, "logits_per_token": -2.9692459106445312, "logits_per_char": -0.2699314464222301, "num_chars": 11}, {"sum_logits": -5.338228225708008, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.448448181152344, "logits_per_token": -2.669114112854004, "logits_per_char": -0.38130201612200054, "num_chars": 14}, {"sum_logits": -20.550613403320312, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -24.244056701660156, "logits_per_token": -10.275306701660156, "logits_per_char": -1.7125511169433594, "num_chars": 12}, {"sum_logits": -28.469127655029297, "num_tokens": 5, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -39.138370513916016, "logits_per_token": -5.693825531005859, "logits_per_char": -1.2940512570467861, "num_chars": 22}, {"sum_logits": -11.534845352172852, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.115755081176758, "logits_per_token": -3.8449484507242837, "logits_per_char": -1.2816494835747614, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1060, "native_id": "e172a93c72d305ee8262a8deb00d9fc3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.589056015014648, "incorrect_loss_raw": 11.84103775024414, "correct_loss_per_char": 1.065450668334961, "incorrect_loss_per_char": 1.0842972278594971, "correct_loss_per_token": 4.794528007507324, "incorrect_loss_per_token": 7.592590570449829, "correct_loss_uncond": -8.859884262084961, "incorrect_loss_uncond": -5.810750246047974}, "model_output": [{"sum_logits": -9.589056015014648, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.44894027709961, "logits_per_token": -4.794528007507324, "logits_per_char": -1.065450668334961, "num_chars": 9}, {"sum_logits": -6.849925518035889, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.638421058654785, "logits_per_token": -6.849925518035889, "logits_per_char": -1.1416542530059814, "num_chars": 6}, {"sum_logits": -20.155010223388672, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.78726577758789, "logits_per_token": -10.077505111694336, "logits_per_char": -1.1855888366699219, "num_chars": 17}, {"sum_logits": -6.526648044586182, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.688596725463867, "logits_per_token": -6.526648044586182, "logits_per_char": -1.087774674097697, "num_chars": 6}, {"sum_logits": -13.83256721496582, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.492868423461914, "logits_per_token": -6.91628360748291, "logits_per_char": -0.922171147664388, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1061, "native_id": "f1c2e37abf17d9e4ad16eb40f966c79f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.265099048614502, "incorrect_loss_raw": 13.793933033943176, "correct_loss_per_char": 0.4387582540512085, "incorrect_loss_per_char": 0.9532655965198171, "correct_loss_per_token": 2.632549524307251, "incorrect_loss_per_token": 4.78308721951076, "correct_loss_uncond": -13.372169017791748, "incorrect_loss_uncond": -9.155143141746521}, "model_output": [{"sum_logits": -5.107799530029297, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.921911239624023, "logits_per_token": -2.5538997650146484, "logits_per_char": -0.46434541182084516, "num_chars": 11}, {"sum_logits": -10.130637168884277, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.835834503173828, "logits_per_token": -5.065318584442139, "logits_per_char": -1.0130637168884278, "num_chars": 10}, {"sum_logits": -33.16152572631836, "num_tokens": 7, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -37.86862564086914, "logits_per_token": -4.73736081804548, "logits_per_char": -1.658076286315918, "num_chars": 20}, {"sum_logits": -6.7757697105407715, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -6.7757697105407715, "logits_per_char": -0.6775769710540771, "num_chars": 10}, {"sum_logits": -5.265099048614502, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.63726806640625, "logits_per_token": -2.632549524307251, "logits_per_char": -0.4387582540512085, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1062, "native_id": "d29252ddaf7c7ef491abcce342d7bb98", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 18.522136688232422, "incorrect_loss_raw": 11.081530094146729, "correct_loss_per_char": 1.8522136688232422, "incorrect_loss_per_char": 1.278667402267456, "correct_loss_per_token": 9.261068344116211, "incorrect_loss_per_token": 5.828847944736481, "correct_loss_uncond": -1.8685226440429688, "incorrect_loss_uncond": -6.801058053970337}, "model_output": [{"sum_logits": -14.027850151062012, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -7.013925075531006, "logits_per_char": -1.7534812688827515, "num_chars": 8}, {"sum_logits": -18.522136688232422, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.39065933227539, "logits_per_token": -9.261068344116211, "logits_per_char": -1.8522136688232422, "num_chars": 10}, {"sum_logits": -10.29059886932373, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.99152660369873, "logits_per_token": -10.29059886932373, "logits_per_char": -2.058119773864746, "num_chars": 5}, {"sum_logits": -4.035799980163574, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.035602569580078, "logits_per_token": -2.017899990081787, "logits_per_char": -0.5044749975204468, "num_chars": 8}, {"sum_logits": -15.971871376037598, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.677364349365234, "logits_per_token": -3.9929678440093994, "logits_per_char": -0.7985935688018799, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1063, "native_id": "8c3c6b34bdb650a6517bca3786406c99", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.687535762786865, "incorrect_loss_raw": 10.354810357093811, "correct_loss_per_char": 0.437502750983605, "incorrect_loss_per_char": 1.204433382831068, "correct_loss_per_token": 2.8437678813934326, "incorrect_loss_per_token": 7.242698431015015, "correct_loss_uncond": -13.923019886016846, "incorrect_loss_uncond": -4.733060956001282}, "model_output": [{"sum_logits": -9.627527236938477, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.382490158081055, "logits_per_token": -9.627527236938477, "logits_per_char": -0.8022939364115397, "num_chars": 12}, {"sum_logits": -11.015231132507324, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.53800392150879, "logits_per_token": -5.507615566253662, "logits_per_char": -1.2239145702785916, "num_chars": 9}, {"sum_logits": -13.881664276123047, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -16.868898391723633, "logits_per_token": -6.940832138061523, "logits_per_char": -1.067820328932542, "num_chars": 13}, {"sum_logits": -6.8948187828063965, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.562092781066895, "logits_per_token": -6.8948187828063965, "logits_per_char": -1.7237046957015991, "num_chars": 4}, {"sum_logits": -5.687535762786865, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.61055564880371, "logits_per_token": -2.8437678813934326, "logits_per_char": -0.437502750983605, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1064, "native_id": "ff1bf2ec835c9df8695ae0cfb5281646", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.03913688659668, "incorrect_loss_raw": 13.581622838973999, "correct_loss_per_char": 0.7198766980852399, "incorrect_loss_per_char": 1.455483325322469, "correct_loss_per_token": 2.51956844329834, "incorrect_loss_per_token": 8.323450803756714, "correct_loss_uncond": -15.827207565307617, "incorrect_loss_uncond": -3.1138217449188232}, "model_output": [{"sum_logits": -19.45130157470703, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.304346084594727, "logits_per_token": -9.725650787353516, "logits_per_char": -1.2967534383138022, "num_chars": 15}, {"sum_logits": -5.895998954772949, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.887779235839844, "logits_per_token": -5.895998954772949, "logits_per_char": -1.17919979095459, "num_chars": 5}, {"sum_logits": -16.960556030273438, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.601966857910156, "logits_per_token": -5.6535186767578125, "logits_per_char": -0.9422531127929688, "num_chars": 18}, {"sum_logits": -5.03913688659668, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.866344451904297, "logits_per_token": -2.51956844329834, "logits_per_char": -0.7198766980852399, "num_chars": 7}, {"sum_logits": -12.018634796142578, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.987686157226562, "logits_per_token": -12.018634796142578, "logits_per_char": -2.4037269592285155, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1065, "native_id": "c7526b682e64f355384631b35cd78fc9", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 13.065277099609375, "incorrect_loss_raw": 12.382665872573853, "correct_loss_per_char": 1.6331596374511719, "incorrect_loss_per_char": 1.5082551883412645, "correct_loss_per_token": 6.5326385498046875, "incorrect_loss_per_token": 10.656932592391968, "correct_loss_uncond": -8.115745544433594, "incorrect_loss_uncond": -2.942140579223633}, "model_output": [{"sum_logits": -11.655271530151367, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -11.655271530151367, "logits_per_char": -1.456908941268921, "num_chars": 8}, {"sum_logits": -13.065277099609375, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -21.18102264404297, "logits_per_token": -6.5326385498046875, "logits_per_char": -1.6331596374511719, "num_chars": 8}, {"sum_logits": -13.805866241455078, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.641830444335938, "logits_per_token": -6.902933120727539, "logits_per_char": -1.972266605922154, "num_chars": 7}, {"sum_logits": -11.875468254089355, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -11.875468254089355, "logits_per_char": -1.0795880230990322, "num_chars": 11}, {"sum_logits": -12.19405746459961, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.490277290344238, "logits_per_token": -12.19405746459961, "logits_per_char": -1.5242571830749512, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1066, "native_id": "0fba83d3997f048adcc31937221af77e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.332234859466553, "incorrect_loss_raw": 12.288463592529297, "correct_loss_per_char": 0.6188906942095075, "incorrect_loss_per_char": 1.6881367405255636, "correct_loss_per_token": 2.1661174297332764, "incorrect_loss_per_token": 7.639132499694824, "correct_loss_uncond": -13.278234958648682, "incorrect_loss_uncond": -3.7401766777038574}, "model_output": [{"sum_logits": -13.715417861938477, "num_tokens": 3, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.042510986328125, "logits_per_token": -4.571805953979492, "logits_per_char": -1.7144272327423096, "num_chars": 8}, {"sum_logits": -4.332234859466553, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.610469818115234, "logits_per_token": -2.1661174297332764, "logits_per_char": -0.6188906942095075, "num_chars": 7}, {"sum_logits": -11.76625919342041, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.72507095336914, "logits_per_token": -11.76625919342041, "logits_per_char": -2.353251838684082, "num_chars": 5}, {"sum_logits": -18.907424926757812, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.09620475769043, "logits_per_token": -9.453712463378906, "logits_per_char": -1.8907424926757812, "num_chars": 10}, {"sum_logits": -4.764752388000488, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.250774383544922, "logits_per_token": -4.764752388000488, "logits_per_char": -0.7941253980000814, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1067, "native_id": "a5456dc611aa93b81d7ab6ed8e160f85", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.286068916320801, "incorrect_loss_raw": 11.959866762161255, "correct_loss_per_char": 0.8095632129245334, "incorrect_loss_per_char": 1.7370735372815813, "correct_loss_per_token": 7.286068916320801, "incorrect_loss_per_token": 8.821117997169495, "correct_loss_uncond": -8.917580604553223, "incorrect_loss_uncond": -3.1848812103271484}, "model_output": [{"sum_logits": -14.623967170715332, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.777435302734375, "logits_per_token": -7.311983585357666, "logits_per_char": -2.9247934341430666, "num_chars": 5}, {"sum_logits": -10.48602294921875, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.455636978149414, "logits_per_token": -5.243011474609375, "logits_per_char": -1.4980032784598214, "num_chars": 7}, {"sum_logits": -12.387360572814941, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.473588943481445, "logits_per_token": -12.387360572814941, "logits_per_char": -1.3763733969794378, "num_chars": 9}, {"sum_logits": -7.286068916320801, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.203649520874023, "logits_per_token": -7.286068916320801, "logits_per_char": -0.8095632129245334, "num_chars": 9}, {"sum_logits": -10.342116355895996, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.872330665588379, "logits_per_token": -10.342116355895996, "logits_per_char": -1.1491240395439997, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1068, "native_id": "11416df796f63d2f0dddc846b9c139d3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.60936164855957, "incorrect_loss_raw": 9.708880186080933, "correct_loss_per_char": 0.8841134707132975, "incorrect_loss_per_char": 1.7824776589870452, "correct_loss_per_token": 5.304680824279785, "incorrect_loss_per_token": 7.3462218046188354, "correct_loss_uncond": -11.685384750366211, "incorrect_loss_uncond": -4.202983379364014}, "model_output": [{"sum_logits": -8.4959135055542, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.43154525756836, "logits_per_token": -4.2479567527771, "logits_per_char": -1.061989188194275, "num_chars": 8}, {"sum_logits": -11.887336730957031, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -11.887336730957031, "logits_per_char": -2.3774673461914064, "num_chars": 5}, {"sum_logits": -8.046916961669922, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -10.526909828186035, "logits_per_token": -8.046916961669922, "logits_per_char": -1.6093833923339844, "num_chars": 5}, {"sum_logits": -10.60936164855957, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.29474639892578, "logits_per_token": -5.304680824279785, "logits_per_char": -0.8841134707132975, "num_chars": 12}, {"sum_logits": -10.405353546142578, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.38377571105957, "logits_per_token": -5.202676773071289, "logits_per_char": -2.0810707092285154, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1069, "native_id": "c908d7c4633c5e6add9463bdd47cb27e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.245046615600586, "incorrect_loss_raw": 13.292277932167053, "correct_loss_per_char": 0.8245046615600586, "incorrect_loss_per_char": 0.9740589797496796, "correct_loss_per_token": 4.122523307800293, "incorrect_loss_per_token": 7.50190281867981, "correct_loss_uncond": -10.677446365356445, "incorrect_loss_uncond": -2.8616143465042114}, "model_output": [{"sum_logits": -6.832474708557129, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.849177360534668, "logits_per_token": -3.4162373542785645, "logits_per_char": -0.8540593385696411, "num_chars": 8}, {"sum_logits": -6.846110820770264, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -6.846110820770264, "logits_per_char": -0.6846110820770264, "num_chars": 10}, {"sum_logits": -21.61536407470703, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.315677642822266, "logits_per_token": -10.807682037353516, "logits_per_char": -1.0807682037353517, "num_chars": 20}, {"sum_logits": -17.87516212463379, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.960481643676758, "logits_per_token": -8.937581062316895, "logits_per_char": -1.2767972946166992, "num_chars": 14}, {"sum_logits": -8.245046615600586, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.92249298095703, "logits_per_token": -4.122523307800293, "logits_per_char": -0.8245046615600586, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1070, "native_id": "7e522a60756f854c5331125f998bc36b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.745902061462402, "incorrect_loss_raw": 12.942441463470459, "correct_loss_per_char": 0.47459020614624026, "incorrect_loss_per_char": 1.5336726009845734, "correct_loss_per_token": 4.745902061462402, "incorrect_loss_per_token": 9.466676235198975, "correct_loss_uncond": -8.683602333068848, "incorrect_loss_uncond": -3.4296982288360596}, "model_output": [{"sum_logits": -12.81045913696289, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.947824478149414, "logits_per_token": -12.81045913696289, "logits_per_char": -2.562091827392578, "num_chars": 5}, {"sum_logits": -20.854591369628906, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.270347595214844, "logits_per_token": -6.951530456542969, "logits_per_char": -1.1585884094238281, "num_chars": 18}, {"sum_logits": -8.45156192779541, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.066084861755371, "logits_per_token": -8.45156192779541, "logits_per_char": -1.2073659896850586, "num_chars": 7}, {"sum_logits": -4.745902061462402, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.42950439453125, "logits_per_token": -4.745902061462402, "logits_per_char": -0.47459020614624026, "num_chars": 10}, {"sum_logits": -9.653153419494629, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -9.653153419494629, "logits_per_char": -1.2066441774368286, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1071, "native_id": "f4a75bf3f115b826a8097edfd0ff2781", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.960758209228516, "incorrect_loss_raw": 8.625038623809814, "correct_loss_per_char": 0.5973838806152344, "incorrect_loss_per_char": 0.9588549812634786, "correct_loss_per_token": 4.480379104614258, "incorrect_loss_per_token": 4.892804503440857, "correct_loss_uncond": -9.940887451171875, "incorrect_loss_uncond": -7.4487526416778564}, "model_output": [{"sum_logits": -8.960758209228516, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.90164566040039, "logits_per_token": -4.480379104614258, "logits_per_char": -0.5973838806152344, "num_chars": 15}, {"sum_logits": -4.642281532287598, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -10.743063926696777, "logits_per_token": -4.642281532287598, "logits_per_char": -0.7737135887145996, "num_chars": 6}, {"sum_logits": -8.27407455444336, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.565013885498047, "logits_per_token": -4.13703727722168, "logits_per_char": -1.03425931930542, "num_chars": 8}, {"sum_logits": -7.855969429016113, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.15125274658203, "logits_per_token": -3.9279847145080566, "logits_per_char": -0.6546641190846761, "num_chars": 12}, {"sum_logits": -13.727828979492188, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.835834503173828, "logits_per_token": -6.863914489746094, "logits_per_char": -1.3727828979492187, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1072, "native_id": "02f43014a135cbd39f23b044c99de96e", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.609219074249268, "incorrect_loss_raw": 9.484798073768616, "correct_loss_per_char": 0.6609219074249267, "incorrect_loss_per_char": 0.9817388561724929, "correct_loss_per_token": 3.304609537124634, "incorrect_loss_per_token": 6.6827312707901, "correct_loss_uncond": -18.7096209526062, "incorrect_loss_uncond": -7.895141243934631}, "model_output": [{"sum_logits": -6.609219074249268, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -25.31884002685547, "logits_per_token": -3.304609537124634, "logits_per_char": -0.6609219074249267, "num_chars": 10}, {"sum_logits": -8.13259220123291, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -8.13259220123291, "logits_per_char": -1.1617988858904158, "num_chars": 7}, {"sum_logits": -7.390065670013428, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.858484268188477, "logits_per_token": -7.390065670013428, "logits_per_char": -0.8211184077792697, "num_chars": 9}, {"sum_logits": -12.385379791259766, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.176204681396484, "logits_per_token": -6.192689895629883, "logits_per_char": -1.0321149826049805, "num_chars": 12}, {"sum_logits": -10.03115463256836, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.213787078857422, "logits_per_token": -5.01557731628418, "logits_per_char": -0.9119231484153054, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1073, "native_id": "8cf478192696744b3427f7c109019af5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 8.977569580078125, "incorrect_loss_raw": 9.023478388786316, "correct_loss_per_char": 0.690582275390625, "incorrect_loss_per_char": 0.9537934129888361, "correct_loss_per_token": 4.4887847900390625, "incorrect_loss_per_token": 4.959550082683563, "correct_loss_uncond": -10.733514785766602, "incorrect_loss_uncond": -7.999793648719788}, "model_output": [{"sum_logits": -7.94075345993042, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.53115463256836, "logits_per_token": -3.97037672996521, "logits_per_char": -0.7218866781754927, "num_chars": 11}, {"sum_logits": -10.493413925170898, "num_tokens": 4, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.314939498901367, "logits_per_token": -2.6233534812927246, "logits_per_char": -0.5246706962585449, "num_chars": 20}, {"sum_logits": -8.829194068908691, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.239700317382812, "logits_per_token": -8.829194068908691, "logits_per_char": -1.7658388137817382, "num_chars": 5}, {"sum_logits": -8.830552101135254, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.007293701171875, "logits_per_token": -4.415276050567627, "logits_per_char": -0.8027774637395685, "num_chars": 11}, {"sum_logits": -8.977569580078125, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.711084365844727, "logits_per_token": -4.4887847900390625, "logits_per_char": -0.690582275390625, "num_chars": 13}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1074, "native_id": "4ccd43cdff044bc4c644dadff1ff1e0b", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.708944320678711, "incorrect_loss_raw": 12.026476383209229, "correct_loss_per_char": 1.070894432067871, "incorrect_loss_per_char": 1.3116511212454902, "correct_loss_per_token": 10.708944320678711, "incorrect_loss_per_token": 7.952462792396545, "correct_loss_uncond": -5.511741638183594, "incorrect_loss_uncond": -4.175552845001221}, "model_output": [{"sum_logits": -4.739442825317383, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.92300033569336, "logits_per_token": -4.739442825317383, "logits_per_char": -0.5266047583685981, "num_chars": 9}, {"sum_logits": -10.774353981018066, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.688596725463867, "logits_per_token": -10.774353981018066, "logits_per_char": -1.795725663503011, "num_chars": 6}, {"sum_logits": -10.708944320678711, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.220685958862305, "logits_per_token": -10.708944320678711, "logits_per_char": -1.070894432067871, "num_chars": 10}, {"sum_logits": -20.096208572387695, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -22.086942672729492, "logits_per_token": -10.048104286193848, "logits_per_char": -1.6746840476989746, "num_chars": 12}, {"sum_logits": -12.49590015411377, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.109577178955078, "logits_per_token": -6.247950077056885, "logits_per_char": -1.249590015411377, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1075, "native_id": "7b7941b883328ad39048d4dfb1eb5623", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.125812530517578, "incorrect_loss_raw": 11.359835386276245, "correct_loss_per_char": 1.520968755086263, "incorrect_loss_per_char": 1.1214061323526445, "correct_loss_per_token": 9.125812530517578, "incorrect_loss_per_token": 8.124094605445862, "correct_loss_uncond": -3.1821985244750977, "incorrect_loss_uncond": -5.719219923019409}, "model_output": [{"sum_logits": -8.114629745483398, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.11556625366211, "logits_per_token": -8.114629745483398, "logits_per_char": -0.9016255272759331, "num_chars": 9}, {"sum_logits": -10.221247673034668, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.750511169433594, "logits_per_token": -5.110623836517334, "logits_per_char": -0.7300891195024762, "num_chars": 14}, {"sum_logits": -15.664678573608398, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.141996383666992, "logits_per_token": -7.832339286804199, "logits_per_char": -1.4240616885098545, "num_chars": 11}, {"sum_logits": -11.438785552978516, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.308147430419922, "logits_per_token": -11.438785552978516, "logits_per_char": -1.4298481941223145, "num_chars": 8}, {"sum_logits": -9.125812530517578, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -9.125812530517578, "logits_per_char": -1.520968755086263, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1076, "native_id": "008b7ba0c039f6d0d542c6c90aae173c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.446738243103027, "incorrect_loss_raw": 12.155956745147705, "correct_loss_per_char": 1.0406125675548206, "incorrect_loss_per_char": 1.052994336327936, "correct_loss_per_token": 5.723369121551514, "incorrect_loss_per_token": 6.0779783725738525, "correct_loss_uncond": -8.81148624420166, "incorrect_loss_uncond": -6.888594150543213}, "model_output": [{"sum_logits": -12.531764030456543, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -17.546207427978516, "logits_per_token": -6.2658820152282715, "logits_per_char": -1.044313669204712, "num_chars": 12}, {"sum_logits": -9.021090507507324, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -18.867748260498047, "logits_per_token": -4.510545253753662, "logits_per_char": -0.693930039039025, "num_chars": 13}, {"sum_logits": -11.446738243103027, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -20.258224487304688, "logits_per_token": -5.723369121551514, "logits_per_char": -1.0406125675548206, "num_chars": 11}, {"sum_logits": -16.082698822021484, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -20.112974166870117, "logits_per_token": -8.041349411010742, "logits_per_char": -1.786966535780165, "num_chars": 9}, {"sum_logits": -10.988273620605469, "num_tokens": 2, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -19.651273727416992, "logits_per_token": -5.494136810302734, "logits_per_char": -0.6867671012878418, "num_chars": 16}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1077, "native_id": "4c968fa73699a38639ba3ffa1745bc21", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.000227928161621, "incorrect_loss_raw": 10.301952362060547, "correct_loss_per_char": 1.0000455856323243, "incorrect_loss_per_char": 1.3100229328328914, "correct_loss_per_token": 5.000227928161621, "incorrect_loss_per_token": 7.21057391166687, "correct_loss_uncond": -6.229462623596191, "incorrect_loss_uncond": -6.119231462478638}, "model_output": [{"sum_logits": -8.781966209411621, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -8.781966209411621, "logits_per_char": -1.7563932418823243, "num_chars": 5}, {"sum_logits": -5.000227928161621, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -5.000227928161621, "logits_per_char": -1.0000455856323243, "num_chars": 5}, {"sum_logits": -9.75300121307373, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.197805404663086, "logits_per_token": -4.876500606536865, "logits_per_char": -0.8866364739157937, "num_chars": 11}, {"sum_logits": -7.694815635681152, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.462380409240723, "logits_per_token": -7.694815635681152, "logits_per_char": -1.099259376525879, "num_chars": 7}, {"sum_logits": -14.978026390075684, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.71932601928711, "logits_per_token": -7.489013195037842, "logits_per_char": -1.4978026390075683, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1078, "native_id": "b1d5cdbf8ef7b3954a6a352bd4df5866", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.090651512145996, "incorrect_loss_raw": 7.839027643203735, "correct_loss_per_char": 1.0181303024291992, "incorrect_loss_per_char": 1.06694431810668, "correct_loss_per_token": 5.090651512145996, "incorrect_loss_per_token": 6.24118435382843, "correct_loss_uncond": -9.996833801269531, "incorrect_loss_uncond": -7.1460254192352295}, "model_output": [{"sum_logits": -5.090651512145996, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -5.090651512145996, "logits_per_char": -1.0181303024291992, "num_chars": 5}, {"sum_logits": -5.385323524475098, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -12.384324073791504, "logits_per_token": -5.385323524475098, "logits_per_char": -0.598369280497233, "num_chars": 9}, {"sum_logits": -3.9080400466918945, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.4590482711792, "logits_per_token": -3.9080400466918945, "logits_per_char": -0.6513400077819824, "num_chars": 6}, {"sum_logits": -9.280000686645508, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.966054916381836, "logits_per_token": -9.280000686645508, "logits_per_char": -1.8560001373291015, "num_chars": 5}, {"sum_logits": -12.782746315002441, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.13078498840332, "logits_per_token": -6.391373157501221, "logits_per_char": -1.1620678468184038, "num_chars": 11}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1079, "native_id": "c3bc395561113c96ec43afd715da5061", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3262555599212646, "incorrect_loss_raw": 13.57911491394043, "correct_loss_per_char": 0.33232222284589497, "incorrect_loss_per_char": 1.7042515178521473, "correct_loss_per_token": 2.3262555599212646, "incorrect_loss_per_token": 9.788525263468424, "correct_loss_uncond": -11.357091188430786, "incorrect_loss_uncond": -2.163562059402466}, "model_output": [{"sum_logits": -2.3262555599212646, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.68334674835205, "logits_per_token": -2.3262555599212646, "logits_per_char": -0.33232222284589497, "num_chars": 7}, {"sum_logits": -13.372745513916016, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.241580963134766, "logits_per_token": -6.686372756958008, "logits_per_char": -2.2287909189860025, "num_chars": 6}, {"sum_logits": -14.30591869354248, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.3347749710083, "logits_per_token": -14.30591869354248, "logits_per_char": -1.78823983669281, "num_chars": 8}, {"sum_logits": -13.923816680908203, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.55308723449707, "logits_per_token": -13.923816680908203, "logits_per_char": -1.7404770851135254, "num_chars": 8}, {"sum_logits": -12.71397876739502, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.841264724731445, "logits_per_token": -4.237992922465007, "logits_per_char": -1.0594982306162517, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1080, "native_id": "d0bd5b5ee7319d1c4727e38d429dd54e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2245159149169922, "incorrect_loss_raw": 13.981880903244019, "correct_loss_per_char": 0.2040859858194987, "incorrect_loss_per_char": 1.4299848666696837, "correct_loss_per_token": 1.2245159149169922, "incorrect_loss_per_token": 9.527451992034912, "correct_loss_uncond": -13.062676429748535, "incorrect_loss_uncond": -1.2996108531951904}, "model_output": [{"sum_logits": -14.947110176086426, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.164570808410645, "logits_per_token": -14.947110176086426, "logits_per_char": -1.8683887720108032, "num_chars": 8}, {"sum_logits": -11.048263549804688, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -11.048263549804688, "logits_per_char": -1.2275848388671875, "num_chars": 9}, {"sum_logits": -12.822305679321289, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.136394500732422, "logits_per_token": -6.4111528396606445, "logits_per_char": -1.0685254732767742, "num_chars": 12}, {"sum_logits": -17.109844207763672, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.019540786743164, "logits_per_token": -5.703281402587891, "logits_per_char": -1.5554403825239702, "num_chars": 11}, {"sum_logits": -1.2245159149169922, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -14.287192344665527, "logits_per_token": -1.2245159149169922, "logits_per_char": -0.2040859858194987, "num_chars": 6}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1081, "native_id": "81f5e741d970578867495ceea5a0c848", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.376733779907227, "incorrect_loss_raw": 13.008947849273682, "correct_loss_per_char": 0.7280431635239545, "incorrect_loss_per_char": 1.4905994865629408, "correct_loss_per_token": 4.125577926635742, "incorrect_loss_per_token": 6.7575976848602295, "correct_loss_uncond": -8.157308578491211, "incorrect_loss_uncond": -4.269247531890869}, "model_output": [{"sum_logits": -21.549663543701172, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -27.81195068359375, "logits_per_token": -5.387415885925293, "logits_per_char": -1.1972035302056208, "num_chars": 18}, {"sum_logits": -10.92618465423584, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.061530113220215, "logits_per_token": -10.92618465423584, "logits_per_char": -1.8210307757059734, "num_chars": 6}, {"sum_logits": -12.376733779907227, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.534042358398438, "logits_per_token": -4.125577926635742, "logits_per_char": -0.7280431635239545, "num_chars": 17}, {"sum_logits": -6.29521369934082, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.203143119812012, "logits_per_token": -6.29521369934082, "logits_per_char": -1.04920228322347, "num_chars": 6}, {"sum_logits": -13.264729499816895, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.036157608032227, "logits_per_token": -4.421576499938965, "logits_per_char": -1.8949613571166992, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1082, "native_id": "6714593a8d1f8ae39930c1f0316e9ffc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.35061779618263245, "incorrect_loss_raw": 12.995105266571045, "correct_loss_per_char": 0.05843629936377207, "incorrect_loss_per_char": 1.649172311737424, "correct_loss_per_token": 0.35061779618263245, "incorrect_loss_per_token": 10.839879274368286, "correct_loss_uncond": -12.937030404806137, "incorrect_loss_uncond": -2.622190237045288}, "model_output": [{"sum_logits": -10.865900993347168, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -18.327959060668945, "logits_per_token": -10.865900993347168, "logits_per_char": -1.8109834988911946, "num_chars": 6}, {"sum_logits": -10.191295623779297, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.918556213378906, "logits_per_token": -10.191295623779297, "logits_per_char": -1.273911952972412, "num_chars": 8}, {"sum_logits": -17.24180793762207, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.513633728027344, "logits_per_token": -8.620903968811035, "logits_per_char": -1.2315577098301478, "num_chars": 14}, {"sum_logits": -0.35061779618263245, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": true, "sum_logits_uncond": -13.28764820098877, "logits_per_token": -0.35061779618263245, "logits_per_char": -0.05843629936377207, "num_chars": 6}, {"sum_logits": -13.681416511535645, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.709033012390137, "logits_per_token": -13.681416511535645, "logits_per_char": -2.280236085255941, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1083, "native_id": "75cb55aec7e64f592c01eee5d4578dcd", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6748361587524414, "incorrect_loss_raw": 10.912510871887207, "correct_loss_per_char": 0.30623634656270343, "incorrect_loss_per_char": 1.6892260551452636, "correct_loss_per_token": 3.6748361587524414, "incorrect_loss_per_token": 10.912510871887207, "correct_loss_uncond": -9.448143005371094, "incorrect_loss_uncond": -0.9154973030090332}, "model_output": [{"sum_logits": -3.6748361587524414, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.122979164123535, "logits_per_token": -3.6748361587524414, "logits_per_char": -0.30623634656270343, "num_chars": 12}, {"sum_logits": -9.093496322631836, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -11.553074836730957, "logits_per_token": -9.093496322631836, "logits_per_char": -0.9093496322631835, "num_chars": 10}, {"sum_logits": -10.637548446655273, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.185674667358398, "logits_per_token": -10.637548446655273, "logits_per_char": -1.0637548446655274, "num_chars": 10}, {"sum_logits": -9.715243339538574, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -9.250422477722168, "logits_per_token": -9.715243339538574, "logits_per_char": -1.9430486679077148, "num_chars": 5}, {"sum_logits": -14.203755378723145, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.322860717773438, "logits_per_token": -14.203755378723145, "logits_per_char": -2.840751075744629, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1084, "native_id": "0b30831fb1862bc62339bdf930cbc447", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 22.027822494506836, "incorrect_loss_raw": 11.138362646102905, "correct_loss_per_char": 1.8356518745422363, "incorrect_loss_per_char": 1.1571074183025059, "correct_loss_per_token": 7.342607498168945, "incorrect_loss_per_token": 8.001762390136719, "correct_loss_uncond": -7.409086227416992, "incorrect_loss_uncond": -5.729139089584351}, "model_output": [{"sum_logits": -13.342947006225586, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.457244873046875, "logits_per_token": -6.671473503112793, "logits_per_char": -1.3342947006225585, "num_chars": 10}, {"sum_logits": -22.027822494506836, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -29.436908721923828, "logits_per_token": -7.342607498168945, "logits_per_char": -1.8356518745422363, "num_chars": 12}, {"sum_logits": -11.749855041503906, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.06351661682129, "logits_per_token": -5.874927520751953, "logits_per_char": -0.8392753601074219, "num_chars": 14}, {"sum_logits": -10.244840621948242, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.384324073791504, "logits_per_token": -10.244840621948242, "logits_per_char": -1.1383156246609158, "num_chars": 9}, {"sum_logits": -9.215807914733887, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -9.215807914733887, "logits_per_char": -1.3165439878191267, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1085, "native_id": "29c194d032a266a7160bff6f546a4d9d", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.361898422241211, "incorrect_loss_raw": 8.733763217926025, "correct_loss_per_char": 1.5904746055603027, "incorrect_loss_per_char": 1.0894837322689237, "correct_loss_per_token": 6.361898422241211, "incorrect_loss_per_token": 5.912552356719971, "correct_loss_uncond": -5.419817924499512, "incorrect_loss_uncond": -6.823200225830078}, "model_output": [{"sum_logits": -6.003467559814453, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -6.003467559814453, "logits_per_char": -0.5002889633178711, "num_chars": 12}, {"sum_logits": -6.490917205810547, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.06329345703125, "logits_per_token": -3.2454586029052734, "logits_per_char": -0.9272738865443638, "num_chars": 7}, {"sum_logits": -16.07876968383789, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.40606117248535, "logits_per_token": -8.039384841918945, "logits_per_char": -1.3398974736531575, "num_chars": 12}, {"sum_logits": -6.361898422241211, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.781716346740723, "logits_per_token": -6.361898422241211, "logits_per_char": -1.5904746055603027, "num_chars": 4}, {"sum_logits": -6.361898422241211, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.781716346740723, "logits_per_token": -6.361898422241211, "logits_per_char": -1.5904746055603027, "num_chars": 4}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1086, "native_id": "ea33206992fb7ad1c3476e9673bb4a9c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.102778911590576, "incorrect_loss_raw": 10.573681592941284, "correct_loss_per_char": 0.591898242632548, "incorrect_loss_per_char": 1.2253492392026462, "correct_loss_per_token": 3.551389455795288, "incorrect_loss_per_token": 7.4020678997039795, "correct_loss_uncond": -9.756123065948486, "incorrect_loss_uncond": -5.575438976287842}, "model_output": [{"sum_logits": -14.240032196044922, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -7.120016098022461, "logits_per_char": -1.0953870920034556, "num_chars": 13}, {"sum_logits": -10.005404472351074, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.863768577575684, "logits_per_token": -10.005404472351074, "logits_per_char": -2.0010808944702148, "num_chars": 5}, {"sum_logits": -11.132877349853516, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.962568283081055, "logits_per_token": -5.566438674926758, "logits_per_char": -1.1132877349853516, "num_chars": 10}, {"sum_logits": -7.102778911590576, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.858901977539062, "logits_per_token": -3.551389455795288, "logits_per_char": -0.591898242632548, "num_chars": 12}, {"sum_logits": -6.916412353515625, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.747095108032227, "logits_per_token": -6.916412353515625, "logits_per_char": -0.6916412353515625, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1087, "native_id": "2b7dd91da5dde1560ace2cd82af926de", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.080160140991211, "incorrect_loss_raw": 13.71244764328003, "correct_loss_per_char": 0.2566800117492676, "incorrect_loss_per_char": 1.1348290305116993, "correct_loss_per_token": 1.5400800704956055, "incorrect_loss_per_token": 5.7896816333134975, "correct_loss_uncond": -19.19291114807129, "incorrect_loss_uncond": -7.567456245422363}, "model_output": [{"sum_logits": -11.510106086730957, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.859966278076172, "logits_per_token": -5.7550530433654785, "logits_per_char": -1.1510106086730958, "num_chars": 10}, {"sum_logits": -11.134767532348633, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.16405487060547, "logits_per_token": -3.7115891774495444, "logits_per_char": -0.7423178354899088, "num_chars": 15}, {"sum_logits": -17.742671966552734, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -25.43960952758789, "logits_per_token": -8.871335983276367, "logits_per_char": -1.6129701787775212, "num_chars": 11}, {"sum_logits": -14.462244987487793, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.65598487854004, "logits_per_token": -4.820748329162598, "logits_per_char": -1.0330174991062708, "num_chars": 14}, {"sum_logits": -3.080160140991211, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -1.5400800704956055, "logits_per_char": -0.2566800117492676, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1088, "native_id": "eb50f536830ba18ab987c7ff652e2aba", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.997836112976074, "incorrect_loss_raw": 16.550580263137817, "correct_loss_per_char": 0.6498918056488037, "incorrect_loss_per_char": 1.4987768820353917, "correct_loss_per_token": 6.498918056488037, "incorrect_loss_per_token": 11.361673672993977, "correct_loss_uncond": -5.3627519607543945, "incorrect_loss_uncond": -0.989551305770874}, "model_output": [{"sum_logits": -12.997836112976074, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.36058807373047, "logits_per_token": -6.498918056488037, "logits_per_char": -0.6498918056488037, "num_chars": 20}, {"sum_logits": -19.05211067199707, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -19.778059005737305, "logits_per_token": -9.526055335998535, "logits_per_char": -1.5876758893330891, "num_chars": 12}, {"sum_logits": -16.844356536865234, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.28049087524414, "logits_per_token": -5.614785512288411, "logits_per_char": -1.2031683240618025, "num_chars": 14}, {"sum_logits": -15.538333892822266, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.523576736450195, "logits_per_token": -15.538333892822266, "logits_per_char": -2.219761984688895, "num_chars": 7}, {"sum_logits": -14.7675199508667, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.578399658203125, "logits_per_token": -14.7675199508667, "logits_per_char": -0.98450133005778, "num_chars": 15}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1089, "native_id": "6bc3ebcfd04965c25bde71339955746c", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.809298038482666, "incorrect_loss_raw": 10.823717713356018, "correct_loss_per_char": 0.4232553376091851, "incorrect_loss_per_char": 1.4800924772307986, "correct_loss_per_token": 3.809298038482666, "incorrect_loss_per_token": 10.823717713356018, "correct_loss_uncond": -8.788989543914795, "incorrect_loss_uncond": -2.9474202394485474}, "model_output": [{"sum_logits": -11.6088285446167, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.794787406921387, "logits_per_token": -11.6088285446167, "logits_per_char": -1.4511035680770874, "num_chars": 8}, {"sum_logits": -3.809298038482666, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.598287582397461, "logits_per_token": -3.809298038482666, "logits_per_char": -0.4232553376091851, "num_chars": 9}, {"sum_logits": -11.77391242980957, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.312516212463379, "logits_per_token": -11.77391242980957, "logits_per_char": -1.6819874899727958, "num_chars": 7}, {"sum_logits": -12.753827095031738, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -12.753827095031738, "logits_per_char": -1.5942283868789673, "num_chars": 8}, {"sum_logits": -7.1583027839660645, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.524795532226562, "logits_per_token": -7.1583027839660645, "logits_per_char": -1.193050463994344, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1090, "native_id": "163898952cb6baf3a6440696e1352e86", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.048780918121338, "incorrect_loss_raw": 10.267699003219604, "correct_loss_per_char": 0.4405488073825836, "incorrect_loss_per_char": 1.2375955818191406, "correct_loss_per_token": 3.524390459060669, "incorrect_loss_per_token": 8.406864086786905, "correct_loss_uncond": -10.366557598114014, "incorrect_loss_uncond": -4.772876262664795}, "model_output": [{"sum_logits": -11.165009498596191, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -18.128067016601562, "logits_per_token": -3.721669832865397, "logits_per_char": -0.6202783054775662, "num_chars": 18}, {"sum_logits": -7.048780918121338, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.41533851623535, "logits_per_token": -3.524390459060669, "logits_per_char": -0.4405488073825836, "num_chars": 16}, {"sum_logits": -11.009239196777344, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.746891021728516, "logits_per_token": -11.009239196777344, "logits_per_char": -1.223248799641927, "num_chars": 9}, {"sum_logits": -9.056777000427246, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.017654418945312, "logits_per_token": -9.056777000427246, "logits_per_char": -0.6469126428876605, "num_chars": 14}, {"sum_logits": -9.839770317077637, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.269688606262207, "logits_per_token": -9.839770317077637, "logits_per_char": -2.459942579269409, "num_chars": 4}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1091, "native_id": "aa984e2b487d08889bc0c73bab5ac945", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.1331684589385986, "incorrect_loss_raw": 9.361634492874146, "correct_loss_per_char": 0.3555280764897664, "incorrect_loss_per_char": 1.4190107072242584, "correct_loss_per_token": 2.1331684589385986, "incorrect_loss_per_token": 9.361634492874146, "correct_loss_uncond": -12.513839483261108, "incorrect_loss_uncond": -4.82856559753418}, "model_output": [{"sum_logits": -7.602370262145996, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.196586608886719, "logits_per_token": -7.602370262145996, "logits_per_char": -1.900592565536499, "num_chars": 4}, {"sum_logits": -12.739568710327148, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.688596725463867, "logits_per_token": -12.739568710327148, "logits_per_char": -2.1232614517211914, "num_chars": 6}, {"sum_logits": -12.291948318481445, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.161336898803711, "logits_per_token": -12.291948318481445, "logits_per_char": -1.1174498471346768, "num_chars": 11}, {"sum_logits": -2.1331684589385986, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": true, "sum_logits_uncond": -14.647007942199707, "logits_per_token": -2.1331684589385986, "logits_per_char": -0.3555280764897664, "num_chars": 6}, {"sum_logits": -4.812650680541992, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -4.812650680541992, "logits_per_char": -0.5347389645046658, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1092, "native_id": "d78baca23e0a636a8961e17119047e63", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.5287845134735107, "incorrect_loss_raw": 8.381327033042908, "correct_loss_per_char": 0.7057569026947021, "incorrect_loss_per_char": 1.4507098237673441, "correct_loss_per_token": 3.5287845134735107, "incorrect_loss_per_token": 8.381327033042908, "correct_loss_uncond": -10.77643895149231, "incorrect_loss_uncond": -4.603632807731628}, "model_output": [{"sum_logits": -7.161840915679932, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -7.161840915679932, "logits_per_char": -1.4323681831359862, "num_chars": 5}, {"sum_logits": -8.265582084655762, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -8.265582084655762, "logits_per_char": -1.3775970141092937, "num_chars": 6}, {"sum_logits": -3.5287845134735107, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -3.5287845134735107, "logits_per_char": -0.7057569026947021, "num_chars": 5}, {"sum_logits": -9.741846084594727, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -11.682840347290039, "logits_per_token": -9.741846084594727, "logits_per_char": -1.9483692169189453, "num_chars": 5}, {"sum_logits": -8.356039047241211, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.722372055053711, "logits_per_token": -8.356039047241211, "logits_per_char": -1.0445048809051514, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1093, "native_id": "ac6378b5e8462dc1bde1155d706213d8", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.449216842651367, "incorrect_loss_raw": 10.692872047424316, "correct_loss_per_char": 1.037434736887614, "incorrect_loss_per_char": 0.8895467684898541, "correct_loss_per_token": 4.149738947550456, "incorrect_loss_per_token": 4.064392427603403, "correct_loss_uncond": -5.36427116394043, "incorrect_loss_uncond": -7.302815914154053}, "model_output": [{"sum_logits": -5.707996368408203, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.269561767578125, "logits_per_token": -2.8539981842041016, "logits_per_char": -0.4756663640340169, "num_chars": 12}, {"sum_logits": -9.548831939697266, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.99258041381836, "logits_per_token": -3.1829439798990884, "logits_per_char": -1.1936039924621582, "num_chars": 8}, {"sum_logits": -12.449216842651367, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.813488006591797, "logits_per_token": -4.149738947550456, "logits_per_char": -1.037434736887614, "num_chars": 12}, {"sum_logits": -13.367850303649902, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.737808227539062, "logits_per_token": -6.683925151824951, "logits_per_char": -1.2152591185136274, "num_chars": 11}, {"sum_logits": -14.146809577941895, "num_tokens": 4, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.98280143737793, "logits_per_token": -3.5367023944854736, "logits_per_char": -0.6736575989496141, "num_chars": 21}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1094, "native_id": "c1aebf059c5102f4e773f7fe4afe13f0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.054582118988037, "incorrect_loss_raw": 13.453053951263428, "correct_loss_per_char": 0.7054582118988038, "incorrect_loss_per_char": 1.0273499637413104, "correct_loss_per_token": 7.054582118988037, "incorrect_loss_per_token": 9.834746837615967, "correct_loss_uncond": -6.498351573944092, "incorrect_loss_uncond": -1.9787094593048096}, "model_output": [{"sum_logits": -13.876241683959961, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.642078399658203, "logits_per_token": -6.9381208419799805, "logits_per_char": -1.15635347366333, "num_chars": 12}, {"sum_logits": -15.070215225219727, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.34062957763672, "logits_per_token": -7.535107612609863, "logits_per_char": -0.8864832485423368, "num_chars": 17}, {"sum_logits": -10.997590065002441, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.81274700164795, "logits_per_token": -10.997590065002441, "logits_per_char": -0.9997809150002219, "num_chars": 11}, {"sum_logits": -7.054582118988037, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.552933692932129, "logits_per_token": -7.054582118988037, "logits_per_char": -0.7054582118988038, "num_chars": 10}, {"sum_logits": -13.868168830871582, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.931598663330078, "logits_per_token": -13.868168830871582, "logits_per_char": -1.0667822177593524, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1095, "native_id": "1017807310a25d3ea4a4ec305e91cba3", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.7722907066345215, "incorrect_loss_raw": 7.496467709541321, "correct_loss_per_char": 0.4191434118482802, "incorrect_loss_per_char": 0.992094259791904, "correct_loss_per_token": 1.8861453533172607, "incorrect_loss_per_token": 5.757032811641693, "correct_loss_uncond": -9.815826892852783, "incorrect_loss_uncond": -6.81138551235199}, "model_output": [{"sum_logits": -10.401388168334961, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -10.401388168334961, "logits_per_char": -1.1557097964816623, "num_chars": 9}, {"sum_logits": -9.276986122131348, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.84480857849121, "logits_per_token": -2.319246530532837, "logits_per_char": -0.9276986122131348, "num_chars": 10}, {"sum_logits": -3.7722907066345215, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.588117599487305, "logits_per_token": -1.8861453533172607, "logits_per_char": -0.4191434118482802, "num_chars": 9}, {"sum_logits": -5.295920372009277, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.884077072143555, "logits_per_token": -5.295920372009277, "logits_per_char": -0.8826533953348795, "num_chars": 6}, {"sum_logits": -5.011576175689697, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.756857872009277, "logits_per_token": -5.011576175689697, "logits_per_char": -1.0023152351379394, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1096, "native_id": "7192c9f5c513aac9042bad595ff5af9f", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.713358879089355, "incorrect_loss_raw": 11.388983964920044, "correct_loss_per_char": 1.079262097676595, "incorrect_loss_per_char": 1.1307728466315148, "correct_loss_per_token": 4.856679439544678, "incorrect_loss_per_token": 7.076452732086182, "correct_loss_uncond": -10.016253471374512, "incorrect_loss_uncond": -3.737488031387329}, "model_output": [{"sum_logits": -11.055685997009277, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.67788028717041, "logits_per_token": -11.055685997009277, "logits_per_char": -0.9213071664174398, "num_chars": 12}, {"sum_logits": -14.783537864685059, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.87059497833252, "logits_per_token": -7.391768932342529, "logits_per_char": -1.1371952203603892, "num_chars": 13}, {"sum_logits": -8.876362800598145, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.526481628417969, "logits_per_token": -4.438181400299072, "logits_per_char": -1.109545350074768, "num_chars": 8}, {"sum_logits": -10.840349197387695, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.430931091308594, "logits_per_token": -5.420174598693848, "logits_per_char": -1.355043649673462, "num_chars": 8}, {"sum_logits": -9.713358879089355, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.729612350463867, "logits_per_token": -4.856679439544678, "logits_per_char": -1.079262097676595, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1097, "native_id": "7c05e8d5a057085455eea243fbd1cd90", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.4342970848083496, "incorrect_loss_raw": 16.621871948242188, "correct_loss_per_char": 0.24530693462916783, "incorrect_loss_per_char": 1.370065781048366, "correct_loss_per_token": 1.7171485424041748, "incorrect_loss_per_token": 8.140950202941895, "correct_loss_uncond": -17.403582096099854, "incorrect_loss_uncond": -2.950270175933838}, "model_output": [{"sum_logits": -26.308658599853516, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -27.5250244140625, "logits_per_token": -6.577164649963379, "logits_per_char": -1.2527932666596913, "num_chars": 21}, {"sum_logits": -13.005472183227539, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.524559020996094, "logits_per_token": -6.5027360916137695, "logits_per_char": -1.300547218322754, "num_chars": 10}, {"sum_logits": -11.794443130493164, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.537405014038086, "logits_per_token": -11.794443130493164, "logits_per_char": -1.9657405217488606, "num_chars": 6}, {"sum_logits": -3.4342970848083496, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.837879180908203, "logits_per_token": -1.7171485424041748, "logits_per_char": -0.24530693462916783, "num_chars": 14}, {"sum_logits": -15.378913879394531, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.701580047607422, "logits_per_token": -7.689456939697266, "logits_per_char": -0.9611821174621582, "num_chars": 16}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1098, "native_id": "3cb91a71a6567da870eedf37becc97ef", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.036222457885742, "incorrect_loss_raw": 11.757179021835327, "correct_loss_per_char": 0.7530185381571451, "incorrect_loss_per_char": 1.4002805618324665, "correct_loss_per_token": 4.518111228942871, "incorrect_loss_per_token": 10.11881411075592, "correct_loss_uncond": -8.254405975341797, "incorrect_loss_uncond": -3.6609690189361572}, "model_output": [{"sum_logits": -9.036222457885742, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.29062843322754, "logits_per_token": -4.518111228942871, "logits_per_char": -0.7530185381571451, "num_chars": 12}, {"sum_logits": -13.106919288635254, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -21.045446395874023, "logits_per_token": -6.553459644317627, "logits_per_char": -1.1915381171486594, "num_chars": 11}, {"sum_logits": -8.667631149291992, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.7456693649292, "logits_per_token": -8.667631149291992, "logits_per_char": -0.9630701276991103, "num_chars": 9}, {"sum_logits": -13.816461563110352, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -13.816461563110352, "logits_per_char": -2.302743593851725, "num_chars": 6}, {"sum_logits": -11.437704086303711, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.92566204071045, "logits_per_token": -11.437704086303711, "logits_per_char": -1.143770408630371, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1099, "native_id": "9b4bbf3c4d24ecdb4b27320afb706808", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.85500717163086, "incorrect_loss_raw": 11.793787002563477, "correct_loss_per_char": 0.985500717163086, "incorrect_loss_per_char": 1.2392256990367292, "correct_loss_per_token": 3.2850023905436196, "incorrect_loss_per_token": 7.441603660583496, "correct_loss_uncond": -11.309700012207031, "incorrect_loss_uncond": -5.582162618637085}, "model_output": [{"sum_logits": -9.85500717163086, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.16470718383789, "logits_per_token": -3.2850023905436196, "logits_per_char": -0.985500717163086, "num_chars": 10}, {"sum_logits": -10.962200164794922, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.883235931396484, "logits_per_token": -3.654066721598307, "logits_per_char": -0.9135166803995768, "num_chars": 12}, {"sum_logits": -12.465617179870605, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -12.465617179870605, "logits_per_char": -2.0776028633117676, "num_chars": 6}, {"sum_logits": -15.150899887084961, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -5.050299962361653, "logits_per_char": -0.8912294051226448, "num_chars": 17}, {"sum_logits": -8.596430778503418, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.95835018157959, "logits_per_token": -8.596430778503418, "logits_per_char": -1.0745538473129272, "num_chars": 8}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1100, "native_id": "43df3a316880d8bab346c06bd43b94dd", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.722776412963867, "incorrect_loss_raw": 14.532426118850708, "correct_loss_per_char": 0.9306941032409668, "incorrect_loss_per_char": 1.2695961194333454, "correct_loss_per_token": 3.722776412963867, "incorrect_loss_per_token": 11.883646726608276, "correct_loss_uncond": -8.060027122497559, "incorrect_loss_uncond": -3.279123544692993}, "model_output": [{"sum_logits": -9.811326026916504, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.535224914550781, "logits_per_token": -9.811326026916504, "logits_per_char": -1.635221004486084, "num_chars": 6}, {"sum_logits": -21.190235137939453, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -27.811811447143555, "logits_per_token": -10.595117568969727, "logits_per_char": -1.2464844198787914, "num_chars": 17}, {"sum_logits": -13.83514404296875, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.169634819030762, "logits_per_token": -13.83514404296875, "logits_per_char": -0.9882245744977679, "num_chars": 14}, {"sum_logits": -3.722776412963867, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -11.782803535461426, "logits_per_token": -3.722776412963867, "logits_per_char": -0.9306941032409668, "num_chars": 4}, {"sum_logits": -13.292999267578125, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.729527473449707, "logits_per_token": -13.292999267578125, "logits_per_char": -1.2084544788707385, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1101, "native_id": "858a5eaa587fe0e266722228671a6bd1", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.325316429138184, "incorrect_loss_raw": 10.009652853012085, "correct_loss_per_char": 0.5750287662852894, "incorrect_loss_per_char": 1.116510395776658, "correct_loss_per_token": 6.325316429138184, "incorrect_loss_per_token": 5.886014540990193, "correct_loss_uncond": -7.017204284667969, "incorrect_loss_uncond": -6.580763816833496}, "model_output": [{"sum_logits": -6.325316429138184, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.342520713806152, "logits_per_token": -6.325316429138184, "logits_per_char": -0.5750287662852894, "num_chars": 11}, {"sum_logits": -9.723764419555664, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.75291919708252, "logits_per_token": -9.723764419555664, "logits_per_char": -0.9723764419555664, "num_chars": 10}, {"sum_logits": -14.006410598754883, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.51103401184082, "logits_per_token": -7.003205299377441, "logits_per_char": -2.0009157998221263, "num_chars": 7}, {"sum_logits": -8.28565788269043, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.857738494873047, "logits_per_token": -4.142828941345215, "logits_per_char": -0.6904714902242025, "num_chars": 12}, {"sum_logits": -8.022778511047363, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.239974975585938, "logits_per_token": -2.6742595036824546, "logits_per_char": -0.8022778511047364, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1102, "native_id": "34005ef0caafefc8585c9fcd50e94557", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.579814910888672, "incorrect_loss_raw": 14.058562278747559, "correct_loss_per_char": 0.6128439222063337, "incorrect_loss_per_char": 1.4452972044074346, "correct_loss_per_token": 4.289907455444336, "incorrect_loss_per_token": 8.777069568634033, "correct_loss_uncond": -9.556983947753906, "incorrect_loss_uncond": -1.133371353149414}, "model_output": [{"sum_logits": -11.12989330291748, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.016385078430176, "logits_per_token": -11.12989330291748, "logits_per_char": -2.225978660583496, "num_chars": 5}, {"sum_logits": -8.579814910888672, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.136798858642578, "logits_per_token": -4.289907455444336, "logits_per_char": -0.6128439222063337, "num_chars": 14}, {"sum_logits": -16.555259704589844, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.604555130004883, "logits_per_token": -4.138814926147461, "logits_per_char": -0.9197366502549913, "num_chars": 18}, {"sum_logits": -17.419052124023438, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -17.19159507751465, "logits_per_token": -8.709526062011719, "logits_per_char": -1.244218008858817, "num_chars": 14}, {"sum_logits": -11.130043983459473, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.955199241638184, "logits_per_token": -11.130043983459473, "logits_per_char": -1.391255497932434, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1103, "native_id": "f61d83f90b92a8d537989e55ee70542d", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.98728609085083, "incorrect_loss_raw": 11.4222913980484, "correct_loss_per_char": 0.6352078264409845, "incorrect_loss_per_char": 1.474137154251638, "correct_loss_per_token": 6.98728609085083, "incorrect_loss_per_token": 7.6001938581466675, "correct_loss_uncond": -6.461097240447998, "incorrect_loss_uncond": -4.429266571998596}, "model_output": [{"sum_logits": -12.977022171020508, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.64935302734375, "logits_per_token": -6.488511085510254, "logits_per_char": -1.1797292882745916, "num_chars": 11}, {"sum_logits": -7.835592269897461, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -10.421459197998047, "logits_per_token": -7.835592269897461, "logits_per_char": -1.3059320449829102, "num_chars": 6}, {"sum_logits": -17.59975814819336, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -8.79987907409668, "logits_per_char": -1.9555286831325955, "num_chars": 9}, {"sum_logits": -6.98728609085083, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.448383331298828, "logits_per_token": -6.98728609085083, "logits_per_char": -0.6352078264409845, "num_chars": 11}, {"sum_logits": -7.276793003082275, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.858410835266113, "logits_per_token": -7.276793003082275, "logits_per_char": -1.4553586006164552, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1104, "native_id": "3bf06235a537adc9d85431846595b800", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.2494561672210693, "incorrect_loss_raw": 8.340007543563843, "correct_loss_per_char": 0.20824269453684488, "incorrect_loss_per_char": 1.4307394345601399, "correct_loss_per_token": 1.2494561672210693, "incorrect_loss_per_token": 8.340007543563843, "correct_loss_uncond": -12.446599245071411, "incorrect_loss_uncond": -6.624195337295532}, "model_output": [{"sum_logits": -7.655238151550293, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.568180084228516, "logits_per_token": -7.655238151550293, "logits_per_char": -1.275873025258382, "num_chars": 6}, {"sum_logits": -1.2494561672210693, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": true, "sum_logits_uncond": -13.69605541229248, "logits_per_token": -1.2494561672210693, "logits_per_char": -0.20824269453684488, "num_chars": 6}, {"sum_logits": -9.069561004638672, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.505199432373047, "logits_per_token": -9.069561004638672, "logits_per_char": -1.8139122009277344, "num_chars": 5}, {"sum_logits": -10.781859397888184, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.350635528564453, "logits_per_token": -10.781859397888184, "logits_per_char": -1.7969765663146973, "num_chars": 6}, {"sum_logits": -5.853371620178223, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.432796478271484, "logits_per_token": -5.853371620178223, "logits_per_char": -0.8361959457397461, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1105, "native_id": "79ec11d8072ce42779adfe0a19bd5374", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.1470232009887695, "incorrect_loss_raw": 11.484207153320312, "correct_loss_per_char": 0.7941136889987521, "incorrect_loss_per_char": 0.9174188534418742, "correct_loss_per_token": 7.1470232009887695, "incorrect_loss_per_token": 5.754545331001282, "correct_loss_uncond": -6.000313758850098, "incorrect_loss_uncond": -5.012925863265991}, "model_output": [{"sum_logits": -12.53022289276123, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.700693130493164, "logits_per_token": -6.265111446380615, "logits_per_char": -0.8353481928507487, "num_chars": 15}, {"sum_logits": -11.48122787475586, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.30280876159668, "logits_per_token": -3.827075958251953, "logits_per_char": -1.148122787475586, "num_chars": 10}, {"sum_logits": -13.499075889587402, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -17.270750045776367, "logits_per_token": -4.499691963195801, "logits_per_char": -0.7499486605326334, "num_chars": 18}, {"sum_logits": -8.426301956176758, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -8.426301956176758, "logits_per_char": -0.9362557729085287, "num_chars": 9}, {"sum_logits": -7.1470232009887695, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.147336959838867, "logits_per_token": -7.1470232009887695, "logits_per_char": -0.7941136889987521, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1106, "native_id": "2982d0eae1bf880f5930341af7665716", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.734870910644531, "incorrect_loss_raw": 12.747414588928223, "correct_loss_per_char": 0.47348709106445314, "incorrect_loss_per_char": 1.3451618221071033, "correct_loss_per_token": 4.734870910644531, "incorrect_loss_per_token": 6.507582823435465, "correct_loss_uncond": -9.642325401306152, "incorrect_loss_uncond": -2.934924364089966}, "model_output": [{"sum_logits": -13.181346893310547, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -6.590673446655273, "logits_per_char": -1.4645940992567275, "num_chars": 9}, {"sum_logits": -13.776490211486816, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -4.5921634038289385, "logits_per_char": -1.5307211346096463, "num_chars": 9}, {"sum_logits": -13.776490211486816, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -4.5921634038289385, "logits_per_char": -1.5307211346096463, "num_chars": 9}, {"sum_logits": -4.734870910644531, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.377196311950684, "logits_per_token": -4.734870910644531, "logits_per_char": -0.47348709106445314, "num_chars": 10}, {"sum_logits": -10.255331039428711, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -10.255331039428711, "logits_per_char": -0.8546109199523926, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1107, "native_id": "ba9132ebf2bc3ad21e6a0631dc4e0a77", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.801454067230225, "incorrect_loss_raw": 6.268168151378632, "correct_loss_per_char": 0.4364958242936568, "incorrect_loss_per_char": 0.9311002967677592, "correct_loss_per_token": 2.4007270336151123, "incorrect_loss_per_token": 6.268168151378632, "correct_loss_uncond": -15.76705026626587, "incorrect_loss_uncond": -7.260848581790924}, "model_output": [{"sum_logits": -9.700338363647461, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -9.700338363647461, "logits_per_char": -1.6167230606079102, "num_chars": 6}, {"sum_logits": -5.203203201293945, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.96281623840332, "logits_per_token": -5.203203201293945, "logits_per_char": -0.4730184728449041, "num_chars": 11}, {"sum_logits": -7.640919208526611, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.650419235229492, "logits_per_token": -7.640919208526611, "logits_per_char": -1.2734865347544353, "num_chars": 6}, {"sum_logits": -2.528211832046509, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -11.92244815826416, "logits_per_token": -2.528211832046509, "logits_per_char": -0.36117311886378695, "num_chars": 7}, {"sum_logits": -4.801454067230225, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.568504333496094, "logits_per_token": -2.4007270336151123, "logits_per_char": -0.4364958242936568, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1108, "native_id": "d06de16a4aaeaef32b398c1213257b4a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.602502822875977, "incorrect_loss_raw": 16.530481815338135, "correct_loss_per_char": 0.5648531072279986, "incorrect_loss_per_char": 1.2403680649670688, "correct_loss_per_token": 3.200834274291992, "incorrect_loss_per_token": 7.493142445882161, "correct_loss_uncond": -11.730632781982422, "incorrect_loss_uncond": -2.7079720497131348}, "model_output": [{"sum_logits": -18.530363082885742, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.228906631469727, "logits_per_token": -6.176787694295247, "logits_per_char": -1.2353575388590494, "num_chars": 15}, {"sum_logits": -16.376686096191406, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.16928482055664, "logits_per_token": -8.188343048095703, "logits_per_char": -1.023542881011963, "num_chars": 16}, {"sum_logits": -9.602502822875977, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.3331356048584, "logits_per_token": -3.200834274291992, "logits_per_char": -0.5648531072279986, "num_chars": 17}, {"sum_logits": -17.839054107666016, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.559873580932617, "logits_per_token": -8.919527053833008, "logits_per_char": -1.4865878423055012, "num_chars": 12}, {"sum_logits": -13.375823974609375, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.995750427246094, "logits_per_token": -6.6879119873046875, "logits_per_char": -1.2159839976917615, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1109, "native_id": "eee9476bf29498b7d74b043afe316fc6", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.7668776512146, "incorrect_loss_raw": 14.858928442001343, "correct_loss_per_char": 0.9611462752024332, "incorrect_loss_per_char": 1.5751702581715379, "correct_loss_per_token": 5.7668776512146, "incorrect_loss_per_token": 8.109935522079468, "correct_loss_uncond": -8.202101230621338, "incorrect_loss_uncond": -1.1474266052246094}, "model_output": [{"sum_logits": -22.768667221069336, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.752277374267578, "logits_per_token": -7.589555740356445, "logits_per_char": -1.7514359400822566, "num_chars": 13}, {"sum_logits": -10.389437675476074, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.629544258117676, "logits_per_token": -10.389437675476074, "logits_per_char": -1.154381963941786, "num_chars": 9}, {"sum_logits": -17.725290298461914, "num_tokens": 3, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.635293006896973, "logits_per_token": -5.908430099487305, "logits_per_char": -1.9694766998291016, "num_chars": 9}, {"sum_logits": -5.7668776512146, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.968978881835938, "logits_per_token": -5.7668776512146, "logits_per_char": -0.9611462752024332, "num_chars": 6}, {"sum_logits": -8.552318572998047, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.008305549621582, "logits_per_token": -8.552318572998047, "logits_per_char": -1.4253864288330078, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1110, "native_id": "a85441d6a0e3f871d81a9f19b31360b7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.981341361999512, "incorrect_loss_raw": 10.3211909532547, "correct_loss_per_char": 0.7255764874545011, "incorrect_loss_per_char": 0.9094743068729128, "correct_loss_per_token": 3.990670680999756, "incorrect_loss_per_token": 5.593936085700989, "correct_loss_uncond": -11.371068000793457, "incorrect_loss_uncond": -7.754978537559509}, "model_output": [{"sum_logits": -13.748207092285156, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.920656204223633, "logits_per_token": -6.874103546142578, "logits_per_char": -1.3748207092285156, "num_chars": 10}, {"sum_logits": -12.371767044067383, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -23.35970687866211, "logits_per_token": -4.123922348022461, "logits_per_char": -0.7732354402542114, "num_chars": 16}, {"sum_logits": -7.590647220611572, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.312068939208984, "logits_per_token": -7.590647220611572, "logits_per_char": -0.9488309025764465, "num_chars": 8}, {"sum_logits": -7.981341361999512, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.35240936279297, "logits_per_token": -3.990670680999756, "logits_per_char": -0.7255764874545011, "num_chars": 11}, {"sum_logits": -7.5741424560546875, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.71224594116211, "logits_per_token": -3.7870712280273438, "logits_per_char": -0.5410101754324776, "num_chars": 14}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1111, "native_id": "f11a2975898033893d6a38f75d791fdf", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.223954200744629, "incorrect_loss_raw": 12.938376426696777, "correct_loss_per_char": 1.5559885501861572, "incorrect_loss_per_char": 1.0355267457472972, "correct_loss_per_token": 6.223954200744629, "incorrect_loss_per_token": 5.756831884384155, "correct_loss_uncond": -6.124007225036621, "incorrect_loss_uncond": -5.309457778930664}, "model_output": [{"sum_logits": -17.0965518951416, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.953899383544922, "logits_per_token": -5.698850631713867, "logits_per_char": -1.315119376549354, "num_chars": 13}, {"sum_logits": -8.45672607421875, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.60655403137207, "logits_per_token": -4.228363037109375, "logits_per_char": -0.6505173903245193, "num_chars": 13}, {"sum_logits": -12.021810531616211, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.278512954711914, "logits_per_token": -6.0109052658081055, "logits_per_char": -0.6010905265808105, "num_chars": 20}, {"sum_logits": -14.178417205810547, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -17.15237045288086, "logits_per_token": -7.089208602905273, "logits_per_char": -1.5753796895345051, "num_chars": 9}, {"sum_logits": -6.223954200744629, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.34796142578125, "logits_per_token": -6.223954200744629, "logits_per_char": -1.5559885501861572, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1112, "native_id": "a2977fd575faba162d04a490dabd1b9b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.8020405769348145, "incorrect_loss_raw": 8.954713463783264, "correct_loss_per_char": 0.6802040576934815, "incorrect_loss_per_char": 1.10519775946935, "correct_loss_per_token": 6.8020405769348145, "incorrect_loss_per_token": 6.70577085018158, "correct_loss_uncond": -6.603773593902588, "incorrect_loss_uncond": -6.442158102989197}, "model_output": [{"sum_logits": -5.599300384521484, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.688202857971191, "logits_per_token": -5.599300384521484, "logits_per_char": -0.7999000549316406, "num_chars": 7}, {"sum_logits": -5.190364837646484, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.153367042541504, "logits_per_token": -5.190364837646484, "logits_per_char": -0.6487956047058105, "num_chars": 8}, {"sum_logits": -6.8020405769348145, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.405814170837402, "logits_per_token": -6.8020405769348145, "logits_per_char": -0.6802040576934815, "num_chars": 10}, {"sum_logits": -7.037647724151611, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.638347625732422, "logits_per_token": -7.037647724151611, "logits_per_char": -1.1729412873586018, "num_chars": 6}, {"sum_logits": -17.991540908813477, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -24.107568740844727, "logits_per_token": -8.995770454406738, "logits_per_char": -1.7991540908813477, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1113, "native_id": "cd39e442204d3edf7acc185fd59c8a44", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.376741886138916, "incorrect_loss_raw": 7.059448003768921, "correct_loss_per_char": 0.6720927357673645, "incorrect_loss_per_char": 0.8835984604828286, "correct_loss_per_token": 5.376741886138916, "incorrect_loss_per_token": 5.050250291824341, "correct_loss_uncond": -12.836352825164795, "incorrect_loss_uncond": -8.130938529968262}, "model_output": [{"sum_logits": -5.25581693649292, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.584364891052246, "logits_per_token": -5.25581693649292, "logits_per_char": -0.8759694894154867, "num_chars": 6}, {"sum_logits": -4.448539733886719, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -4.448539733886719, "logits_per_char": -0.8897079467773438, "num_chars": 5}, {"sum_logits": -16.07358169555664, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.332252502441406, "logits_per_token": -8.03679084777832, "logits_per_char": -1.4612346995960583, "num_chars": 11}, {"sum_logits": -5.376741886138916, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.21309471130371, "logits_per_token": -5.376741886138916, "logits_per_char": -0.6720927357673645, "num_chars": 8}, {"sum_logits": -2.4598536491394043, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": true, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -2.4598536491394043, "logits_per_char": -0.30748170614242554, "num_chars": 8}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1114, "native_id": "c77e1039d78cdff197a370fcda0f2b9f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.964431643486023, "incorrect_loss_raw": 11.037112712860107, "correct_loss_per_char": 0.32740527391433716, "incorrect_loss_per_char": 1.7633105913798017, "correct_loss_per_token": 1.964431643486023, "incorrect_loss_per_token": 11.037112712860107, "correct_loss_uncond": -13.478633046150208, "incorrect_loss_uncond": -3.5342276096343994}, "model_output": [{"sum_logits": -1.964431643486023, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -15.44306468963623, "logits_per_token": -1.964431643486023, "logits_per_char": -0.32740527391433716, "num_chars": 6}, {"sum_logits": -12.802976608276367, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.640442848205566, "logits_per_token": -12.802976608276367, "logits_per_char": -1.8289966583251953, "num_chars": 7}, {"sum_logits": -8.806982040405273, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -8.806982040405273, "logits_per_char": -1.4678303400675456, "num_chars": 6}, {"sum_logits": -8.806982040405273, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.940177917480469, "logits_per_token": -8.806982040405273, "logits_per_char": -1.4678303400675456, "num_chars": 6}, {"sum_logits": -13.731510162353516, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.764562606811523, "logits_per_token": -13.731510162353516, "logits_per_char": -2.2885850270589194, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1115, "native_id": "f537f6bb8527724e0b1e1c1051326cd5", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.809744834899902, "incorrect_loss_raw": 11.041492223739624, "correct_loss_per_char": 1.3121938705444336, "incorrect_loss_per_char": 1.1538206146313594, "correct_loss_per_token": 5.904872417449951, "incorrect_loss_per_token": 7.621542692184448, "correct_loss_uncond": -8.01560115814209, "incorrect_loss_uncond": -5.417222738265991}, "model_output": [{"sum_logits": -5.769952774047852, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -5.769952774047852, "logits_per_char": -0.7212440967559814, "num_chars": 8}, {"sum_logits": -11.036419868469238, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.665517807006836, "logits_per_token": -11.036419868469238, "logits_per_char": -2.2072839736938477, "num_chars": 5}, {"sum_logits": -11.840198516845703, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.61815643310547, "logits_per_token": -5.920099258422852, "logits_per_char": -0.9107845012958233, "num_chars": 13}, {"sum_logits": -11.809744834899902, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.825345993041992, "logits_per_token": -5.904872417449951, "logits_per_char": -1.3121938705444336, "num_chars": 9}, {"sum_logits": -15.519397735595703, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.34688377380371, "logits_per_token": -7.759698867797852, "logits_per_char": -0.7759698867797852, "num_chars": 20}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1116, "native_id": "d3b145911a76fd6fbe9a23ab027be024", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.2812209129333496, "incorrect_loss_raw": 7.138520121574402, "correct_loss_per_char": 0.4687458447047642, "incorrect_loss_per_char": 1.3606546095439365, "correct_loss_per_token": 3.2812209129333496, "incorrect_loss_per_token": 7.138520121574402, "correct_loss_uncond": -8.868072986602783, "incorrect_loss_uncond": -6.773155331611633}, "model_output": [{"sum_logits": -3.2812209129333496, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -3.2812209129333496, "logits_per_char": -0.4687458447047642, "num_chars": 7}, {"sum_logits": -3.1047139167785645, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.333755493164062, "logits_per_token": -3.1047139167785645, "logits_per_char": -0.6209427833557128, "num_chars": 5}, {"sum_logits": -7.275032043457031, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.091991424560547, "logits_per_token": -7.275032043457031, "logits_per_char": -1.4550064086914063, "num_chars": 5}, {"sum_logits": -10.98453426361084, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.312009811401367, "logits_per_token": -10.98453426361084, "logits_per_char": -1.5692191805158342, "num_chars": 7}, {"sum_logits": -7.189800262451172, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -7.189800262451172, "logits_per_char": -1.797450065612793, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1117, "native_id": "dc2fa76467ff342abdb4cf142f92dddd", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.457526683807373, "incorrect_loss_raw": 9.651854634284973, "correct_loss_per_char": 0.1755376202719552, "incorrect_loss_per_char": 0.8184742000367905, "correct_loss_per_token": 1.2287633419036865, "incorrect_loss_per_token": 4.135273575782776, "correct_loss_uncond": -15.242302417755127, "incorrect_loss_uncond": -10.42656409740448}, "model_output": [{"sum_logits": -11.050459861755371, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.75716209411621, "logits_per_token": -2.7626149654388428, "logits_per_char": -0.7893185615539551, "num_chars": 14}, {"sum_logits": -2.457526683807373, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.6998291015625, "logits_per_token": -1.2287633419036865, "logits_per_char": -0.1755376202719552, "num_chars": 14}, {"sum_logits": -11.798826217651367, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.775413513183594, "logits_per_token": -5.899413108825684, "logits_per_char": -0.9832355181376139, "num_chars": 12}, {"sum_logits": -6.773940563201904, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.20847511291504, "logits_per_token": -3.386970281600952, "logits_per_char": -0.7526600625779893, "num_chars": 9}, {"sum_logits": -8.98419189453125, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.57262420654297, "logits_per_token": -4.492095947265625, "logits_per_char": -0.7486826578776041, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1118, "native_id": "246249cd7976358051a9811ff9c30736", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 4.730014801025391, "incorrect_loss_raw": 14.71460747718811, "correct_loss_per_char": 0.9460029602050781, "incorrect_loss_per_char": 1.5642476156319216, "correct_loss_per_token": 4.730014801025391, "incorrect_loss_per_token": 8.911229968070984, "correct_loss_uncond": -7.477945327758789, "incorrect_loss_uncond": -2.1311516761779785}, "model_output": [{"sum_logits": -21.958110809326172, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.36046600341797, "logits_per_token": -10.979055404663086, "logits_per_char": -1.689085446871244, "num_chars": 13}, {"sum_logits": -18.85266876220703, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.13232421875, "logits_per_token": -9.426334381103516, "logits_per_char": -1.5710557301839192, "num_chars": 12}, {"sum_logits": -5.616240501403809, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.38504695892334, "logits_per_token": -2.8081202507019043, "logits_per_char": -0.5105673183094371, "num_chars": 11}, {"sum_logits": -4.730014801025391, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.20796012878418, "logits_per_token": -4.730014801025391, "logits_per_char": -0.9460029602050781, "num_chars": 5}, {"sum_logits": -12.43140983581543, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.505199432373047, "logits_per_token": -12.43140983581543, "logits_per_char": -2.486281967163086, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1119, "native_id": "32be8cbc1b5a967310bcab8b80563481", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.373327732086182, "incorrect_loss_raw": 13.574729442596436, "correct_loss_per_char": 0.5373327732086182, "incorrect_loss_per_char": 1.2901590807765138, "correct_loss_per_token": 2.686663866043091, "incorrect_loss_per_token": 6.8990607261657715, "correct_loss_uncond": -9.779202938079834, "incorrect_loss_uncond": -3.2413501739501953}, "model_output": [{"sum_logits": -17.896081924438477, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.797748565673828, "logits_per_token": -5.965360641479492, "logits_per_char": -1.0527107014375574, "num_chars": 17}, {"sum_logits": -5.373327732086182, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.152530670166016, "logits_per_token": -2.686663866043091, "logits_per_char": -0.5373327732086182, "num_chars": 10}, {"sum_logits": -10.035100936889648, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.00012969970703, "logits_per_token": -5.017550468444824, "logits_per_char": -1.254387617111206, "num_chars": 8}, {"sum_logits": -19.508806228637695, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.600324630737305, "logits_per_token": -9.754403114318848, "logits_per_char": -2.1676451365152993, "num_chars": 9}, {"sum_logits": -6.858928680419922, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -9.86611557006836, "logits_per_token": -6.858928680419922, "logits_per_char": -0.6858928680419922, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1120, "native_id": "ad769851a59375865607452d3bf2a45d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.477869033813477, "incorrect_loss_raw": 12.149955034255981, "correct_loss_per_char": 0.7898224194844564, "incorrect_loss_per_char": 2.2023330132166543, "correct_loss_per_token": 4.738934516906738, "incorrect_loss_per_token": 12.149955034255981, "correct_loss_uncond": -5.745203971862793, "incorrect_loss_uncond": 0.7131218910217285}, "model_output": [{"sum_logits": -12.625787734985352, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.354521751403809, "logits_per_token": -12.625787734985352, "logits_per_char": -3.156446933746338, "num_chars": 4}, {"sum_logits": -9.477869033813477, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.22307300567627, "logits_per_token": -4.738934516906738, "logits_per_char": -0.7898224194844564, "num_chars": 12}, {"sum_logits": -12.625787734985352, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.354521751403809, "logits_per_token": -12.625787734985352, "logits_per_char": -3.156446933746338, "num_chars": 4}, {"sum_logits": -13.218027114868164, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.715654373168945, "logits_per_token": -13.218027114868164, "logits_per_char": -1.6522533893585205, "num_chars": 8}, {"sum_logits": -10.130217552185059, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.32263469696045, "logits_per_token": -10.130217552185059, "logits_per_char": -0.8441847960154215, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1121, "native_id": "5ea6b94d1a911365b06cf776919413e8", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.714598655700684, "incorrect_loss_raw": 8.899950385093689, "correct_loss_per_char": 0.36266143505389875, "incorrect_loss_per_char": 0.9063892280212557, "correct_loss_per_token": 4.714598655700684, "incorrect_loss_per_token": 5.8340799411137905, "correct_loss_uncond": -11.062273025512695, "incorrect_loss_uncond": -7.0540560483932495}, "model_output": [{"sum_logits": -13.008655548095703, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.533774375915527, "logits_per_token": -6.504327774047852, "logits_per_char": -1.4454061720106337, "num_chars": 9}, {"sum_logits": -4.714598655700684, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.776871681213379, "logits_per_token": -4.714598655700684, "logits_per_char": -0.36266143505389875, "num_chars": 13}, {"sum_logits": -6.48021125793457, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.360472679138184, "logits_per_token": -6.48021125793457, "logits_per_char": -0.7200234731038412, "num_chars": 9}, {"sum_logits": -8.638731002807617, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.419702529907227, "logits_per_token": -2.8795770009358725, "logits_per_char": -0.3926695910367099, "num_chars": 22}, {"sum_logits": -7.472203731536865, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.502076148986816, "logits_per_token": -7.472203731536865, "logits_per_char": -1.067457675933838, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1122, "native_id": "820df15b615d221e38a71fcc44461085", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.3538568019866943, "incorrect_loss_raw": 9.474343180656433, "correct_loss_per_char": 0.23538568019866943, "incorrect_loss_per_char": 0.7191842456658681, "correct_loss_per_token": 2.3538568019866943, "incorrect_loss_per_token": 4.7371715903282166, "correct_loss_uncond": -14.816076517105103, "incorrect_loss_uncond": -11.7003675699234}, "model_output": [{"sum_logits": -6.1034698486328125, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.877809524536133, "logits_per_token": -3.0517349243164062, "logits_per_char": -0.5086224873860677, "num_chars": 12}, {"sum_logits": -7.05279541015625, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -21.329002380371094, "logits_per_token": -3.526397705078125, "logits_per_char": -0.5877329508463541, "num_chars": 12}, {"sum_logits": -7.858462810516357, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -3.9292314052581787, "logits_per_char": -0.6548719008763632, "num_chars": 12}, {"sum_logits": -2.3538568019866943, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -2.3538568019866943, "logits_per_char": -0.23538568019866943, "num_chars": 10}, {"sum_logits": -16.882644653320312, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -20.21895980834961, "logits_per_token": -8.441322326660156, "logits_per_char": -1.1255096435546874, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1123, "native_id": "0a4a00ba435397c4a0496dd2c2426be7", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.218090057373047, "incorrect_loss_raw": 3.7204726934432983, "correct_loss_per_char": 1.031155722481864, "incorrect_loss_per_char": 0.7515735430376871, "correct_loss_per_token": 3.6090450286865234, "incorrect_loss_per_token": 3.7204726934432983, "correct_loss_uncond": -5.944326400756836, "incorrect_loss_uncond": -7.588307976722717}, "model_output": [{"sum_logits": -2.2193055152893066, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.128262519836426, "logits_per_token": -2.2193055152893066, "logits_per_char": -0.4438611030578613, "num_chars": 5}, {"sum_logits": -4.132522106170654, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.979043960571289, "logits_per_token": -4.132522106170654, "logits_per_char": -0.590360300881522, "num_chars": 7}, {"sum_logits": -3.208860397338867, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -9.524346351623535, "logits_per_token": -3.208860397338867, "logits_per_char": -0.6417720794677735, "num_chars": 5}, {"sum_logits": -5.321202754974365, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.603469848632812, "logits_per_token": -5.321202754974365, "logits_per_char": -1.3303006887435913, "num_chars": 4}, {"sum_logits": -7.218090057373047, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -13.162416458129883, "logits_per_token": -3.6090450286865234, "logits_per_char": -1.031155722481864, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1124, "native_id": "a7f29f4aebe0e3bcb77038fea71bf28c", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.024541854858398, "incorrect_loss_raw": 8.354566097259521, "correct_loss_per_char": 0.5030677318572998, "incorrect_loss_per_char": 1.0218152365623376, "correct_loss_per_token": 4.024541854858398, "incorrect_loss_per_token": 7.061532338460287, "correct_loss_uncond": -9.185985565185547, "incorrect_loss_uncond": -6.449768781661987}, "model_output": [{"sum_logits": -7.2794389724731445, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.16165828704834, "logits_per_token": -7.2794389724731445, "logits_per_char": -1.2132398287455242, "num_chars": 6}, {"sum_logits": -7.75820255279541, "num_tokens": 3, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.160179138183594, "logits_per_token": -2.58606751759847, "logits_per_char": -0.9697753190994263, "num_chars": 8}, {"sum_logits": -4.024541854858398, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.210527420043945, "logits_per_token": -4.024541854858398, "logits_per_char": -0.5030677318572998, "num_chars": 8}, {"sum_logits": -8.181306838989258, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.629813194274902, "logits_per_token": -8.181306838989258, "logits_per_char": -0.6293312953068659, "num_chars": 13}, {"sum_logits": -10.199316024780273, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.2656888961792, "logits_per_token": -10.199316024780273, "logits_per_char": -1.2749145030975342, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1125, "native_id": "ecd32cc0c17d4738a27bba3399f04591", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.288848876953125, "incorrect_loss_raw": 8.740072250366211, "correct_loss_per_char": 0.2938249376085069, "incorrect_loss_per_char": 1.0317735517126883, "correct_loss_per_token": 2.6444244384765625, "incorrect_loss_per_token": 5.029385566711426, "correct_loss_uncond": -14.45771598815918, "incorrect_loss_uncond": -8.273561239242554}, "model_output": [{"sum_logits": -5.2747955322265625, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.809586524963379, "logits_per_token": -5.2747955322265625, "logits_per_char": -0.5860883924696181, "num_chars": 9}, {"sum_logits": -9.938369750976562, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -18.948307037353516, "logits_per_token": -4.969184875488281, "logits_per_char": -0.7644899808443509, "num_chars": 13}, {"sum_logits": -5.288848876953125, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.746564865112305, "logits_per_token": -2.6444244384765625, "logits_per_char": -0.2938249376085069, "num_chars": 18}, {"sum_logits": -9.693618774414062, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.115375518798828, "logits_per_token": -4.846809387207031, "logits_per_char": -1.9387237548828125, "num_chars": 5}, {"sum_logits": -10.053504943847656, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -17.181264877319336, "logits_per_token": -5.026752471923828, "logits_per_char": -0.8377920786539713, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1126, "native_id": "8b2af2d865b7dc500427786c846eacaf", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.801715850830078, "incorrect_loss_raw": 10.55542516708374, "correct_loss_per_char": 1.0801715850830078, "incorrect_loss_per_char": 0.9169410049915314, "correct_loss_per_token": 5.400857925415039, "incorrect_loss_per_token": 7.203260779380798, "correct_loss_uncond": -5.751520156860352, "incorrect_loss_uncond": -5.625607490539551}, "model_output": [{"sum_logits": -14.2529296875, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -18.835248947143555, "logits_per_token": -7.12646484375, "logits_per_char": -1.187744140625, "num_chars": 12}, {"sum_logits": -7.09853458404541, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -13.933419227600098, "logits_per_token": -7.09853458404541, "logits_per_char": -0.8873168230056763, "num_chars": 8}, {"sum_logits": -8.305850982666016, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.814337730407715, "logits_per_token": -8.305850982666016, "logits_per_char": -0.7550773620605469, "num_chars": 11}, {"sum_logits": -10.801715850830078, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -16.55323600769043, "logits_per_token": -5.400857925415039, "logits_per_char": -1.0801715850830078, "num_chars": 10}, {"sum_logits": -12.564385414123535, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -17.141124725341797, "logits_per_token": -6.282192707061768, "logits_per_char": -0.8376256942749023, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1127, "native_id": "383282aace64dd49138bac2392f8b38e", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.289122581481934, "incorrect_loss_raw": 10.692426800727844, "correct_loss_per_char": 0.6611403226852417, "incorrect_loss_per_char": 1.48629439444769, "correct_loss_per_token": 5.289122581481934, "incorrect_loss_per_token": 7.460476875305176, "correct_loss_uncond": -9.71953296661377, "incorrect_loss_uncond": -5.487786650657654}, "model_output": [{"sum_logits": -9.47610855102539, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.008305549621582, "logits_per_token": -9.47610855102539, "logits_per_char": -1.5793514251708984, "num_chars": 6}, {"sum_logits": -5.289122581481934, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -5.289122581481934, "logits_per_char": -0.6611403226852417, "num_chars": 8}, {"sum_logits": -10.829676628112793, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.090351104736328, "logits_per_token": -5.4148383140563965, "logits_per_char": -1.8049461046854656, "num_chars": 6}, {"sum_logits": -15.025922775268555, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -15.61535930633545, "logits_per_token": -7.512961387634277, "logits_per_char": -1.073280198233468, "num_chars": 14}, {"sum_logits": -7.437999248504639, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.006837844848633, "logits_per_token": -7.437999248504639, "logits_per_char": -1.4875998497009277, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1128, "native_id": "eaf6838d29bcd4ebf408da2f75aa65c3", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.78420877456665, "incorrect_loss_raw": 8.945841073989868, "correct_loss_per_char": 1.2973681290944417, "incorrect_loss_per_char": 1.109294363430568, "correct_loss_per_token": 7.78420877456665, "incorrect_loss_per_token": 8.945841073989868, "correct_loss_uncond": -4.543945789337158, "incorrect_loss_uncond": -4.325161695480347}, "model_output": [{"sum_logits": -9.622936248779297, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.308333396911621, "logits_per_token": -9.622936248779297, "logits_per_char": -1.202867031097412, "num_chars": 8}, {"sum_logits": -7.78420877456665, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.328154563903809, "logits_per_token": -7.78420877456665, "logits_per_char": -1.2973681290944417, "num_chars": 6}, {"sum_logits": -9.72188663482666, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.403600692749023, "logits_per_token": -9.72188663482666, "logits_per_char": -1.38884094783238, "num_chars": 7}, {"sum_logits": -7.98992919921875, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.879678726196289, "logits_per_token": -7.98992919921875, "logits_per_char": -1.14141845703125, "num_chars": 7}, {"sum_logits": -8.448612213134766, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.492398262023926, "logits_per_token": -8.448612213134766, "logits_per_char": -0.7040510177612305, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1129, "native_id": "7c8bc9c0e56389eef033bca40c88c151", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.686171293258667, "incorrect_loss_raw": 9.93454909324646, "correct_loss_per_char": 0.22384760777155557, "incorrect_loss_per_char": 1.5396100918451947, "correct_loss_per_token": 1.3430856466293335, "incorrect_loss_per_token": 8.63867437839508, "correct_loss_uncond": -14.459206819534302, "incorrect_loss_uncond": -5.118069171905518}, "model_output": [{"sum_logits": -10.366997718811035, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.413734436035156, "logits_per_token": -5.183498859405518, "logits_per_char": -1.0366997718811035, "num_chars": 10}, {"sum_logits": -13.637824058532715, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -13.637824058532715, "logits_per_char": -2.272970676422119, "num_chars": 6}, {"sum_logits": -8.937150001525879, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.888906478881836, "logits_per_token": -8.937150001525879, "logits_per_char": -1.4895250002543132, "num_chars": 6}, {"sum_logits": -2.686171293258667, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -1.3430856466293335, "logits_per_char": -0.22384760777155557, "num_chars": 12}, {"sum_logits": -6.796224594116211, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -6.796224594116211, "logits_per_char": -1.3592449188232423, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1130, "native_id": "ca60a46c9007e4b6213f50bfb5342fdd", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 17.3857421875, "incorrect_loss_raw": 8.182739496231079, "correct_loss_per_char": 1.4488118489583333, "incorrect_loss_per_char": 1.134072592901805, "correct_loss_per_token": 5.795247395833333, "incorrect_loss_per_token": 8.182739496231079, "correct_loss_uncond": -3.2346363067626953, "incorrect_loss_uncond": -5.401552438735962}, "model_output": [{"sum_logits": -12.069557189941406, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.09428596496582, "logits_per_token": -12.069557189941406, "logits_per_char": -1.5086946487426758, "num_chars": 8}, {"sum_logits": -17.3857421875, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.620378494262695, "logits_per_token": -5.795247395833333, "logits_per_char": -1.4488118489583333, "num_chars": 12}, {"sum_logits": -5.5841064453125, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.535295486450195, "logits_per_token": -5.5841064453125, "logits_per_char": -0.6204562717013888, "num_chars": 9}, {"sum_logits": -10.645589828491211, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.871313095092773, "logits_per_token": -10.645589828491211, "logits_per_char": -1.5207985469273158, "num_chars": 7}, {"sum_logits": -4.431704521179199, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -4.431704521179199, "logits_per_char": -0.8863409042358399, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1131, "native_id": "f50209f04d11690d7c8f30e29b35ff02", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.498312950134277, "incorrect_loss_raw": 12.994893789291382, "correct_loss_per_char": 0.49984663183038885, "incorrect_loss_per_char": 0.8367959296002108, "correct_loss_per_token": 2.7491564750671387, "incorrect_loss_per_token": 4.121465563774109, "correct_loss_uncond": -14.028656959533691, "incorrect_loss_uncond": -8.338731527328491}, "model_output": [{"sum_logits": -13.427311897277832, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.798698425292969, "logits_per_token": -4.475770632425944, "logits_per_char": -1.118942658106486, "num_chars": 12}, {"sum_logits": -5.498312950134277, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.52696990966797, "logits_per_token": -2.7491564750671387, "logits_per_char": -0.49984663183038885, "num_chars": 11}, {"sum_logits": -10.087953567504883, "num_tokens": 4, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.723575592041016, "logits_per_token": -2.5219883918762207, "logits_per_char": -0.7205681119646344, "num_chars": 14}, {"sum_logits": -13.586494445800781, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -23.001657485961914, "logits_per_token": -4.528831481933594, "logits_per_char": -0.7992055556353401, "num_chars": 17}, {"sum_logits": -14.877815246582031, "num_tokens": 3, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -28.810569763183594, "logits_per_token": -4.959271748860677, "logits_per_char": -0.7084673926943824, "num_chars": 21}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1132, "native_id": "d725f1c2e150a3221de31612123f3f46", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 2, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.251311302185059, "incorrect_loss_raw": 13.940483331680298, "correct_loss_per_char": 0.8057012557983398, "incorrect_loss_per_char": 0.9588521847426612, "correct_loss_per_token": 3.6256556510925293, "incorrect_loss_per_token": 6.256951491038005, "correct_loss_uncond": -9.95716381072998, "incorrect_loss_uncond": -6.5110015869140625}, "model_output": [{"sum_logits": -7.251311302185059, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.20847511291504, "logits_per_token": -3.6256556510925293, "logits_per_char": -0.8057012557983398, "num_chars": 9}, {"sum_logits": -15.851994514465332, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -22.481849670410156, "logits_per_token": -5.283998171488444, "logits_per_char": -0.9324702655567843, "num_chars": 17}, {"sum_logits": -12.592985153198242, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.284446716308594, "logits_per_token": -6.296492576599121, "logits_per_char": -0.6996102862887912, "num_chars": 18}, {"sum_logits": -18.49285125732422, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.758827209472656, "logits_per_token": -4.623212814331055, "logits_per_char": -1.3209179469517298, "num_chars": 14}, {"sum_logits": -8.824102401733398, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.280816078186035, "logits_per_token": -8.824102401733398, "logits_per_char": -0.8824102401733398, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1133, "native_id": "f7735d721dfdc94621154951d4eaa4cf", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.087941646575928, "incorrect_loss_raw": 9.187449932098389, "correct_loss_per_char": 0.5087941646575928, "incorrect_loss_per_char": 1.2827892390164461, "correct_loss_per_token": 5.087941646575928, "incorrect_loss_per_token": 9.187449932098389, "correct_loss_uncond": -8.40229082107544, "incorrect_loss_uncond": -4.543129920959473}, "model_output": [{"sum_logits": -11.777482986450195, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.471572875976562, "logits_per_token": -11.777482986450195, "logits_per_char": -1.0706802714954724, "num_chars": 11}, {"sum_logits": -8.62755012512207, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.47494888305664, "logits_per_token": -8.62755012512207, "logits_per_char": -1.725510025024414, "num_chars": 5}, {"sum_logits": -5.087941646575928, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -5.087941646575928, "logits_per_char": -0.5087941646575928, "num_chars": 10}, {"sum_logits": -6.533071517944336, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.404559135437012, "logits_per_token": -6.533071517944336, "logits_per_char": -0.9332959311349052, "num_chars": 7}, {"sum_logits": -9.811695098876953, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.57123851776123, "logits_per_token": -9.811695098876953, "logits_per_char": -1.4016707284109933, "num_chars": 7}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1134, "native_id": "eaf980db7e945b1cf6d648fa55ddcb5e", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.023592710494995, "incorrect_loss_raw": 9.913415908813477, "correct_loss_per_char": 0.335954745610555, "incorrect_loss_per_char": 1.4114692942963707, "correct_loss_per_token": 3.023592710494995, "incorrect_loss_per_token": 9.913415908813477, "correct_loss_uncond": -10.996153116226196, "incorrect_loss_uncond": -4.485871076583862}, "model_output": [{"sum_logits": -7.107746601104736, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.630324363708496, "logits_per_token": -7.107746601104736, "logits_per_char": -0.888468325138092, "num_chars": 8}, {"sum_logits": -3.023592710494995, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -3.023592710494995, "logits_per_char": -0.335954745610555, "num_chars": 9}, {"sum_logits": -12.83845329284668, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.903834342956543, "logits_per_token": -12.83845329284668, "logits_per_char": -2.5676906585693358, "num_chars": 5}, {"sum_logits": -17.103349685668945, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.348709106445312, "logits_per_token": -17.103349685668945, "logits_per_char": -1.9003721872965496, "num_chars": 9}, {"sum_logits": -2.604114055633545, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -14.714280128479004, "logits_per_token": -2.604114055633545, "logits_per_char": -0.28934600618150497, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1135, "native_id": "8bbfe8cd056d612e9d3190f278bef287", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.907491207122803, "incorrect_loss_raw": 16.158955812454224, "correct_loss_per_char": 1.7268728017807007, "incorrect_loss_per_char": 1.7147969378507073, "correct_loss_per_token": 6.907491207122803, "incorrect_loss_per_token": 10.025937000910442, "correct_loss_uncond": -8.985032558441162, "incorrect_loss_uncond": -1.881335973739624}, "model_output": [{"sum_logits": -13.30382251739502, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.238839149475098, "logits_per_token": -13.30382251739502, "logits_per_char": -1.2094384106722744, "num_chars": 11}, {"sum_logits": -19.98879623413086, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -6.66293207804362, "logits_per_char": -1.1758115431841683, "num_chars": 17}, {"sum_logits": -6.907491207122803, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.892523765563965, "logits_per_token": -6.907491207122803, "logits_per_char": -1.7268728017807007, "num_chars": 4}, {"sum_logits": -8.930782318115234, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.54236888885498, "logits_per_token": -8.930782318115234, "logits_per_char": -2.2326955795288086, "num_chars": 4}, {"sum_logits": -22.41242218017578, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.65792465209961, "logits_per_token": -11.20621109008789, "logits_per_char": -2.241242218017578, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1136, "native_id": "aa7c4c351cf8d59792aa68e3de339db4", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.44854736328125, "incorrect_loss_raw": 14.815065503120422, "correct_loss_per_char": 0.3135043057528409, "incorrect_loss_per_char": 1.0775411009788514, "correct_loss_per_token": 1.724273681640625, "incorrect_loss_per_token": 5.588656226793924, "correct_loss_uncond": -13.871774673461914, "incorrect_loss_uncond": -2.180216908454895}, "model_output": [{"sum_logits": -16.775917053222656, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.68995475769043, "logits_per_token": -5.591972351074219, "logits_per_char": -1.1183944702148438, "num_chars": 15}, {"sum_logits": -3.44854736328125, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.320322036743164, "logits_per_token": -1.724273681640625, "logits_per_char": -0.3135043057528409, "num_chars": 11}, {"sum_logits": -21.222379684448242, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.849449157714844, "logits_per_token": -7.074126561482747, "logits_per_char": -1.3263987302780151, "num_chars": 16}, {"sum_logits": -3.901806354522705, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.579615592956543, "logits_per_token": -3.901806354522705, "logits_per_char": -0.780361270904541, "num_chars": 5}, {"sum_logits": -17.360158920288086, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.862110137939453, "logits_per_token": -5.786719640096028, "logits_per_char": -1.0850099325180054, "num_chars": 16}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1137, "native_id": "23df3bac9cfcb156f4cfd8a05f21c5e2", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.938675880432129, "incorrect_loss_raw": 10.1444993019104, "correct_loss_per_char": 1.2154084311591253, "incorrect_loss_per_char": 0.8779848456382752, "correct_loss_per_token": 5.4693379402160645, "incorrect_loss_per_token": 5.0722496509552, "correct_loss_uncond": -6.930403709411621, "incorrect_loss_uncond": -8.952489852905273}, "model_output": [{"sum_logits": -10.938675880432129, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.86907958984375, "logits_per_token": -5.4693379402160645, "logits_per_char": -1.2154084311591253, "num_chars": 9}, {"sum_logits": -14.150552749633789, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.187362670898438, "logits_per_token": -7.0752763748168945, "logits_per_char": -0.9433701833089193, "num_chars": 15}, {"sum_logits": -15.851348876953125, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.304346084594727, "logits_per_token": -7.9256744384765625, "logits_per_char": -1.056756591796875, "num_chars": 15}, {"sum_logits": -6.020879745483398, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.82585906982422, "logits_per_token": -3.010439872741699, "logits_per_char": -0.7526099681854248, "num_chars": 8}, {"sum_logits": -4.555215835571289, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.070388793945312, "logits_per_token": -2.2776079177856445, "logits_per_char": -0.7592026392618815, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1138, "native_id": "d21777d771dc6fd08e769d378651817e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.039327621459961, "incorrect_loss_raw": 13.368001699447632, "correct_loss_per_char": 0.8217570564963601, "incorrect_loss_per_char": 1.1339052530852232, "correct_loss_per_token": 4.5196638107299805, "incorrect_loss_per_token": 6.830141544342041, "correct_loss_uncond": -8.935934066772461, "incorrect_loss_uncond": -3.771315813064575}, "model_output": [{"sum_logits": -12.03761100769043, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -16.398208618164062, "logits_per_token": -6.018805503845215, "logits_per_char": -1.0943282734264026, "num_chars": 11}, {"sum_logits": -16.861270904541016, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -21.62520980834961, "logits_per_token": -8.430635452270508, "logits_per_char": -1.6861270904541015, "num_chars": 10}, {"sum_logits": -17.55299949645996, "num_tokens": 3, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -17.621944427490234, "logits_per_token": -5.85099983215332, "logits_per_char": -0.877649974822998, "num_chars": 20}, {"sum_logits": -7.020125389099121, "num_tokens": 1, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -12.911907196044922, "logits_per_token": -7.020125389099121, "logits_per_char": -0.8775156736373901, "num_chars": 8}, {"sum_logits": -9.039327621459961, "num_tokens": 2, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -17.975261688232422, "logits_per_token": -4.5196638107299805, "logits_per_char": -0.8217570564963601, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1139, "native_id": "611a4cc0e288b8a11afa923f48cb2ab4", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.499363422393799, "incorrect_loss_raw": 10.57921016216278, "correct_loss_per_char": 0.5356688158852714, "incorrect_loss_per_char": 1.2079937894967039, "correct_loss_per_token": 3.7496817111968994, "incorrect_loss_per_token": 5.732616563638051, "correct_loss_uncond": -8.265751361846924, "incorrect_loss_uncond": -6.339850068092346}, "model_output": [{"sum_logits": -17.703357696533203, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -20.75762939453125, "logits_per_token": -5.901119232177734, "logits_per_char": -2.2129197120666504, "num_chars": 8}, {"sum_logits": -11.463120460510254, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -11.463120460510254, "logits_per_char": -1.6375886372157507, "num_chars": 7}, {"sum_logits": -7.096634387969971, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.629249572753906, "logits_per_token": -3.5483171939849854, "logits_per_char": -0.6451485807245428, "num_chars": 11}, {"sum_logits": -6.053728103637695, "num_tokens": 3, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.14006805419922, "logits_per_token": -2.017909367879232, "logits_per_char": -0.336318227979872, "num_chars": 18}, {"sum_logits": -7.499363422393799, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.765114784240723, "logits_per_token": -3.7496817111968994, "logits_per_char": -0.5356688158852714, "num_chars": 14}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1140, "native_id": "8e7941ce31996ca83cc0a68f7313c96d", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 0.9082806706428528, "incorrect_loss_raw": 9.86074948310852, "correct_loss_per_char": 0.1135350838303566, "incorrect_loss_per_char": 1.1105524730114709, "correct_loss_per_token": 0.9082806706428528, "incorrect_loss_per_token": 9.86074948310852, "correct_loss_uncond": -15.353695571422577, "incorrect_loss_uncond": -4.066369533538818}, "model_output": [{"sum_logits": -11.505881309509277, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.68825626373291, "logits_per_token": -11.505881309509277, "logits_per_char": -1.6436973299298967, "num_chars": 7}, {"sum_logits": -0.9082806706428528, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -16.26197624206543, "logits_per_token": -0.9082806706428528, "logits_per_char": -0.1135350838303566, "num_chars": 8}, {"sum_logits": -6.399911880493164, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -12.453316688537598, "logits_per_token": -6.399911880493164, "logits_per_char": -0.6399911880493164, "num_chars": 10}, {"sum_logits": -12.807101249694824, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.020503044128418, "logits_per_token": -12.807101249694824, "logits_per_char": -1.0672584374745686, "num_chars": 12}, {"sum_logits": -8.730103492736816, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -15.54640007019043, "logits_per_token": -8.730103492736816, "logits_per_char": -1.091262936592102, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1141, "native_id": "ea02772e27f5bd40eced3b65e8c6427f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 10.58712387084961, "incorrect_loss_raw": 10.366716027259827, "correct_loss_per_char": 0.8143941439115084, "incorrect_loss_per_char": 1.9054197981244043, "correct_loss_per_token": 10.58712387084961, "incorrect_loss_per_token": 10.366716027259827, "correct_loss_uncond": -5.719446182250977, "incorrect_loss_uncond": -4.109194397926331}, "model_output": [{"sum_logits": -10.975411415100098, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.269688606262207, "logits_per_token": -10.975411415100098, "logits_per_char": -2.7438528537750244, "num_chars": 4}, {"sum_logits": -10.58712387084961, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.306570053100586, "logits_per_token": -10.58712387084961, "logits_per_char": -0.8143941439115084, "num_chars": 13}, {"sum_logits": -14.145249366760254, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.326818466186523, "logits_per_token": -14.145249366760254, "logits_per_char": -2.357541561126709, "num_chars": 6}, {"sum_logits": -8.571462631225586, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -13.935919761657715, "logits_per_token": -8.571462631225586, "logits_per_char": -1.2244946616036552, "num_chars": 7}, {"sum_logits": -7.774740695953369, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.371214866638184, "logits_per_token": -7.774740695953369, "logits_per_char": -1.2957901159922283, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1142, "native_id": "de54d03e69d9765872f95ff06ed21499", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 11.940380096435547, "incorrect_loss_raw": 13.507567882537842, "correct_loss_per_char": 0.8528842926025391, "incorrect_loss_per_char": 2.304982092645433, "correct_loss_per_token": 5.970190048217773, "incorrect_loss_per_token": 13.507567882537842, "correct_loss_uncond": -5.251214981079102, "incorrect_loss_uncond": -1.2099535465240479}, "model_output": [{"sum_logits": -14.880838394165039, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -13.196586608886719, "logits_per_token": -14.880838394165039, "logits_per_char": -3.7202095985412598, "num_chars": 4}, {"sum_logits": -11.940380096435547, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.19159507751465, "logits_per_token": -5.970190048217773, "logits_per_char": -0.8528842926025391, "num_chars": 14}, {"sum_logits": -14.89883804321289, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.15831184387207, "logits_per_token": -14.89883804321289, "logits_per_char": -2.483139673868815, "num_chars": 6}, {"sum_logits": -14.105436325073242, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.019745826721191, "logits_per_token": -14.105436325073242, "logits_per_char": -1.5672707027859158, "num_chars": 9}, {"sum_logits": -10.145158767700195, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.495441436767578, "logits_per_token": -10.145158767700195, "logits_per_char": -1.4493083953857422, "num_chars": 7}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1143, "native_id": "b231a732a3fdf0621391e7e385f8d651", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 7.031156063079834, "incorrect_loss_raw": 9.50895071029663, "correct_loss_per_char": 0.7031156063079834, "incorrect_loss_per_char": 0.9451285733116997, "correct_loss_per_token": 3.515578031539917, "incorrect_loss_per_token": 7.238925933837891, "correct_loss_uncond": -12.94920015335083, "incorrect_loss_uncond": -6.0786659717559814}, "model_output": [{"sum_logits": -9.014841079711914, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.387165069580078, "logits_per_token": -4.507420539855957, "logits_per_char": -0.5008245044284396, "num_chars": 18}, {"sum_logits": -9.145357131958008, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -14.329689979553223, "logits_per_token": -4.572678565979004, "logits_per_char": -0.8313961029052734, "num_chars": 11}, {"sum_logits": -11.408853530883789, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.835965156555176, "logits_per_token": -11.408853530883789, "logits_per_char": -1.0371685028076172, "num_chars": 11}, {"sum_logits": -7.031156063079834, "num_tokens": 2, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -19.980356216430664, "logits_per_token": -3.515578031539917, "logits_per_char": -0.7031156063079834, "num_chars": 10}, {"sum_logits": -8.466751098632812, "num_tokens": 1, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.797646522521973, "logits_per_token": -8.466751098632812, "logits_per_char": -1.4111251831054688, "num_chars": 6}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1144, "native_id": "b9121c3228f961c5ad68958c702cd94b", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.078719139099121, "incorrect_loss_raw": 11.682450890541077, "correct_loss_per_char": 0.9162471944635565, "incorrect_loss_per_char": 1.1275097546123323, "correct_loss_per_token": 5.0393595695495605, "incorrect_loss_per_token": 6.748597145080566, "correct_loss_uncond": -9.816567420959473, "incorrect_loss_uncond": -6.15380585193634}, "model_output": [{"sum_logits": -11.028417587280273, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.71932601928711, "logits_per_token": -5.514208793640137, "logits_per_char": -1.1028417587280273, "num_chars": 10}, {"sum_logits": -7.258973598480225, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.564921379089355, "logits_per_token": -7.258973598480225, "logits_per_char": -1.0369962283543177, "num_chars": 7}, {"sum_logits": -10.078719139099121, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.895286560058594, "logits_per_token": -5.0393595695495605, "logits_per_char": -0.9162471944635565, "num_chars": 11}, {"sum_logits": -13.260743141174316, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.139053344726562, "logits_per_token": -6.630371570587158, "logits_per_char": -1.105061928431193, "num_chars": 12}, {"sum_logits": -15.181669235229492, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.92172622680664, "logits_per_token": -7.590834617614746, "logits_per_char": -1.265139102935791, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1145, "native_id": "4015ab002ff8c233d1c7ef26f5156b88", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.778253555297852, "incorrect_loss_raw": 13.131942749023438, "correct_loss_per_char": 0.7980230504816229, "incorrect_loss_per_char": 1.1550233573703976, "correct_loss_per_token": 4.389126777648926, "incorrect_loss_per_token": 6.809543609619141, "correct_loss_uncond": -8.925262451171875, "incorrect_loss_uncond": -6.403219699859619}, "model_output": [{"sum_logits": -8.865930557250977, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.747547149658203, "logits_per_token": -8.865930557250977, "logits_per_char": -1.266561508178711, "num_chars": 7}, {"sum_logits": -13.834705352783203, "num_tokens": 4, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.164539337158203, "logits_per_token": -3.458676338195801, "logits_per_char": -1.1528921127319336, "num_chars": 12}, {"sum_logits": -17.063446044921875, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -23.579856872558594, "logits_per_token": -8.531723022460938, "logits_per_char": -1.2188175746372767, "num_chars": 14}, {"sum_logits": -12.763689041137695, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.648706436157227, "logits_per_token": -6.381844520568848, "logits_per_char": -0.9818222339336689, "num_chars": 13}, {"sum_logits": -8.778253555297852, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.703516006469727, "logits_per_token": -4.389126777648926, "logits_per_char": -0.7980230504816229, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1146, "native_id": "0197ade3bb26d163ab2e284c960c626f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 4, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 4.792556285858154, "incorrect_loss_raw": 8.936043739318848, "correct_loss_per_char": 0.798759380976359, "incorrect_loss_per_char": 1.2004251822248682, "correct_loss_per_token": 4.792556285858154, "incorrect_loss_per_token": 5.122670531272888, "correct_loss_uncond": -8.18407678604126, "incorrect_loss_uncond": -7.256165027618408}, "model_output": [{"sum_logits": -4.792556285858154, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.976633071899414, "logits_per_token": -4.792556285858154, "logits_per_char": -0.798759380976359, "num_chars": 6}, {"sum_logits": -9.930204391479492, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.138530731201172, "logits_per_token": -4.965102195739746, "logits_per_char": -0.9027458537708629, "num_chars": 11}, {"sum_logits": -12.412853240966797, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.654151916503906, "logits_per_token": -6.206426620483398, "logits_per_char": -1.7732647487095423, "num_chars": 7}, {"sum_logits": -5.237189292907715, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -11.538265228271484, "logits_per_token": -5.237189292907715, "logits_per_char": -1.3092973232269287, "num_chars": 4}, {"sum_logits": -8.163928031921387, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.43788719177246, "logits_per_token": -4.081964015960693, "logits_per_char": -0.8163928031921387, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1147, "native_id": "a90f9197a13c64089c9ba95bcba275ad", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 7.974071502685547, "incorrect_loss_raw": 10.240028381347656, "correct_loss_per_char": 0.6645059585571289, "incorrect_loss_per_char": 1.0189059650055086, "correct_loss_per_token": 3.9870357513427734, "incorrect_loss_per_token": 6.319852709770203, "correct_loss_uncond": -11.202133178710938, "incorrect_loss_uncond": -9.863004684448242}, "model_output": [{"sum_logits": -9.598708152770996, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.108352661132812, "logits_per_token": -9.598708152770996, "logits_per_char": -1.0665231280856662, "num_chars": 9}, {"sum_logits": -9.187844276428223, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.531917572021484, "logits_per_token": -4.593922138214111, "logits_per_char": -0.9187844276428223, "num_chars": 10}, {"sum_logits": -8.199182510375977, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.664451599121094, "logits_per_token": -4.099591255187988, "logits_per_char": -0.8199182510375976, "num_chars": 10}, {"sum_logits": -7.974071502685547, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -19.176204681396484, "logits_per_token": -3.9870357513427734, "logits_per_char": -0.6645059585571289, "num_chars": 12}, {"sum_logits": -13.97437858581543, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -22.107410430908203, "logits_per_token": -6.987189292907715, "logits_per_char": -1.2703980532559482, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1148, "native_id": "684204df916cc58d47293960f9c6ed9f", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.363945960998535, "incorrect_loss_raw": 8.407233595848083, "correct_loss_per_char": 0.7662779944283622, "incorrect_loss_per_char": 0.841867840246403, "correct_loss_per_token": 5.363945960998535, "incorrect_loss_per_token": 6.9146599769592285, "correct_loss_uncond": -6.213164329528809, "incorrect_loss_uncond": -6.302844166755676}, "model_output": [{"sum_logits": -11.94058895111084, "num_tokens": 2, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.652633666992188, "logits_per_token": -5.97029447555542, "logits_per_char": -0.9185068423931415, "num_chars": 13}, {"sum_logits": -6.007994651794434, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.009564399719238, "logits_per_token": -6.007994651794434, "logits_per_char": -0.5006662209828695, "num_chars": 12}, {"sum_logits": -6.807542324066162, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.989152908325195, "logits_per_token": -6.807542324066162, "logits_per_char": -0.6807542324066163, "num_chars": 10}, {"sum_logits": -8.872808456420898, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.188960075378418, "logits_per_token": -8.872808456420898, "logits_per_char": -1.2675440652029855, "num_chars": 7}, {"sum_logits": -5.363945960998535, "num_tokens": 1, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -11.577110290527344, "logits_per_token": -5.363945960998535, "logits_per_char": -0.7662779944283622, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1149, "native_id": "a2aa95861ef74bf1ecfc55db505e3982", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.916129112243652, "incorrect_loss_raw": 11.754337549209595, "correct_loss_per_char": 1.0610752741495768, "incorrect_loss_per_char": 1.264638327023922, "correct_loss_per_token": 7.958064556121826, "incorrect_loss_per_token": 6.2506072123845415, "correct_loss_uncond": -5.8757219314575195, "incorrect_loss_uncond": -5.386258840560913}, "model_output": [{"sum_logits": -10.254280090332031, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -19.252992630004883, "logits_per_token": -3.4180933634440103, "logits_per_char": -0.788790776179387, "num_chars": 13}, {"sum_logits": -11.448400497436523, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -13.676216125488281, "logits_per_token": -11.448400497436523, "logits_per_char": -1.908066749572754, "num_chars": 6}, {"sum_logits": -15.128398895263672, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.515771865844727, "logits_per_token": -5.042799631754558, "logits_per_char": -1.5128398895263673, "num_chars": 10}, {"sum_logits": -15.916129112243652, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.791851043701172, "logits_per_token": -7.958064556121826, "logits_per_char": -1.0610752741495768, "num_chars": 15}, {"sum_logits": -10.186270713806152, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.11740493774414, "logits_per_token": -5.093135356903076, "logits_per_char": -0.8488558928171793, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1150, "native_id": "8555dd9667d010018961a2f7d1c22704", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.6079010963439941, "incorrect_loss_raw": 7.02783989906311, "correct_loss_per_char": 0.3215802192687988, "incorrect_loss_per_char": 0.9371276541017695, "correct_loss_per_token": 1.6079010963439941, "incorrect_loss_per_token": 6.088248252868652, "correct_loss_uncond": -13.331912517547607, "incorrect_loss_uncond": -7.219117164611816}, "model_output": [{"sum_logits": -13.297615051269531, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.423558235168457, "logits_per_token": -13.297615051269531, "logits_per_char": -1.8996592930385046, "num_chars": 7}, {"sum_logits": -3.211029052734375, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.557395935058594, "logits_per_token": -3.211029052734375, "logits_per_char": -0.642205810546875, "num_chars": 5}, {"sum_logits": -7.516733169555664, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.47786808013916, "logits_per_token": -3.758366584777832, "logits_per_char": -0.8351925743950738, "num_chars": 9}, {"sum_logits": -1.6079010963439941, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": true, "sum_logits_uncond": -14.939813613891602, "logits_per_token": -1.6079010963439941, "logits_per_char": -0.3215802192687988, "num_chars": 5}, {"sum_logits": -4.085982322692871, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.529006004333496, "logits_per_token": -4.085982322692871, "logits_per_char": -0.37145293842662463, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1151, "native_id": "84a761f516efce04ab27d7ca8dd25255", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 12.128806114196777, "incorrect_loss_raw": 8.962397694587708, "correct_loss_per_char": 0.9329850857074444, "incorrect_loss_per_char": 0.798717530086787, "correct_loss_per_token": 4.042935371398926, "incorrect_loss_per_token": 6.608371555805206, "correct_loss_uncond": -5.076970100402832, "incorrect_loss_uncond": -6.535130620002747}, "model_output": [{"sum_logits": -11.738487243652344, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.676286697387695, "logits_per_token": -5.869243621826172, "logits_per_char": -0.7336554527282715, "num_chars": 16}, {"sum_logits": -12.128806114196777, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.20577621459961, "logits_per_token": -4.042935371398926, "logits_per_char": -0.9329850857074444, "num_chars": 13}, {"sum_logits": -13.330160140991211, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.00574779510498, "logits_per_token": -13.330160140991211, "logits_per_char": -1.481128904554579, "num_chars": 9}, {"sum_logits": -3.6872215270996094, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.389070510864258, "logits_per_token": -3.6872215270996094, "logits_per_char": -0.3352019570090554, "num_chars": 11}, {"sum_logits": -7.093721866607666, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.919008255004883, "logits_per_token": -3.546860933303833, "logits_per_char": -0.6448838060552423, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1152, "native_id": "45a6becd307342669d9d17474e50b97a", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.507264137268066, "incorrect_loss_raw": 11.807751893997192, "correct_loss_per_char": 0.6180743610157686, "incorrect_loss_per_char": 1.2898357108200624, "correct_loss_per_token": 2.6268160343170166, "incorrect_loss_per_token": 6.638169328371684, "correct_loss_uncond": -13.71909236907959, "incorrect_loss_uncond": -6.675760746002197}, "model_output": [{"sum_logits": -4.018553733825684, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.857738494873047, "logits_per_token": -2.009276866912842, "logits_per_char": -0.33487947781880695, "num_chars": 12}, {"sum_logits": -10.585057258605957, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.973843574523926, "logits_per_token": -10.585057258605957, "logits_per_char": -2.1170114517211913, "num_chars": 5}, {"sum_logits": -10.507264137268066, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -24.226356506347656, "logits_per_token": -2.6268160343170166, "logits_per_char": -0.6180743610157686, "num_chars": 17}, {"sum_logits": -18.49526596069336, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.47528839111328, "logits_per_token": -9.24763298034668, "logits_per_char": -1.4227127662071815, "num_chars": 13}, {"sum_logits": -14.13213062286377, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -21.627180099487305, "logits_per_token": -4.710710207621257, "logits_per_char": -1.28473914753307, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1153, "native_id": "c509c499bace6de324b39c0d4d0c30fa", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.507179260253906, "incorrect_loss_raw": 8.246309161186218, "correct_loss_per_char": 1.0724541800362724, "incorrect_loss_per_char": 1.294616621732712, "correct_loss_per_token": 7.507179260253906, "incorrect_loss_per_token": 8.246309161186218, "correct_loss_uncond": -8.037012100219727, "incorrect_loss_uncond": -5.092533707618713}, "model_output": [{"sum_logits": -3.138698101043701, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.97678279876709, "logits_per_token": -3.138698101043701, "logits_per_char": -0.2615581750869751, "num_chars": 12}, {"sum_logits": -4.072120666503906, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -11.836273193359375, "logits_per_token": -4.072120666503906, "logits_per_char": -0.8144241333007812, "num_chars": 5}, {"sum_logits": -11.742425918579102, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.35525894165039, "logits_per_token": -11.742425918579102, "logits_per_char": -2.3484851837158205, "num_chars": 5}, {"sum_logits": -14.031991958618164, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -14.031991958618164, "logits_per_char": -1.7539989948272705, "num_chars": 8}, {"sum_logits": -7.507179260253906, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.544191360473633, "logits_per_token": -7.507179260253906, "logits_per_char": -1.0724541800362724, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1154, "native_id": "77ddc9134bb27f9962aa2ed5ec5a5ef9", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.045218467712402, "incorrect_loss_raw": 15.085712671279907, "correct_loss_per_char": 0.5022609233856201, "incorrect_loss_per_char": 1.5054872433344524, "correct_loss_per_token": 5.022609233856201, "incorrect_loss_per_token": 9.963044246037802, "correct_loss_uncond": -6.283173561096191, "incorrect_loss_uncond": -2.0560173988342285}, "model_output": [{"sum_logits": -18.84445571899414, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -19.139053344726562, "logits_per_token": -9.42222785949707, "logits_per_char": -1.5703713099161785, "num_chars": 12}, {"sum_logits": -10.045218467712402, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.328392028808594, "logits_per_token": -5.022609233856201, "logits_per_char": -0.5022609233856201, "num_chars": 20}, {"sum_logits": -12.827406883239746, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.454044342041016, "logits_per_token": -12.827406883239746, "logits_per_char": -2.1379011472066245, "num_chars": 6}, {"sum_logits": -12.068319320678711, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.762389183044434, "logits_per_token": -12.068319320678711, "logits_per_char": -1.206831932067871, "num_chars": 10}, {"sum_logits": -16.60266876220703, "num_tokens": 3, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -21.21143341064453, "logits_per_token": -5.534222920735677, "logits_per_char": -1.1068445841471355, "num_chars": 15}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1155, "native_id": "715583129369c0c5c9f499c93a1c095e", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 10.096134185791016, "incorrect_loss_raw": 11.956534624099731, "correct_loss_per_char": 1.121792687310113, "incorrect_loss_per_char": 1.1882724530723583, "correct_loss_per_token": 3.3653780619303384, "incorrect_loss_per_token": 7.320894161860148, "correct_loss_uncond": -8.208972930908203, "incorrect_loss_uncond": -4.105136394500732}, "model_output": [{"sum_logits": -12.51518440246582, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.247249603271484, "logits_per_token": -4.171728134155273, "logits_per_char": -1.7878834860665458, "num_chars": 7}, {"sum_logits": -15.29865837097168, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.117111206054688, "logits_per_token": -5.0995527903238935, "logits_per_char": -0.8999210806453929, "num_chars": 17}, {"sum_logits": -10.470041275024414, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.179875373840332, "logits_per_token": -10.470041275024414, "logits_per_char": -0.8725034395853678, "num_chars": 12}, {"sum_logits": -9.542254447937012, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -12.702447891235352, "logits_per_token": -9.542254447937012, "logits_per_char": -1.1927818059921265, "num_chars": 8}, {"sum_logits": -10.096134185791016, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.30510711669922, "logits_per_token": -3.3653780619303384, "logits_per_char": -1.121792687310113, "num_chars": 9}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1156, "native_id": "a478e8b7c049781574f7fbb11ba1eec0", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 6.231928825378418, "incorrect_loss_raw": 8.345441699028015, "correct_loss_per_char": 0.6924365361531576, "incorrect_loss_per_char": 1.009562566424861, "correct_loss_per_token": 6.231928825378418, "incorrect_loss_per_token": 7.1170347929000854, "correct_loss_uncond": -8.593381881713867, "incorrect_loss_uncond": -5.4177387952804565}, "model_output": [{"sum_logits": -9.827255249023438, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.666664123535156, "logits_per_token": -4.913627624511719, "logits_per_char": -0.8189379374186198, "num_chars": 12}, {"sum_logits": -6.231928825378418, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.825310707092285, "logits_per_token": -6.231928825378418, "logits_per_char": -0.6924365361531576, "num_chars": 9}, {"sum_logits": -9.325002670288086, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -13.826044082641602, "logits_per_token": -9.325002670288086, "logits_per_char": -0.8477275154807351, "num_chars": 11}, {"sum_logits": -4.110894680023193, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -12.674345016479492, "logits_per_token": -4.110894680023193, "logits_per_char": -0.6851491133371989, "num_chars": 6}, {"sum_logits": -10.118614196777344, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.885668754577637, "logits_per_token": -10.118614196777344, "logits_per_char": -1.6864356994628906, "num_chars": 6}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1157, "native_id": "f427f9de6bf580314531baf86de8acbc", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.903928756713867, "incorrect_loss_raw": 13.086063623428345, "correct_loss_per_char": 0.557704108101981, "incorrect_loss_per_char": 1.9303822835286457, "correct_loss_per_token": 3.903928756713867, "incorrect_loss_per_token": 13.086063623428345, "correct_loss_uncond": -10.375219345092773, "incorrect_loss_uncond": -0.4286067485809326}, "model_output": [{"sum_logits": -14.28978443145752, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.789668083190918, "logits_per_token": -14.28978443145752, "logits_per_char": -2.3816307385762534, "num_chars": 6}, {"sum_logits": -3.903928756713867, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.27914810180664, "logits_per_token": -3.903928756713867, "logits_per_char": -0.557704108101981, "num_chars": 7}, {"sum_logits": -13.757711410522461, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.351519584655762, "logits_per_token": -13.757711410522461, "logits_per_char": -2.7515422821044924, "num_chars": 5}, {"sum_logits": -17.949548721313477, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -17.949548721313477, "logits_per_char": -1.7949548721313477, "num_chars": 10}, {"sum_logits": -6.347209930419922, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -12.747560501098633, "logits_per_token": -6.347209930419922, "logits_per_char": -0.7934012413024902, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1158, "native_id": "0f7425ecbe369bf41a230aab92d84132", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.01090145111084, "incorrect_loss_raw": 7.617249250411987, "correct_loss_per_char": 0.9175751209259033, "incorrect_loss_per_char": 0.930856011130593, "correct_loss_per_token": 5.50545072555542, "incorrect_loss_per_token": 5.656939268112183, "correct_loss_uncond": -5.455779075622559, "incorrect_loss_uncond": -7.500040531158447}, "model_output": [{"sum_logits": -9.801549911499023, "num_tokens": 5, "num_tokens_all": 156, "is_greedy": false, "sum_logits_uncond": -17.36239242553711, "logits_per_token": -1.9603099822998047, "logits_per_char": -0.5445305506388346, "num_chars": 18}, {"sum_logits": -11.01090145111084, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.4666805267334, "logits_per_token": -5.50545072555542, "logits_per_char": -0.9175751209259033, "num_chars": 12}, {"sum_logits": -6.746894836425781, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -6.746894836425781, "logits_per_char": -1.1244824727376301, "num_chars": 6}, {"sum_logits": -10.413562774658203, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.20539665222168, "logits_per_token": -10.413562774658203, "logits_per_char": -1.7355937957763672, "num_chars": 6}, {"sum_logits": -3.5069894790649414, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.593358993530273, "logits_per_token": -3.5069894790649414, "logits_per_char": -0.3188172253695401, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1159, "native_id": "c872c08a95dd28a16479b76f240a4ad5", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 4.545307636260986, "incorrect_loss_raw": 10.884706377983093, "correct_loss_per_char": 0.7575512727101644, "incorrect_loss_per_char": 0.991909219144465, "correct_loss_per_token": 4.545307636260986, "incorrect_loss_per_token": 8.694617629051208, "correct_loss_uncond": -7.7627034187316895, "incorrect_loss_uncond": -3.0328181982040405}, "model_output": [{"sum_logits": -9.073147773742676, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -13.907217979431152, "logits_per_token": -9.073147773742676, "logits_per_char": -0.6048765182495117, "num_chars": 15}, {"sum_logits": -9.466245651245117, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.811025619506836, "logits_per_token": -9.466245651245117, "logits_per_char": -0.9466245651245118, "num_chars": 10}, {"sum_logits": -7.478722095489502, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -11.577110290527344, "logits_per_token": -7.478722095489502, "logits_per_char": -1.0683888707842146, "num_chars": 7}, {"sum_logits": -4.545307636260986, "num_tokens": 1, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -12.308011054992676, "logits_per_token": -4.545307636260986, "logits_per_char": -0.7575512727101644, "num_chars": 6}, {"sum_logits": -17.520709991455078, "num_tokens": 2, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -17.374744415283203, "logits_per_token": -8.760354995727539, "logits_per_char": -1.3477469224196215, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1160, "native_id": "08d908ed723f813574992195d61386a2", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.555986404418945, "incorrect_loss_raw": 11.193049550056458, "correct_loss_per_char": 1.0505442185835405, "incorrect_loss_per_char": 1.4889923178788387, "correct_loss_per_token": 5.777993202209473, "incorrect_loss_per_token": 8.263815919558207, "correct_loss_uncond": -10.101613998413086, "incorrect_loss_uncond": -7.336698412895203}, "model_output": [{"sum_logits": -5.317371845245361, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -17.44467544555664, "logits_per_token": -1.7724572817484539, "logits_per_char": -0.44311432043711346, "num_chars": 12}, {"sum_logits": -11.555986404418945, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -21.65760040283203, "logits_per_token": -5.777993202209473, "logits_per_char": -1.0505442185835405, "num_chars": 11}, {"sum_logits": -13.878275871276855, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.718061447143555, "logits_per_token": -13.878275871276855, "logits_per_char": -1.734784483909607, "num_chars": 8}, {"sum_logits": -12.25802993774414, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.84897232055664, "logits_per_token": -4.086009979248047, "logits_per_char": -1.11436635797674, "num_chars": 11}, {"sum_logits": -13.318520545959473, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.107282638549805, "logits_per_token": -13.318520545959473, "logits_per_char": -2.6637041091918947, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1161, "native_id": "5365fd00ef8cec62ee5685e246a939db", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 4, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 13.032190322875977, "incorrect_loss_raw": 16.040884017944336, "correct_loss_per_char": 0.8145118951797485, "incorrect_loss_per_char": 1.1905515348210054, "correct_loss_per_token": 6.516095161437988, "incorrect_loss_per_token": 8.202933152516682, "correct_loss_uncond": -3.229036331176758, "incorrect_loss_uncond": -1.9264905452728271}, "model_output": [{"sum_logits": -17.659259796142578, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -19.928348541259766, "logits_per_token": -8.829629898071289, "logits_per_char": -1.038779988008387, "num_chars": 17}, {"sum_logits": -11.946432113647461, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -15.841264724731445, "logits_per_token": -3.982144037882487, "logits_per_char": -0.9955360094706217, "num_chars": 12}, {"sum_logits": -21.836828231811523, "num_tokens": 3, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -20.685462951660156, "logits_per_token": -7.278942743937175, "logits_per_char": -1.455788548787435, "num_chars": 15}, {"sum_logits": -12.721015930175781, "num_tokens": 1, "num_tokens_all": 149, "is_greedy": false, "sum_logits_uncond": -15.414422035217285, "logits_per_token": -12.721015930175781, "logits_per_char": -1.272101593017578, "num_chars": 10}, {"sum_logits": -13.032190322875977, "num_tokens": 2, "num_tokens_all": 150, "is_greedy": false, "sum_logits_uncond": -16.261226654052734, "logits_per_token": -6.516095161437988, "logits_per_char": -0.8145118951797485, "num_chars": 16}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1162, "native_id": "5649bd90dbb57e223fd843b7a4563a0f", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.617059707641602, "incorrect_loss_raw": 11.346019506454468, "correct_loss_per_char": 1.1234119415283204, "incorrect_loss_per_char": 1.3649399323122842, "correct_loss_per_token": 5.617059707641602, "incorrect_loss_per_token": 9.523208856582642, "correct_loss_uncond": -5.612630844116211, "incorrect_loss_uncond": -5.512589931488037}, "model_output": [{"sum_logits": -11.690424919128418, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.096864700317383, "logits_per_token": -11.690424919128418, "logits_per_char": -1.670060702732631, "num_chars": 7}, {"sum_logits": -8.717795372009277, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -16.2243709564209, "logits_per_token": -8.717795372009277, "logits_per_char": -1.0897244215011597, "num_chars": 8}, {"sum_logits": -10.393372535705566, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -14.462380409240723, "logits_per_token": -10.393372535705566, "logits_per_char": -1.4847675051007951, "num_chars": 7}, {"sum_logits": -5.617059707641602, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -11.229690551757812, "logits_per_token": -5.617059707641602, "logits_per_char": -1.1234119415283204, "num_chars": 5}, {"sum_logits": -14.58248519897461, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.650821685791016, "logits_per_token": -7.291242599487305, "logits_per_char": -1.2152070999145508, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1163, "native_id": "0a2195ae8d4706abc5721578c9991466", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 4.028216361999512, "incorrect_loss_raw": 9.294961929321289, "correct_loss_per_char": 0.33568469683329266, "incorrect_loss_per_char": 0.9296317517155348, "correct_loss_per_token": 2.014108180999756, "incorrect_loss_per_token": 5.473377108573914, "correct_loss_uncond": -18.24485492706299, "incorrect_loss_uncond": -8.833306312561035}, "model_output": [{"sum_logits": -6.607169151306152, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.169933319091797, "logits_per_token": -6.607169151306152, "logits_per_char": -0.6607169151306153, "num_chars": 10}, {"sum_logits": -4.028216361999512, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.2730712890625, "logits_per_token": -2.014108180999756, "logits_per_char": -0.33568469683329266, "num_chars": 12}, {"sum_logits": -10.164773941040039, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.6395206451416, "logits_per_token": -5.0823869705200195, "logits_per_char": -1.016477394104004, "num_chars": 10}, {"sum_logits": -11.19750690460205, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.88583755493164, "logits_per_token": -5.598753452301025, "logits_per_char": -1.0179551731456409, "num_chars": 11}, {"sum_logits": -9.210397720336914, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.817781448364258, "logits_per_token": -4.605198860168457, "logits_per_char": -1.0233775244818792, "num_chars": 9}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1164, "native_id": "5d15989039d46156b417c149728591de", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 4, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.635680675506592, "incorrect_loss_raw": 15.231261968612671, "correct_loss_per_char": 0.7372978528340658, "incorrect_loss_per_char": 1.8188485843794684, "correct_loss_per_token": 3.317840337753296, "incorrect_loss_per_token": 6.853194236755371, "correct_loss_uncond": -9.943295001983643, "incorrect_loss_uncond": -1.8391649723052979}, "model_output": [{"sum_logits": -18.807300567626953, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.066104888916016, "logits_per_token": -9.403650283813477, "logits_per_char": -2.6867572239467075, "num_chars": 7}, {"sum_logits": -6.635680675506592, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.578975677490234, "logits_per_token": -3.317840337753296, "logits_per_char": -0.7372978528340658, "num_chars": 9}, {"sum_logits": -15.074495315551758, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.495027542114258, "logits_per_token": -7.537247657775879, "logits_per_char": -1.8843119144439697, "num_chars": 8}, {"sum_logits": -14.844264030456543, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.748876571655273, "logits_per_token": -7.4221320152282715, "logits_per_char": -1.4844264030456542, "num_chars": 10}, {"sum_logits": -12.19898796081543, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.971698760986328, "logits_per_token": -3.0497469902038574, "logits_per_char": -1.219898796081543, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1165, "native_id": "6eb57102b44ab74163d8f9821cbdabd0", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 5.808938503265381, "incorrect_loss_raw": 9.534745872020721, "correct_loss_per_char": 0.528085318478671, "incorrect_loss_per_char": 0.7835743573578922, "correct_loss_per_token": 5.808938503265381, "incorrect_loss_per_token": 3.8754432002703347, "correct_loss_uncond": -6.259249210357666, "incorrect_loss_uncond": -9.145789682865143}, "model_output": [{"sum_logits": -5.808938503265381, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.068187713623047, "logits_per_token": -5.808938503265381, "logits_per_char": -0.528085318478671, "num_chars": 11}, {"sum_logits": -8.347840309143066, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.879416465759277, "logits_per_token": -4.173920154571533, "logits_per_char": -0.7588945735584606, "num_chars": 11}, {"sum_logits": -14.376472473144531, "num_tokens": 4, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -23.56168556213379, "logits_per_token": -3.594118118286133, "logits_per_char": -0.9584314982096355, "num_chars": 15}, {"sum_logits": -11.521404266357422, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -22.10120964050293, "logits_per_token": -3.840468088785807, "logits_per_char": -0.7680936177571615, "num_chars": 15}, {"sum_logits": -3.893266439437866, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.179830551147461, "logits_per_token": -3.893266439437866, "logits_per_char": -0.648877739906311, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1166, "native_id": "63861ac5e633db9090704ae315ef6f93", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.0892767906188965, "incorrect_loss_raw": 12.283137202262878, "correct_loss_per_char": 0.4413252558026995, "incorrect_loss_per_char": 1.4524824778238934, "correct_loss_per_token": 3.0892767906188965, "incorrect_loss_per_token": 9.19762122631073, "correct_loss_uncond": -10.389783382415771, "incorrect_loss_uncond": -3.5364409685134888}, "model_output": [{"sum_logits": -5.9774346351623535, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.30522346496582, "logits_per_token": -5.9774346351623535, "logits_per_char": -1.1954869270324706, "num_chars": 5}, {"sum_logits": -18.51309585571289, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -22.523784637451172, "logits_per_token": -6.171031951904297, "logits_per_char": -1.1570684909820557, "num_chars": 16}, {"sum_logits": -15.591085433959961, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.868921279907227, "logits_per_token": -15.591085433959961, "logits_per_char": -1.9488856792449951, "num_chars": 8}, {"sum_logits": -9.050932884216309, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -9.050932884216309, "logits_per_char": -1.5084888140360515, "num_chars": 6}, {"sum_logits": -3.0892767906188965, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.479060173034668, "logits_per_token": -3.0892767906188965, "logits_per_char": -0.4413252558026995, "num_chars": 7}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1167, "native_id": "8058c566a4f488033d00e6520b17caea", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.412230491638184, "incorrect_loss_raw": 9.187166213989258, "correct_loss_per_char": 0.617685874303182, "incorrect_loss_per_char": 0.8246666543431336, "correct_loss_per_token": 7.412230491638184, "incorrect_loss_per_token": 6.709883173306783, "correct_loss_uncond": -6.316307067871094, "incorrect_loss_uncond": -6.739248514175415}, "model_output": [{"sum_logits": -9.824286460876465, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.689218521118164, "logits_per_token": -9.824286460876465, "logits_per_char": -0.7017347472054618, "num_chars": 14}, {"sum_logits": -9.65317440032959, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.3347749710083, "logits_per_token": -9.65317440032959, "logits_per_char": -1.2066468000411987, "num_chars": 8}, {"sum_logits": -9.630023002624512, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -16.905248641967773, "logits_per_token": -4.815011501312256, "logits_per_char": -0.802501916885376, "num_chars": 12}, {"sum_logits": -7.412230491638184, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -13.728537559509277, "logits_per_token": -7.412230491638184, "logits_per_char": -0.617685874303182, "num_chars": 12}, {"sum_logits": -7.641180992126465, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.776416778564453, "logits_per_token": -2.5470603307088218, "logits_per_char": -0.5877831532404973, "num_chars": 13}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1168, "native_id": "57b83653d82b27d32bc39228130f3516", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.164571762084961, "incorrect_loss_raw": 12.045938730239868, "correct_loss_per_char": 1.3955714702606201, "incorrect_loss_per_char": 1.315088912092074, "correct_loss_per_token": 11.164571762084961, "incorrect_loss_per_token": 10.012581586837769, "correct_loss_uncond": -2.241642951965332, "incorrect_loss_uncond": -2.6936216354370117}, "model_output": [{"sum_logits": -5.000879287719727, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.426194190979004, "logits_per_token": -5.000879287719727, "logits_per_char": -0.5556532541910807, "num_chars": 9}, {"sum_logits": -13.705368995666504, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -15.820996284484863, "logits_per_token": -13.705368995666504, "logits_per_char": -1.2459426359696821, "num_chars": 11}, {"sum_logits": -13.210649490356445, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.187056541442871, "logits_per_token": -13.210649490356445, "logits_per_char": -1.6513311862945557, "num_chars": 8}, {"sum_logits": -16.266857147216797, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.52399444580078, "logits_per_token": -8.133428573608398, "logits_per_char": -1.8074285719129775, "num_chars": 9}, {"sum_logits": -11.164571762084961, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -13.406214714050293, "logits_per_token": -11.164571762084961, "logits_per_char": -1.3955714702606201, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1169, "native_id": "410f907f817dd7aa8e73291a918d3d86", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.532546043395996, "incorrect_loss_raw": 8.599014401435852, "correct_loss_per_char": 1.2554243405659993, "incorrect_loss_per_char": 1.0642001880453777, "correct_loss_per_token": 7.532546043395996, "incorrect_loss_per_token": 6.968002617359161, "correct_loss_uncond": -9.029648780822754, "incorrect_loss_uncond": -8.394537568092346}, "model_output": [{"sum_logits": -12.3228120803833, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.501020431518555, "logits_per_token": -12.3228120803833, "logits_per_char": -2.053802013397217, "num_chars": 6}, {"sum_logits": -9.025151252746582, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.118462562561035, "logits_per_token": -9.025151252746582, "logits_per_char": -1.1281439065933228, "num_chars": 8}, {"sum_logits": -5.7152252197265625, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.156919479370117, "logits_per_token": -2.8576126098632812, "logits_per_char": -0.4082303728376116, "num_chars": 14}, {"sum_logits": -7.532546043395996, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -16.56219482421875, "logits_per_token": -7.532546043395996, "logits_per_char": -1.2554243405659993, "num_chars": 6}, {"sum_logits": -7.332869052886963, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.197805404663086, "logits_per_token": -3.6664345264434814, "logits_per_char": -0.6666244593533602, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1170, "native_id": "506c2dbfe7b00a82bfdf0507a8de88fb", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 9.95169448852539, "incorrect_loss_raw": 12.646481990814209, "correct_loss_per_char": 1.2439618110656738, "incorrect_loss_per_char": 1.4894141069110136, "correct_loss_per_token": 3.3172314961751304, "incorrect_loss_per_token": 8.38703242937724, "correct_loss_uncond": -6.9237213134765625, "incorrect_loss_uncond": -2.9941821098327637}, "model_output": [{"sum_logits": -12.356256484985352, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.137375831604004, "logits_per_token": -12.356256484985352, "logits_per_char": -1.7651794978550501, "num_chars": 7}, {"sum_logits": -12.897016525268555, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.921577453613281, "logits_per_token": -6.448508262634277, "logits_per_char": -1.6121270656585693, "num_chars": 8}, {"sum_logits": -15.88393497467041, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.837387084960938, "logits_per_token": -5.294644991556804, "logits_per_char": -0.6906058684639309, "num_chars": 23}, {"sum_logits": -9.44871997833252, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -9.44871997833252, "logits_per_char": -1.8897439956665039, "num_chars": 5}, {"sum_logits": -9.95169448852539, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.875415802001953, "logits_per_token": -3.3172314961751304, "logits_per_char": -1.2439618110656738, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1171, "native_id": "42520bf3f93f8de23670044e019001a3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.794578552246094, "incorrect_loss_raw": 8.883185148239136, "correct_loss_per_char": 0.5794578552246094, "incorrect_loss_per_char": 1.0056017557779948, "correct_loss_per_token": 2.897289276123047, "incorrect_loss_per_token": 6.184211730957031, "correct_loss_uncond": -14.523561477661133, "incorrect_loss_uncond": -7.07290244102478}, "model_output": [{"sum_logits": -8.730517387390137, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -12.955814361572266, "logits_per_token": -8.730517387390137, "logits_per_char": -1.4550862312316895, "num_chars": 6}, {"sum_logits": -13.175634384155273, "num_tokens": 3, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.109735488891602, "logits_per_token": -4.391878128051758, "logits_per_char": -1.0979695320129395, "num_chars": 12}, {"sum_logits": -9.602313995361328, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -16.858484268188477, "logits_per_token": -9.602313995361328, "logits_per_char": -1.0669237772623699, "num_chars": 9}, {"sum_logits": -4.024274826049805, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -18.90031623840332, "logits_per_token": -2.0121374130249023, "logits_per_char": -0.40242748260498046, "num_chars": 10}, {"sum_logits": -5.794578552246094, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -20.318140029907227, "logits_per_token": -2.897289276123047, "logits_per_char": -0.5794578552246094, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1172, "native_id": "5e260e1d96187716888cbd968010bb65", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 10.97463321685791, "incorrect_loss_raw": 9.007054209709167, "correct_loss_per_char": 0.6455666598151711, "incorrect_loss_per_char": 1.2491908124514988, "correct_loss_per_token": 3.65821107228597, "incorrect_loss_per_token": 6.403913617134094, "correct_loss_uncond": -11.747401237487793, "incorrect_loss_uncond": -6.054853081703186}, "model_output": [{"sum_logits": -12.293610572814941, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.6275577545166, "logits_per_token": -6.146805286407471, "logits_per_char": -1.0244675477345784, "num_chars": 12}, {"sum_logits": -8.613313674926758, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.374385833740234, "logits_per_token": -8.613313674926758, "logits_per_char": -1.4355522791544597, "num_chars": 6}, {"sum_logits": -8.531514167785645, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -16.688289642333984, "logits_per_token": -4.265757083892822, "logits_per_char": -1.218787738255092, "num_chars": 7}, {"sum_logits": -10.97463321685791, "num_tokens": 3, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -22.722034454345703, "logits_per_token": -3.65821107228597, "logits_per_char": -0.6455666598151711, "num_chars": 17}, {"sum_logits": -6.589778423309326, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.557395935058594, "logits_per_token": -6.589778423309326, "logits_per_char": -1.3179556846618652, "num_chars": 5}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1173, "native_id": "ed50555f8db2b8f66caf9868dcd7e13b", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 1, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.651573657989502, "incorrect_loss_raw": 10.26155400276184, "correct_loss_per_char": 0.7390637397766113, "incorrect_loss_per_char": 1.0169742193486955, "correct_loss_per_token": 3.325786828994751, "incorrect_loss_per_token": 6.306230306625366, "correct_loss_uncond": -8.279036045074463, "incorrect_loss_uncond": -7.778901815414429}, "model_output": [{"sum_logits": -6.651573657989502, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -14.930609703063965, "logits_per_token": -3.325786828994751, "logits_per_char": -0.7390637397766113, "num_chars": 9}, {"sum_logits": -16.180910110473633, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -29.10370635986328, "logits_per_token": -8.090455055236816, "logits_per_char": -1.3484091758728027, "num_chars": 12}, {"sum_logits": -15.461679458618164, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.26066017150879, "logits_per_token": -7.730839729309082, "logits_per_char": -0.9663549661636353, "num_chars": 16}, {"sum_logits": -1.4354162216186523, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": true, "sum_logits_uncond": -13.479942321777344, "logits_per_token": -1.4354162216186523, "logits_per_char": -0.15949069129096138, "num_chars": 9}, {"sum_logits": -7.968210220336914, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -10.317514419555664, "logits_per_token": -7.968210220336914, "logits_per_char": -1.5936420440673829, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1174, "native_id": "a8c284637dabc87745a7eb05d4f7fcbc", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.9275363683700562, "incorrect_loss_raw": 12.168852806091309, "correct_loss_per_char": 0.2141707075966729, "incorrect_loss_per_char": 1.4403669073468164, "correct_loss_per_token": 1.9275363683700562, "incorrect_loss_per_token": 7.548291969299316, "correct_loss_uncond": -11.87792456150055, "incorrect_loss_uncond": -3.6549432277679443}, "model_output": [{"sum_logits": -23.10280418395996, "num_tokens": 5, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.619035720825195, "logits_per_token": -4.620560836791992, "logits_per_char": -1.155140209197998, "num_chars": 20}, {"sum_logits": -6.964485168457031, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -6.964485168457031, "logits_per_char": -1.7411212921142578, "num_chars": 4}, {"sum_logits": -8.689926147460938, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.287192344665527, "logits_per_token": -8.689926147460938, "logits_per_char": -1.448321024576823, "num_chars": 6}, {"sum_logits": -1.9275363683700562, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.805460929870605, "logits_per_token": -1.9275363683700562, "logits_per_char": -0.2141707075966729, "num_chars": 9}, {"sum_logits": -9.918195724487305, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -12.480010986328125, "logits_per_token": -9.918195724487305, "logits_per_char": -1.4168851034981864, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1175, "native_id": "5758a0fb686071e95d95b1cfad5299a0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 4, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.81141185760498, "incorrect_loss_raw": 13.149198055267334, "correct_loss_per_char": 0.9009509881337484, "incorrect_loss_per_char": 1.0388141220266167, "correct_loss_per_token": 3.6038039525349936, "incorrect_loss_per_token": 8.051812887191772, "correct_loss_uncond": -8.032010078430176, "incorrect_loss_uncond": -3.6914865970611572}, "model_output": [{"sum_logits": -11.817710876464844, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.416316986083984, "logits_per_token": -11.817710876464844, "logits_per_char": -1.0743373524058948, "num_chars": 11}, {"sum_logits": -14.420863151550293, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -18.553625106811523, "logits_per_token": -7.2104315757751465, "logits_per_char": -0.8011590639750162, "num_chars": 18}, {"sum_logits": -10.81141185760498, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -18.843421936035156, "logits_per_token": -3.6038039525349936, "logits_per_char": -0.9009509881337484, "num_chars": 12}, {"sum_logits": -15.370288848876953, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -15.976893424987793, "logits_per_token": -7.685144424438477, "logits_per_char": -1.2808574040730794, "num_chars": 12}, {"sum_logits": -10.987929344177246, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.415903091430664, "logits_per_token": -5.493964672088623, "logits_per_char": -0.9989026676524769, "num_chars": 11}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1176, "native_id": "d986f17acb3ed19c77e3ca3f98c026b9", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.710037231445312, "incorrect_loss_raw": 22.2524471282959, "correct_loss_per_char": 0.5394465128580729, "incorrect_loss_per_char": 1.3345488077013001, "correct_loss_per_token": 4.855018615722656, "incorrect_loss_per_token": 8.265472849210104, "correct_loss_uncond": -11.59536361694336, "incorrect_loss_uncond": -3.6754751205444336}, "model_output": [{"sum_logits": -27.148330688476562, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -28.192962646484375, "logits_per_token": -9.049443562825521, "logits_per_char": -1.1803622038468071, "num_chars": 23}, {"sum_logits": -19.15950584411621, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.411449432373047, "logits_per_token": -9.579752922058105, "logits_per_char": -1.4738081418550932, "num_chars": 13}, {"sum_logits": -27.673124313354492, "num_tokens": 4, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -33.76897430419922, "logits_per_token": -6.918281078338623, "logits_per_char": -1.317767824445452, "num_chars": 21}, {"sum_logits": -9.710037231445312, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -21.305400848388672, "logits_per_token": -4.855018615722656, "logits_per_char": -0.5394465128580729, "num_chars": 18}, {"sum_logits": -15.028827667236328, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -20.338302612304688, "logits_per_token": -7.514413833618164, "logits_per_char": -1.366257060657848, "num_chars": 11}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1177, "native_id": "4a4f6408fae400ce0beb5bea0f9913e9", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.3983612060546875, "incorrect_loss_raw": 8.041697144508362, "correct_loss_per_char": 0.08225654153262868, "incorrect_loss_per_char": 1.025834758130331, "correct_loss_per_token": 0.6991806030273438, "incorrect_loss_per_token": 5.974534630775452, "correct_loss_uncond": -17.137584686279297, "incorrect_loss_uncond": -8.503862977027893}, "model_output": [{"sum_logits": -11.033899307250977, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -15.931836128234863, "logits_per_token": -11.033899307250977, "logits_per_char": -1.379237413406372, "num_chars": 8}, {"sum_logits": -11.844088554382324, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -17.13184356689453, "logits_per_token": -5.922044277191162, "logits_per_char": -1.6920126506260462, "num_chars": 7}, {"sum_logits": -4.693211555480957, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.971271514892578, "logits_per_token": -2.3466057777404785, "logits_per_char": -0.5214679506089952, "num_chars": 9}, {"sum_logits": -4.5955891609191895, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.147289276123047, "logits_per_token": -4.5955891609191895, "logits_per_char": -0.51062101787991, "num_chars": 9}, {"sum_logits": -1.3983612060546875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": true, "sum_logits_uncond": -18.535945892333984, "logits_per_token": -0.6991806030273438, "logits_per_char": -0.08225654153262868, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1178, "native_id": "8c655f3a55bde41aad880f138d7a445d", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 1, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.937840461730957, "incorrect_loss_raw": 9.837411522865295, "correct_loss_per_char": 1.3875680923461915, "incorrect_loss_per_char": 2.2281681299209595, "correct_loss_per_token": 6.937840461730957, "incorrect_loss_per_token": 9.837411522865295, "correct_loss_uncond": -5.524565696716309, "incorrect_loss_uncond": -3.9456390142440796}, "model_output": [{"sum_logits": -10.427433013916016, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.47614574432373, "logits_per_token": -10.427433013916016, "logits_per_char": -2.606858253479004, "num_chars": 4}, {"sum_logits": -7.8156609535217285, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.158048629760742, "logits_per_token": -7.8156609535217285, "logits_per_char": -1.5631321907043456, "num_chars": 5}, {"sum_logits": -6.937840461730957, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.462406158447266, "logits_per_token": -6.937840461730957, "logits_per_char": -1.3875680923461915, "num_chars": 5}, {"sum_logits": -10.427433013916016, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.47614574432373, "logits_per_token": -10.427433013916016, "logits_per_char": -2.606858253479004, "num_chars": 4}, {"sum_logits": -10.679119110107422, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.021862030029297, "logits_per_token": -10.679119110107422, "logits_per_char": -2.1358238220214845, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1179, "native_id": "56417ee33b44f0d916bedfb6fd99b0ec", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 6.2062859535217285, "incorrect_loss_raw": 11.750711679458618, "correct_loss_per_char": 0.5642078139565208, "incorrect_loss_per_char": 1.1921508430488525, "correct_loss_per_token": 6.2062859535217285, "incorrect_loss_per_token": 6.3033759991327925, "correct_loss_uncond": -7.2420973777771, "incorrect_loss_uncond": -5.637267589569092}, "model_output": [{"sum_logits": -18.505590438842773, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -19.01320457458496, "logits_per_token": -6.168530146280925, "logits_per_char": -2.0561767154269748, "num_chars": 9}, {"sum_logits": -9.592691421508789, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -12.999371528625488, "logits_per_token": -9.592691421508789, "logits_per_char": -1.3703844887869698, "num_chars": 7}, {"sum_logits": -11.200235366821289, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.393962860107422, "logits_per_token": -5.6001176834106445, "logits_per_char": -0.7000147104263306, "num_chars": 16}, {"sum_logits": -6.2062859535217285, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -13.448383331298828, "logits_per_token": -6.2062859535217285, "logits_per_char": -0.5642078139565208, "num_chars": 11}, {"sum_logits": -7.704329490661621, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.14537811279297, "logits_per_token": -3.8521647453308105, "logits_per_char": -0.6420274575551351, "num_chars": 12}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1180, "native_id": "43fb083962f825ae651d88648bbd2f74", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 2, "predicted_index_per_char": 0, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 12.432933807373047, "incorrect_loss_raw": 14.528859853744507, "correct_loss_per_char": 0.8880667005266462, "incorrect_loss_per_char": 1.3937860574477758, "correct_loss_per_token": 6.216466903686523, "incorrect_loss_per_token": 7.358952164649963, "correct_loss_uncond": -8.481681823730469, "incorrect_loss_uncond": -2.513936996459961}, "model_output": [{"sum_logits": -9.711845397949219, "num_tokens": 1, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -9.711845397949219, "logits_per_char": -0.8093204498291016, "num_chars": 12}, {"sum_logits": -16.347082138061523, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.735669136047363, "logits_per_token": -8.173541069030762, "logits_per_char": -1.8163424597846136, "num_chars": 9}, {"sum_logits": -17.911334991455078, "num_tokens": 4, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -20.276830673217773, "logits_per_token": -4.4778337478637695, "logits_per_char": -1.3777949993426983, "num_chars": 13}, {"sum_logits": -12.432933807373047, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -20.914615631103516, "logits_per_token": -6.216466903686523, "logits_per_char": -0.8880667005266462, "num_chars": 14}, {"sum_logits": -14.145176887512207, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.43558692932129, "logits_per_token": -7.0725884437561035, "logits_per_char": -1.5716863208346896, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1181, "native_id": "aed771629c8dbd0c2587891e98030607", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 6.162103652954102, "incorrect_loss_raw": 9.455991506576538, "correct_loss_per_char": 1.2324207305908204, "incorrect_loss_per_char": 1.3035650522935958, "correct_loss_per_token": 6.162103652954102, "incorrect_loss_per_token": 9.455991506576538, "correct_loss_uncond": -6.3489274978637695, "incorrect_loss_uncond": -3.8669240474700928}, "model_output": [{"sum_logits": -11.339153289794922, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.615432739257812, "logits_per_token": -11.339153289794922, "logits_per_char": -1.889858881632487, "num_chars": 6}, {"sum_logits": -10.538262367248535, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.17702579498291, "logits_per_token": -10.538262367248535, "logits_per_char": -1.5054660524640764, "num_chars": 7}, {"sum_logits": -6.975340843200684, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.989152908325195, "logits_per_token": -6.975340843200684, "logits_per_char": -0.6975340843200684, "num_chars": 10}, {"sum_logits": -8.971209526062012, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.510050773620605, "logits_per_token": -8.971209526062012, "logits_per_char": -1.1214011907577515, "num_chars": 8}, {"sum_logits": -6.162103652954102, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.511031150817871, "logits_per_token": -6.162103652954102, "logits_per_char": -1.2324207305908204, "num_chars": 5}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1182, "native_id": "d0a42c8180b4e080aa071dd70fce7e03", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 11.599678039550781, "incorrect_loss_raw": 16.71597123146057, "correct_loss_per_char": 0.6444265577528212, "incorrect_loss_per_char": 1.3384168738410587, "correct_loss_per_token": 5.799839019775391, "incorrect_loss_per_token": 9.59399676322937, "correct_loss_uncond": -7.116512298583984, "incorrect_loss_uncond": -0.030317306518554688}, "model_output": [{"sum_logits": -18.052221298217773, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.136798858642578, "logits_per_token": -9.026110649108887, "logits_per_char": -1.2894443784441267, "num_chars": 14}, {"sum_logits": -11.599678039550781, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -18.716190338134766, "logits_per_token": -5.799839019775391, "logits_per_char": -0.6444265577528212, "num_chars": 18}, {"sum_logits": -21.934711456298828, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.652427673339844, "logits_per_token": -10.967355728149414, "logits_per_char": -1.5667651040213448, "num_chars": 14}, {"sum_logits": -9.888089179992676, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.72219181060791, "logits_per_token": -9.888089179992676, "logits_per_char": -1.6480148633321126, "num_chars": 6}, {"sum_logits": -16.988862991333008, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -16.473735809326172, "logits_per_token": -8.494431495666504, "logits_per_char": -0.8494431495666503, "num_chars": 20}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1183, "native_id": "533599262a5dae7c7137cfe69e0e24fb", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.9632647037506104, "incorrect_loss_raw": 9.15037202835083, "correct_loss_per_char": 0.24693872531255087, "incorrect_loss_per_char": 1.202743829621209, "correct_loss_per_token": 2.9632647037506104, "incorrect_loss_per_token": 9.15037202835083, "correct_loss_uncond": -12.759835958480835, "incorrect_loss_uncond": -4.547790765762329}, "model_output": [{"sum_logits": -3.548738479614258, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -14.377196311950684, "logits_per_token": -3.548738479614258, "logits_per_char": -0.3548738479614258, "num_chars": 10}, {"sum_logits": -2.9632647037506104, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -2.9632647037506104, "logits_per_char": -0.24693872531255087, "num_chars": 12}, {"sum_logits": -12.261116981506348, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.071558952331543, "logits_per_token": -12.261116981506348, "logits_per_char": -2.0435194969177246, "num_chars": 6}, {"sum_logits": -13.418791770935059, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.966282844543457, "logits_per_token": -13.418791770935059, "logits_per_char": -1.4909768634372287, "num_chars": 9}, {"sum_logits": -7.372840881347656, "num_tokens": 1, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -13.377613067626953, "logits_per_token": -7.372840881347656, "logits_per_char": -0.921605110168457, "num_chars": 8}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1184, "native_id": "edd1634d911614590c6b8ca730df95fe", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.338611125946045, "incorrect_loss_raw": 9.330140113830566, "correct_loss_per_char": 0.576237375086004, "incorrect_loss_per_char": 0.8782368155268879, "correct_loss_per_token": 3.1693055629730225, "incorrect_loss_per_token": 5.265468597412109, "correct_loss_uncond": -10.798180103302002, "incorrect_loss_uncond": -8.671652793884277}, "model_output": [{"sum_logits": -7.922176361083984, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -16.553054809570312, "logits_per_token": -3.961088180541992, "logits_per_char": -0.6601813634236654, "num_chars": 12}, {"sum_logits": -9.15165901184082, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.523197174072266, "logits_per_token": -4.57582950592041, "logits_per_char": -0.762638250986735, "num_chars": 12}, {"sum_logits": -15.443536758422852, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.929039001464844, "logits_per_token": -7.721768379211426, "logits_per_char": -1.40395788712935, "num_chars": 11}, {"sum_logits": -6.338611125946045, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.136791229248047, "logits_per_token": -3.1693055629730225, "logits_per_char": -0.576237375086004, "num_chars": 11}, {"sum_logits": -4.803188323974609, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -15.001880645751953, "logits_per_token": -4.803188323974609, "logits_per_char": -0.6861697605678013, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1185, "native_id": "9a544e9f4847c41a15fdf47ae7b98d8a", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 2, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.138350963592529, "incorrect_loss_raw": 13.196674108505249, "correct_loss_per_char": 0.8922938704490662, "incorrect_loss_per_char": 1.1360747558729989, "correct_loss_per_token": 7.138350963592529, "incorrect_loss_per_token": 6.920434196790059, "correct_loss_uncond": -7.76375150680542, "incorrect_loss_uncond": -4.424871206283569}, "model_output": [{"sum_logits": -8.967823028564453, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.100849151611328, "logits_per_token": -8.967823028564453, "logits_per_char": -1.1209778785705566, "num_chars": 8}, {"sum_logits": -7.138350963592529, "num_tokens": 1, "num_tokens_all": 159, "is_greedy": false, "sum_logits_uncond": -14.90210247039795, "logits_per_token": -7.138350963592529, "logits_per_char": -0.8922938704490662, "num_chars": 8}, {"sum_logits": -11.139670372009277, "num_tokens": 2, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -17.197744369506836, "logits_per_token": -5.569835186004639, "logits_per_char": -0.9283058643341064, "num_chars": 12}, {"sum_logits": -19.173137664794922, "num_tokens": 3, "num_tokens_all": 161, "is_greedy": false, "sum_logits_uncond": -20.787113189697266, "logits_per_token": -6.391045888264974, "logits_per_char": -1.3695098331996374, "num_chars": 14}, {"sum_logits": -13.506065368652344, "num_tokens": 2, "num_tokens_all": 160, "is_greedy": false, "sum_logits_uncond": -18.400474548339844, "logits_per_token": -6.753032684326172, "logits_per_char": -1.1255054473876953, "num_chars": 12}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1186, "native_id": "26bd85f05d29863ed777a4f1a4b8fa63", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 0, "correct_loss_raw": 3.0858054161071777, "incorrect_loss_raw": 14.96642518043518, "correct_loss_per_char": 0.3085805416107178, "incorrect_loss_per_char": 1.1004289633267885, "correct_loss_per_token": 3.0858054161071777, "incorrect_loss_per_token": 6.07584547996521, "correct_loss_uncond": -11.101104259490967, "incorrect_loss_uncond": -5.543704271316528}, "model_output": [{"sum_logits": -18.629121780395508, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.099693298339844, "logits_per_token": -6.209707260131836, "logits_per_char": -1.3306515557425362, "num_chars": 14}, {"sum_logits": -15.938943862915039, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.160675048828125, "logits_per_token": -7.9694719314575195, "logits_per_char": -1.138495990208217, "num_chars": 14}, {"sum_logits": -10.149946212768555, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -23.306211471557617, "logits_per_token": -5.074973106384277, "logits_per_char": -0.9227223829789595, "num_chars": 11}, {"sum_logits": -3.0858054161071777, "num_tokens": 1, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -14.186909675598145, "logits_per_token": -3.0858054161071777, "logits_per_char": -0.3085805416107178, "num_chars": 10}, {"sum_logits": -15.147688865661621, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -20.47393798828125, "logits_per_token": -5.049229621887207, "logits_per_char": -1.0098459243774414, "num_chars": 15}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1187, "native_id": "3884d82524f2337ce53ce64776293cf7", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.673162460327148, "incorrect_loss_raw": 8.13649034500122, "correct_loss_per_char": 0.8673162460327148, "incorrect_loss_per_char": 0.8141915396640176, "correct_loss_per_token": 4.336581230163574, "incorrect_loss_per_token": 5.521821588277817, "correct_loss_uncond": -10.568853378295898, "incorrect_loss_uncond": -8.261410474777222}, "model_output": [{"sum_logits": -8.673162460327148, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.242015838623047, "logits_per_token": -4.336581230163574, "logits_per_char": -0.8673162460327148, "num_chars": 10}, {"sum_logits": -10.319947242736816, "num_tokens": 2, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -19.932151794433594, "logits_per_token": -5.159973621368408, "logits_per_char": -0.6879964828491211, "num_chars": 15}, {"sum_logits": -11.137130737304688, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -15.308147430419922, "logits_per_token": -11.137130737304688, "logits_per_char": -1.392141342163086, "num_chars": 8}, {"sum_logits": -7.064935207366943, "num_tokens": 4, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -17.856563568115234, "logits_per_token": -1.7662338018417358, "logits_per_char": -0.37183869512457596, "num_chars": 19}, {"sum_logits": -4.0239481925964355, "num_tokens": 1, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -12.49474048614502, "logits_per_token": -4.0239481925964355, "logits_per_char": -0.8047896385192871, "num_chars": 5}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1188, "native_id": "acb3147d946db3b06a596d48e0be56cf", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.442172050476074, "incorrect_loss_raw": 10.09419059753418, "correct_loss_per_char": 1.0884344100952148, "incorrect_loss_per_char": 1.4883629587980416, "correct_loss_per_token": 5.442172050476074, "incorrect_loss_per_token": 8.269830703735352, "correct_loss_uncond": -9.367361068725586, "incorrect_loss_uncond": -6.157853126525879}, "model_output": [{"sum_logits": -5.442172050476074, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.80953311920166, "logits_per_token": -5.442172050476074, "logits_per_char": -1.0884344100952148, "num_chars": 5}, {"sum_logits": -14.594879150390625, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -23.01959991455078, "logits_per_token": -7.2974395751953125, "logits_per_char": -1.1226830115685096, "num_chars": 13}, {"sum_logits": -6.405459403991699, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -11.330245018005371, "logits_per_token": -6.405459403991699, "logits_per_char": -1.6013648509979248, "num_chars": 4}, {"sum_logits": -10.765412330627441, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.082191467285156, "logits_per_token": -10.765412330627441, "logits_per_char": -1.794235388437907, "num_chars": 6}, {"sum_logits": -8.611011505126953, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.576138496398926, "logits_per_token": -8.611011505126953, "logits_per_char": -1.4351685841878254, "num_chars": 6}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1189, "native_id": "52ab95f9216f1994e37cc08f7f258f13", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 9.007181167602539, "incorrect_loss_raw": 15.141317129135132, "correct_loss_per_char": 0.6004787445068359, "incorrect_loss_per_char": 1.079730648692377, "correct_loss_per_token": 4.5035905838012695, "incorrect_loss_per_token": 6.955510139465332, "correct_loss_uncond": -12.812929153442383, "incorrect_loss_uncond": -5.089158773422241}, "model_output": [{"sum_logits": -14.763562202453613, "num_tokens": 3, "num_tokens_all": 132, "is_greedy": false, "sum_logits_uncond": -19.59397315979004, "logits_per_token": -4.921187400817871, "logits_per_char": -1.1356586309579701, "num_chars": 13}, {"sum_logits": -22.697635650634766, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -25.221031188964844, "logits_per_token": -11.348817825317383, "logits_per_char": -1.3351550382726334, "num_chars": 17}, {"sum_logits": -9.007181167602539, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -21.820110321044922, "logits_per_token": -4.5035905838012695, "logits_per_char": -0.6004787445068359, "num_chars": 15}, {"sum_logits": -11.876830101013184, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -19.866668701171875, "logits_per_token": -5.938415050506592, "logits_per_char": -1.1876830101013183, "num_chars": 10}, {"sum_logits": -11.227240562438965, "num_tokens": 2, "num_tokens_all": 131, "is_greedy": false, "sum_logits_uncond": -16.240230560302734, "logits_per_token": -5.613620281219482, "logits_per_char": -0.6604259154375862, "num_chars": 17}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1190, "native_id": "f60641f550d5ee44ac1bedcaf6ad6357", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0394649505615234, "incorrect_loss_raw": 12.611126899719238, "correct_loss_per_char": 0.10394649505615235, "incorrect_loss_per_char": 1.4741252182802675, "correct_loss_per_token": 0.5197324752807617, "incorrect_loss_per_token": 7.55925989151001, "correct_loss_uncond": -15.320398330688477, "incorrect_loss_uncond": -2.9382519721984863}, "model_output": [{"sum_logits": -13.987070083618164, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.02305030822754, "logits_per_token": -6.993535041809082, "logits_per_char": -1.075928467970628, "num_chars": 13}, {"sum_logits": -1.0394649505615234, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": true, "sum_logits_uncond": -16.35986328125, "logits_per_token": -0.5197324752807617, "logits_per_char": -0.10394649505615235, "num_chars": 10}, {"sum_logits": -10.029571533203125, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.11971378326416, "logits_per_token": -10.029571533203125, "logits_per_char": -2.5073928833007812, "num_chars": 4}, {"sum_logits": -11.794694900512695, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -13.918330192565918, "logits_per_token": -5.897347450256348, "logits_per_char": -0.9828912417093912, "num_chars": 12}, {"sum_logits": -14.633171081542969, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -18.13642120361328, "logits_per_token": -7.316585540771484, "logits_per_char": -1.3302882801402698, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1191, "native_id": "d9835ede7a0ed79325de13ca95b85b78", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 3, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 10.484159469604492, "incorrect_loss_raw": 10.285005569458008, "correct_loss_per_char": 1.3105199337005615, "incorrect_loss_per_char": 1.0341087304165684, "correct_loss_per_token": 3.4947198232014975, "incorrect_loss_per_token": 6.44324517250061, "correct_loss_uncond": -6.729608535766602, "incorrect_loss_uncond": -6.457728385925293}, "model_output": [{"sum_logits": -10.43904972076416, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.13226890563965, "logits_per_token": -5.21952486038208, "logits_per_char": -0.8030038246741662, "num_chars": 13}, {"sum_logits": -11.004592895507812, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.17339324951172, "logits_per_token": -5.502296447753906, "logits_per_char": -1.0004175359552556, "num_chars": 11}, {"sum_logits": -10.405939102172852, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -13.585737228393555, "logits_per_token": -10.405939102172852, "logits_per_char": -1.3007423877716064, "num_chars": 8}, {"sum_logits": -10.484159469604492, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -17.213768005371094, "logits_per_token": -3.4947198232014975, "logits_per_char": -1.3105199337005615, "num_chars": 8}, {"sum_logits": -9.290440559387207, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -18.07953643798828, "logits_per_token": -4.6452202796936035, "logits_per_char": -1.0322711732652452, "num_chars": 9}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1192, "native_id": "2987db72e66f5fa0015ac64f9b3614ec", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.326866149902344, "incorrect_loss_raw": 11.236105918884277, "correct_loss_per_char": 0.6939055124918619, "incorrect_loss_per_char": 1.042152047675589, "correct_loss_per_token": 4.163433074951172, "incorrect_loss_per_token": 6.090879678726196, "correct_loss_uncond": -9.058433532714844, "incorrect_loss_uncond": -7.018589496612549}, "model_output": [{"sum_logits": -8.326866149902344, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -17.385299682617188, "logits_per_token": -4.163433074951172, "logits_per_char": -0.6939055124918619, "num_chars": 12}, {"sum_logits": -8.595014572143555, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -14.46192741394043, "logits_per_token": -8.595014572143555, "logits_per_char": -0.8595014572143554, "num_chars": 10}, {"sum_logits": -5.110416412353516, "num_tokens": 1, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -9.92572021484375, "logits_per_token": -5.110416412353516, "logits_per_char": -1.0220832824707031, "num_chars": 5}, {"sum_logits": -11.39335823059082, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.872962951660156, "logits_per_token": -5.69667911529541, "logits_per_char": -1.4241697788238525, "num_chars": 8}, {"sum_logits": -19.84563446044922, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -28.75817108154297, "logits_per_token": -4.961408615112305, "logits_per_char": -0.8628536721934443, "num_chars": 23}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1193, "native_id": "8b548832703a8c68a788e2f9c0e222ae", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 9.6727294921875, "incorrect_loss_raw": 11.629182815551758, "correct_loss_per_char": 1.9345458984375, "incorrect_loss_per_char": 1.986464045918177, "correct_loss_per_token": 9.6727294921875, "incorrect_loss_per_token": 10.191579699516296, "correct_loss_uncond": -2.5105247497558594, "incorrect_loss_uncond": -3.2687928676605225}, "model_output": [{"sum_logits": -9.6727294921875, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.18325424194336, "logits_per_token": -9.6727294921875, "logits_per_char": -1.9345458984375, "num_chars": 5}, {"sum_logits": -13.863892555236816, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.892448425292969, "logits_per_token": -13.863892555236816, "logits_per_char": -1.9805560793195451, "num_chars": 7}, {"sum_logits": -11.500824928283691, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.47467041015625, "logits_per_token": -5.750412464141846, "logits_per_char": -1.2778694364759657, "num_chars": 9}, {"sum_logits": -12.011455535888672, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.60795783996582, "logits_per_token": -12.011455535888672, "logits_per_char": -2.402291107177734, "num_chars": 5}, {"sum_logits": -9.140558242797852, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -11.616826057434082, "logits_per_token": -9.140558242797852, "logits_per_char": -2.285139560699463, "num_chars": 4}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1194, "native_id": "1ddd239a2a6438a891cb411b82e7f450", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.625256299972534, "incorrect_loss_raw": 9.810524225234985, "correct_loss_per_char": 0.23865966363386673, "incorrect_loss_per_char": 1.7014533480008445, "correct_loss_per_token": 2.625256299972534, "incorrect_loss_per_token": 7.914458632469177, "correct_loss_uncond": -10.613688707351685, "incorrect_loss_uncond": -5.696498155593872}, "model_output": [{"sum_logits": -9.109057426452637, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -16.65142059326172, "logits_per_token": -9.109057426452637, "logits_per_char": -1.3012939180646623, "num_chars": 7}, {"sum_logits": -7.137168884277344, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -7.137168884277344, "logits_per_char": -1.0195955548967635, "num_chars": 7}, {"sum_logits": -15.168524742126465, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -18.170650482177734, "logits_per_token": -7.584262371063232, "logits_per_char": -2.5280874570210776, "num_chars": 6}, {"sum_logits": -7.827345848083496, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.934737205505371, "logits_per_token": -7.827345848083496, "logits_per_char": -1.956836462020874, "num_chars": 4}, {"sum_logits": -2.625256299972534, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.238945007324219, "logits_per_token": -2.625256299972534, "logits_per_char": -0.23865966363386673, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1195, "native_id": "6544a50bf9563d52dbd2034e81df0bf3", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 2.4859812259674072, "incorrect_loss_raw": 8.877927541732788, "correct_loss_per_char": 0.22599829326976428, "incorrect_loss_per_char": 0.9094851573308309, "correct_loss_per_token": 2.4859812259674072, "incorrect_loss_per_token": 5.249819517135621, "correct_loss_uncond": -11.662383794784546, "incorrect_loss_uncond": -7.939802408218384}, "model_output": [{"sum_logits": -7.80926513671875, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.630867004394531, "logits_per_token": -7.80926513671875, "logits_per_char": -0.8676961263020834, "num_chars": 9}, {"sum_logits": -10.927898406982422, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -20.827516555786133, "logits_per_token": -3.642632802327474, "logits_per_char": -1.0927898406982421, "num_chars": 10}, {"sum_logits": -5.9337968826293945, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.184347152709961, "logits_per_token": -5.9337968826293945, "logits_per_char": -0.5933796882629394, "num_chars": 10}, {"sum_logits": -2.4859812259674072, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.148365020751953, "logits_per_token": -2.4859812259674072, "logits_per_char": -0.22599829326976428, "num_chars": 11}, {"sum_logits": -10.840749740600586, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -16.628189086914062, "logits_per_token": -3.613583246866862, "logits_per_char": -1.0840749740600586, "num_chars": 10}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1196, "native_id": "5ff6ce8ad88459272ffe23d33db4970a", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 3, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.747282981872559, "incorrect_loss_raw": 13.126399278640747, "correct_loss_per_char": 0.8434103727340698, "incorrect_loss_per_char": 1.4948569048018685, "correct_loss_per_token": 6.747282981872559, "incorrect_loss_per_token": 9.126537322998047, "correct_loss_uncond": -8.261372566223145, "incorrect_loss_uncond": -4.1713409423828125}, "model_output": [{"sum_logits": -18.257362365722656, "num_tokens": 2, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -21.208097457885742, "logits_per_token": -9.128681182861328, "logits_per_char": -1.8257362365722656, "num_chars": 10}, {"sum_logits": -13.741533279418945, "num_tokens": 2, "num_tokens_all": 158, "is_greedy": false, "sum_logits_uncond": -17.121227264404297, "logits_per_token": -6.870766639709473, "logits_per_char": -1.5268370310465496, "num_chars": 9}, {"sum_logits": -9.534244537353516, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -16.108352661132812, "logits_per_token": -9.534244537353516, "logits_per_char": -1.0593605041503906, "num_chars": 9}, {"sum_logits": -6.747282981872559, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -15.008655548095703, "logits_per_token": -6.747282981872559, "logits_per_char": -0.8434103727340698, "num_chars": 8}, {"sum_logits": -10.972456932067871, "num_tokens": 1, "num_tokens_all": 157, "is_greedy": false, "sum_logits_uncond": -14.753283500671387, "logits_per_token": -10.972456932067871, "logits_per_char": -1.5674938474382674, "num_chars": 7}], "label": 3, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1197, "native_id": "2ca05683157a3cd89d82016f13e560ec", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 3, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.754295825958252, "incorrect_loss_raw": 7.884958028793335, "correct_loss_per_char": 0.417143980662028, "incorrect_loss_per_char": 0.9884929373860359, "correct_loss_per_token": 3.754295825958252, "incorrect_loss_per_token": 5.232024371623993, "correct_loss_uncond": -11.101763248443604, "incorrect_loss_uncond": -8.389071226119995}, "model_output": [{"sum_logits": -4.715233325958252, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -12.666316032409668, "logits_per_token": -4.715233325958252, "logits_per_char": -0.9430466651916504, "num_chars": 5}, {"sum_logits": -3.754295825958252, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.856059074401855, "logits_per_token": -3.754295825958252, "logits_per_char": -0.417143980662028, "num_chars": 9}, {"sum_logits": -5.601129531860352, "num_tokens": 1, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.087485313415527, "logits_per_token": -5.601129531860352, "logits_per_char": -1.1202259063720703, "num_chars": 5}, {"sum_logits": -6.177273273468018, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -18.202571868896484, "logits_per_token": -3.088636636734009, "logits_per_char": -0.3860795795917511, "num_chars": 16}, {"sum_logits": -15.046195983886719, "num_tokens": 2, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -7.523097991943359, "logits_per_char": -1.5046195983886719, "num_chars": 10}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1198, "native_id": "1a8fbab20bbdf0bbf3961894662d5f7c", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.0837857723236084, "incorrect_loss_raw": 12.506731510162354, "correct_loss_per_char": 0.10837857723236084, "incorrect_loss_per_char": 1.4565999085166923, "correct_loss_per_token": 1.0837857723236084, "incorrect_loss_per_token": 10.905516624450684, "correct_loss_uncond": -11.214118242263794, "incorrect_loss_uncond": -0.7189109325408936}, "model_output": [{"sum_logits": -12.807011604309082, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.452452659606934, "logits_per_token": -12.807011604309082, "logits_per_char": -1.6008764505386353, "num_chars": 8}, {"sum_logits": -13.185281753540039, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -12.743459701538086, "logits_per_token": -13.185281753540039, "logits_per_char": -1.198661977594549, "num_chars": 11}, {"sum_logits": -12.80971908569336, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -14.505773544311523, "logits_per_token": -6.40485954284668, "logits_per_char": -1.4233021206325955, "num_chars": 9}, {"sum_logits": -11.224913597106934, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -11.200883865356445, "logits_per_token": -11.224913597106934, "logits_per_char": -1.6035590853009904, "num_chars": 7}, {"sum_logits": -1.0837857723236084, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": true, "sum_logits_uncond": -12.297904014587402, "logits_per_token": -1.0837857723236084, "logits_per_char": -0.10837857723236084, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1199, "native_id": "5b5d2a8b83282f61c68a870116042f64", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.147209167480469, "incorrect_loss_raw": 11.520405769348145, "correct_loss_per_char": 0.558837197043679, "incorrect_loss_per_char": 1.1068321901209215, "correct_loss_per_token": 3.0736045837402344, "incorrect_loss_per_token": 6.058923403422038, "correct_loss_uncond": -9.488651275634766, "incorrect_loss_uncond": -4.677735328674316}, "model_output": [{"sum_logits": -9.786396026611328, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -10.858423233032227, "logits_per_token": -9.786396026611328, "logits_per_char": -1.6310660044352214, "num_chars": 6}, {"sum_logits": -11.933587074279785, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -19.89373207092285, "logits_per_token": -3.9778623580932617, "logits_per_char": -0.7019757102517521, "num_chars": 17}, {"sum_logits": -10.256308555603027, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -17.669754028320312, "logits_per_token": -3.4187695185343423, "logits_per_char": -0.6837539037068685, "num_chars": 15}, {"sum_logits": -14.105331420898438, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -16.370655059814453, "logits_per_token": -7.052665710449219, "logits_per_char": -1.4105331420898437, "num_chars": 10}, {"sum_logits": -6.147209167480469, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -15.635860443115234, "logits_per_token": -3.0736045837402344, "logits_per_char": -0.558837197043679, "num_chars": 11}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1200, "native_id": "cfa081b5ba90dae4d7ddb5b7ad9d369a", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 3, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 7.659545421600342, "incorrect_loss_raw": 11.697078466415405, "correct_loss_per_char": 1.9148863554000854, "incorrect_loss_per_char": 1.6267618073357477, "correct_loss_per_token": 7.659545421600342, "incorrect_loss_per_token": 10.037100076675415, "correct_loss_uncond": -4.123258113861084, "incorrect_loss_uncond": -2.272916316986084}, "model_output": [{"sum_logits": -12.381394386291504, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.279019355773926, "logits_per_token": -12.381394386291504, "logits_per_char": -2.063565731048584, "num_chars": 6}, {"sum_logits": -11.168828010559082, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.019183158874512, "logits_per_token": -11.168828010559082, "logits_per_char": -1.8614713350931804, "num_chars": 6}, {"sum_logits": -9.958264350891113, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -14.49753475189209, "logits_per_token": -9.958264350891113, "logits_per_char": -1.1064738167656794, "num_chars": 9}, {"sum_logits": -13.279827117919922, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.08424186706543, "logits_per_token": -6.639913558959961, "logits_per_char": -1.4755363464355469, "num_chars": 9}, {"sum_logits": -7.659545421600342, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -11.782803535461426, "logits_per_token": -7.659545421600342, "logits_per_char": -1.9148863554000854, "num_chars": 4}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1201, "native_id": "009a7aabffe0583fc2df46656b29c326", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.8391783237457275, "incorrect_loss_raw": 11.32469654083252, "correct_loss_per_char": 0.2953214095189021, "incorrect_loss_per_char": 1.162746736738417, "correct_loss_per_token": 1.9195891618728638, "incorrect_loss_per_token": 6.928189992904663, "correct_loss_uncond": -11.857266187667847, "incorrect_loss_uncond": -5.04211163520813}, "model_output": [{"sum_logits": -17.33336067199707, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -20.477008819580078, "logits_per_token": -8.666680335998535, "logits_per_char": -1.9259289635552301, "num_chars": 9}, {"sum_logits": -10.126733779907227, "num_tokens": 1, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -14.86549186706543, "logits_per_token": -10.126733779907227, "logits_per_char": -1.446676254272461, "num_chars": 7}, {"sum_logits": -3.8391783237457275, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -15.696444511413574, "logits_per_token": -1.9195891618728638, "logits_per_char": -0.2953214095189021, "num_chars": 13}, {"sum_logits": -5.348136901855469, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -12.291243553161621, "logits_per_token": -2.6740684509277344, "logits_per_char": -0.4456780751546224, "num_chars": 12}, {"sum_logits": -12.490554809570312, "num_tokens": 2, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -17.83348846435547, "logits_per_token": -6.245277404785156, "logits_per_char": -0.8327036539713542, "num_chars": 15}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1202, "native_id": "2521b3fe6bfd6aeb91f9107dc7c4fbee", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 1.546706199645996, "incorrect_loss_raw": 10.779401123523712, "correct_loss_per_char": 0.1546706199645996, "incorrect_loss_per_char": 1.0379754948474111, "correct_loss_per_token": 1.546706199645996, "incorrect_loss_per_token": 8.248288929462433, "correct_loss_uncond": -12.654027938842773, "incorrect_loss_uncond": -4.9832958579063416}, "model_output": [{"sum_logits": -20.248897552490234, "num_tokens": 2, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -21.440462112426758, "logits_per_token": -10.124448776245117, "logits_per_char": -1.349926503499349, "num_chars": 15}, {"sum_logits": -12.432068824768066, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.558603286743164, "logits_per_token": -12.432068824768066, "logits_per_char": -1.3813409805297852, "num_chars": 9}, {"sum_logits": -1.546706199645996, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": true, "sum_logits_uncond": -14.20073413848877, "logits_per_token": -1.546706199645996, "logits_per_char": -0.1546706199645996, "num_chars": 10}, {"sum_logits": -6.499064922332764, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -12.871313095092773, "logits_per_token": -6.499064922332764, "logits_per_char": -0.9284378460475377, "num_chars": 7}, {"sum_logits": -3.937573194503784, "num_tokens": 1, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -14.18040943145752, "logits_per_token": -3.937573194503784, "logits_per_char": -0.492196649312973, "num_chars": 8}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1203, "native_id": "3fe45ab3bd4a844ea290050fc0ece8c1_1", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 8.439234733581543, "incorrect_loss_raw": 14.032194375991821, "correct_loss_per_char": 0.8439234733581543, "incorrect_loss_per_char": 1.219909881788587, "correct_loss_per_token": 8.439234733581543, "incorrect_loss_per_token": 8.391516208648682, "correct_loss_uncond": -6.267120361328125, "incorrect_loss_uncond": -2.3000357151031494}, "model_output": [{"sum_logits": -11.003352165222168, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -16.108352661132812, "logits_per_token": -11.003352165222168, "logits_per_char": -1.2225946850246854, "num_chars": 9}, {"sum_logits": -8.686210632324219, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.333471298217773, "logits_per_token": -4.343105316162109, "logits_per_char": -0.620443616594587, "num_chars": 14}, {"sum_logits": -18.831369400024414, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -17.617534637451172, "logits_per_token": -9.415684700012207, "logits_per_char": -1.569280783335368, "num_chars": 12}, {"sum_logits": -17.607845306396484, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -13.269561767578125, "logits_per_token": -8.803922653198242, "logits_per_char": -1.467320442199707, "num_chars": 12}, {"sum_logits": -8.439234733581543, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -14.706355094909668, "logits_per_token": -8.439234733581543, "logits_per_char": -0.8439234733581543, "num_chars": 10}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1204, "native_id": "a2e0f6b5651e5271fcff8d6f5c9adfee", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 7.167710781097412, "incorrect_loss_raw": 11.057767391204834, "correct_loss_per_char": 0.7167710781097412, "incorrect_loss_per_char": 0.9067699214725784, "correct_loss_per_token": 3.583855390548706, "incorrect_loss_per_token": 6.270893057187399, "correct_loss_uncond": -7.0944037437438965, "incorrect_loss_uncond": -4.972245931625366}, "model_output": [{"sum_logits": -7.167710781097412, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -14.262114524841309, "logits_per_token": -3.583855390548706, "logits_per_char": -0.7167710781097412, "num_chars": 10}, {"sum_logits": -9.508047103881836, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -13.737719535827637, "logits_per_token": -3.169349034627279, "logits_per_char": -0.8643679185347124, "num_chars": 11}, {"sum_logits": -9.105423927307129, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.490232467651367, "logits_per_token": -9.105423927307129, "logits_per_char": -0.9105423927307129, "num_chars": 10}, {"sum_logits": -13.566264152526855, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -17.976200103759766, "logits_per_token": -6.783132076263428, "logits_per_char": -0.8478915095329285, "num_chars": 16}, {"sum_logits": -12.051334381103516, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.91590118408203, "logits_per_token": -6.025667190551758, "logits_per_char": -1.0042778650919597, "num_chars": 12}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1205, "native_id": "d6900a01a9dd6627b4bb22b0f6d191a5", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.188724040985107, "incorrect_loss_raw": 15.386168956756592, "correct_loss_per_char": 0.6485905051231384, "incorrect_loss_per_char": 1.017174025482885, "correct_loss_per_token": 2.5943620204925537, "incorrect_loss_per_token": 4.490216493606567, "correct_loss_uncond": -13.899586200714111, "incorrect_loss_uncond": -8.267163276672363}, "model_output": [{"sum_logits": -20.435083389282227, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -27.998855590820312, "logits_per_token": -6.811694463094075, "logits_per_char": -1.8577348535711116, "num_chars": 11}, {"sum_logits": -14.906877517700195, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -22.84883689880371, "logits_per_token": -4.9689591725667315, "logits_per_char": -0.8768751481000114, "num_chars": 17}, {"sum_logits": -15.324155807495117, "num_tokens": 6, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -24.498493194580078, "logits_per_token": -2.554025967915853, "logits_per_char": -0.7297217051188151, "num_chars": 21}, {"sum_logits": -10.878559112548828, "num_tokens": 3, "num_tokens_all": 135, "is_greedy": false, "sum_logits_uncond": -19.26714324951172, "logits_per_token": -3.6261863708496094, "logits_per_char": -0.6043643951416016, "num_chars": 18}, {"sum_logits": -5.188724040985107, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.08831024169922, "logits_per_token": -2.5943620204925537, "logits_per_char": -0.6485905051231384, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1206, "native_id": "8f2976690c83be6b8fa3a1196dfd9722", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 4, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.198122024536133, "incorrect_loss_raw": 10.514516830444336, "correct_loss_per_char": 0.5465414683024089, "incorrect_loss_per_char": 1.1582187507833754, "correct_loss_per_token": 4.099061012268066, "incorrect_loss_per_token": 6.26580011844635, "correct_loss_uncond": -9.04948616027832, "incorrect_loss_uncond": -6.113154411315918}, "model_output": [{"sum_logits": -13.11074161529541, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -19.307220458984375, "logits_per_token": -6.555370807647705, "logits_per_char": -1.6388427019119263, "num_chars": 8}, {"sum_logits": -10.197932243347168, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -18.08420753479004, "logits_per_token": -5.098966121673584, "logits_per_char": -1.0197932243347168, "num_chars": 10}, {"sum_logits": -10.681059837341309, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -16.382843017578125, "logits_per_token": -5.340529918670654, "logits_per_char": -0.821619987487793, "num_chars": 13}, {"sum_logits": -8.068333625793457, "num_tokens": 1, "num_tokens_all": 151, "is_greedy": false, "sum_logits_uncond": -12.736413955688477, "logits_per_token": -8.068333625793457, "logits_per_char": -1.1526190893990653, "num_chars": 7}, {"sum_logits": -8.198122024536133, "num_tokens": 2, "num_tokens_all": 152, "is_greedy": false, "sum_logits_uncond": -17.247608184814453, "logits_per_token": -4.099061012268066, "logits_per_char": -0.5465414683024089, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1207, "native_id": "570be8c1edb8c638603dc5c8cae421cc", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 3, "predicted_index_uncond": 3, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 3.860045909881592, "incorrect_loss_raw": 11.075539231300354, "correct_loss_per_char": 0.5514351299830845, "incorrect_loss_per_char": 1.5563608547051748, "correct_loss_per_token": 3.860045909881592, "incorrect_loss_per_token": 8.286314606666565, "correct_loss_uncond": -8.289247989654541, "incorrect_loss_uncond": -5.98364531993866}, "model_output": [{"sum_logits": -7.188313007354736, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -13.908945083618164, "logits_per_token": -7.188313007354736, "logits_per_char": -1.797078251838684, "num_chars": 4}, {"sum_logits": -22.313796997070312, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -24.5127010345459, "logits_per_token": -11.156898498535156, "logits_per_char": -2.2313796997070314, "num_chars": 10}, {"sum_logits": -3.860045909881592, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.149293899536133, "logits_per_token": -3.860045909881592, "logits_per_char": -0.5514351299830845, "num_chars": 7}, {"sum_logits": -6.540205001831055, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -15.723100662231445, "logits_per_token": -6.540205001831055, "logits_per_char": -0.5450170834859213, "num_chars": 12}, {"sum_logits": -8.259841918945312, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -14.091991424560547, "logits_per_token": -8.259841918945312, "logits_per_char": -1.6519683837890624, "num_chars": 5}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1208, "native_id": "08d3175de59a639be02f2ebc032d56bd", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 11.589484214782715, "incorrect_loss_raw": 11.454414248466492, "correct_loss_per_char": 0.6817343655754539, "incorrect_loss_per_char": 1.441155172908117, "correct_loss_per_token": 5.794742107391357, "incorrect_loss_per_token": 6.471238096555074, "correct_loss_uncond": -7.326001167297363, "incorrect_loss_uncond": -8.246716141700745}, "model_output": [{"sum_logits": -19.778644561767578, "num_tokens": 3, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -28.19113540649414, "logits_per_token": -6.592881520589192, "logits_per_char": -1.648220380147298, "num_chars": 12}, {"sum_logits": -13.49388313293457, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -20.071252822875977, "logits_per_token": -6.746941566467285, "logits_per_char": -1.9276975904192244, "num_chars": 7}, {"sum_logits": -3.603635311126709, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.535295486450195, "logits_per_token": -3.603635311126709, "logits_per_char": -0.4004039234585232, "num_chars": 9}, {"sum_logits": -8.94149398803711, "num_tokens": 1, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.006837844848633, "logits_per_token": -8.94149398803711, "logits_per_char": -1.7882987976074218, "num_chars": 5}, {"sum_logits": -11.589484214782715, "num_tokens": 2, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -18.915485382080078, "logits_per_token": -5.794742107391357, "logits_per_char": -0.6817343655754539, "num_chars": 17}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1209, "native_id": "549cf641318edfc0510fa7c7dbb359e1", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.3838729858398438, "incorrect_loss_raw": 8.671321392059326, "correct_loss_per_char": 0.24170521327427455, "incorrect_loss_per_char": 1.2342487053265647, "correct_loss_per_token": 1.6919364929199219, "incorrect_loss_per_token": 7.260547637939453, "correct_loss_uncond": -16.285144805908203, "incorrect_loss_uncond": -8.145019769668579}, "model_output": [{"sum_logits": -8.696805000305176, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -14.456185340881348, "logits_per_token": -8.696805000305176, "logits_per_char": -2.174201250076294, "num_chars": 4}, {"sum_logits": -10.176053047180176, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.405698776245117, "logits_per_token": -10.176053047180176, "logits_per_char": -1.4537218638828822, "num_chars": 7}, {"sum_logits": -3.3838729858398438, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -19.669017791748047, "logits_per_token": -1.6919364929199219, "logits_per_char": -0.24170521327427455, "num_chars": 14}, {"sum_logits": -11.286190032958984, "num_tokens": 2, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -21.86818504333496, "logits_per_token": -5.643095016479492, "logits_per_char": -0.8061564309256417, "num_chars": 14}, {"sum_logits": -4.526237487792969, "num_tokens": 1, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -15.535295486450195, "logits_per_token": -4.526237487792969, "logits_per_char": -0.502915276421441, "num_chars": 9}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1210, "native_id": "dfa23d3422b7294843447b6950d2b476", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 15.892011642456055, "incorrect_loss_raw": 14.982888460159302, "correct_loss_per_char": 1.0594674428304036, "incorrect_loss_per_char": 1.0461779627883643, "correct_loss_per_token": 5.2973372141520185, "incorrect_loss_per_token": 7.491444230079651, "correct_loss_uncond": -5.10822868347168, "incorrect_loss_uncond": -4.64891791343689}, "model_output": [{"sum_logits": -15.168051719665527, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.134544372558594, "logits_per_token": -7.584025859832764, "logits_per_char": -1.011203447977702, "num_chars": 15}, {"sum_logits": -15.085912704467773, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -21.589622497558594, "logits_per_token": -7.542956352233887, "logits_per_char": -1.0775651931762695, "num_chars": 14}, {"sum_logits": -11.272590637207031, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -19.13974380493164, "logits_per_token": -5.636295318603516, "logits_per_char": -1.1272590637207032, "num_chars": 10}, {"sum_logits": -18.404998779296875, "num_tokens": 2, "num_tokens_all": 136, "is_greedy": false, "sum_logits_uncond": -16.663314819335938, "logits_per_token": -9.202499389648438, "logits_per_char": -0.9686841462787829, "num_chars": 19}, {"sum_logits": -15.892011642456055, "num_tokens": 3, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -21.000240325927734, "logits_per_token": -5.2973372141520185, "logits_per_char": -1.0594674428304036, "num_chars": 15}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1211, "native_id": "1fe90a4aee405e1aa2279442d28803ae", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 4, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.1803483963012695, "incorrect_loss_raw": 7.642058849334717, "correct_loss_per_char": 0.5150290330251058, "incorrect_loss_per_char": 0.8890606760978699, "correct_loss_per_token": 3.0901741981506348, "incorrect_loss_per_token": 3.8210294246673584, "correct_loss_uncond": -15.595170021057129, "incorrect_loss_uncond": -9.200369358062744}, "model_output": [{"sum_logits": -6.677468776702881, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -15.432003021240234, "logits_per_token": -3.3387343883514404, "logits_per_char": -0.741940975189209, "num_chars": 9}, {"sum_logits": -8.208415031433105, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -18.935102462768555, "logits_per_token": -4.104207515716553, "logits_per_char": -0.6840345859527588, "num_chars": 12}, {"sum_logits": -5.620319843292236, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.932453155517578, "logits_per_token": -2.810159921646118, "logits_per_char": -1.1240639686584473, "num_chars": 5}, {"sum_logits": -10.062031745910645, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -16.070154190063477, "logits_per_token": -5.031015872955322, "logits_per_char": -1.0062031745910645, "num_chars": 10}, {"sum_logits": -6.1803483963012695, "num_tokens": 2, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -21.7755184173584, "logits_per_token": -3.0901741981506348, "logits_per_char": -0.5150290330251058, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1212, "native_id": "01794dde3ca2991615f1aa2f63fb22e3", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 1, "predicted_index_per_char": 0, "predicted_index_uncond": 4, "correct_choice": 4, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 1, "correct_loss_raw": 5.002102851867676, "incorrect_loss_raw": 10.74232530593872, "correct_loss_per_char": 0.6252628564834595, "incorrect_loss_per_char": 1.0297225396884115, "correct_loss_per_token": 5.002102851867676, "incorrect_loss_per_token": 4.992943604787191, "correct_loss_uncond": -9.782590866088867, "incorrect_loss_uncond": -7.890766143798828}, "model_output": [{"sum_logits": -10.500808715820312, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -17.672609329223633, "logits_per_token": -5.250404357910156, "logits_per_char": -0.5526741429379112, "num_chars": 19}, {"sum_logits": -9.07725715637207, "num_tokens": 3, "num_tokens_all": 155, "is_greedy": false, "sum_logits_uncond": -18.811241149902344, "logits_per_token": -3.025752385457357, "logits_per_char": -0.907725715637207, "num_chars": 10}, {"sum_logits": -10.616564750671387, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -19.15056610107422, "logits_per_token": -5.308282375335693, "logits_per_char": -1.0616564750671387, "num_chars": 10}, {"sum_logits": -12.774670600891113, "num_tokens": 2, "num_tokens_all": 154, "is_greedy": false, "sum_logits_uncond": -18.89794921875, "logits_per_token": -6.387335300445557, "logits_per_char": -1.5968338251113892, "num_chars": 8}, {"sum_logits": -5.002102851867676, "num_tokens": 1, "num_tokens_all": 153, "is_greedy": false, "sum_logits_uncond": -14.784693717956543, "logits_per_token": -5.002102851867676, "logits_per_char": -0.6252628564834595, "num_chars": 8}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1213, "native_id": "f794e376672c98ac25d8f70506a26e68", "metrics": {"predicted_index_raw": 3, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 12.268699645996094, "incorrect_loss_raw": 14.296910285949707, "correct_loss_per_char": 0.876335688999721, "incorrect_loss_per_char": 1.70201325946384, "correct_loss_per_token": 6.134349822998047, "incorrect_loss_per_token": 12.56103503704071, "correct_loss_uncond": -7.337322235107422, "incorrect_loss_uncond": -0.13492107391357422}, "model_output": [{"sum_logits": -12.268699645996094, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -19.606021881103516, "logits_per_token": -6.134349822998047, "logits_per_char": -0.876335688999721, "num_chars": 14}, {"sum_logits": -14.930845260620117, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.275694847106934, "logits_per_token": -14.930845260620117, "logits_per_char": -1.6589828067355685, "num_chars": 9}, {"sum_logits": -16.402189254760742, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -14.932636260986328, "logits_per_token": -16.402189254760742, "logits_per_char": -1.3668491045633953, "num_chars": 12}, {"sum_logits": -11.967604637145996, "num_tokens": 1, "num_tokens_all": 133, "is_greedy": false, "sum_logits_uncond": -12.448840141296387, "logits_per_token": -11.967604637145996, "logits_per_char": -2.3935209274291993, "num_chars": 5}, {"sum_logits": -13.887001991271973, "num_tokens": 2, "num_tokens_all": 134, "is_greedy": false, "sum_logits_uncond": -16.070154190063477, "logits_per_token": -6.943500995635986, "logits_per_char": -1.3887001991271972, "num_chars": 10}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1214, "native_id": "ace8fa2943ba8414aebdb74b48906fae", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 0, "predicted_index_per_char": 3, "predicted_index_uncond": 2, "correct_choice": 4, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 21.114961624145508, "incorrect_loss_raw": 11.48377251625061, "correct_loss_per_char": 1.759580135345459, "incorrect_loss_per_char": 1.1620914503040476, "correct_loss_per_token": 5.278740406036377, "incorrect_loss_per_token": 6.199294010798137, "correct_loss_uncond": -6.542593002319336, "incorrect_loss_uncond": -5.7519612312316895}, "model_output": [{"sum_logits": -14.94845199584961, "num_tokens": 3, "num_tokens_all": 147, "is_greedy": false, "sum_logits_uncond": -16.520139694213867, "logits_per_token": -4.98281733194987, "logits_per_char": -1.6609391106499567, "num_chars": 9}, {"sum_logits": -8.64207935333252, "num_tokens": 1, "num_tokens_all": 145, "is_greedy": false, "sum_logits_uncond": -12.47563648223877, "logits_per_token": -8.64207935333252, "logits_per_char": -1.4403465588887532, "num_chars": 6}, {"sum_logits": -10.437801361083984, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -20.007043838500977, "logits_per_token": -5.218900680541992, "logits_per_char": -0.8029077970064603, "num_chars": 13}, {"sum_logits": -11.906757354736328, "num_tokens": 2, "num_tokens_all": 146, "is_greedy": false, "sum_logits_uncond": -19.940114974975586, "logits_per_token": -5.953378677368164, "logits_per_char": -0.7441723346710205, "num_chars": 16}, {"sum_logits": -21.114961624145508, "num_tokens": 4, "num_tokens_all": 148, "is_greedy": false, "sum_logits_uncond": -27.657554626464844, "logits_per_token": -5.278740406036377, "logits_per_char": -1.759580135345459, "num_chars": 12}], "label": 4, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1215, "native_id": "21ce6f7c5c3d1ad8cf234988c1ad471f", "metrics": {"predicted_index_raw": 0, "predicted_index_per_token": 0, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 5.902255058288574, "incorrect_loss_raw": 6.93070375919342, "correct_loss_per_char": 0.49185458819071454, "incorrect_loss_per_char": 1.0928488606498354, "correct_loss_per_token": 2.951127529144287, "incorrect_loss_per_token": 6.228540897369385, "correct_loss_uncond": -17.06831455230713, "incorrect_loss_uncond": -7.163995385169983}, "model_output": [{"sum_logits": -5.617302894592285, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -17.909461975097656, "logits_per_token": -2.8086514472961426, "logits_per_char": -0.8024718420846122, "num_chars": 7}, {"sum_logits": -5.902255058288574, "num_tokens": 2, "num_tokens_all": 141, "is_greedy": false, "sum_logits_uncond": -22.970569610595703, "logits_per_token": -2.951127529144287, "logits_per_char": -0.49185458819071454, "num_chars": 12}, {"sum_logits": -6.154685974121094, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -12.58038330078125, "logits_per_token": -6.154685974121094, "logits_per_char": -1.0257809956868489, "num_chars": 6}, {"sum_logits": -8.626968383789062, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -14.204301834106445, "logits_per_token": -8.626968383789062, "logits_per_char": -1.0783710479736328, "num_chars": 8}, {"sum_logits": -7.32385778427124, "num_tokens": 1, "num_tokens_all": 140, "is_greedy": false, "sum_logits_uncond": -11.684649467468262, "logits_per_token": -7.32385778427124, "logits_per_char": -1.464771556854248, "num_chars": 5}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1216, "native_id": "6c84e79d0595efd99596faa07c4961d0", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 0, "acc_per_char": 0, "acc_uncond": 0, "correct_loss_raw": 5.94776725769043, "incorrect_loss_raw": 7.9421539306640625, "correct_loss_per_char": 1.189553451538086, "incorrect_loss_per_char": 0.9206899424393973, "correct_loss_per_token": 5.94776725769043, "incorrect_loss_per_token": 5.274778723716736, "correct_loss_uncond": -7.903576850891113, "incorrect_loss_uncond": -7.428506135940552}, "model_output": [{"sum_logits": -5.94776725769043, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -13.851344108581543, "logits_per_token": -5.94776725769043, "logits_per_char": -1.189553451538086, "num_chars": 5}, {"sum_logits": -9.413216590881348, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -15.806502342224121, "logits_per_token": -4.706608295440674, "logits_per_char": -0.9413216590881348, "num_chars": 10}, {"sum_logits": -2.7035789489746094, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": true, "sum_logits_uncond": -12.71070384979248, "logits_per_token": -2.7035789489746094, "logits_per_char": -0.45059649149576825, "num_chars": 6}, {"sum_logits": -7.726035118103027, "num_tokens": 1, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -14.500212669372559, "logits_per_token": -7.726035118103027, "logits_per_char": -0.9657543897628784, "num_chars": 8}, {"sum_logits": -11.925785064697266, "num_tokens": 2, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -18.465221405029297, "logits_per_token": -5.962892532348633, "logits_per_char": -1.3250872294108074, "num_chars": 9}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1217, "native_id": "88f1fe6cfbcb1a25f25454341c789463", "metrics": {"predicted_index_raw": 1, "predicted_index_per_token": 1, "predicted_index_per_char": 1, "predicted_index_uncond": 1, "correct_choice": 1, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 3.6311023235321045, "incorrect_loss_raw": 10.308611392974854, "correct_loss_per_char": 0.24207348823547364, "incorrect_loss_per_char": 1.0960740759259178, "correct_loss_per_token": 1.8155511617660522, "incorrect_loss_per_token": 7.074796199798584, "correct_loss_uncond": -16.523104906082153, "incorrect_loss_uncond": -6.199044227600098}, "model_output": [{"sum_logits": -11.704944610595703, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -19.478294372558594, "logits_per_token": -5.852472305297852, "logits_per_char": -0.9754120508829752, "num_chars": 12}, {"sum_logits": -3.6311023235321045, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -20.154207229614258, "logits_per_token": -1.8155511617660522, "logits_per_char": -0.24207348823547364, "num_chars": 15}, {"sum_logits": -14.165576934814453, "num_tokens": 2, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -17.831636428833008, "logits_per_token": -7.082788467407227, "logits_per_char": -1.7706971168518066, "num_chars": 8}, {"sum_logits": -4.648235321044922, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -15.271281242370605, "logits_per_token": -4.648235321044922, "logits_per_char": -0.6640336172921317, "num_chars": 7}, {"sum_logits": -10.715688705444336, "num_tokens": 1, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -13.449410438537598, "logits_per_token": -10.715688705444336, "logits_per_char": -0.9741535186767578, "num_chars": 11}], "label": 1, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1218, "native_id": "5074bcaf0f700c9f3c8c563067af156a", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 4, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 0, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.347598075866699, "incorrect_loss_raw": 9.98235535621643, "correct_loss_per_char": 0.7052886750962999, "incorrect_loss_per_char": 1.1762210845947265, "correct_loss_per_token": 6.347598075866699, "incorrect_loss_per_token": 8.16938328742981, "correct_loss_uncond": -9.926844596862793, "incorrect_loss_uncond": -4.493027925491333}, "model_output": [{"sum_logits": -8.932394027709961, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -15.301833152770996, "logits_per_token": -8.932394027709961, "logits_per_char": -1.7864788055419922, "num_chars": 5}, {"sum_logits": -8.80131721496582, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.762389183044434, "logits_per_token": -8.80131721496582, "logits_per_char": -0.880131721496582, "num_chars": 10}, {"sum_logits": -6.347598075866699, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.274442672729492, "logits_per_token": -6.347598075866699, "logits_per_char": -0.7052886750962999, "num_chars": 9}, {"sum_logits": -11.317877769470215, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -12.624444961547852, "logits_per_token": -11.317877769470215, "logits_per_char": -1.1317877769470215, "num_chars": 10}, {"sum_logits": -10.877832412719727, "num_tokens": 3, "num_tokens_all": 144, "is_greedy": false, "sum_logits_uncond": -15.212865829467773, "logits_per_token": -3.625944137573242, "logits_per_char": -0.9064860343933105, "num_chars": 12}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1219, "native_id": "6a253e076cd2af00e17d9950d70daf47", "metrics": {"predicted_index_raw": 4, "predicted_index_per_token": 0, "predicted_index_per_char": 0, "predicted_index_uncond": 0, "correct_choice": 0, "acc_raw": 0, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 8.507508277893066, "incorrect_loss_raw": 9.67942750453949, "correct_loss_per_char": 0.5004416634054745, "incorrect_loss_per_char": 1.1712446996143886, "correct_loss_per_token": 4.253754138946533, "incorrect_loss_per_token": 9.67942750453949, "correct_loss_uncond": -9.981457710266113, "incorrect_loss_uncond": -4.827184319496155}, "model_output": [{"sum_logits": -8.507508277893066, "num_tokens": 2, "num_tokens_all": 143, "is_greedy": false, "sum_logits_uncond": -18.48896598815918, "logits_per_token": -4.253754138946533, "logits_per_char": -0.5004416634054745, "num_chars": 17}, {"sum_logits": -11.269792556762695, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -16.356603622436523, "logits_per_token": -11.269792556762695, "logits_per_char": -1.1269792556762694, "num_chars": 10}, {"sum_logits": -11.935630798339844, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.794161796569824, "logits_per_token": -11.935630798339844, "logits_per_char": -1.4919538497924805, "num_chars": 8}, {"sum_logits": -8.399734497070312, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -14.396621704101562, "logits_per_token": -8.399734497070312, "logits_per_char": -1.049966812133789, "num_chars": 8}, {"sum_logits": -7.112552165985107, "num_tokens": 1, "num_tokens_all": 142, "is_greedy": false, "sum_logits_uncond": -13.479060173034668, "logits_per_token": -7.112552165985107, "logits_per_char": -1.0160788808550154, "num_chars": 7}], "label": 0, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"} +{"doc_id": 1220, "native_id": "5af7c7860e3be61d4cfd814cc109f9d9", "metrics": {"predicted_index_raw": 2, "predicted_index_per_token": 2, "predicted_index_per_char": 2, "predicted_index_uncond": 2, "correct_choice": 2, "acc_raw": 1, "acc_per_token": 1, "acc_per_char": 1, "acc_uncond": 1, "correct_loss_raw": 6.387145042419434, "incorrect_loss_raw": 14.299842357635498, "correct_loss_per_char": 0.31935725212097166, "incorrect_loss_per_char": 1.1290642752163653, "correct_loss_per_token": 2.1290483474731445, "incorrect_loss_per_token": 6.1963050365448, "correct_loss_uncond": -12.357953071594238, "incorrect_loss_uncond": -7.358813285827637}, "model_output": [{"sum_logits": -10.238374710083008, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.736038208007812, "logits_per_token": -5.119187355041504, "logits_per_char": -0.6825583140055339, "num_chars": 15}, {"sum_logits": -15.257858276367188, "num_tokens": 4, "num_tokens_all": 139, "is_greedy": false, "sum_logits_uncond": -24.884029388427734, "logits_per_token": -3.814464569091797, "logits_per_char": -0.6633851424507473, "num_chars": 23}, {"sum_logits": -6.387145042419434, "num_tokens": 3, "num_tokens_all": 138, "is_greedy": false, "sum_logits_uncond": -18.745098114013672, "logits_per_token": -2.1290483474731445, "logits_per_char": -0.31935725212097166, "num_chars": 20}, {"sum_logits": -13.976730346679688, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -22.633941650390625, "logits_per_token": -6.988365173339844, "logits_per_char": -1.3976730346679687, "num_chars": 10}, {"sum_logits": -17.72640609741211, "num_tokens": 2, "num_tokens_all": 137, "is_greedy": false, "sum_logits_uncond": -19.380613327026367, "logits_per_token": -8.863203048706055, "logits_per_char": -1.7726406097412108, "num_chars": 10}], "label": 2, "task_hash": "648cdcc5233e8fead60944b3946367f7", "model_hash": "a15b9c896b1d6d099cb7fb72a8261ed0"}