chanind commited on
Commit
d4abb36
·
verified ·
1 Parent(s): 0efaf41

Upload sparse_probing/gemma-2-2b-res-snap-matryoshka-dc_blocks.8.hook_resid_post_eval_results.json with huggingface_hub

Browse files
sparse_probing/gemma-2-2b-res-snap-matryoshka-dc_blocks.8.hook_resid_post_eval_results.json ADDED
@@ -0,0 +1,675 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eval_type_id": "sparse_probing",
3
+ "eval_config": {
4
+ "random_seed": 42,
5
+ "dataset_names": [
6
+ "LabHC/bias_in_bios_class_set1",
7
+ "LabHC/bias_in_bios_class_set2",
8
+ "LabHC/bias_in_bios_class_set3",
9
+ "canrager/amazon_reviews_mcauley_1and5",
10
+ "canrager/amazon_reviews_mcauley_1and5_sentiment",
11
+ "codeparrot/github-code",
12
+ "fancyzhx/ag_news",
13
+ "Helsinki-NLP/europarl"
14
+ ],
15
+ "probe_train_set_size": 4000,
16
+ "probe_test_set_size": 1000,
17
+ "context_length": 128,
18
+ "sae_batch_size": 125,
19
+ "llm_batch_size": 32,
20
+ "llm_dtype": "bfloat16",
21
+ "model_name": "gemma-2-2b",
22
+ "k_values": [
23
+ 1,
24
+ 2,
25
+ 5
26
+ ],
27
+ "lower_vram_usage": false
28
+ },
29
+ "eval_id": "acf88c24-09bd-4561-9b58-1bd96edb30c6",
30
+ "datetime_epoch_millis": 1745624730281,
31
+ "eval_result_metrics": {
32
+ "llm": {
33
+ "llm_test_accuracy": 0.9575375493615865,
34
+ "llm_top_1_test_accuracy": 0.6629437499999999,
35
+ "llm_top_2_test_accuracy": 0.73073125,
36
+ "llm_top_5_test_accuracy": 0.78996875,
37
+ "llm_top_10_test_accuracy": null,
38
+ "llm_top_20_test_accuracy": null,
39
+ "llm_top_50_test_accuracy": null,
40
+ "llm_top_100_test_accuracy": null
41
+ },
42
+ "sae": {
43
+ "sae_test_accuracy": 0.9548937939107418,
44
+ "sae_top_1_test_accuracy": 0.7905375000000001,
45
+ "sae_top_2_test_accuracy": 0.83363125,
46
+ "sae_top_5_test_accuracy": 0.8840875,
47
+ "sae_top_10_test_accuracy": null,
48
+ "sae_top_20_test_accuracy": null,
49
+ "sae_top_50_test_accuracy": null,
50
+ "sae_top_100_test_accuracy": null
51
+ }
52
+ },
53
+ "eval_result_details": [
54
+ {
55
+ "dataset_name": "LabHC/bias_in_bios_class_set1_results",
56
+ "llm_test_accuracy": 0.9702000498771668,
57
+ "llm_top_1_test_accuracy": 0.7175999999999999,
58
+ "llm_top_2_test_accuracy": 0.72,
59
+ "llm_top_5_test_accuracy": 0.7806,
60
+ "llm_top_10_test_accuracy": null,
61
+ "llm_top_20_test_accuracy": null,
62
+ "llm_top_50_test_accuracy": null,
63
+ "llm_top_100_test_accuracy": null,
64
+ "sae_test_accuracy": 0.9682000398635864,
65
+ "sae_top_1_test_accuracy": 0.7634000000000001,
66
+ "sae_top_2_test_accuracy": 0.8572,
67
+ "sae_top_5_test_accuracy": 0.9057999999999999,
68
+ "sae_top_10_test_accuracy": null,
69
+ "sae_top_20_test_accuracy": null,
70
+ "sae_top_50_test_accuracy": null,
71
+ "sae_top_100_test_accuracy": null
72
+ },
73
+ {
74
+ "dataset_name": "LabHC/bias_in_bios_class_set2_results",
75
+ "llm_test_accuracy": 0.9552000522613525,
76
+ "llm_top_1_test_accuracy": 0.6586000000000001,
77
+ "llm_top_2_test_accuracy": 0.7243999999999999,
78
+ "llm_top_5_test_accuracy": 0.7744000000000001,
79
+ "llm_top_10_test_accuracy": null,
80
+ "llm_top_20_test_accuracy": null,
81
+ "llm_top_50_test_accuracy": null,
82
+ "llm_top_100_test_accuracy": null,
83
+ "sae_test_accuracy": 0.9510000348091125,
84
+ "sae_top_1_test_accuracy": 0.7666000000000001,
85
+ "sae_top_2_test_accuracy": 0.818,
86
+ "sae_top_5_test_accuracy": 0.8615999999999999,
87
+ "sae_top_10_test_accuracy": null,
88
+ "sae_top_20_test_accuracy": null,
89
+ "sae_top_50_test_accuracy": null,
90
+ "sae_top_100_test_accuracy": null
91
+ },
92
+ {
93
+ "dataset_name": "LabHC/bias_in_bios_class_set3_results",
94
+ "llm_test_accuracy": 0.9330000519752503,
95
+ "llm_top_1_test_accuracy": 0.6738000000000001,
96
+ "llm_top_2_test_accuracy": 0.7247999999999999,
97
+ "llm_top_5_test_accuracy": 0.7607999999999999,
98
+ "llm_top_10_test_accuracy": null,
99
+ "llm_top_20_test_accuracy": null,
100
+ "llm_top_50_test_accuracy": null,
101
+ "llm_top_100_test_accuracy": null,
102
+ "sae_test_accuracy": 0.9276000499725342,
103
+ "sae_top_1_test_accuracy": 0.8112,
104
+ "sae_top_2_test_accuracy": 0.8320000000000001,
105
+ "sae_top_5_test_accuracy": 0.8582000000000001,
106
+ "sae_top_10_test_accuracy": null,
107
+ "sae_top_20_test_accuracy": null,
108
+ "sae_top_50_test_accuracy": null,
109
+ "sae_top_100_test_accuracy": null
110
+ },
111
+ {
112
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_results",
113
+ "llm_test_accuracy": 0.9218000411987305,
114
+ "llm_top_1_test_accuracy": 0.6165999999999998,
115
+ "llm_top_2_test_accuracy": 0.6402,
116
+ "llm_top_5_test_accuracy": 0.7112,
117
+ "llm_top_10_test_accuracy": null,
118
+ "llm_top_20_test_accuracy": null,
119
+ "llm_top_50_test_accuracy": null,
120
+ "llm_top_100_test_accuracy": null,
121
+ "sae_test_accuracy": 0.9240000486373902,
122
+ "sae_top_1_test_accuracy": 0.7786000000000001,
123
+ "sae_top_2_test_accuracy": 0.8202,
124
+ "sae_top_5_test_accuracy": 0.8430000000000002,
125
+ "sae_top_10_test_accuracy": null,
126
+ "sae_top_20_test_accuracy": null,
127
+ "sae_top_50_test_accuracy": null,
128
+ "sae_top_100_test_accuracy": null
129
+ },
130
+ {
131
+ "dataset_name": "canrager/amazon_reviews_mcauley_1and5_sentiment_results",
132
+ "llm_test_accuracy": 0.9630000591278076,
133
+ "llm_top_1_test_accuracy": 0.643,
134
+ "llm_top_2_test_accuracy": 0.698,
135
+ "llm_top_5_test_accuracy": 0.741,
136
+ "llm_top_10_test_accuracy": null,
137
+ "llm_top_20_test_accuracy": null,
138
+ "llm_top_50_test_accuracy": null,
139
+ "llm_top_100_test_accuracy": null,
140
+ "sae_test_accuracy": 0.9495000541210175,
141
+ "sae_top_1_test_accuracy": 0.728,
142
+ "sae_top_2_test_accuracy": 0.814,
143
+ "sae_top_5_test_accuracy": 0.834,
144
+ "sae_top_10_test_accuracy": null,
145
+ "sae_top_20_test_accuracy": null,
146
+ "sae_top_50_test_accuracy": null,
147
+ "sae_top_100_test_accuracy": null
148
+ },
149
+ {
150
+ "dataset_name": "codeparrot/github-code_results",
151
+ "llm_test_accuracy": 0.970400047302246,
152
+ "llm_top_1_test_accuracy": 0.6422000000000001,
153
+ "llm_top_2_test_accuracy": 0.7318,
154
+ "llm_top_5_test_accuracy": 0.7853999999999999,
155
+ "llm_top_10_test_accuracy": null,
156
+ "llm_top_20_test_accuracy": null,
157
+ "llm_top_50_test_accuracy": null,
158
+ "llm_top_100_test_accuracy": null,
159
+ "sae_test_accuracy": 0.9710000395774842,
160
+ "sae_top_1_test_accuracy": 0.6904,
161
+ "sae_top_2_test_accuracy": 0.7070000000000001,
162
+ "sae_top_5_test_accuracy": 0.8788,
163
+ "sae_top_10_test_accuracy": null,
164
+ "sae_top_20_test_accuracy": null,
165
+ "sae_top_50_test_accuracy": null,
166
+ "sae_top_100_test_accuracy": null
167
+ },
168
+ {
169
+ "dataset_name": "fancyzhx/ag_news_results",
170
+ "llm_test_accuracy": 0.9475000351667404,
171
+ "llm_top_1_test_accuracy": 0.62575,
172
+ "llm_top_2_test_accuracy": 0.75925,
173
+ "llm_top_5_test_accuracy": 0.83675,
174
+ "llm_top_10_test_accuracy": null,
175
+ "llm_top_20_test_accuracy": null,
176
+ "llm_top_50_test_accuracy": null,
177
+ "llm_top_100_test_accuracy": null,
178
+ "sae_test_accuracy": 0.9492500424385071,
179
+ "sae_top_1_test_accuracy": 0.8055,
180
+ "sae_top_2_test_accuracy": 0.8382499999999999,
181
+ "sae_top_5_test_accuracy": 0.8975,
182
+ "sae_top_10_test_accuracy": null,
183
+ "sae_top_20_test_accuracy": null,
184
+ "sae_top_50_test_accuracy": null,
185
+ "sae_top_100_test_accuracy": null
186
+ },
187
+ {
188
+ "dataset_name": "Helsinki-NLP/europarl_results",
189
+ "llm_test_accuracy": 0.9992000579833984,
190
+ "llm_top_1_test_accuracy": 0.726,
191
+ "llm_top_2_test_accuracy": 0.8474,
192
+ "llm_top_5_test_accuracy": 0.9296,
193
+ "llm_top_10_test_accuracy": null,
194
+ "llm_top_20_test_accuracy": null,
195
+ "llm_top_50_test_accuracy": null,
196
+ "llm_top_100_test_accuracy": null,
197
+ "sae_test_accuracy": 0.9986000418663025,
198
+ "sae_top_1_test_accuracy": 0.9805999999999999,
199
+ "sae_top_2_test_accuracy": 0.9823999999999999,
200
+ "sae_top_5_test_accuracy": 0.9938,
201
+ "sae_top_10_test_accuracy": null,
202
+ "sae_top_20_test_accuracy": null,
203
+ "sae_top_50_test_accuracy": null,
204
+ "sae_top_100_test_accuracy": null
205
+ }
206
+ ],
207
+ "sae_bench_commit_hash": "Unknown",
208
+ "sae_lens_id": "blocks.8.hook_resid_post",
209
+ "sae_lens_release_id": "gemma-2-2b-res-snap-matryoshka-dc",
210
+ "sae_lens_version": "5.9.1",
211
+ "sae_cfg_dict": {
212
+ "architecture": "jumprelu",
213
+ "d_in": 2304,
214
+ "d_sae": 32768,
215
+ "activation_fn_str": "relu",
216
+ "apply_b_dec_to_input": true,
217
+ "finetuning_scaling_factor": false,
218
+ "context_size": 1024,
219
+ "model_name": "gemma-2-2b",
220
+ "hook_name": "blocks.8.hook_resid_post",
221
+ "hook_layer": 8,
222
+ "hook_head_index": null,
223
+ "prepend_bos": true,
224
+ "dataset_path": "chanind/pile-uncopyrighted-gemma-1024-abbrv-1B",
225
+ "dataset_trust_remote_code": true,
226
+ "normalize_activations": "none",
227
+ "dtype": "torch.bfloat16",
228
+ "device": "cuda",
229
+ "sae_lens_training_version": "5.5.1",
230
+ "activation_fn_kwargs": {
231
+ "k": 40
232
+ },
233
+ "neuronpedia_id": null,
234
+ "model_from_pretrained_kwargs": {
235
+ "center_writing_weights": false
236
+ },
237
+ "seqpos_slice": [
238
+ null
239
+ ]
240
+ },
241
+ "eval_result_unstructured": {
242
+ "LabHC/bias_in_bios_class_set1_results": {
243
+ "sae_test_accuracy": {
244
+ "0": 0.9550000429153442,
245
+ "1": 0.9650000333786011,
246
+ "2": 0.9570000171661377,
247
+ "6": 0.9930000305175781,
248
+ "9": 0.971000075340271
249
+ },
250
+ "llm_test_accuracy": {
251
+ "0": 0.9540000557899475,
252
+ "1": 0.9720000624656677,
253
+ "2": 0.9550000429153442,
254
+ "6": 0.9920000433921814,
255
+ "9": 0.9780000448226929
256
+ },
257
+ "llm_top_1_test_accuracy": {
258
+ "0": 0.584,
259
+ "1": 0.663,
260
+ "2": 0.666,
261
+ "6": 0.823,
262
+ "9": 0.852
263
+ },
264
+ "llm_top_2_test_accuracy": {
265
+ "0": 0.585,
266
+ "1": 0.641,
267
+ "2": 0.686,
268
+ "6": 0.835,
269
+ "9": 0.853
270
+ },
271
+ "llm_top_5_test_accuracy": {
272
+ "0": 0.698,
273
+ "1": 0.713,
274
+ "2": 0.742,
275
+ "6": 0.889,
276
+ "9": 0.861
277
+ },
278
+ "sae_top_1_test_accuracy": {
279
+ "0": 0.569,
280
+ "1": 0.67,
281
+ "2": 0.798,
282
+ "6": 0.833,
283
+ "9": 0.947
284
+ },
285
+ "sae_top_2_test_accuracy": {
286
+ "0": 0.688,
287
+ "1": 0.814,
288
+ "2": 0.867,
289
+ "6": 0.977,
290
+ "9": 0.94
291
+ },
292
+ "sae_top_5_test_accuracy": {
293
+ "0": 0.873,
294
+ "1": 0.836,
295
+ "2": 0.882,
296
+ "6": 0.989,
297
+ "9": 0.949
298
+ }
299
+ },
300
+ "LabHC/bias_in_bios_class_set2_results": {
301
+ "sae_test_accuracy": {
302
+ "11": 0.9600000381469727,
303
+ "13": 0.956000030040741,
304
+ "14": 0.956000030040741,
305
+ "18": 0.9280000329017639,
306
+ "19": 0.9550000429153442
307
+ },
308
+ "llm_test_accuracy": {
309
+ "11": 0.9650000333786011,
310
+ "13": 0.9500000476837158,
311
+ "14": 0.9590000510215759,
312
+ "18": 0.940000057220459,
313
+ "19": 0.9620000720024109
314
+ },
315
+ "llm_top_1_test_accuracy": {
316
+ "11": 0.575,
317
+ "13": 0.685,
318
+ "14": 0.641,
319
+ "18": 0.662,
320
+ "19": 0.73
321
+ },
322
+ "llm_top_2_test_accuracy": {
323
+ "11": 0.748,
324
+ "13": 0.688,
325
+ "14": 0.664,
326
+ "18": 0.719,
327
+ "19": 0.803
328
+ },
329
+ "llm_top_5_test_accuracy": {
330
+ "11": 0.786,
331
+ "13": 0.752,
332
+ "14": 0.758,
333
+ "18": 0.726,
334
+ "19": 0.85
335
+ },
336
+ "sae_top_1_test_accuracy": {
337
+ "11": 0.735,
338
+ "13": 0.677,
339
+ "14": 0.892,
340
+ "18": 0.688,
341
+ "19": 0.841
342
+ },
343
+ "sae_top_2_test_accuracy": {
344
+ "11": 0.867,
345
+ "13": 0.775,
346
+ "14": 0.885,
347
+ "18": 0.725,
348
+ "19": 0.838
349
+ },
350
+ "sae_top_5_test_accuracy": {
351
+ "11": 0.947,
352
+ "13": 0.864,
353
+ "14": 0.888,
354
+ "18": 0.736,
355
+ "19": 0.873
356
+ }
357
+ },
358
+ "LabHC/bias_in_bios_class_set3_results": {
359
+ "sae_test_accuracy": {
360
+ "20": 0.9600000381469727,
361
+ "21": 0.9070000648498535,
362
+ "22": 0.9180000424385071,
363
+ "25": 0.9620000720024109,
364
+ "26": 0.8910000324249268
365
+ },
366
+ "llm_test_accuracy": {
367
+ "20": 0.9540000557899475,
368
+ "21": 0.9260000586509705,
369
+ "22": 0.9270000457763672,
370
+ "25": 0.9580000638961792,
371
+ "26": 0.9000000357627869
372
+ },
373
+ "llm_top_1_test_accuracy": {
374
+ "20": 0.738,
375
+ "21": 0.717,
376
+ "22": 0.62,
377
+ "25": 0.693,
378
+ "26": 0.601
379
+ },
380
+ "llm_top_2_test_accuracy": {
381
+ "20": 0.811,
382
+ "21": 0.749,
383
+ "22": 0.682,
384
+ "25": 0.715,
385
+ "26": 0.667
386
+ },
387
+ "llm_top_5_test_accuracy": {
388
+ "20": 0.823,
389
+ "21": 0.772,
390
+ "22": 0.719,
391
+ "25": 0.82,
392
+ "26": 0.67
393
+ },
394
+ "sae_top_1_test_accuracy": {
395
+ "20": 0.898,
396
+ "21": 0.827,
397
+ "22": 0.875,
398
+ "25": 0.866,
399
+ "26": 0.59
400
+ },
401
+ "sae_top_2_test_accuracy": {
402
+ "20": 0.909,
403
+ "21": 0.815,
404
+ "22": 0.873,
405
+ "25": 0.861,
406
+ "26": 0.702
407
+ },
408
+ "sae_top_5_test_accuracy": {
409
+ "20": 0.946,
410
+ "21": 0.832,
411
+ "22": 0.899,
412
+ "25": 0.878,
413
+ "26": 0.736
414
+ }
415
+ },
416
+ "canrager/amazon_reviews_mcauley_1and5_results": {
417
+ "sae_test_accuracy": {
418
+ "1": 0.9570000171661377,
419
+ "2": 0.9390000700950623,
420
+ "3": 0.9120000600814819,
421
+ "5": 0.9320000410079956,
422
+ "6": 0.8800000548362732
423
+ },
424
+ "llm_test_accuracy": {
425
+ "1": 0.9500000476837158,
426
+ "2": 0.9380000233650208,
427
+ "3": 0.9320000410079956,
428
+ "5": 0.9190000295639038,
429
+ "6": 0.8700000643730164
430
+ },
431
+ "llm_top_1_test_accuracy": {
432
+ "1": 0.718,
433
+ "2": 0.586,
434
+ "3": 0.61,
435
+ "5": 0.578,
436
+ "6": 0.591
437
+ },
438
+ "llm_top_2_test_accuracy": {
439
+ "1": 0.742,
440
+ "2": 0.606,
441
+ "3": 0.605,
442
+ "5": 0.604,
443
+ "6": 0.644
444
+ },
445
+ "llm_top_5_test_accuracy": {
446
+ "1": 0.755,
447
+ "2": 0.773,
448
+ "3": 0.63,
449
+ "5": 0.71,
450
+ "6": 0.688
451
+ },
452
+ "sae_top_1_test_accuracy": {
453
+ "1": 0.754,
454
+ "2": 0.885,
455
+ "3": 0.694,
456
+ "5": 0.823,
457
+ "6": 0.737
458
+ },
459
+ "sae_top_2_test_accuracy": {
460
+ "1": 0.852,
461
+ "2": 0.887,
462
+ "3": 0.74,
463
+ "5": 0.889,
464
+ "6": 0.733
465
+ },
466
+ "sae_top_5_test_accuracy": {
467
+ "1": 0.901,
468
+ "2": 0.893,
469
+ "3": 0.768,
470
+ "5": 0.897,
471
+ "6": 0.756
472
+ }
473
+ },
474
+ "canrager/amazon_reviews_mcauley_1and5_sentiment_results": {
475
+ "sae_test_accuracy": {
476
+ "1.0": 0.9500000476837158,
477
+ "5.0": 0.9490000605583191
478
+ },
479
+ "llm_test_accuracy": {
480
+ "1.0": 0.9620000720024109,
481
+ "5.0": 0.9640000462532043
482
+ },
483
+ "llm_top_1_test_accuracy": {
484
+ "1.0": 0.643,
485
+ "5.0": 0.643
486
+ },
487
+ "llm_top_2_test_accuracy": {
488
+ "1.0": 0.698,
489
+ "5.0": 0.698
490
+ },
491
+ "llm_top_5_test_accuracy": {
492
+ "1.0": 0.741,
493
+ "5.0": 0.741
494
+ },
495
+ "sae_top_1_test_accuracy": {
496
+ "1.0": 0.728,
497
+ "5.0": 0.728
498
+ },
499
+ "sae_top_2_test_accuracy": {
500
+ "1.0": 0.814,
501
+ "5.0": 0.814
502
+ },
503
+ "sae_top_5_test_accuracy": {
504
+ "1.0": 0.834,
505
+ "5.0": 0.834
506
+ }
507
+ },
508
+ "codeparrot/github-code_results": {
509
+ "sae_test_accuracy": {
510
+ "C": 0.9590000510215759,
511
+ "Python": 0.984000027179718,
512
+ "HTML": 0.984000027179718,
513
+ "Java": 0.9670000672340393,
514
+ "PHP": 0.9610000252723694
515
+ },
516
+ "llm_test_accuracy": {
517
+ "C": 0.9650000333786011,
518
+ "Python": 0.9820000529289246,
519
+ "HTML": 0.987000048160553,
520
+ "Java": 0.9630000591278076,
521
+ "PHP": 0.9550000429153442
522
+ },
523
+ "llm_top_1_test_accuracy": {
524
+ "C": 0.558,
525
+ "Python": 0.625,
526
+ "HTML": 0.803,
527
+ "Java": 0.619,
528
+ "PHP": 0.606
529
+ },
530
+ "llm_top_2_test_accuracy": {
531
+ "C": 0.675,
532
+ "Python": 0.699,
533
+ "HTML": 0.92,
534
+ "Java": 0.718,
535
+ "PHP": 0.647
536
+ },
537
+ "llm_top_5_test_accuracy": {
538
+ "C": 0.777,
539
+ "Python": 0.751,
540
+ "HTML": 0.937,
541
+ "Java": 0.784,
542
+ "PHP": 0.678
543
+ },
544
+ "sae_top_1_test_accuracy": {
545
+ "C": 0.627,
546
+ "Python": 0.63,
547
+ "HTML": 0.948,
548
+ "Java": 0.646,
549
+ "PHP": 0.601
550
+ },
551
+ "sae_top_2_test_accuracy": {
552
+ "C": 0.67,
553
+ "Python": 0.668,
554
+ "HTML": 0.947,
555
+ "Java": 0.661,
556
+ "PHP": 0.589
557
+ },
558
+ "sae_top_5_test_accuracy": {
559
+ "C": 0.876,
560
+ "Python": 0.964,
561
+ "HTML": 0.95,
562
+ "Java": 0.69,
563
+ "PHP": 0.914
564
+ }
565
+ },
566
+ "fancyzhx/ag_news_results": {
567
+ "sae_test_accuracy": {
568
+ "0": 0.937000036239624,
569
+ "1": 0.987000048160553,
570
+ "2": 0.9230000376701355,
571
+ "3": 0.9500000476837158
572
+ },
573
+ "llm_test_accuracy": {
574
+ "0": 0.9420000314712524,
575
+ "1": 0.9910000562667847,
576
+ "2": 0.9150000214576721,
577
+ "3": 0.9420000314712524
578
+ },
579
+ "llm_top_1_test_accuracy": {
580
+ "0": 0.637,
581
+ "1": 0.693,
582
+ "2": 0.567,
583
+ "3": 0.606
584
+ },
585
+ "llm_top_2_test_accuracy": {
586
+ "0": 0.805,
587
+ "1": 0.805,
588
+ "2": 0.684,
589
+ "3": 0.743
590
+ },
591
+ "llm_top_5_test_accuracy": {
592
+ "0": 0.825,
593
+ "1": 0.883,
594
+ "2": 0.777,
595
+ "3": 0.862
596
+ },
597
+ "sae_top_1_test_accuracy": {
598
+ "0": 0.733,
599
+ "1": 0.942,
600
+ "2": 0.74,
601
+ "3": 0.807
602
+ },
603
+ "sae_top_2_test_accuracy": {
604
+ "0": 0.825,
605
+ "1": 0.945,
606
+ "2": 0.74,
607
+ "3": 0.843
608
+ },
609
+ "sae_top_5_test_accuracy": {
610
+ "0": 0.881,
611
+ "1": 0.978,
612
+ "2": 0.838,
613
+ "3": 0.893
614
+ }
615
+ },
616
+ "Helsinki-NLP/europarl_results": {
617
+ "sae_test_accuracy": {
618
+ "en": 1.0,
619
+ "fr": 0.999000072479248,
620
+ "de": 0.999000072479248,
621
+ "es": 0.9970000386238098,
622
+ "nl": 0.9980000257492065
623
+ },
624
+ "llm_test_accuracy": {
625
+ "en": 0.999000072479248,
626
+ "fr": 1.0,
627
+ "de": 0.999000072479248,
628
+ "es": 0.999000072479248,
629
+ "nl": 0.999000072479248
630
+ },
631
+ "llm_top_1_test_accuracy": {
632
+ "en": 0.726,
633
+ "fr": 0.581,
634
+ "de": 0.794,
635
+ "es": 0.901,
636
+ "nl": 0.628
637
+ },
638
+ "llm_top_2_test_accuracy": {
639
+ "en": 0.849,
640
+ "fr": 0.888,
641
+ "de": 0.818,
642
+ "es": 0.959,
643
+ "nl": 0.723
644
+ },
645
+ "llm_top_5_test_accuracy": {
646
+ "en": 0.932,
647
+ "fr": 0.959,
648
+ "de": 0.916,
649
+ "es": 0.977,
650
+ "nl": 0.864
651
+ },
652
+ "sae_top_1_test_accuracy": {
653
+ "en": 1.0,
654
+ "fr": 0.987,
655
+ "de": 0.925,
656
+ "es": 0.992,
657
+ "nl": 0.999
658
+ },
659
+ "sae_top_2_test_accuracy": {
660
+ "en": 1.0,
661
+ "fr": 0.986,
662
+ "de": 0.935,
663
+ "es": 0.993,
664
+ "nl": 0.998
665
+ },
666
+ "sae_top_5_test_accuracy": {
667
+ "en": 0.998,
668
+ "fr": 0.996,
669
+ "de": 0.977,
670
+ "es": 0.998,
671
+ "nl": 1.0
672
+ }
673
+ }
674
+ }
675
+ }