{ "attention_layers": [ 18, 19, 20, 21, 22, 23 ], "auto_mapping": null, "base_model_name_or_path": "microsoft/phi-1_5", "cache_dtype": "float16", "cache_size": 262144, "cache_type": "FIFO", "compression_factor": 8, "context_size": 2, "global_cache": true, "inference_mode": true, "neighborhood_size": 2, "neurocache_type": "ONDEVICE", "retrieval_map": { "18": 18 }, "similarity_fn": "l2", "task_type": "CAUSAL_LM", "topk": 8 }