{ "custom_generation_config": null, "model_params": { "model_name_or_path": "NousResearch/Meta-Llama-3-8B", "generation_config": { "bos_token_id": 128000, "do_sample": true, "eos_token_id": [ 128001, 198, 271 ], "max_length": 8192, "max_new_tokens": 64, "pad_token_id": 128001, "stop_strings": [ "\n", "\n\n" ], "temperature": 0.1, "top_k": 40, "top_p": 0.9, "transformers_version": "4.38.2", "trust_remote_code": [ false ] }, "conversation_template": { "system_message_template": "{content}\n", "user_message_template": "{content}\n", "bot_message_template": "{content}\n\n", "bot_message_template_incomplete": "{content}", "user_role": "user", "bot_role": "bot", "system_role": "system", "suffix": "", "add_special_tokens": false, "eos_token": [ "\n", "\n\n" ], "global_prefix": "<|begin_of_text|>" }, "load_in_8bit": false, "torch_dtype": "auto", "use_flash_attention_2": true, "device_map": "cuda:0", "use_fast_tokenizer": true, "leading_space": false, "space_token": null, "trust_remote_code": [ false ], "max_model_len": 8192 }, "task_params": { "max_len": 4000, "few_shot_count": 5, "batch_size": 8, "max_sample_per_dataset": 10000000000000, "method": "calculate_tokens_proba" } }