Tool calling broken
We need to wait for the guys in llamacpp update, right?
We need to wait for the guys in llamacpp update, right?
Yes that is correct
We need to wait for the guys in llamacpp update, right?
Do you have the error btw?
We need to wait for the guys in llamacpp update, right?
Do you have the error btw?
i've updated llama.cpp to the 6100 version, but it seems like that the tool calling hasn't still been supported yet. anything else i can do to get tool calling worked @shimmyshimmer ?
here is response payload (no selected tool although reasoning_content looks reasonable) i got for a prompt request with tools.
{
"choices": [
{
"finish_reason": "stop",
"index": 0,
"message": {
"role": "assistant",
"reasoning_content": "User request: "Analyze Bitcoin price movements over the past 3 months." This is a factual question requiring research and data analysis. Category: Hand Off to Planner. Must call tool, no response.<|start|>assistant<|channel|>commentary to=functions.handoff_to_planner json<|message|>{"research_topic":"Analyze Bitcoin price movements over the past 3 months","locale":"en-US"}",
"content": ""
}
}
],
"created": 1754525933,
"model": "gpt-oss-20b",
"system_fingerprint": "b6100-65c797c4",
"object": "chat.completion",
"usage": {
"completion_tokens": 82,
"prompt_tokens": 878,
"total_tokens": 960
},
"id": "chatcmpl-Vzn4NVMk4tv5v2ze6ja04aBwlvMZZA35",
"__verbose": {
"index": 0,
"content": "<|channel|>analysis<|message|>User request: "Analyze Bitcoin price movements over the past 3 months." This is a factual question requiring research and data analysis. Category: Hand Off to Planner. Must call tool, no response.<|start|>assistant<|channel|>commentary to=functions.handoff_to_planner json<|message|>{"research_topic":"Analyze Bitcoin price movements over the past 3 months","locale":"en-US"}",
"tokens": [],
"id_slot": 0,
"stop": true,
"model": "qwen/qwen-30b-a3b-2507",
"tokens_predicted": 82,
"tokens_evaluated": 878,
"generation_settings": {
"n_predict": -1,
"seed": 4294967295,
"temperature": 1.0,
"dynatemp_range": 0.0,
"dynatemp_exponent": 1.0,
"top_k": 0,
"top_p": 1.0,
"min_p": 0.0,
"top_n_sigma": -1.0,
"xtc_probability": 0.0,
"xtc_threshold": 0.10000000149011612,
"typical_p": 1.0,
"repeat_last_n": 64,
"repeat_penalty": 1.0,
"presence_penalty": 0.0,
"frequency_penalty": 0.0,
"dry_multiplier": 0.0,
"dry_base": 1.75,
"dry_allowed_length": 2,
"dry_penalty_last_n": 30016,
"dry_sequence_breakers": [
"\n",
":",
""",
"*"
],
"mirostat": 0,
"mirostat_tau": 5.0,
"mirostat_eta": 0.10000000149011612,
"stop": [],
"max_tokens": -1,
"n_keep": 0,
"n_discard": 0,
"ignore_eos": false,
"stream": false,
"logit_bias": [],
"n_probs": 0,
"min_keep": 0,
"grammar": "",
"grammar_lazy": false,
"grammar_triggers": [],
"preserved_tokens": [],
"chat_format": "GPT-OSS",
"reasoning_format": "auto",
"reasoning_in_content": false,
"thinking_forced_open": false,
"samplers": [
"penalties",
"dry",
"top_n_sigma",
"top_k",
"typ_p",
"top_p",
"min_p",
"xtc",
"temperature"
],
"speculative.n_max": 16,
"speculative.n_min": 0,
"speculative.p_min": 0.75,
"timings_per_token": false,
"post_sampling_probs": false,
"lora": []
},
"prompt": "...",
"has_new_line": false,
"truncated": false,
"stop_type": "eos",
"stopping_word": "",
"tokens_cached": 959,
"timings": {
"prompt_n": 1,
"prompt_ms": 760.097,
"prompt_per_token_ms": 760.097,
"prompt_per_second": 1.3156215588273603,
"predicted_n": 82,
"predicted_ms": 20656.992,
"predicted_per_token_ms": 251.91453658536582,
"predicted_per_second": 3.9696002205935894
}
},
"timings": {
"prompt_n": 1,
"prompt_ms": 760.097,
"prompt_per_token_ms": 760.097,
"prompt_per_second": 1.3156215588273603,
"predicted_n": 82,
"predicted_ms": 20656.992,
"predicted_per_token_ms": 251.91453658536582,
"predicted_per_second": 3.9696002205935894
}
}