"""
-
def update_chat_display(df, index):
- """Update the chat visualization for a specific index."""
+ """Update the chat visualization with improved dark theme support."""
if df is None or df.empty or index >= len(df):
return (
- "
',
)
row = df.iloc[index]
- # Format chat messages
messages = json.loads(row["conversation"])
+ response = row["response"]
+ formatted_response = format_response(response)
+
+ # Create list of all messages including the response
+ all_messages = [
+ format_chat_message(msg["role"], msg["content"]) for msg in messages
+ ]
+ all_messages.append(
+ format_chat_message("Assistant", formatted_response, is_response=True)
+ )
+
chat_html = f"""
-
- {"".join([format_chat_message(msg["role"], msg["content"])
- for msg in messages])}
+
+ {"".join(all_messages)}
"""
- # Format metrics
metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"])
-
- # Format tool info
tool_html = format_tool_info(row["tools_langchain"])
return chat_html, metrics_html, tool_html
-def filter_and_update_display(model, dataset, selected_scores, current_index):
+def filter_and_update_display(model, dataset, min_score, max_score, current_index):
try:
- # Get data and filter by scores
df_chat = get_chat_and_score_df(model, dataset)
- if selected_scores:
- df_chat = df_chat[df_chat["score"].isin(selected_scores)]
+ df_chat = df_chat[
+ (df_chat["score"] >= min_score) & (df_chat["score"] <= max_score)
+ ]
if df_chat.empty:
return (
- "
No data available for selected filters
",
- "
No metrics available
",
- "
No tool information available
",
- gr.update(maximum=0, value=0),
+ '
No data available for selected filters
',
+ '
No metrics available
',
+ '
No tool information available
',
"0/0",
)
- # Update index bounds
max_index = len(df_chat) - 1
current_index = min(current_index, max_index)
-
- # Get displays for current index
chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index)
- return (
- chat_html,
- metrics_html,
- tool_html,
- gr.update(maximum=max_index, value=current_index),
- f"{current_index + 1}/{len(df_chat)}",
- )
+ index_display = f'
{current_index + 1}/{len(df_chat)}
'
+ return chat_html, metrics_html, tool_html, index_display
+
except Exception as e:
- print(f"Error in filter_and_update_display: {str(e)}")
+ error_html = f"""
+
+ Error: {str(e)}
+
+ """
return (
- f"
Error: {str(e)}
",
- "
No metrics available
",
- "
No tool information available
",
- gr.update(maximum=0, value=0),
+ error_html,
+ '
No metrics available
',
+ '
No tool information available
',
"0/0",
)
diff --git a/data_loader.py b/data_loader.py
index db4227e8d4a68e6c62e3052b57a39848dd35d1ca..6e68494dcd88724012dd2133abfdef6a70ece2eb 100644
--- a/data_loader.py
+++ b/data_loader.py
@@ -1,11 +1,25 @@
import pandas as pd
-from glob import glob
-import numpy as np
-from pathlib import Path
-DATASETS = [Path(file).stem for file in glob("datasets/*.parquet")]
-SCORES = [round(x, 2) for x in np.arange(0, 1.1, 0.1).tolist()]
+DATASETS = [
+ "BFCL_v3_irrelevance",
+ "BFCL_v3_multi_turn_base_multi_func_call",
+ "BFCL_v3_multi_turn_base_single_func_call",
+ "BFCL_v3_multi_turn_composite",
+ "BFCL_v3_multi_turn_long_context",
+ "BFCL_v3_multi_turn_miss_func",
+ "BFCL_v3_multi_turn_miss_param",
+ "tau_long_context",
+ "toolace_single_func_call_1",
+ "toolace_single_func_call_2",
+ "xlam_multiple_tool_multiple_call",
+ "xlam_multiple_tool_single_call",
+ "xlam_single_tool_multiple_call",
+ "xlam_single_tool_single_call",
+ "xlam_tool_miss",
+]
+
+SCORES = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
def load_data():
"""Load and preprocess the data."""
diff --git a/get_exp_data.ipynb b/get_exp_data.ipynb
new file mode 100644
index 0000000000000000000000000000000000000000..b0116a58e4930e84f4243e2d64131afc30707430
--- /dev/null
+++ b/get_exp_data.ipynb
@@ -0,0 +1,167 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from functools import lru_cache\n",
+ "from concurrent.futures import ThreadPoolExecutor\n",
+ "import promptquality as pq\n",
+ "from dotenv import load_dotenv\n",
+ "from data_loader import DATASETS, load_data\n",
+ "from tqdm.auto import tqdm\n",
+ "\n",
+ "load_dotenv()\n",
+ "pq.login(\"https://console.demo.rungalileo.io\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "project_name = \"agent-lb-v1\"\n",
+ "PROJECT_ID = pq.get_project_from_name(project_name).id\n",
+ "\n",
+ "\n",
+ "@lru_cache(maxsize=1000)\n",
+ "def get_output_df(model, dataset):\n",
+ " print(f\"Getting metrics for {model} {project_name} for dataset {dataset}\")\n",
+ " run_name = f\"{model} {dataset}\"\n",
+ " run_id = pq.get_run_from_name(run_name, PROJECT_ID).id\n",
+ " rows = pq.get_rows(\n",
+ " project_id=PROJECT_ID,\n",
+ " run_id=run_id,\n",
+ " task_type=None,\n",
+ " config=None,\n",
+ " starting_token=0,\n",
+ " limit=1000,\n",
+ " )\n",
+ "\n",
+ " rationales = [d.metrics.tool_selection_quality_rationale for d in rows]\n",
+ "\n",
+ " scores = [\n",
+ " round(d.metrics.tool_selection_quality, 2)\n",
+ " for d, rationale in zip(rows, rationales)\n",
+ " if rationale\n",
+ " ]\n",
+ " \n",
+ " explanations = [\n",
+ " d.metrics.tool_selection_quality_explanation\n",
+ " for d, rationale in zip(rows, rationales)\n",
+ " if rationale\n",
+ " ]\n",
+ " \n",
+ " responses = [d.response for d, rationale in zip(rows, rationales)\n",
+ " if rationale\n",
+ " ]\n",
+ " \n",
+ " rationales = [r for r in rationales if r]\n",
+ " mean_score = round(np.mean(scores), 2)\n",
+ " \n",
+ " data = {\n",
+ " \"response\": responses,\n",
+ " \"mean_score\": mean_score,\n",
+ " \"score\": scores,\n",
+ " \"rationale\": rationales,\n",
+ " \"explanation\": explanations,\n",
+ " }\n",
+ " return pd.DataFrame(data)\n",
+ "\n",
+ "def save_output_df(df, model, dataset):\n",
+ " os.makedirs(f\"output/{model}\", exist_ok=True)\n",
+ " df.to_parquet(f\"output/{model}/{dataset}.parquet\")\n",
+ "\n",
+ "def get_updated_df(df, df_output):\n",
+ " df = df.iloc[:len(df_output)].copy()\n",
+ " \n",
+ " df[\"response\"] = df_output[\"response\"].tolist()\n",
+ " df[\"rationale\"] = df_output[\"rationale\"].tolist()\n",
+ " df[\"explanation\"] = df_output[\"explanation\"].tolist()\n",
+ " df[\"score\"] = df_output[\"score\"].tolist()\n",
+ " cols = ['conversation', 'tools_langchain', 'n_turns',\n",
+ " 'len_query', 'n_tools', 'response', 'rationale', 'explanation', 'score']\n",
+ " return df[cols]\n",
+ "\n",
+ "\n",
+ "def get_chat_and_score_df(model, dataset):\n",
+ " df_output = pd.read_parquet(f\"output/{model}/{dataset}.parquet\")\n",
+ " df = pd.read_parquet(f\"datasets/{dataset}.parquet\")\n",
+ " df = get_updated_df(df, df_output)\n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def process_dataset(args):\n",
+ " model, dataset = args\n",
+ " if os.path.exists(f\"output/{model}/{dataset}.parquet\"):\n",
+ " return None\n",
+ " print(model, dataset)\n",
+ " df_output = get_output_df(model, dataset)\n",
+ " save_output_df(df_output, model, dataset)\n",
+ " return f\"Completed: {model} - {dataset}\"\n",
+ "\n",
+ "def process_model_datasets(model, datasets, max_workers=5):\n",
+ " with ThreadPoolExecutor(max_workers=max_workers) as executor:\n",
+ " # Create arguments list for each dataset\n",
+ " args_list = [(model, dataset) for dataset in datasets]\n",
+ " \n",
+ " # Process datasets in parallel with progress bar\n",
+ " list(tqdm(\n",
+ " executor.map(process_dataset, args_list),\n",
+ " total=len(datasets),\n",
+ " desc=f\"Datasets ({model})\",\n",
+ " position=1,\n",
+ " leave=False\n",
+ " ))\n",
+ "\n",
+ "\n",
+ "models = [\"accounts/fireworks/models/qwen2p5-72b-instruct\", \"meta-llama/Llama-3.3-70B-Instruct-Turbo\", \"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\"]\n",
+ "# models = load_data()[\"Model\"]\n",
+ "\n",
+ "# Process each model sequentially, but datasets in parallel\n",
+ "for model in tqdm(models, desc=\"Models\", position=0):\n",
+ " process_model_datasets(model, DATASETS)\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "langgraph",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.6"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_irrelevance.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..5e54732d481c1146f544d138dfb73e1524b7230d
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e2817c3fbce11e1585e889164baa236816796332f0f11b2cb9a1897417572927
+size 36407
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6ce69ec80d457cce39867d19fe5307896f2a4262
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8878a5ccc9bf16026ff0c951819f183607799c060e25f5a8f99ce0cba286f684
+size 25352
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..88f68ef27c9cffa79986ce13422a08950b1818e1
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c37d524e3c17f02429af0774587d4aa0b5d28d85ed1ef7cdcd9306122ead2a8
+size 22850
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..76f8cacd0924613b7da958cde30e05d5091c1f9e
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ba9fd63c299f61fdf3c59ad582ab374f13198efc03bbadb99bbe31c6bbf1a71
+size 42354
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..bff4fd766c593fdf2ed63732bee7439b662a7417
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1e22dc3995bca271de419792437a4e95756d010e05ccdad024b91acfbba9a742
+size 38027
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cd65552aff6e7f6f8aac20970dc85c487dc7e393
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7a31da12d9d42cafdd92f5c24d898f3423188b2a2d90cca908a9fe40ccc4fb35
+size 41590
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a0027694d1a719125a6d7d23da3a0520e4dbf6a0
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df9e322d8426cd7dccd971ee3aac870511ff3f5a2e07964df21de4ad0f24915c
+size 42727
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/tau_long_context.parquet b/output/Llama-3.3-70B-Instruct-Turbo/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e2240d92ab66d88f038496e72eb2a04d197f36e1
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7d41adc0a7212cde1b5e107425f7396ddf64967fe270c180209f9363ea86ea63
+size 47080
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_1.parquet b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cb78937960b444175efa2cb887320ee444ff04a2
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4515b1d276f4d3737609be9d6a682d83a65c2a560cb626b739659bea11cee266
+size 13137
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_2.parquet b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9f015b5dc34ff20c1f6c88c6b0dea020e3738135
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c4f418e1e2dc111f4df5568882e929e697191f210349ecf83888727a6a60c1fc
+size 11495
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0e1ab8894df9e4553474030d50cbe5ba6f594f86
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ba4341a56d0cbfc556ded33ba0d975fe9bf4a3f3071f78b44cba99a46dac1988
+size 103694
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..119a821b4414b13eacad4fbb241dbf0c219f83eb
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9fd1511267bb48fe464e545a0946e0c7952f8657864794c9d278874b08be111
+size 39286
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9321df474c6542233245160714a727fa5f77bac1
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57dc4c2b0927d4a4788b4736e8e2e506685892bb367141c756a2eacc536b5022
+size 30411
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_single_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0708fd5c6a276aae0995417bb832258ee912ef20
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93d602bbbe10a95b4db1079ea8ad5958e530b89ac357ec528c563e7662f20e3c
+size 43779
diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_tool_miss.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ec6fb5eb42ee2f97efa0ec55ae24d17e642f0a73
--- /dev/null
+++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3f82514fec1d787c447f91f28b35fce99c1aa7b485c53dbf90a656550d9270c
+size 49444
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_irrelevance.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d91c901d767928af4f7f9da4805d93ecd9f10d46
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1632880d864efbb48b5533c14a9ab6dfc3bd8c756d8612ca2baca0bcb7a69e4b
+size 41488
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e7bb00eb06aa23c328ca20fd5c9d04644ce6ef65
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6865f39478b20a8a4b30a19aab53fe35581db64f29a0a18ec6c28c4e0d90f93d
+size 28818
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a07090ff56422a6b73bc5c688d8def8ac7afafc5
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:649363984f056844ef6ebef6040f5c7b202ace7b2122512b1ddb4c0b2373a9ad
+size 24260
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a999d75386a5f32c352239834cc4a162d862a7f2
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f20ada4667753ca67fbbd4ee3fd160d883781e5fec4cadb5f7395c1d47f54d8b
+size 59035
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..98aa89993f3b23eb2a3a7453784267a566bbe35d
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:59e172a62f31efc4e91fbe02354ea9919597eadd6086103d13df9da476e9f7ba
+size 45652
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..35a82aadc9562294d5d16eefe626534b307c2c09
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57b4255e618139abbb1c5531baeed296c6b97cb991dce0b118d984f178d8473a
+size 49516
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6f081704239bd2c2a25779aeadc73609a6bbeae1
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:804d402cd62a9c6aeecbc1a29cafb076445b0a00d08bc113dc54120302f9d68f
+size 45782
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/tau_long_context.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4f02fea6004ee11f898142d9c70ebe5417ad7ec1
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38557506e024d20030df47cf7222dde468fe88e2cdd402846c548466d095ef43
+size 106328
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_1.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..161d8687a057cd4a7b4d7bd8c950f95dbc2c2ef5
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ac567ee462a1f2564922e3249d868fc1a6b46b346d2a5e4d537197fd726eafe4
+size 18349
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_2.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f2cce42ce3341dce1febb792e7a3e27a2308aec8
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f76072d412286daee3a23a2867db77d74f0d94d6179b0279f6b4d685807247e2
+size 14697
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f64c527dc002b440b8ac1dcde6f8aa161f9579ad
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f742f430b2ddd553f8e34b1b805b08e2b3369581b8ac24455bc408f310902a9
+size 103082
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..14c866e9952a222328a915a361c5241c9a512588
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:af9bf31720ee319f14bd8680183e907d367bf3f03e748431da05633ccd555ec0
+size 39935
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..736a76f80c044ac68edcc6c4bddce0670b1b1c1e
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a1b585b6c36cee9c9fd859db2371a2d35e3e5f5129503d7261ca4b72288c559e
+size 30561
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_single_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f74c54b5e9a8bf2902bf53c1de0f4c1d6a30955b
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4a86349e92d6aa556b37b8b56ecbd60e10ed0946cad03d261dc4a012ec03fa14
+size 44998
diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_tool_miss.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1d92e13e7d43ffe1f93284d24329807800262f49
--- /dev/null
+++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2c008962f6ff71452f42579f1b80ab354f2eae5a7e26f265c0ed32c239d17238
+size 75460
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_irrelevance.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..52d0d2466288998775d16b06624e7c55dba5c488
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:629cb48a309de2c0d82f81880d7cf6b1aa3d0066905f7384943d3c4b939c40f9
+size 56461
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2d2df7358631e27b26c5c7e2203dae6f2b27b81c
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cc6a087d39e959b3869a1b8846912c3860fc2b390b5d2ce906f16d2812f01175
+size 25800
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4bf9f2cf7a1f96dc6498dc29ee75fb2769994e4e
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2b76c198d7045d6c1c9dd6100e43c085f4c2a3eaf122e2038ee16e2149f6ae51
+size 24717
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_composite.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..464f267817c5e86e63e520a519751ab9c7444cfd
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0617b626d043bb176a34c2109b5dd123fa92e25f0f8cfff5063a4cec39ad1b40
+size 50601
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_long_context.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..27dc7796f5b343c94eadda249e0a51d4c0ea4058
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5ec5a4e24400d4be1c10e27660a4dca42e41f15a11419d17e978f3707abe01bf
+size 40562
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_func.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..64616dad2ac77dc52e4803b79be276dc71803684
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99e2fa627f8e43a1b533af74d928f57df2549552ca72ff0464eb27377d7c3be5
+size 49028
diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_param.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f59d45c544c2293b2f88cd62f97996fc7e01fac1
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35439010bcff4005c7c820c00ff33e3d6cc48647716cb54cb058e14f901a505b
+size 49827
diff --git a/output/claude-3-5-haiku-20241022/tau_long_context.parquet b/output/claude-3-5-haiku-20241022/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ce9b003e63702fc83c4910f242cf42bf865001ad
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2055060f303497e49eb292df2b2c57d57afca155e666c9a89cf91302beaa26a5
+size 42402
diff --git a/output/claude-3-5-haiku-20241022/toolace_single_func_call_1.parquet b/output/claude-3-5-haiku-20241022/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9d1fed8dec5446061c1459c93010d32560b17141
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1919937f16d5bd9169a08735e95555cc583fdc849b2cd90c52f33d1d4d742968
+size 19861
diff --git a/output/claude-3-5-haiku-20241022/toolace_single_func_call_2.parquet b/output/claude-3-5-haiku-20241022/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7465ae9470df9d5273c7a67296426bf4850a8342
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a7cfa18597a10d9ab722fbb0115dd5d78faec9ca3eed834e0393a24dc877659f
+size 13842
diff --git a/output/claude-3-5-haiku-20241022/xlam_multiple_tool_multiple_call.parquet b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2e6b3215afe469614a30d8cc25a27462bc954c52
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:db992d9979dafe082b443f0cbe163c88bad98cdf01d3248cf2affdd2f97dfe5d
+size 89313
diff --git a/output/claude-3-5-haiku-20241022/xlam_multiple_tool_single_call.parquet b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f684abe96e0f0959d785dd3cd43ac0051d4e4a3b
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d76776f1962307d8b2fbb6cba8caaea27c62f0fad096b4e3575e28d08947a7e
+size 40827
diff --git a/output/claude-3-5-haiku-20241022/xlam_single_tool_multiple_call.parquet b/output/claude-3-5-haiku-20241022/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3494291b4ed400e171e42c40618af68bf4387dbc
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35a6c8eecd89f7ec1092945f9597bf2db469164016cadf315f50fa075662b728
+size 27274
diff --git a/output/claude-3-5-haiku-20241022/xlam_single_tool_single_call.parquet b/output/claude-3-5-haiku-20241022/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e4f58d66d8ab3ef54614f2345e476abfe563adb3
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f603069ab1ef9094ce7e4b3a20e8253e821c3e59fbee1415c961c7fc87972ac
+size 49346
diff --git a/output/claude-3-5-haiku-20241022/xlam_tool_miss.parquet b/output/claude-3-5-haiku-20241022/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6c9ef4921a4a0b9b12ffa88759c4b3c4f5033238
--- /dev/null
+++ b/output/claude-3-5-haiku-20241022/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2aa32dcf4ced1c07020f08b18bf98012be350f8bcae06d7c5592fb6d98ac39f9
+size 56628
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_irrelevance.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a80a665e09ee2e4a0fd2360b81c15cf9cfbd1e19
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:183ba40828d01bb6c08607124b590017baa18e23304ca8f2818c39327928b69d
+size 47355
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..63bc8dfaf4720c3c75728502925ca137cd99a7bc
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:38f7e26b43084e3b991f28a5dfc2398f2f99aac1c17d9d67cf56dda0235e86b5
+size 25897
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8d572ae03a8d241bfcf849d72d6ade3f2f8e2fe6
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d01add1c6b6043ca5976b0bd2ec37aa3ef23f384a73e8e40f212138896cdea2
+size 25472
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_composite.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..809eb1dcc62487f531c819d949b4d56381629416
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b122f693fc7465a00ddc1275647ffc7d3dfbe9455b5d363f701f0d8afba2ed9b
+size 51374
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_long_context.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..44c02fa85c5a924ac7e06a84e06ee53f82b06e13
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74f93575491c0db9dbec030cce34a7fa08f233bc5d638af284fc9f0660abe376
+size 41032
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_func.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d4c659c625a82d13e1fbac1d45be05c503f073ac
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:07b49eb0c3b1a971bc0018eb5ca264c568a382784d7c28535f3b1910bec44d4e
+size 51224
diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_param.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8d2bce33550055792d97d80446d14d026757ac34
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08462753aace1e06e36a150b1623b4068f8c9e14f023e766a7ccdfadd6411e16
+size 51509
diff --git a/output/claude-3-5-sonnet-20241022/tau_long_context.parquet b/output/claude-3-5-sonnet-20241022/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..823cf8ac19d1f17ed3522d5781d8c69b1883e393
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:593863c22b76cabcc3c7a48a651fac463efa13028597c229359a41b6388d12ce
+size 48252
diff --git a/output/claude-3-5-sonnet-20241022/toolace_single_func_call_1.parquet b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c04582e41ad3ca6dff83d18365a1a8eff87f2470
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:13d88fddb7edb096a3314189fe8c6f6b4c6f2ec37a1c81122f57f82e6741ad12
+size 20414
diff --git a/output/claude-3-5-sonnet-20241022/toolace_single_func_call_2.parquet b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..fd4410f1ada24a334f57dddcd7c0e44298821353
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9800126a411b0b363eeb35fc41e1dd517a34ead3f5acffef6659703cdd3aa1d2
+size 13925
diff --git a/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_multiple_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6be31619e666a8ce6faabe9e0cdad4c31e381bcc
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0fec49a52067984f7666812b7c13bde0c3e537a89cb43fbcd82cf00ea283b70
+size 91529
diff --git a/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_single_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..08ef91886a31ea69d8dc303b7130b2c57d23b3fc
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a5caee275f8a18625a7cf8daec40584e42b20a6282f2b2b84814182e8bfe9c91
+size 42361
diff --git a/output/claude-3-5-sonnet-20241022/xlam_single_tool_multiple_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..418ac29c4bc871c6f97969cec9877c44bdc587da
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:505cb9a3145c79feed04e7e39d50aa1abb17575847f8187577d0535f0301f450
+size 29049
diff --git a/output/claude-3-5-sonnet-20241022/xlam_single_tool_single_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3c78850b41761a4b4d093cc1eca5714b3951e4ce
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cfe5e8d50e76eacf9bce74c139bbf7cc9512f617414b8f909657eeaf88873d39
+size 48333
diff --git a/output/claude-3-5-sonnet-20241022/xlam_tool_miss.parquet b/output/claude-3-5-sonnet-20241022/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ea9c1f04317106b917b3175914d0860b0f099354
--- /dev/null
+++ b/output/claude-3-5-sonnet-20241022/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3fdbff91fab1aec4cac5e0d51149def4c5d2a6cbae29a0407a5056c1f5be47e8
+size 53233
diff --git a/output/gemini-1.5-flash/BFCL_v3_irrelevance.parquet b/output/gemini-1.5-flash/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9a3aa5aa3817675c572d49e7ed4dcc7ec0ca40ce
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5249956839f8b75467416244a75563b50141ecff61b32ad2804b806b820b6af6
+size 27599
diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..97b02ecf65505e2107a2fdc93c32122890d6e11f
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e0a81daa696e6a1b06eb61646915243019c028d8a51fbc7bf74a8b305e5ec513
+size 26376
diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..330b4f420b04865496e803c34c8243407ef3651f
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dd2df42ea847e440673a079bc87991f77fa6701a1798012d55b078b8caf066c8
+size 21530
diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_composite.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..193b142a25c6045ea3b3c15246d83bbafc74e5db
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4bf75bddddb88394586817b6530fd517d27ffa06b6e3d369735affd103c2eff9
+size 43787
diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_long_context.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4ef4acf9b0dd1e73e3a5958f68704e3bd2a0a5ff
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0513db147d6662a92aef4b69f8bb54ea7b45029cd08df573c4f05e128482004b
+size 39527
diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_func.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..43de56edf186a05ee6dbc2c726427a28e36e9767
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b2a7cb7add0a51cc5e3ae7dadea03ec3c0c9be4a92393cae05df5672d69953c6
+size 41897
diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_param.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d5097ab21e59a6e652b5d2179e961e1f661f660c
--- /dev/null
+++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:dcae1f2141f50404ea07282e3408baba4dd0f0b3490f85bf1042da01ff293009
+size 42215
diff --git a/output/gemini-1.5-flash/tau_long_context.parquet b/output/gemini-1.5-flash/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c3200a15b1599ab19668fd2b5f2b7939e7d0f142
--- /dev/null
+++ b/output/gemini-1.5-flash/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc8cd5d90a35855e7606af76a4b7d128c1a97f68029093ae33d0aa7d343a0dfa
+size 46692
diff --git a/output/gemini-1.5-flash/toolace_single_func_call_1.parquet b/output/gemini-1.5-flash/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..765621308a5df79f493fb0b932ffe7074fdd1ff4
--- /dev/null
+++ b/output/gemini-1.5-flash/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8e3f46ad06d06585ddced90bf3f0fbf716795b2682dd026ffa0f7d18d80ca8b0
+size 14004
diff --git a/output/gemini-1.5-flash/toolace_single_func_call_2.parquet b/output/gemini-1.5-flash/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1eb5b348f08be4bd31d0d1a1bb2197628b9fe498
--- /dev/null
+++ b/output/gemini-1.5-flash/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b9fc26fb06c55e6373e038285a08007451a1f6d40ff11277832e29d4c540bf1
+size 12151
diff --git a/output/gemini-1.5-flash/xlam_multiple_tool_multiple_call.parquet b/output/gemini-1.5-flash/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7f20af45d95ebd57b8d15d5c94ba1faa8a5027f5
--- /dev/null
+++ b/output/gemini-1.5-flash/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9b3705e6af4df10ed316ff6e9b22c9ec8bb432cb68ac5391527f2bcff3926dc5
+size 107671
diff --git a/output/gemini-1.5-flash/xlam_multiple_tool_single_call.parquet b/output/gemini-1.5-flash/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..eb54a9da2058e4478c9febfb05da3bce1b6ac8bc
--- /dev/null
+++ b/output/gemini-1.5-flash/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b5f2dff11755ec0ff5d3f6e84fe90f49c54afb38c00f743cd7d6858f37284d10
+size 42317
diff --git a/output/gemini-1.5-flash/xlam_single_tool_multiple_call.parquet b/output/gemini-1.5-flash/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..89010fc3ca95428da46136da48648fc438cb8f2a
--- /dev/null
+++ b/output/gemini-1.5-flash/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3694c13eaa74271d20430cda72ea079866fd6bb11ff3247edf015578520658d9
+size 30892
diff --git a/output/gemini-1.5-flash/xlam_single_tool_single_call.parquet b/output/gemini-1.5-flash/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c29c6d042611eb508b29340530a6f5219405cbfa
--- /dev/null
+++ b/output/gemini-1.5-flash/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:57a3aad5dd793d31b6558221b305b20766e99361c6b78e53b896c432ce197ec1
+size 45169
diff --git a/output/gemini-1.5-flash/xlam_tool_miss.parquet b/output/gemini-1.5-flash/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2c848279ab6b841f99ac96204b73424e2431c56c
--- /dev/null
+++ b/output/gemini-1.5-flash/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf00d5e071b49f106144109c6d5fb01753ab08d6031da71a58ad37ce706c55c5
+size 47444
diff --git a/output/gemini-1.5-pro/BFCL_v3_irrelevance.parquet b/output/gemini-1.5-pro/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..33eef5872fa64c85678662f81ad9f3afc7ef1441
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4612d61b195f16a6fe53c62fc5d28cfd5a661d769737eb623bb34bf8ab87928e
+size 28338
diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ed8886d326e812362dbd3e52f6b87f74188e7b09
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cbde82c9cfd00b594dbbd0e3b1da13ec4f6f9d0ec9b0227a422cfd358750d906
+size 22381
diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..bbb58ca69a1d7864b4b918241f8fbdec17f482ce
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:99cea91fdd703ea5949aa8278763c2e469cf06b6b4487314dd658fe91c1eac3d
+size 22202
diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_composite.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b8581531c2dd95862e8743790e294100225a6b7
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:41ec4ad5cf3508dc086b147b2258e06dd2cf6829110fb6d16672a5faed11b97a
+size 41073
diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_long_context.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d8e1a706c1f54bf1fc84ea63fc318fdf8e339862
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fc9448ddec190eb66d29dddc24243cdccbbf6176c06aef6f88c3c1ebc2d805f9
+size 36764
diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_func.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d80edbbdbe6638fe3b44a66fa4dc2f0ccc9fdc11
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ff269a50b030cd799da9ba80965c6efb811734f2574895f9c4be3476e1bf17a8
+size 35004
diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_param.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0b315417b725fde4aae1366d1e201f25a81521d4
--- /dev/null
+++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0977062fc3effefec9599db7903721f7c282b3f1719cf42dca0b768b692e32da
+size 39353
diff --git a/output/gemini-1.5-pro/tau_long_context.parquet b/output/gemini-1.5-pro/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..61ae143803a610c40fa64950a9a3d0b0f6d5c1cb
--- /dev/null
+++ b/output/gemini-1.5-pro/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d3ad91af5a1002edc2819545c9d6af16192bf6783fd5209492c01f7b770be28f
+size 45651
diff --git a/output/gemini-1.5-pro/toolace_single_func_call_1.parquet b/output/gemini-1.5-pro/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..17308eeabda8ca302fa89d5be7d903c49acb7239
--- /dev/null
+++ b/output/gemini-1.5-pro/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cef131778ae6557b60a107aae9d8880a215c53acaeea90b94d032be8467f1a70
+size 15193
diff --git a/output/gemini-1.5-pro/toolace_single_func_call_2.parquet b/output/gemini-1.5-pro/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6a312494b76573ee1a0e7a9a81351d07dcbed5bc
--- /dev/null
+++ b/output/gemini-1.5-pro/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:20d8c065f95f21e9056242df690edc31790da5ad360a66896b766c5f6294bebf
+size 12087
diff --git a/output/gemini-1.5-pro/xlam_multiple_tool_multiple_call.parquet b/output/gemini-1.5-pro/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8327a22e8ba737ec31a862d2bb1c8e90c78b9572
--- /dev/null
+++ b/output/gemini-1.5-pro/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2e1a48301a76bf92981adfff1920ed0b6b71c5d74c0865b4b7c2c6cddf23b0b1
+size 49774
diff --git a/output/gemini-1.5-pro/xlam_multiple_tool_single_call.parquet b/output/gemini-1.5-pro/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d151bab1d5dad1b12d4bb14f5499f00365edda3c
--- /dev/null
+++ b/output/gemini-1.5-pro/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70b5a8fc9a8476f7963bd0dbefd58e59faaf2c55e3f0f46146f4ecb043e57bbd
+size 41034
diff --git a/output/gemini-1.5-pro/xlam_single_tool_multiple_call.parquet b/output/gemini-1.5-pro/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..daeae99f5d3890dd19cdcd8582097fd102a3e6cf
--- /dev/null
+++ b/output/gemini-1.5-pro/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e00f16af167fbe6c302470f6b66591749f6f94c52b93fc636012bada7f511c9c
+size 30936
diff --git a/output/gemini-1.5-pro/xlam_single_tool_single_call.parquet b/output/gemini-1.5-pro/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c8ec1546e7353731b38bbe253b0dba129acdc459
--- /dev/null
+++ b/output/gemini-1.5-pro/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ede8fe40323d0f17736b361cf615adafed1e82a320efd91016fbd479c504f2de
+size 40537
diff --git a/output/gemini-1.5-pro/xlam_tool_miss.parquet b/output/gemini-1.5-pro/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..899bc1a01ee759b404ac8e0a1e1721308419b968
--- /dev/null
+++ b/output/gemini-1.5-pro/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:06c68ecda86c3fd2a1488985b96443a17f2895429e95c4781d26d6fa9bf0bc41
+size 48728
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_irrelevance.parquet b/output/gemini-2.0-flash-001/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..693898c6fb21ea87be007e5dec6b2f328071d394
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9418f5b9e8302dccf22089063cb7fca50d276fbbb5f1a14ac87d9758f3a84785
+size 30928
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cacbc67d7cc74e559c6712e568d8e0c22c361c2d
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e7beb22784290c6880d218232c099e368365a02382e902a58e0408f7f7d0916
+size 22138
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7b289d304287e0ae5dbe336bf761fb83d5221e50
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0302490b6eab03294fa631f3861917ae77262208dd43c69987437cad87c7ae06
+size 21570
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_composite.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ff0e1f0fd9c6316c9c14a7f56d46992240296efc
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b40fda69942889b0b9a4cd6d0e5ca5b4395e05b5627e9f07a35f3daabb7b319e
+size 40294
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_long_context.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7bdf5db27d666749cc4a6662dc8ca779683d9b1d
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ef5e919669de1102fe8ecabfdcc6a0aefafb6a05e37bda4b7ebcdce206505e27
+size 36774
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_func.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d6e269f16fe611c85f7447f15a4cc6ec42a59857
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0c0675c09704dfcb93c5c3f16468eb1c0eacf6b6d402ef441158a7a02516f48f
+size 39831
diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_param.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4655947cc601e260630625822ee078cafa270ffb
--- /dev/null
+++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4eb7f9916b63087e616d35f583c6d7dddd815ccdbe8e9f61a6e1ba7576973b8d
+size 37934
diff --git a/output/gemini-2.0-flash-001/tau_long_context.parquet b/output/gemini-2.0-flash-001/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2493c7792f0c825cac983bfc61ec1322f2a55a19
--- /dev/null
+++ b/output/gemini-2.0-flash-001/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d2bc30eefa28286f8b2b4d9958267c78eba08bb674bde423f64430bd66319916
+size 42499
diff --git a/output/gemini-2.0-flash-001/toolace_single_func_call_1.parquet b/output/gemini-2.0-flash-001/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..84e9ef6b4a11bc19ed04cc5be1b4033f2aa165c1
--- /dev/null
+++ b/output/gemini-2.0-flash-001/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1f1ca02d956c0002c31978df685f309576361656f4942eb60ca00ec16566183a
+size 15363
diff --git a/output/gemini-2.0-flash-001/toolace_single_func_call_2.parquet b/output/gemini-2.0-flash-001/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..eb47b90d357a5ef07f10c4d982e5679efd10fd1e
--- /dev/null
+++ b/output/gemini-2.0-flash-001/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a090befc959e0aef2b8db1d3136af52d626b1a418ebbf72442a36a4f4952ac6a
+size 11943
diff --git a/output/gemini-2.0-flash-001/xlam_multiple_tool_multiple_call.parquet b/output/gemini-2.0-flash-001/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b537726470a5cb5411b6349aeb268d53187ad71b
--- /dev/null
+++ b/output/gemini-2.0-flash-001/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec2153c4db805f63b35f9509b702e485d831b9e855186e884704d118b5f321f6
+size 107999
diff --git a/output/gemini-2.0-flash-001/xlam_multiple_tool_single_call.parquet b/output/gemini-2.0-flash-001/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..609481c4fe924f7a88a2d9920da1c600c49d5506
--- /dev/null
+++ b/output/gemini-2.0-flash-001/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:68f0f3315d3db9a991cc0d8b3964a551a6055cd54be4d728d3005bbfddead472
+size 41168
diff --git a/output/gemini-2.0-flash-001/xlam_single_tool_multiple_call.parquet b/output/gemini-2.0-flash-001/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d54588003197e1c9bd626524b577bf8f164be36e
--- /dev/null
+++ b/output/gemini-2.0-flash-001/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:26ac115f0f57da9565aafc315c0275bcecb0b88827f9db238f2a2ccd6defc8c5
+size 30989
diff --git a/output/gemini-2.0-flash-001/xlam_single_tool_single_call.parquet b/output/gemini-2.0-flash-001/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..29d871730795c6fb3f4dfe778d30526bbd6c2415
--- /dev/null
+++ b/output/gemini-2.0-flash-001/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82027447d8573256810f8f0430152612d3795fc13dfb948f6fed5af145c93757
+size 43836
diff --git a/output/gemini-2.0-flash-001/xlam_tool_miss.parquet b/output/gemini-2.0-flash-001/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..85a026834da6a13b5f6336ce65dae4530ea90fdb
--- /dev/null
+++ b/output/gemini-2.0-flash-001/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5121c050fd3db66c2e8268a8c2703ccb19f8675a3394609f4f4a2dba8efd31e7
+size 45973
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_irrelevance.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..85e55849bedd2ee342dab292dca85fdd7c631f6a
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d035642bc7cec4942ca79b3c4e19b06f7823a53ce64530f1a25ea5a534226fd7
+size 33211
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f95f8b7630963f46faa5f485442e6412148303f0
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3ff79e50a4731de0671cbed84004783afcea66e136cc9e7ff4d7773b82b5add9
+size 23423
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..91579af1f655e0ca048be4c1efe24f178b7e1683
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fd79507b606df0599766938b8cd08d2d5874f2312b9b991c5f9816082a09af35
+size 21602
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_composite.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8c9ce9dc5f181ce7b72f6297a5bd282a8649db78
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0094efbfe2bb9f463ca99866bbc5ec3c050fcad6c04125121a691f3d439cd8e5
+size 40253
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_long_context.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6b4d09e7891042c3d2cf88b88f1cb7e965a6a5e4
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:370f700f343609cd1741163dbe2c5dbb5c19941d02feaa51304aac66c3012553
+size 36183
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_func.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..64ff5a1147b304e04b3377b5fbacdc823acbca27
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b7b0f80e2f1180bcbe3f28061f861f18450efc9747b09678f709ccc8f48138f
+size 37994
diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_param.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a4a9246b18f7a220aec653c00542462c3130c16c
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:15db89e972e0f6be8097202f4e71f54a2dfb4a94e48bc37b669f0739f27388ad
+size 40102
diff --git a/output/gpt-4o-2024-11-20/tau_long_context.parquet b/output/gpt-4o-2024-11-20/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7904d1213a16f46410cb40111ccc381732f8831b
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3b6dc08bcf01b49c401368ad8b88be91d09e21be37a4e9643512e8ffbc9a14fe
+size 39940
diff --git a/output/gpt-4o-2024-11-20/toolace_single_func_call_1.parquet b/output/gpt-4o-2024-11-20/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7aeb7261c24d4c7617bca62d9a9161ea432c914e
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5d0d503e8e58736f31d2228b42bc1b583e991a090c19ebf85062a40e11e8066e
+size 15209
diff --git a/output/gpt-4o-2024-11-20/toolace_single_func_call_2.parquet b/output/gpt-4o-2024-11-20/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c41e49c4e38d05c4106aa89df2e0612cdc31c488
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12e6f73695cf56607aaae7b42c8178f3587642de6cee745bfd53f80b4d465b01
+size 10966
diff --git a/output/gpt-4o-2024-11-20/xlam_multiple_tool_multiple_call.parquet b/output/gpt-4o-2024-11-20/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..00a4a9c0834f26b1508cf4ce8824731fc10f4dc5
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:153ca6f78eb961159d099969c563773ccdc2ec973b0a7339615e99766b52d929
+size 101090
diff --git a/output/gpt-4o-2024-11-20/xlam_multiple_tool_single_call.parquet b/output/gpt-4o-2024-11-20/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..036dd1633f6eafe46bd144877124d6828985061b
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d5ad47d7c0b4c8c52a8243cd9c8e553c8ef0ff13f9c6b08ed7df7666c0089d6
+size 40026
diff --git a/output/gpt-4o-2024-11-20/xlam_single_tool_multiple_call.parquet b/output/gpt-4o-2024-11-20/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..632058b7f953b1177d134969d8b7dd62ba986f9a
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f27bd670790f9457ad58e4c1e08fa0ad4ca63a84be5dc2504c3a252cca8c805e
+size 31855
diff --git a/output/gpt-4o-2024-11-20/xlam_single_tool_single_call.parquet b/output/gpt-4o-2024-11-20/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d56aa7bef0cb6789e4372fffb99228e434691a20
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4655e438e12d954da0b1ce12e4c03ae897995509bf1c42c9290e8387472f512e
+size 48007
diff --git a/output/gpt-4o-2024-11-20/xlam_tool_miss.parquet b/output/gpt-4o-2024-11-20/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..65d98d4effb6c5d026fba9d3e9aec9563dd005d8
--- /dev/null
+++ b/output/gpt-4o-2024-11-20/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1ca31c1e28060629449372af722cb3f6d01253de06b90a94078bbc1a6a3f8ac7
+size 49543
diff --git a/output/gpt-4o-mini/BFCL_v3_irrelevance.parquet b/output/gpt-4o-mini/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..51f9ab5d77e907a70a5bfedd217f13bd27ba6870
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9a0c3bf98f298552eed56303e350616c100acfb8197d971974cc4a3d0fd27f41
+size 28707
diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9b460d74997a5a1c4ce670bab9695ae49defddba
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:18e7f39d5eed2bdced751486fcf08023af79791741c331de5e0f6b3054a4fdf1
+size 24007
diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b19e30d6c0dea8bc734a704a875502e7f3def4d0
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:752bb7faee66296dceb18f69e1eeb0b7c8e6709cb34155d119b07d8942ea0501
+size 22673
diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_composite.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..fdf426b7cdc6a7459c9245b8e883670d5704b015
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5e43f408cfb62859b3cf29ba33b87fa10843cd2f618f8473420ac7b99d9d0c26
+size 40128
diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_long_context.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..5878b1e8f2393b6540d3e72d7de508e8ef73ca16
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ed05f5776b15df238c7260483532ef68996d5ccd7f7ab1db37962272a84e1291
+size 39154
diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_func.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b94d1e87b4d26b8eff920f0239c9c2c4e97f733f
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:34841ca690b9b245ee54d1c3a2721863cbe7ae343441e082258af3c1411ffbcc
+size 39671
diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_param.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..33c51b4d5462f351f90ebc0ac64938c112741d5e
--- /dev/null
+++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:35769131c4e582a3a7c7991650dc8b4be53f24690963c002bd5e4b05592c9fc9
+size 41463
diff --git a/output/gpt-4o-mini/tau_long_context.parquet b/output/gpt-4o-mini/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7835a91c98d643d90598a2ed4a63d4fecf433a07
--- /dev/null
+++ b/output/gpt-4o-mini/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:707a33e2bc4d82ca725a3d0f95b85a088abfcb753c97ecf42c2f9d9962a1f9b2
+size 46378
diff --git a/output/gpt-4o-mini/toolace_single_func_call_1.parquet b/output/gpt-4o-mini/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1a83065bfa3ca107985846a851837ff5612f4652
--- /dev/null
+++ b/output/gpt-4o-mini/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66b118eb4bebcc5e24045ef248f37d9d8d48963903e8650b6522b2c91f77f2d1
+size 15218
diff --git a/output/gpt-4o-mini/toolace_single_func_call_2.parquet b/output/gpt-4o-mini/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f6a1377ed0d77a6b423287525d775cbe51611773
--- /dev/null
+++ b/output/gpt-4o-mini/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:83060408277527b9140ebb6cdd093a3f407c91093b0ca469a1c90728c064095d
+size 12157
diff --git a/output/gpt-4o-mini/xlam_multiple_tool_multiple_call.parquet b/output/gpt-4o-mini/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d4234ff75e88f38ae6ee649cc06e6a8dbde81564
--- /dev/null
+++ b/output/gpt-4o-mini/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1c7198ccf61e7427817437f564b3a2345b3c93bd253739efeb7a735d25a22f2f
+size 101412
diff --git a/output/gpt-4o-mini/xlam_multiple_tool_single_call.parquet b/output/gpt-4o-mini/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..32751c29f75bd71f9a469fa610d200d42d898ded
--- /dev/null
+++ b/output/gpt-4o-mini/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:82ee2b0e69490b2a03a18c530ca029ba0cad4691e0c5a1504e8ed00b4d481dcb
+size 40464
diff --git a/output/gpt-4o-mini/xlam_single_tool_multiple_call.parquet b/output/gpt-4o-mini/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e842fc5afa13e792aa5a823ab95abbf349f4af5a
--- /dev/null
+++ b/output/gpt-4o-mini/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e14b262aee04ef4cd0d7ff12b050e12dd8d4d4ab543c968e1850082e4821f3e9
+size 30350
diff --git a/output/gpt-4o-mini/xlam_single_tool_single_call.parquet b/output/gpt-4o-mini/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a830be1540f9d1cd6d2a1053c599718ea5b0b6c4
--- /dev/null
+++ b/output/gpt-4o-mini/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a40bb3fa0c725d24b3e34011599d50847476f061260e05d3cb717758c570a603
+size 43587
diff --git a/output/gpt-4o-mini/xlam_tool_miss.parquet b/output/gpt-4o-mini/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3d80cf92d47819a4b83898ad0bd9b48add222981
--- /dev/null
+++ b/output/gpt-4o-mini/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:916a301206a81efea98bb017d04f7f19c4160a880ffaed682bd4535cf8bb0e0f
+size 55624
diff --git a/output/ministral-8b-2410/BFCL_v3_irrelevance.parquet b/output/ministral-8b-2410/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e435d64de8fd47def0bdd37a26cf93f76cbbbf5d
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0cb65900fd4b1382a5b2f98820b7869f1a66e256a4f5a265900c786f10e57489
+size 38643
diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d275c140536c8da8aa228f7599aa1aa2c0400bf5
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:270e7928520a15de3be350539397da80b14c40fdb4778eda4c3f539d09101599
+size 22410
diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..523907a841e68546ce5cb2d85709c229b157a4e4
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:70b39d2fe6c9014c15c1f5fb153e4479ebcdee38cf5dc1e0e4e496331b2e0ddf
+size 21303
diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_composite.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9e42e846417bb4a2ef410a084bbb474f0ce2c810
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8f051b0eb97310cd4452f75b1b806808bacdeeb332a9478351be51a39c1bc780
+size 39382
diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_long_context.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0839b77412941ebfcc27ebe2d263cd2031c9a38f
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df381465902a3e4b1fd02115ae4bff668c1ef9552c7f88cea76790381c81def2
+size 35245
diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_func.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c45438e963e46e294c28aed6f63961938be39145
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ec0d7beec730a4ab3938434fd1b7d876d1d9f55cceb204007e5213f5085bb92c
+size 39912
diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_param.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e2187410980d7173310c3e9f4de394822de318b3
--- /dev/null
+++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9841a5541a80490e3e4ff8f23e0583ec883298b587bbb45199309a7602a38ed0
+size 41529
diff --git a/output/ministral-8b-2410/tau_long_context.parquet b/output/ministral-8b-2410/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..fbed7aa8040266215f3d96fd17fbb211684b2b4f
--- /dev/null
+++ b/output/ministral-8b-2410/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:28c0240198811bd731cf5ff066442a806f519cf208e7642b2161baee4b7c2cd3
+size 38416
diff --git a/output/ministral-8b-2410/toolace_single_func_call_1.parquet b/output/ministral-8b-2410/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..30718c71d45efe62b84c533e34858d184196f474
--- /dev/null
+++ b/output/ministral-8b-2410/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d653e7c1e7ec22f2e86adaeb972704efd7ddfaff4ec8c11ddb5b34c4655dfb63
+size 16172
diff --git a/output/ministral-8b-2410/toolace_single_func_call_2.parquet b/output/ministral-8b-2410/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..001c281ebda6bcc71b76ba713f9c38cdccea0863
--- /dev/null
+++ b/output/ministral-8b-2410/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5256c752355e6073b42f72f521624c3d304ecddc54897304e8daf208cd5d9e0f
+size 11889
diff --git a/output/ministral-8b-2410/xlam_multiple_tool_multiple_call.parquet b/output/ministral-8b-2410/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6a8bbd709096c859b32e5e8df39b218608ec9ddc
--- /dev/null
+++ b/output/ministral-8b-2410/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f36990e2b07b50edefef4c9194db0be223ee73ea1f4b5232934dc536cb6c8ffb
+size 94431
diff --git a/output/ministral-8b-2410/xlam_multiple_tool_single_call.parquet b/output/ministral-8b-2410/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..76a044fec5a92f211ef8bf26e5b0e08c84fb2647
--- /dev/null
+++ b/output/ministral-8b-2410/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16e5404eab455248d2ca129d2d46a06f1465dea1cbd30b05503cbace90304421
+size 37736
diff --git a/output/ministral-8b-2410/xlam_single_tool_multiple_call.parquet b/output/ministral-8b-2410/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9bdc2b838092e24730fa6c0f05c60494f0ec8d57
--- /dev/null
+++ b/output/ministral-8b-2410/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:72d0f720192d62e632d752ffef9c67b0091d596fa5d27610dca536c544cd34fe
+size 28011
diff --git a/output/ministral-8b-2410/xlam_single_tool_single_call.parquet b/output/ministral-8b-2410/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f3ceec1d673ab403a355607d5b901bdf507b2f0b
--- /dev/null
+++ b/output/ministral-8b-2410/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7b910566e53966b82884caf149e2be5baed35dbdb1d47293f37d3fd839d8f13d
+size 44101
diff --git a/output/ministral-8b-2410/xlam_tool_miss.parquet b/output/ministral-8b-2410/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6c0dfbd98d02a2afec940706e3f7eb48af59e613
--- /dev/null
+++ b/output/ministral-8b-2410/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3feeb44ac4b5dbb03891b2cc8e9b50fd09a4ae1f8f7fb84fba92970c4cce0218
+size 49060
diff --git a/output/mistral-large-2411/BFCL_v3_irrelevance.parquet b/output/mistral-large-2411/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f7389f20186e36d23bbce580ce28401bda94ede7
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:857d12f4b4e99fa9f00467bde5ebd982425da63755652a0fb3b97bc920e1933c
+size 30702
diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9ad19e787d09e7d7fe050283b8fda2e3308ca8cb
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f52c51a0f15ee2d231105da9cb76f254d1af3c608ff9429884ac462f02e61e90
+size 22852
diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..01cfd3b0a0f81593b789b5326eeeddfec7b985c4
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46e3ebb22d755439a3414edf86765a0cdb3559f441950ac88071e294faf473f3
+size 22001
diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_composite.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..60ea24b062b19f35cb1a590eb74a58b33118f2b0
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:94b7969bf0f6ea006359f33c4d195dbb2ab62d0b6103ddeec677729dead38017
+size 41223
diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9be57196a626a03d366e489c1322bc1cd01d64cd
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e71d6b8cab2e9c3140ac09eb45e43512a2f362b12aa1a5c1e1b071a5b8043740
+size 35651
diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6eb0317af5d2963b113261b049cc040003b02ef4
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c86ed19c77109d501de73d7f3f2c359749ceb7d605b1f71463f4859e923cd6d
+size 38813
diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3f3daffc5bc9a735c516b514d54a49f05ce9ae80
--- /dev/null
+++ b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9637d6210edb287c9b8c5f1b6fe6d92f51a5b6fc73a4141553db7665a7c01a95
+size 40458
diff --git a/output/mistral-large-2411/tau_long_context.parquet b/output/mistral-large-2411/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..49ba86f2ae2fa013b93345a45d637bd13c3d5934
--- /dev/null
+++ b/output/mistral-large-2411/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:068bbc01cc56b2721c87afbc9f6918518771ab87c5dd6847200fc86f09d10d77
+size 40230
diff --git a/output/mistral-large-2411/toolace_single_func_call_1.parquet b/output/mistral-large-2411/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d7389eef3539480ff8fb2bfee294e6b66989a745
--- /dev/null
+++ b/output/mistral-large-2411/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:79d25ee70a7c715fc5277c594d44f96f4f8d985423a7795bedee99f7e1d89b8f
+size 15179
diff --git a/output/mistral-large-2411/toolace_single_func_call_2.parquet b/output/mistral-large-2411/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..fc072c8de1377fad2469dd8de541b861040318ba
--- /dev/null
+++ b/output/mistral-large-2411/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:b850d7b0175b43f70bba156d5aa36604fa16f0dfccc60e334417b338864efb70
+size 10603
diff --git a/output/mistral-large-2411/xlam_multiple_tool_multiple_call.parquet b/output/mistral-large-2411/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8caebc59c020015d2645358f13c70a13af9e4fed
--- /dev/null
+++ b/output/mistral-large-2411/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2d8289b3b6b4ca120bb104a1380020c0352950aa8fbe15f175256176fc7770be
+size 92619
diff --git a/output/mistral-large-2411/xlam_multiple_tool_single_call.parquet b/output/mistral-large-2411/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b7b4e7e4eb7f7bf119a55ca6e9bc14670c8ed438
--- /dev/null
+++ b/output/mistral-large-2411/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f4d70af4a64c32a58e5132b3a1e80f6d99389880fb7db3d4d7f8e8e0bde1605
+size 39237
diff --git a/output/mistral-large-2411/xlam_single_tool_multiple_call.parquet b/output/mistral-large-2411/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9a1d8b7bdc5c271b9265712c889676d6f99cdede
--- /dev/null
+++ b/output/mistral-large-2411/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6e96162198615b2adc1dbfffda3d70bc87395f45740f6471c1d5e15b05b3730d
+size 28523
diff --git a/output/mistral-large-2411/xlam_single_tool_single_call.parquet b/output/mistral-large-2411/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..03dfed26abee7a2572808be93faf5f7468618beb
--- /dev/null
+++ b/output/mistral-large-2411/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:36ceadddf825d0f9db71e231c7d71880b0baf5239297ba02e907ad280d8ea5b5
+size 43714
diff --git a/output/mistral-large-2411/xlam_tool_miss.parquet b/output/mistral-large-2411/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..576cfb23933f88568cd98f572ce7f2286941370b
--- /dev/null
+++ b/output/mistral-large-2411/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d579a3e339e5bcc021ce9dd7e5a971f6c1561a09d57737afebf0b041b04f863
+size 43655
diff --git a/output/mistral-small-2409/BFCL_v3_irrelevance.parquet b/output/mistral-small-2409/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c812b2e3e5d754ca3f7fe40919b37ec8b9b15e05
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:52a4b575dd9dc0b2e34a2d54f043e6c4ad67b9979b5c399fd860443e5a664703
+size 31374
diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..5b221f434e1f46c900972d67c728ef8943ce9c2b
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:49740ae3894319d5a40c86e13ca0355a4dface0ba78b072fc0b0c8e175d7feba
+size 21949
diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4ff4ea42085245f67562cbf239c2a4f0dc64f178
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:93a8714037833303f75bb7375259623b094b448f27d84a0ccdb39fc4d2b21fbe
+size 21432
diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_composite.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1fa6811594c97a83f5c76c40d2337ed03ea56102
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:27d356a0cf2590098fe2e180efe41a26b26c2901f701993abd1e06439eb1006c
+size 40361
diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4baa778a23b79a68c05f06e808cf160efdc27c87
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cae8d071ae4629a6b1c82c6c0b00736c6a20d699dfad2fd71eb51c423cc09255
+size 36116
diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4df00cc9b7d6d29bbb958b068e8bb9a25d710313
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0bf76aa092de9c7c3304066c2c7345483b8612a052a0e6b15679cf7f41e2198a
+size 40792
diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..060225bd53bc884eb4b4826b3738d0dd4538fbcb
--- /dev/null
+++ b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:47e661baa5d200644f967d1d9b1f99f0242330233b595265842cb8bc7f554f81
+size 40699
diff --git a/output/mistral-small-2409/tau_long_context.parquet b/output/mistral-small-2409/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..03722624d50b833c62155ff608601d0967779493
--- /dev/null
+++ b/output/mistral-small-2409/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7145187a08f1bb1548a83cd1173fe767c3ef2bf710bf2b2230c62f2d97b3d7bd
+size 40104
diff --git a/output/mistral-small-2409/toolace_single_func_call_1.parquet b/output/mistral-small-2409/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3e93e7ad0e1d99250086b3d4b3293769c78f320b
--- /dev/null
+++ b/output/mistral-small-2409/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:08a888330b39b9ad7e0c739bc38162e0e44040fb517490eb0d64214468dcb656
+size 16134
diff --git a/output/mistral-small-2409/toolace_single_func_call_2.parquet b/output/mistral-small-2409/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..35ebfee340e7ae0e4c4e711628c3767f10b74bef
--- /dev/null
+++ b/output/mistral-small-2409/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e58913a05a63dc6a4b2317f44a5d189594a711990e1c0b94bd290ec1a1fe61f3
+size 11793
diff --git a/output/mistral-small-2409/xlam_multiple_tool_multiple_call.parquet b/output/mistral-small-2409/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b8f667b5d277cb90eca06bb4531003b5f6665e2e
--- /dev/null
+++ b/output/mistral-small-2409/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c8e27644df90a7d784ee19abcb4aad028b0539b5c28e9950aec46c24246e05ac
+size 92947
diff --git a/output/mistral-small-2409/xlam_multiple_tool_single_call.parquet b/output/mistral-small-2409/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cb72d33d1ad990d7e0fbeba45e3207811bd55b04
--- /dev/null
+++ b/output/mistral-small-2409/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9f4c985a99c1b5ebf114cc8356593cea804aae60e45ee84755793bdc52e7d024
+size 38472
diff --git a/output/mistral-small-2409/xlam_single_tool_multiple_call.parquet b/output/mistral-small-2409/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c529d6209b50d59cdb22b22c448b037b82d1cd2a
--- /dev/null
+++ b/output/mistral-small-2409/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:74c20414f38f72174ff08b6b6efcd8a4c8183c97616cfacbf3414c15fa613beb
+size 27899
diff --git a/output/mistral-small-2409/xlam_single_tool_single_call.parquet b/output/mistral-small-2409/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..03108398db031500332ba4abf6d395c01f9218f2
--- /dev/null
+++ b/output/mistral-small-2409/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:62a406bfd71d285dff63c9ad405075d8638d82236efa9f85b806e1c8d02b2e56
+size 42800
diff --git a/output/mistral-small-2409/xlam_tool_miss.parquet b/output/mistral-small-2409/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..63c1be62f771dbe889c7976e1d4708304d7a58a6
--- /dev/null
+++ b/output/mistral-small-2409/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6d9e0570b39b726a0df2d4d2e8306ab306c0d78761be1bc0e7d310b95cedb36a
+size 47367
diff --git a/output/mistral-small-2501/BFCL_v3_irrelevance.parquet b/output/mistral-small-2501/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d34fb9e950338ed26d4b30a42d45ee9b82a95336
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0d1f5575bac38de57aa3ffbd47a605aa2d50535c15c380aa589dce7da757c37c
+size 34457
diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..09798ea3ce13451be90bc33a95504374710439f4
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f572de78c8b18b1de9cfa28b2cb86840ca05b000a8c5d754257e27784232ec99
+size 22288
diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2898c11665c05478d06d9e5cb807eeedc82aa06f
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:158bb34db42a4149bd551a47b0ccd74ca370e92276c96556620731abca86c228
+size 21869
diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_composite.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4cc54d55541183ecbbe5c24c86b53f4e64dc5927
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8cd510d392dc22e33f5b5dd620501415a2d40a38736eb0dcfe8057a24ccb2c1e
+size 37681
diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..344d2ee5aab9ea842f96d0186e1814004e838002
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f71a6db126ce6135c7a82f0cae4701041ba598173535169d2f5b2cf39fca6c45
+size 35179
diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a17e8c4b19bc481bd2aaed3f6d666617a22e03a8
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0401813ef618a6da8c60238d2eec00118c03f4e28dc810bc50a7ac87a798ba25
+size 35564
diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6b0cccdb32b0ed36ab92a40ebb96df8ccbffbbc5
--- /dev/null
+++ b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4cc168c2931f9ed461efdb91ee809ade9f092613b3d4a542c5155df21b5e33fa
+size 36579
diff --git a/output/mistral-small-2501/tau_long_context.parquet b/output/mistral-small-2501/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d3833c00ae4ebd68b945248cd475ed031bede6ef
--- /dev/null
+++ b/output/mistral-small-2501/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2318522680f2c54050da9773099cf787ee680c047697fa37bb1f395c32dc1634
+size 37570
diff --git a/output/mistral-small-2501/toolace_single_func_call_1.parquet b/output/mistral-small-2501/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ef8e963d8afb011a69448b71b743ae11831cfe1a
--- /dev/null
+++ b/output/mistral-small-2501/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d052bb4bbdf8a4b418305d185297ddb098ddf818b381cd541136228305efc3ce
+size 14905
diff --git a/output/mistral-small-2501/toolace_single_func_call_2.parquet b/output/mistral-small-2501/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..865e09098ec07f9cf935537dc5d7c7b530b0a99f
--- /dev/null
+++ b/output/mistral-small-2501/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:39b1fb656c21148e21c585b58ce82647e748b0045659daecd42a8bfc3a0eecb9
+size 10302
diff --git a/output/mistral-small-2501/xlam_multiple_tool_multiple_call.parquet b/output/mistral-small-2501/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..50f35bb1d00348d61b6691b331484d2f6ba3b260
--- /dev/null
+++ b/output/mistral-small-2501/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fa8db2630ce302e16ef948885632346cd8008154737649c2bb675972d09bf1ca
+size 91560
diff --git a/output/mistral-small-2501/xlam_multiple_tool_single_call.parquet b/output/mistral-small-2501/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..338c64f2dbcf865cdb7733b55c7b2e8e48cda245
--- /dev/null
+++ b/output/mistral-small-2501/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8b02e39f1bf1b52b2a0269bdf464289cefdaf69f0311b21124e3a4caaf78530f
+size 37546
diff --git a/output/mistral-small-2501/xlam_single_tool_multiple_call.parquet b/output/mistral-small-2501/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..dc0fe37b94ff83d7cf8c3705d399edee8ef4280a
--- /dev/null
+++ b/output/mistral-small-2501/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:159df7de0f244b043c97fdc99600f7da9b1de1a94f0f7b985bb337db71d34695
+size 26915
diff --git a/output/mistral-small-2501/xlam_single_tool_single_call.parquet b/output/mistral-small-2501/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3fcf53a16f24cc4ea99ca2ddf569f081d393b20e
--- /dev/null
+++ b/output/mistral-small-2501/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bd2e8fc7dc73648dd5051ccb4f65a17fa932a4ac60c8fe611731feb1d6cedc80
+size 41430
diff --git a/output/mistral-small-2501/xlam_tool_miss.parquet b/output/mistral-small-2501/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..5c9786e2efce611cc664e8d1884820bb845c854d
--- /dev/null
+++ b/output/mistral-small-2501/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:63a128128e5aa05109924c78f6c075ceeb67952576f63fa587726b263ff19a7a
+size 45045
diff --git a/output/o1-2024-12-17/BFCL_v3_irrelevance.parquet b/output/o1-2024-12-17/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..827fa6ea428a62d37298f419fb66e821b2243ca6
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf8e45b0af8aa06deaea9b797ceeacbca8467886211da5044f06d0e829589604
+size 33228
diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..eb5c611e875e6086e475a8f092e3fce51dbb4718
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f0bcb7368700a11dbaca015efd51e3df5bccf23cd4e021917b95c4919e78a6dc
+size 22701
diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..a7c485059ecf2f62fd321e1155b50542671da8e2
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9e16c694aa2b14884e62b2ee13e413e52eac732e2fe00866665944fd843ca1ed
+size 22862
diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_composite.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..460949868028b3eb0b7b04b2dfe79dd41b88c771
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7ca98082f3d0ab1bff6ec03055fec9b76dc101168e28c4796c921a1c11fb873a
+size 42084
diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_long_context.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..fe07129f7972fe81e40f132a2efb57b50e76bd4f
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:5b7b263cf86953638b63ed46b632acc599d395109c04449e72095d12eac159f2
+size 32475
diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_func.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8ad0e825635a46eafc1589fda1074c5815acb47f
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:90ba6109fde88737682fcc0df1e61e8e37b6b55d6201f73542da97abc93c45d9
+size 35092
diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_param.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..973f92b82c168947472d82694ca6fed5f9248d08
--- /dev/null
+++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:de90d8ba891984767feab406ad7d8804a3895c10a1732ecf17f71eaf8947805e
+size 41446
diff --git a/output/o1-2024-12-17/tau_long_context.parquet b/output/o1-2024-12-17/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3a91794bfb2a9d237020801cce451fc548e3f4bd
--- /dev/null
+++ b/output/o1-2024-12-17/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:8956a3ba615f5bf658bfb0aa39bee518f29754f48a8ac7230ae36047a7c372c1
+size 47664
diff --git a/output/o1-2024-12-17/toolace_single_func_call_1.parquet b/output/o1-2024-12-17/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..37dc632408a5b21c55adf3297fdb12efee84b1a2
--- /dev/null
+++ b/output/o1-2024-12-17/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df2cdeb823d381ec65771e95ecb0c8ff51b9f988a4af124cfd4bb3b65d365626
+size 13993
diff --git a/output/o1-2024-12-17/toolace_single_func_call_2.parquet b/output/o1-2024-12-17/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..cb73c1fa8b2c45cc8e117650d3cc42f5e4015c70
--- /dev/null
+++ b/output/o1-2024-12-17/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:063485c79b0ba1fb0400641df12df7d55bc3935912b8dce81c07662041684b7e
+size 10883
diff --git a/output/o1-2024-12-17/xlam_multiple_tool_multiple_call.parquet b/output/o1-2024-12-17/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2fa8c11bf129164125c0e377ad441e65f4cb6942
--- /dev/null
+++ b/output/o1-2024-12-17/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0ea33a82d1afdfee6e894d2b9a81ac463a36cf792f3e36166bc551eebf41c6df
+size 89207
diff --git a/output/o1-2024-12-17/xlam_multiple_tool_single_call.parquet b/output/o1-2024-12-17/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ae3af62c7e70b4ff9186c7aa5b05e4f4a05ce154
--- /dev/null
+++ b/output/o1-2024-12-17/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a4c874617dd8a9ad5b090c2627aafad3a0625040eced52387f1104c393479656
+size 41318
diff --git a/output/o1-2024-12-17/xlam_single_tool_multiple_call.parquet b/output/o1-2024-12-17/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7679b6cf1eabe601344499714792104f2bb011f0
--- /dev/null
+++ b/output/o1-2024-12-17/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:05941c92ca197ded8d7f03ed7a5a6eea427b35e520d98a14b9ce50d4a43ca80d
+size 28346
diff --git a/output/o1-2024-12-17/xlam_single_tool_single_call.parquet b/output/o1-2024-12-17/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..52fd12bb36cd18ea9a342a975dbf9ea137fe13fc
--- /dev/null
+++ b/output/o1-2024-12-17/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a8c4c5a64f6cb6f77c3a0742c9b7119f7c1444a2528775edc7029ad9ce8039ed
+size 50475
diff --git a/output/o1-2024-12-17/xlam_tool_miss.parquet b/output/o1-2024-12-17/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..d0ccdc8a682241bb4c7f8955155fa5bd85c7fe91
--- /dev/null
+++ b/output/o1-2024-12-17/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:599c5d16379ac412445750e09899ef6e105b08cda3873994c113612089b83141
+size 50779
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_irrelevance.parquet b/output/o3-mini-2025-01-31/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9e895d8d79ed7a51af75bff6a0be7ba7866dc224
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0251df7573102d184e1b2278978f136fadd3309f94027b76ed26ee96b95fafb2
+size 40772
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..200ca8e404037c3345b00afab48eca0676a108e6
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:681b126959dd0a482a24fe37646cacca6f7641e9777904fb6b1ec23a1a100144
+size 22986
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ed73de7a514f160579d0bdc39a4da2156a7bda29
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:67d6aa260eca1288ccf890a29ef708b4d83488da2f40cd8665061f803eaafead
+size 22841
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_composite.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..651ebf19a7bc7388542379584d56a5870e3c5e68
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a9e129d5f6c881ef1389e0d003f841719d24d3ffbb0d4e68c974136acca80f02
+size 44739
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_long_context.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..44f9f1534f6b8a3bafe360e50423d4575c40ead9
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:16ccde7de2ee7995b0e85af8ec325a7777adc794ddee805ef28bde9812cbc78b
+size 37387
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_func.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..54f5ada7cfd50338c104c59988aa31bbda5f05eb
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:61cf3c67cb952d0900d69590df6f9a997aed1a658c074f2423a807bf00a9cb3c
+size 40550
diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_param.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..68af3ed747d24c734a4bc6267206bc518f1cac69
--- /dev/null
+++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:142b0d29c80723c298d34c111f5f698e81b4fcb99028514800faa3ef00292ffc
+size 40149
diff --git a/output/o3-mini-2025-01-31/tau_long_context.parquet b/output/o3-mini-2025-01-31/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..923d5189246a7cbd53766f3350a384991c787a36
--- /dev/null
+++ b/output/o3-mini-2025-01-31/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:1b48df930e501711013b6a68ff943a0f202cf42121ba5a27cbb38becb6b27096
+size 53306
diff --git a/output/o3-mini-2025-01-31/toolace_single_func_call_1.parquet b/output/o3-mini-2025-01-31/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..4c09b669467ee241714be765d9432ffa5eff369d
--- /dev/null
+++ b/output/o3-mini-2025-01-31/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d470cf0ebfbc10c66ef1ffc0d870b3840f5a17825a1bde0ae20e7559ffd7c6fa
+size 14793
diff --git a/output/o3-mini-2025-01-31/toolace_single_func_call_2.parquet b/output/o3-mini-2025-01-31/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..29f149b4d48ffe8f6100620209f5b99af1cbef0b
--- /dev/null
+++ b/output/o3-mini-2025-01-31/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c7a700ac0dcae0906a7223444d105fe535ad29ba1c21317b04a1e43acf234de0
+size 12425
diff --git a/output/o3-mini-2025-01-31/xlam_multiple_tool_multiple_call.parquet b/output/o3-mini-2025-01-31/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3b31d35eb213f9e336afdf75bb8e18b4140baeb7
--- /dev/null
+++ b/output/o3-mini-2025-01-31/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bae876dcfc459e6a46a4169f36f0d01eb41cc30a37136488fd8dfced732c6e48
+size 89686
diff --git a/output/o3-mini-2025-01-31/xlam_multiple_tool_single_call.parquet b/output/o3-mini-2025-01-31/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..78a95a29211ddebcfe1ab92ac2f32778378269e1
--- /dev/null
+++ b/output/o3-mini-2025-01-31/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f79a4f11fdd7db611a0e889cda2e2244bc803b659d3c8e849a85828a9dccca0d
+size 39152
diff --git a/output/o3-mini-2025-01-31/xlam_single_tool_multiple_call.parquet b/output/o3-mini-2025-01-31/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..3fedb01a603b9e576e9d46079ff5835b89878fda
--- /dev/null
+++ b/output/o3-mini-2025-01-31/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4f696bb5b6dad79e6bb4514ab4a125975d575d2f574536f9abf1354b44e893ed
+size 30359
diff --git a/output/o3-mini-2025-01-31/xlam_single_tool_single_call.parquet b/output/o3-mini-2025-01-31/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..56ba40cba26f66181963c82bab00468304026fc1
--- /dev/null
+++ b/output/o3-mini-2025-01-31/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c9e885e1de218ee18ad556a5da25cb70680fa471f9d35a1f92d428b94008c255
+size 42511
diff --git a/output/o3-mini-2025-01-31/xlam_tool_miss.parquet b/output/o3-mini-2025-01-31/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..bda7a366d0d8f6f39878bd636625f8387b6d24d6
--- /dev/null
+++ b/output/o3-mini-2025-01-31/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:fabbb9419be03d34acc719002618b38e841c0ef0930daf43229397e69bf6be9f
+size 54065
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_irrelevance.parquet b/output/open-mistral-nemo-2407/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..606fdb337838d9d288b8d7e05991fe4770055f18
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:bdaff452ecbb0203d9710208941614db2ae98cfe38a73505a27cc1664953211f
+size 40306
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..79d1332f622c94a8c0f2da4de47f77a72c41095c
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:df59f21532f95785aecab0401403c1de55c5e1f129fe3d5cfdbad7748bdefbd9
+size 22264
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..e3e7e90085397be2a325941a849e4e9cc16a283b
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3f5ed6a5bb72e3cb89fafe32faa61f805c78cb2e184c48845e91582e42bff374
+size 24759
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_composite.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..f68872a1056340ee253a1255a4d810e145844cef
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:f4d753cb575bbda366b3151d13f5f20224ae37c553a7f50fd52a390b0e704601
+size 40008
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_long_context.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..8d8c4feb8bd567d602481f2c548cae41996a44ff
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:66254fa40b2e3adf4e843a5aecbe7ef0d4c1089bc65939b4cbff53e81b09825b
+size 38243
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_func.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c40391605f725d05a7355fc33fe12dc6d5bad069
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:7bb2e78ffc0d7d42c3bb94541da7e9d2e69ae792b18ca2cbceec9919ab191c2d
+size 43658
diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_param.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..be54a66ceef256fb6abe7921513a19add471b315
--- /dev/null
+++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:ebb7fd52431bfa843a0a168def5740fc5bdb937483654ac72dfb86812460d1df
+size 40354
diff --git a/output/open-mistral-nemo-2407/tau_long_context.parquet b/output/open-mistral-nemo-2407/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..ee5acc922ed10ce1b73052b8bece3a6d4d55600e
--- /dev/null
+++ b/output/open-mistral-nemo-2407/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:043e030ac8a5c30d42a732af4cd35a5811e900da5dd7b4ddcc2b4d8bc5ec07c5
+size 45520
diff --git a/output/open-mistral-nemo-2407/toolace_single_func_call_1.parquet b/output/open-mistral-nemo-2407/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..26a6bb9ac0cc18a6015525dba443922ae788e4cb
--- /dev/null
+++ b/output/open-mistral-nemo-2407/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c71c8f7a2165d4d097dce1eb662a44fc45a4c26659dcd6ad03fb6b6f1643ee0f
+size 15778
diff --git a/output/open-mistral-nemo-2407/toolace_single_func_call_2.parquet b/output/open-mistral-nemo-2407/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..11e425fb6de72ca83bd254ab9c1eace778c94aeb
--- /dev/null
+++ b/output/open-mistral-nemo-2407/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0742484e5de6cb0de2207627046b52efeb372ca734f8dbc3931f7c01249278ad
+size 11793
diff --git a/output/open-mistral-nemo-2407/xlam_multiple_tool_multiple_call.parquet b/output/open-mistral-nemo-2407/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..881313d5f21bcc6dab1549e5b786ce263755c161
--- /dev/null
+++ b/output/open-mistral-nemo-2407/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:e034675ab307ccba53f1e074f80dccce097b23f0114a66db68885eb448b3e70f
+size 93944
diff --git a/output/open-mistral-nemo-2407/xlam_multiple_tool_single_call.parquet b/output/open-mistral-nemo-2407/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7b39bdb8318b3cacdc16b7c42a718b2319587b83
--- /dev/null
+++ b/output/open-mistral-nemo-2407/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:0faea55eb2d9698e1b7622c9fbfcabfc14a64c498ea1fc063aa3e12d6c0a3706
+size 39269
diff --git a/output/open-mistral-nemo-2407/xlam_single_tool_multiple_call.parquet b/output/open-mistral-nemo-2407/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2b7d9f2566dc9beacc5ad08d308d81065fb80919
--- /dev/null
+++ b/output/open-mistral-nemo-2407/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:edfd01e6497ac3c1adb50a4496911c662d90899d124166fad2420e4f8f294889
+size 28422
diff --git a/output/open-mistral-nemo-2407/xlam_single_tool_single_call.parquet b/output/open-mistral-nemo-2407/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..9352a9ab109c3ca494165d4efa3874e71fc835c3
--- /dev/null
+++ b/output/open-mistral-nemo-2407/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:12a6ed223b4a817d12beee36ad833e1d14b144afc7b9b4b6d3ccdc659fa6af28
+size 43566
diff --git a/output/open-mistral-nemo-2407/xlam_tool_miss.parquet b/output/open-mistral-nemo-2407/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..33e23110e7ef06d901b700fe6132efa04af829f5
--- /dev/null
+++ b/output/open-mistral-nemo-2407/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c3a686d3619bbcb86f40f2a12fadaab442245faa3abe4225f293ebdaabe773c2
+size 51517
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_irrelevance.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_irrelevance.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..5cbdff5273ed655891f9ad67494c707d9758e364
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_irrelevance.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:c25cfdf1d1dc5c6b2ab1adb0432d48d8e1247ca784e2a1627375f877127503e0
+size 38324
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_multi_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..b346039eb9fa02a0a9482505fc1d027df2268a1a
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_multi_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:6bda1dba70342c11f13a8d9a1176c7b50cc3e59a3009b0818299293cc3a011c3
+size 22015
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_single_func_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..6d7ac9f5c10cf2621235a0f06ed7ff45236d4f6b
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_single_func_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf027e54e639b8ae17bf7ab677ea982daa1ea8ba9983ca57280eadd91afb7155
+size 22667
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_composite.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_composite.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..0b3623617c253f60f9bc8ea104e620130f08737d
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_composite.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d1276117805c09b1a22cc58e70672918ecc180ec181d87425c33c2482863ec1
+size 45055
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_long_context.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2a4893b3f8b4984ce0bef4ae40477e94b939b21d
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:86a20e72c28d8e1c692ecbfd2f11ac5a6efa0da1721e0a70c0d287a2b4c31891
+size 36947
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_func.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_func.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c7171dd32677628b9a9134e22b6914476dfd2cc0
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_func.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:048ec3197847ab01d84d13a91698fd05b501697ce1eaefe5ed6984634eacaeb7
+size 44135
diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_param.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_param.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..66aa7910baff001746eec20ab5581cddc683e8dc
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_param.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:2858d953d6e9e07ab8470cfaac7772833ff82885093a2705e8d22eca4e0b0cac
+size 43816
diff --git a/output/qwen2.5-72b-instruct/tau_long_context.parquet b/output/qwen2.5-72b-instruct/tau_long_context.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c15864bcf58e1e42d50885bf04c2f6bf01074cbf
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/tau_long_context.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:3d1031a6909a0f2783bfb7cf8a89fcf473217ddb75c06829d8e722c7a0cb2ffa
+size 40162
diff --git a/output/qwen2.5-72b-instruct/toolace_single_func_call_1.parquet b/output/qwen2.5-72b-instruct/toolace_single_func_call_1.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..38b3a9e90d904b327ec9f2fca3e19c382d9332dc
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/toolace_single_func_call_1.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:9c0bced5349fd60838e8457a5eff5c20f5550b79e829cc6e02bfaab4f66be66b
+size 15262
diff --git a/output/qwen2.5-72b-instruct/toolace_single_func_call_2.parquet b/output/qwen2.5-72b-instruct/toolace_single_func_call_2.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c9c4a0d1b55cab5d69952e85c9e0c33310e327af
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/toolace_single_func_call_2.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:194465e6bcbd2c9ca72a2bcaf1850ec0c2cf93a6b099a35256746ac156884caf
+size 12250
diff --git a/output/qwen2.5-72b-instruct/xlam_multiple_tool_multiple_call.parquet b/output/qwen2.5-72b-instruct/xlam_multiple_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..2320e63a68c6eb389913d44285041eca1a082f11
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/xlam_multiple_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:46f0ae6dcee6ac4f4984e88b03ad4cc6f3dae69eeb74fc88311748e73517c4cf
+size 87738
diff --git a/output/qwen2.5-72b-instruct/xlam_multiple_tool_single_call.parquet b/output/qwen2.5-72b-instruct/xlam_multiple_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..7b02f842fc3aafe702bafe132db7b97d1bdacc44
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/xlam_multiple_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:356d18bda1985b977c02ca7731dc6304abad56354ca2854530ba199826b7f005
+size 39426
diff --git a/output/qwen2.5-72b-instruct/xlam_single_tool_multiple_call.parquet b/output/qwen2.5-72b-instruct/xlam_single_tool_multiple_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..c1e5bd1319c49e6b26c8251abd1e9279979de91d
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/xlam_single_tool_multiple_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:a30f45f7053bf4bfae96b525b233748796032dac6646903381935417c7a64954
+size 28692
diff --git a/output/qwen2.5-72b-instruct/xlam_single_tool_single_call.parquet b/output/qwen2.5-72b-instruct/xlam_single_tool_single_call.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..88060b49662d444955ecf6d6a8fb7b907cd59139
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/xlam_single_tool_single_call.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:039ffb80b746e7c25416fec9724f846eb0261a27690eb821d43ad493a47f8c5a
+size 40202
diff --git a/output/qwen2.5-72b-instruct/xlam_tool_miss.parquet b/output/qwen2.5-72b-instruct/xlam_tool_miss.parquet
new file mode 100644
index 0000000000000000000000000000000000000000..1e982be07ca95f724ee9f0760be8007b35190ed1
--- /dev/null
+++ b/output/qwen2.5-72b-instruct/xlam_tool_miss.parquet
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:278daacc873ff49258937965a0a7059c50bd62d930c3c0c6c091813e313e72c8
+size 49739
diff --git a/tabs/data_exploration.py b/tabs/data_exploration.py
index 6c4ae828a1ffefc45b4d83469a2bc761875f8e53..2c25f86cfb24e19dfe2a00e6215d257ef4155535 100644
--- a/tabs/data_exploration.py
+++ b/tabs/data_exploration.py
@@ -1,20 +1,22 @@
import gradio as gr
from chat import get_chat_and_score_df, update_chat_display
-
def create_exploration_tab(df, MODELS, DATASETS, SCORES, HEADER_CONTENT):
- def filter_and_update_display(model, dataset, selected_scores, current_index):
+
+ def filter_and_update_display(model, dataset, min_score, max_score, current_index):
try:
df_chat = get_chat_and_score_df(model, dataset)
- if selected_scores:
- df_chat = df_chat[df_chat["score"].isin(selected_scores)]
+
+ # Filter by score range
+ df_chat = df_chat[
+ (df_chat["score"] >= min_score) & (df_chat["score"] <= max_score)
+ ]
if df_chat.empty:
return (
"
No data available for selected filters
",
"
No metrics available
",
"
No tool information available
",
- gr.update(maximum=0, value=0),
"0/0",
)
@@ -28,7 +30,6 @@ def create_exploration_tab(df, MODELS, DATASETS, SCORES, HEADER_CONTENT):
chat_html,
metrics_html,
tool_html,
- gr.update(maximum=max_index, value=current_index),
f"{current_index + 1}/{len(df_chat)}",
)
except Exception as e:
@@ -37,112 +38,129 @@ def create_exploration_tab(df, MODELS, DATASETS, SCORES, HEADER_CONTENT):
f"
Error: {str(e)}
",
"
No metrics available
",
"
No tool information available
",
- gr.update(maximum=0, value=0),
"0/0",
)
with gr.Tab("Data Exploration"):
gr.HTML(HEADER_CONTENT)
- with gr.Row():
- filters_column = gr.Column(scale=1, min_width=300)
- with filters_column:
- gr.Markdown("# Exploration Filters")
- explore_model = gr.Dropdown(
- choices=MODELS,
- value=MODELS[0],
- label="Select Model",
- )
- explore_dataset = gr.Dropdown(
- choices=DATASETS,
- value=DATASETS[0],
- label="Select Dataset",
- )
- explore_scores = gr.Dropdown(
- choices=SCORES,
- value=SCORES,
- multiselect=True,
- label="Score Range",
- )
- gr.Markdown("## Navigation")
- index_slider = gr.Slider(
- minimum=0,
- maximum=0,
- step=1,
- value=0,
- label="Position",
- )
- index_text = gr.HTML("0/0")
- with gr.Row():
- prev_btn = gr.Button("← Previous")
- next_btn = gr.Button("Next →")
+ # All filters in a single row with consistent sizing
+ with gr.Row(equal_height=True):
+ explore_model = gr.Dropdown(
+ choices=MODELS,
+ value=MODELS[0],
+ label="Model",
+ container=True,
+ scale=1,
+ )
+ explore_dataset = gr.Dropdown(
+ choices=DATASETS,
+ value=DATASETS[0],
+ label="Dataset",
+ container=True,
+ scale=1,
+ )
+ min_score = gr.Slider(
+ minimum=min(SCORES),
+ maximum=max(SCORES),
+ value=min(SCORES),
+ step=0.1,
+ label="Minimum Score - TSQ",
+ container=True,
+ scale=1,
+ )
+ max_score = gr.Slider(
+ minimum=min(SCORES),
+ maximum=max(SCORES),
+ value=max(SCORES),
+ step=0.1,
+ label="Maximum Score - TSQ",
+ container=True,
+ scale=1,
+ )
+
+ # Navigation row
+ with gr.Row(variant="panel"):
+ index_display = gr.HTML( # Changed the variable name to index_display
+ value="0/0", elem_id="index-display", elem_classes="text-center"
+ )
+ with gr.Row():
+ prev_btn = gr.Button("← Previous", size="lg", variant="secondary")
+ next_btn = gr.Button("Next →", size="lg", variant="secondary")
- content_column = gr.Column(scale=4)
- with content_column:
- chat_display = gr.HTML()
- metrics_display = gr.HTML()
- tool_info_display = gr.HTML()
+ # Content area with equal column widths
+ with gr.Row(equal_height=True):
+ chat_display = gr.HTML()
+ metrics_display = gr.HTML()
+ tool_info_display = gr.HTML()
- def update_on_filter_change(model, dataset, scores, _):
- return filter_and_update_display(model, dataset, scores, 0)
+ current_index = gr.State(value=0)
- for control in [explore_model, explore_dataset, explore_scores]:
+ # Update display on filter change
+ def update_on_filter_change(model, dataset, min_score, max_score):
+ return filter_and_update_display(model, dataset, min_score, max_score, 0)
+
+ for control in [explore_model, explore_dataset, min_score, max_score]:
control.change(
update_on_filter_change,
- inputs=[explore_model, explore_dataset, explore_scores, gr.State(0)],
+ inputs=[explore_model, explore_dataset, min_score, max_score],
outputs=[
chat_display,
metrics_display,
tool_info_display,
- index_slider,
- index_text,
- ],
+ index_display,
+ ], # Changed to index_display
)
- def navigate(direction, current, model, dataset, scores):
- new_index = current + direction
- return filter_and_update_display(model, dataset, scores, new_index)
+ # Navigation functions
+ def navigate(direction, current_idx, model, dataset, min_score, max_score):
+ new_index = current_idx + direction
+ return (
+ *filter_and_update_display(
+ model, dataset, min_score, max_score, new_index
+ ),
+ new_index,
+ )
prev_btn.click(
- lambda idx, m, d, s: navigate(-1, idx, m, d, s),
- inputs=[index_slider, explore_model, explore_dataset, explore_scores],
+ lambda idx, m, d, min_s, max_s: navigate(-1, idx, m, d, min_s, max_s),
+ inputs=[
+ current_index,
+ explore_model,
+ explore_dataset,
+ min_score,
+ max_score,
+ ],
outputs=[
chat_display,
metrics_display,
tool_info_display,
- index_slider,
- index_text,
- ],
+ index_display,
+ current_index,
+ ], # Changed to index_display
)
next_btn.click(
- lambda idx, m, d, s: navigate(1, idx, m, d, s),
- inputs=[index_slider, explore_model, explore_dataset, explore_scores],
- outputs=[
- chat_display,
- metrics_display,
- tool_info_display,
- index_slider,
- index_text,
+ lambda idx, m, d, min_s, max_s: navigate(1, idx, m, d, min_s, max_s),
+ inputs=[
+ current_index,
+ explore_model,
+ explore_dataset,
+ min_score,
+ max_score,
],
- )
-
- index_slider.change(
- lambda idx, m, d, s: filter_and_update_display(m, d, s, int(idx)),
- inputs=[index_slider, explore_model, explore_dataset, explore_scores],
outputs=[
chat_display,
metrics_display,
tool_info_display,
- index_slider,
- index_text,
- ],
+ index_display,
+ current_index,
+ ], # Changed to index_display
)
return (
chat_display,
metrics_display,
tool_info_display,
- index_slider,
- index_text,
+ index_display, # Changed to index_display
)