diff --git a/app.py b/app.py index ab2878333e0363b403c2d33558ef1346b6e2fd6c..08e49addd66781f6869dc3a4068028d3bf52b8b2 100644 --- a/app.py +++ b/app.py @@ -3,7 +3,6 @@ import promptquality as pq from dotenv import load_dotenv load_dotenv() -pq.login("https://console.demo.rungalileo.io") from data_loader import ( load_data, @@ -36,9 +35,9 @@ def create_app(): mc_info, mc_plot = create_model_comparison_tab(df, HEADER_CONTENT) - # exp_outputs = create_exploration_tab( - # df, MODELS, DATASETS, SCORES, HEADER_CONTENT - # ) + exp_outputs = create_exploration_tab( + df, MODELS, DATASETS, SCORES, HEADER_CONTENT + ) # Initial loads app.load( @@ -55,10 +54,10 @@ def create_app(): outputs=[mc_info, mc_plot], ) - # app.load( - # fn=lambda: filter_and_update_display(MODELS[0], DATASETS[0], [], 0), - # outputs=exp_outputs, - # ) + app.load( + fn=lambda: filter_and_update_display(MODELS[0], DATASETS[0], 0, 1, 0), + outputs=exp_outputs, + ) return app diff --git a/chat.py b/chat.py index c7f4af390fef59462abec6fd3f7c974370faea1d..8333afff4fd1223912879a1638f4b293f31bd681 100644 --- a/chat.py +++ b/chat.py @@ -1,199 +1,350 @@ -# chat.py import gradio as gr -import json import pandas as pd -import numpy as np -from functools import lru_cache -import promptquality as pq - -project_name = "agent-lb-v1" -PROJECT_ID = pq.get_project_from_name(project_name).id - - -@lru_cache(maxsize=1000) -def get_model_score_for_dataset(model, dataset): - print(f"Getting metrics for {model} {project_name} for dataset {dataset}") - run_name = f"{model} {dataset}" - run_id = pq.get_run_from_name(run_name, PROJECT_ID).id - rows = pq.get_rows( - project_id=PROJECT_ID, - run_id=run_id, - task_type=None, - config=None, - starting_token=0, - limit=1000, - ) +import json - rationales = [d.metrics.tool_selection_quality_rationale for d in rows] - scores = [ - round(d.metrics.tool_selection_quality, 2) - for d, rationale in zip(rows, rationales) - if rationale - ] - explanations = [ - d.metrics.tool_selection_quality_explanation - for d, rationale in zip(rows, rationales) - if rationale + +def get_updated_df(df, df_output): + df = df.iloc[: len(df_output)].copy() + df["response"] = df_output["response"].tolist() + df["rationale"] = df_output["rationale"].tolist() + df["explanation"] = df_output["explanation"].tolist() + df["score"] = df_output["score"].tolist() + cols = [ + "conversation", + "tools_langchain", + "n_turns", + "len_query", + "n_tools", + "response", + "rationale", + "explanation", + "score", ] - rationales = [r for r in rationales if r] - mean_score = round(np.mean(scores), 2) - return { - "mean_score": mean_score, - "scores": scores, - "rationales": rationales, - "explanations": explanations, - } - - -def get_updated_df(df, data): - df["rationale"] = data["rationales"] - df["explanation"] = data["explanations"] - df["score"] = data["scores"] - return df + return df[cols] def get_chat_and_score_df(model, dataset): - data = get_model_score_for_dataset(model, dataset) + df_output = pd.read_parquet(f"output/{model}/{dataset}.parquet") df = pd.read_parquet(f"datasets/{dataset}.parquet") - df = get_updated_df(df, data) + df = get_updated_df(df, df_output) return df -def format_chat_message(role, content): - """Format individual chat messages with proper styling.""" +def format_chat_message(role, content, is_response=False): + """Format individual chat messages with alignment based on role.""" role_style = role.lower() + alignment = "flex-end" if role_style == "user" else "flex-start" + max_width = "80%" + + # Clean up any excessive whitespace while preserving intentional line breaks + cleaned_content = "\n".join(line.strip() for line in content.split("\n")) + + background_color = ( + "var(--response-bg)" if is_response else f"var(--message-bg-{role_style})" + ) + return f""" -
-
{role}
-
{content}
+
+
+
+ {role + (" Response" if is_response else "")} +
+
+ {cleaned_content} +
+
""" +def format_response(response): + """Format the response data, handling both JSON and text.""" + try: + # Try to parse as JSON + response_data = json.loads(response) + # Format JSON response nicely + formatted_response = json.dumps(response_data, indent=2) + except (json.JSONDecodeError, TypeError): + # If not JSON, use as is + formatted_response = str(response) + + return formatted_response + + +def parse_tool_schema(tool): + """Parse tool schema to extract name, description, and parameters properly.""" + name = tool.get("title", "Unnamed Tool") + description = tool.get("description", "No description available") + + parameters = {} + if "properties" in tool: + for param_name, param_data in tool["properties"].items(): + param_desc = param_data.get("description", "No description") + param_type = param_data.get("type", "unknown") + parameters[param_name] = f"{param_desc} (Type: {param_type})" + + return name, description, parameters + + def format_tool_info(tools): - """Format tool information with proper styling.""" + """Format tool information with improved schema parsing and dark theme support.""" if isinstance(tools, str): try: tools = json.loads(tools) except: - return "
No tool information available
" + return '
No tool information available
' if not tools: - return "
No tool information available
" + return '
No tool information available
' tool_html = "" for tool in tools: + name, description, parameters = parse_tool_schema(tool) tool_html += f""" -
-
{tool.get('name', 'Unnamed Tool')}
-
{tool.get('description', 'No description available')}
-
- {format_parameters(tool.get('parameters', {}))} +
+
+ {name} +
+
+ {description} +
+
+ {format_parameters(parameters)}
""" - return f'
{tool_html}
' - + return f""" +
+ + {tool_html} +
+ """ def format_parameters(parameters): if not parameters: - return "
No parameters
" + return '
No parameters
' params_html = "" for name, desc in parameters.items(): params_html += f""" -
- {name}: {desc} +
+
+ {name} +
+
+ {desc} +
""" return params_html - def format_metrics(score, rationale, explanation): - """Format metrics display with proper styling.""" + """Format metrics display with improved dark theme support.""" + score_color = ( + "var(--score-high)" + if score >= 0.7 + else "var(--score-med)" if score >= 0.4 else "var(--score-low)" + ) return f""" -
-
-

Score

-
{score:.2f}
+
+
+

TSQ Score

+
+ {score:.2f} +
-
-

Rationale

-
{rationale}
+
+

Rationale

+
+ {rationale} +
-
-

Explanation

-
{explanation}
+
+

Explanation

+
+ {explanation} +
""" - def update_chat_display(df, index): - """Update the chat visualization for a specific index.""" + """Update the chat visualization with improved dark theme support.""" if df is None or df.empty or index >= len(df): return ( - "
No data available
", - "
No metrics available
", - "
No tool information available
", + '
No data available
', + '
No metrics available
', + '
No tool information available
', ) row = df.iloc[index] - # Format chat messages messages = json.loads(row["conversation"]) + response = row["response"] + formatted_response = format_response(response) + + # Create list of all messages including the response + all_messages = [ + format_chat_message(msg["role"], msg["content"]) for msg in messages + ] + all_messages.append( + format_chat_message("Assistant", formatted_response, is_response=True) + ) + chat_html = f""" -
- {"".join([format_chat_message(msg["role"], msg["content"]) - for msg in messages])} +
+ {"".join(all_messages)}
""" - # Format metrics metrics_html = format_metrics(row["score"], row["rationale"], row["explanation"]) - - # Format tool info tool_html = format_tool_info(row["tools_langchain"]) return chat_html, metrics_html, tool_html -def filter_and_update_display(model, dataset, selected_scores, current_index): +def filter_and_update_display(model, dataset, min_score, max_score, current_index): try: - # Get data and filter by scores df_chat = get_chat_and_score_df(model, dataset) - if selected_scores: - df_chat = df_chat[df_chat["score"].isin(selected_scores)] + df_chat = df_chat[ + (df_chat["score"] >= min_score) & (df_chat["score"] <= max_score) + ] if df_chat.empty: return ( - "
No data available for selected filters
", - "
No metrics available
", - "
No tool information available
", - gr.update(maximum=0, value=0), + '
No data available for selected filters
', + '
No metrics available
', + '
No tool information available
', "0/0", ) - # Update index bounds max_index = len(df_chat) - 1 current_index = min(current_index, max_index) - - # Get displays for current index chat_html, metrics_html, tool_html = update_chat_display(df_chat, current_index) - return ( - chat_html, - metrics_html, - tool_html, - gr.update(maximum=max_index, value=current_index), - f"{current_index + 1}/{len(df_chat)}", - ) + index_display = f'
{current_index + 1}/{len(df_chat)}
' + return chat_html, metrics_html, tool_html, index_display + except Exception as e: - print(f"Error in filter_and_update_display: {str(e)}") + error_html = f""" +
+ Error: {str(e)} +
+ """ return ( - f"
Error: {str(e)}
", - "
No metrics available
", - "
No tool information available
", - gr.update(maximum=0, value=0), + error_html, + '
No metrics available
', + '
No tool information available
', "0/0", ) diff --git a/data_loader.py b/data_loader.py index db4227e8d4a68e6c62e3052b57a39848dd35d1ca..6e68494dcd88724012dd2133abfdef6a70ece2eb 100644 --- a/data_loader.py +++ b/data_loader.py @@ -1,11 +1,25 @@ import pandas as pd -from glob import glob -import numpy as np -from pathlib import Path -DATASETS = [Path(file).stem for file in glob("datasets/*.parquet")] -SCORES = [round(x, 2) for x in np.arange(0, 1.1, 0.1).tolist()] +DATASETS = [ + "BFCL_v3_irrelevance", + "BFCL_v3_multi_turn_base_multi_func_call", + "BFCL_v3_multi_turn_base_single_func_call", + "BFCL_v3_multi_turn_composite", + "BFCL_v3_multi_turn_long_context", + "BFCL_v3_multi_turn_miss_func", + "BFCL_v3_multi_turn_miss_param", + "tau_long_context", + "toolace_single_func_call_1", + "toolace_single_func_call_2", + "xlam_multiple_tool_multiple_call", + "xlam_multiple_tool_single_call", + "xlam_single_tool_multiple_call", + "xlam_single_tool_single_call", + "xlam_tool_miss", +] + +SCORES = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0] def load_data(): """Load and preprocess the data.""" diff --git a/get_exp_data.ipynb b/get_exp_data.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..b0116a58e4930e84f4243e2d64131afc30707430 --- /dev/null +++ b/get_exp_data.ipynb @@ -0,0 +1,167 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import pandas as pd\n", + "import numpy as np\n", + "from functools import lru_cache\n", + "from concurrent.futures import ThreadPoolExecutor\n", + "import promptquality as pq\n", + "from dotenv import load_dotenv\n", + "from data_loader import DATASETS, load_data\n", + "from tqdm.auto import tqdm\n", + "\n", + "load_dotenv()\n", + "pq.login(\"https://console.demo.rungalileo.io\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "project_name = \"agent-lb-v1\"\n", + "PROJECT_ID = pq.get_project_from_name(project_name).id\n", + "\n", + "\n", + "@lru_cache(maxsize=1000)\n", + "def get_output_df(model, dataset):\n", + " print(f\"Getting metrics for {model} {project_name} for dataset {dataset}\")\n", + " run_name = f\"{model} {dataset}\"\n", + " run_id = pq.get_run_from_name(run_name, PROJECT_ID).id\n", + " rows = pq.get_rows(\n", + " project_id=PROJECT_ID,\n", + " run_id=run_id,\n", + " task_type=None,\n", + " config=None,\n", + " starting_token=0,\n", + " limit=1000,\n", + " )\n", + "\n", + " rationales = [d.metrics.tool_selection_quality_rationale for d in rows]\n", + "\n", + " scores = [\n", + " round(d.metrics.tool_selection_quality, 2)\n", + " for d, rationale in zip(rows, rationales)\n", + " if rationale\n", + " ]\n", + " \n", + " explanations = [\n", + " d.metrics.tool_selection_quality_explanation\n", + " for d, rationale in zip(rows, rationales)\n", + " if rationale\n", + " ]\n", + " \n", + " responses = [d.response for d, rationale in zip(rows, rationales)\n", + " if rationale\n", + " ]\n", + " \n", + " rationales = [r for r in rationales if r]\n", + " mean_score = round(np.mean(scores), 2)\n", + " \n", + " data = {\n", + " \"response\": responses,\n", + " \"mean_score\": mean_score,\n", + " \"score\": scores,\n", + " \"rationale\": rationales,\n", + " \"explanation\": explanations,\n", + " }\n", + " return pd.DataFrame(data)\n", + "\n", + "def save_output_df(df, model, dataset):\n", + " os.makedirs(f\"output/{model}\", exist_ok=True)\n", + " df.to_parquet(f\"output/{model}/{dataset}.parquet\")\n", + "\n", + "def get_updated_df(df, df_output):\n", + " df = df.iloc[:len(df_output)].copy()\n", + " \n", + " df[\"response\"] = df_output[\"response\"].tolist()\n", + " df[\"rationale\"] = df_output[\"rationale\"].tolist()\n", + " df[\"explanation\"] = df_output[\"explanation\"].tolist()\n", + " df[\"score\"] = df_output[\"score\"].tolist()\n", + " cols = ['conversation', 'tools_langchain', 'n_turns',\n", + " 'len_query', 'n_tools', 'response', 'rationale', 'explanation', 'score']\n", + " return df[cols]\n", + "\n", + "\n", + "def get_chat_and_score_df(model, dataset):\n", + " df_output = pd.read_parquet(f\"output/{model}/{dataset}.parquet\")\n", + " df = pd.read_parquet(f\"datasets/{dataset}.parquet\")\n", + " df = get_updated_df(df, df_output)\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def process_dataset(args):\n", + " model, dataset = args\n", + " if os.path.exists(f\"output/{model}/{dataset}.parquet\"):\n", + " return None\n", + " print(model, dataset)\n", + " df_output = get_output_df(model, dataset)\n", + " save_output_df(df_output, model, dataset)\n", + " return f\"Completed: {model} - {dataset}\"\n", + "\n", + "def process_model_datasets(model, datasets, max_workers=5):\n", + " with ThreadPoolExecutor(max_workers=max_workers) as executor:\n", + " # Create arguments list for each dataset\n", + " args_list = [(model, dataset) for dataset in datasets]\n", + " \n", + " # Process datasets in parallel with progress bar\n", + " list(tqdm(\n", + " executor.map(process_dataset, args_list),\n", + " total=len(datasets),\n", + " desc=f\"Datasets ({model})\",\n", + " position=1,\n", + " leave=False\n", + " ))\n", + "\n", + "\n", + "models = [\"accounts/fireworks/models/qwen2p5-72b-instruct\", \"meta-llama/Llama-3.3-70B-Instruct-Turbo\", \"meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo\"]\n", + "# models = load_data()[\"Model\"]\n", + "\n", + "# Process each model sequentially, but datasets in parallel\n", + "for model in tqdm(models, desc=\"Models\", position=0):\n", + " process_model_datasets(model, DATASETS)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "langgraph", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_irrelevance.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5e54732d481c1146f544d138dfb73e1524b7230d --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e2817c3fbce11e1585e889164baa236816796332f0f11b2cb9a1897417572927 +size 36407 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6ce69ec80d457cce39867d19fe5307896f2a4262 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8878a5ccc9bf16026ff0c951819f183607799c060e25f5a8f99ce0cba286f684 +size 25352 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..88f68ef27c9cffa79986ce13422a08950b1818e1 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c37d524e3c17f02429af0774587d4aa0b5d28d85ed1ef7cdcd9306122ead2a8 +size 22850 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..76f8cacd0924613b7da958cde30e05d5091c1f9e --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ba9fd63c299f61fdf3c59ad582ab374f13198efc03bbadb99bbe31c6bbf1a71 +size 42354 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bff4fd766c593fdf2ed63732bee7439b662a7417 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e22dc3995bca271de419792437a4e95756d010e05ccdad024b91acfbba9a742 +size 38027 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cd65552aff6e7f6f8aac20970dc85c487dc7e393 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a31da12d9d42cafdd92f5c24d898f3423188b2a2d90cca908a9fe40ccc4fb35 +size 41590 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a0027694d1a719125a6d7d23da3a0520e4dbf6a0 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df9e322d8426cd7dccd971ee3aac870511ff3f5a2e07964df21de4ad0f24915c +size 42727 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/tau_long_context.parquet b/output/Llama-3.3-70B-Instruct-Turbo/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e2240d92ab66d88f038496e72eb2a04d197f36e1 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7d41adc0a7212cde1b5e107425f7396ddf64967fe270c180209f9363ea86ea63 +size 47080 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_1.parquet b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cb78937960b444175efa2cb887320ee444ff04a2 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4515b1d276f4d3737609be9d6a682d83a65c2a560cb626b739659bea11cee266 +size 13137 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_2.parquet b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9f015b5dc34ff20c1f6c88c6b0dea020e3738135 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4f418e1e2dc111f4df5568882e929e697191f210349ecf83888727a6a60c1fc +size 11495 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0e1ab8894df9e4553474030d50cbe5ba6f594f86 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba4341a56d0cbfc556ded33ba0d975fe9bf4a3f3071f78b44cba99a46dac1988 +size 103694 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..119a821b4414b13eacad4fbb241dbf0c219f83eb --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9fd1511267bb48fe464e545a0946e0c7952f8657864794c9d278874b08be111 +size 39286 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9321df474c6542233245160714a727fa5f77bac1 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57dc4c2b0927d4a4788b4736e8e2e506685892bb367141c756a2eacc536b5022 +size 30411 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_single_call.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0708fd5c6a276aae0995417bb832258ee912ef20 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93d602bbbe10a95b4db1079ea8ad5958e530b89ac357ec528c563e7662f20e3c +size 43779 diff --git a/output/Llama-3.3-70B-Instruct-Turbo/xlam_tool_miss.parquet b/output/Llama-3.3-70B-Instruct-Turbo/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ec6fb5eb42ee2f97efa0ec55ae24d17e642f0a73 --- /dev/null +++ b/output/Llama-3.3-70B-Instruct-Turbo/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f82514fec1d787c447f91f28b35fce99c1aa7b485c53dbf90a656550d9270c +size 49444 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_irrelevance.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d91c901d767928af4f7f9da4805d93ecd9f10d46 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1632880d864efbb48b5533c14a9ab6dfc3bd8c756d8612ca2baca0bcb7a69e4b +size 41488 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e7bb00eb06aa23c328ca20fd5c9d04644ce6ef65 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6865f39478b20a8a4b30a19aab53fe35581db64f29a0a18ec6c28c4e0d90f93d +size 28818 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a07090ff56422a6b73bc5c688d8def8ac7afafc5 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:649363984f056844ef6ebef6040f5c7b202ace7b2122512b1ddb4c0b2373a9ad +size 24260 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a999d75386a5f32c352239834cc4a162d862a7f2 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f20ada4667753ca67fbbd4ee3fd160d883781e5fec4cadb5f7395c1d47f54d8b +size 59035 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..98aa89993f3b23eb2a3a7453784267a566bbe35d --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e172a62f31efc4e91fbe02354ea9919597eadd6086103d13df9da476e9f7ba +size 45652 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..35a82aadc9562294d5d16eefe626534b307c2c09 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57b4255e618139abbb1c5531baeed296c6b97cb991dce0b118d984f178d8473a +size 49516 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6f081704239bd2c2a25779aeadc73609a6bbeae1 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:804d402cd62a9c6aeecbc1a29cafb076445b0a00d08bc113dc54120302f9d68f +size 45782 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/tau_long_context.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4f02fea6004ee11f898142d9c70ebe5417ad7ec1 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38557506e024d20030df47cf7222dde468fe88e2cdd402846c548466d095ef43 +size 106328 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_1.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..161d8687a057cd4a7b4d7bd8c950f95dbc2c2ef5 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac567ee462a1f2564922e3249d868fc1a6b46b346d2a5e4d537197fd726eafe4 +size 18349 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_2.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f2cce42ce3341dce1febb792e7a3e27a2308aec8 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f76072d412286daee3a23a2867db77d74f0d94d6179b0279f6b4d685807247e2 +size 14697 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f64c527dc002b440b8ac1dcde6f8aa161f9579ad --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f742f430b2ddd553f8e34b1b805b08e2b3369581b8ac24455bc408f310902a9 +size 103082 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..14c866e9952a222328a915a361c5241c9a512588 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:af9bf31720ee319f14bd8680183e907d367bf3f03e748431da05633ccd555ec0 +size 39935 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..736a76f80c044ac68edcc6c4bddce0670b1b1c1e --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b585b6c36cee9c9fd859db2371a2d35e3e5f5129503d7261ca4b72288c559e +size 30561 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_single_call.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f74c54b5e9a8bf2902bf53c1de0f4c1d6a30955b --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a86349e92d6aa556b37b8b56ecbd60e10ed0946cad03d261dc4a012ec03fa14 +size 44998 diff --git a/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_tool_miss.parquet b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1d92e13e7d43ffe1f93284d24329807800262f49 --- /dev/null +++ b/output/Meta-Llama-3.1-8B-Instruct-Turbo/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c008962f6ff71452f42579f1b80ab354f2eae5a7e26f265c0ed32c239d17238 +size 75460 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_irrelevance.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..52d0d2466288998775d16b06624e7c55dba5c488 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:629cb48a309de2c0d82f81880d7cf6b1aa3d0066905f7384943d3c4b939c40f9 +size 56461 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2d2df7358631e27b26c5c7e2203dae6f2b27b81c --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cc6a087d39e959b3869a1b8846912c3860fc2b390b5d2ce906f16d2812f01175 +size 25800 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4bf9f2cf7a1f96dc6498dc29ee75fb2769994e4e --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b76c198d7045d6c1c9dd6100e43c085f4c2a3eaf122e2038ee16e2149f6ae51 +size 24717 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_composite.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..464f267817c5e86e63e520a519751ab9c7444cfd --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0617b626d043bb176a34c2109b5dd123fa92e25f0f8cfff5063a4cec39ad1b40 +size 50601 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_long_context.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..27dc7796f5b343c94eadda249e0a51d4c0ea4058 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5ec5a4e24400d4be1c10e27660a4dca42e41f15a11419d17e978f3707abe01bf +size 40562 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_func.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..64616dad2ac77dc52e4803b79be276dc71803684 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99e2fa627f8e43a1b533af74d928f57df2549552ca72ff0464eb27377d7c3be5 +size 49028 diff --git a/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_param.parquet b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f59d45c544c2293b2f88cd62f97996fc7e01fac1 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35439010bcff4005c7c820c00ff33e3d6cc48647716cb54cb058e14f901a505b +size 49827 diff --git a/output/claude-3-5-haiku-20241022/tau_long_context.parquet b/output/claude-3-5-haiku-20241022/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ce9b003e63702fc83c4910f242cf42bf865001ad --- /dev/null +++ b/output/claude-3-5-haiku-20241022/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2055060f303497e49eb292df2b2c57d57afca155e666c9a89cf91302beaa26a5 +size 42402 diff --git a/output/claude-3-5-haiku-20241022/toolace_single_func_call_1.parquet b/output/claude-3-5-haiku-20241022/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9d1fed8dec5446061c1459c93010d32560b17141 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1919937f16d5bd9169a08735e95555cc583fdc849b2cd90c52f33d1d4d742968 +size 19861 diff --git a/output/claude-3-5-haiku-20241022/toolace_single_func_call_2.parquet b/output/claude-3-5-haiku-20241022/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7465ae9470df9d5273c7a67296426bf4850a8342 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7cfa18597a10d9ab722fbb0115dd5d78faec9ca3eed834e0393a24dc877659f +size 13842 diff --git a/output/claude-3-5-haiku-20241022/xlam_multiple_tool_multiple_call.parquet b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2e6b3215afe469614a30d8cc25a27462bc954c52 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:db992d9979dafe082b443f0cbe163c88bad98cdf01d3248cf2affdd2f97dfe5d +size 89313 diff --git a/output/claude-3-5-haiku-20241022/xlam_multiple_tool_single_call.parquet b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f684abe96e0f0959d785dd3cd43ac0051d4e4a3b --- /dev/null +++ b/output/claude-3-5-haiku-20241022/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d76776f1962307d8b2fbb6cba8caaea27c62f0fad096b4e3575e28d08947a7e +size 40827 diff --git a/output/claude-3-5-haiku-20241022/xlam_single_tool_multiple_call.parquet b/output/claude-3-5-haiku-20241022/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3494291b4ed400e171e42c40618af68bf4387dbc --- /dev/null +++ b/output/claude-3-5-haiku-20241022/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35a6c8eecd89f7ec1092945f9597bf2db469164016cadf315f50fa075662b728 +size 27274 diff --git a/output/claude-3-5-haiku-20241022/xlam_single_tool_single_call.parquet b/output/claude-3-5-haiku-20241022/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e4f58d66d8ab3ef54614f2345e476abfe563adb3 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f603069ab1ef9094ce7e4b3a20e8253e821c3e59fbee1415c961c7fc87972ac +size 49346 diff --git a/output/claude-3-5-haiku-20241022/xlam_tool_miss.parquet b/output/claude-3-5-haiku-20241022/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6c9ef4921a4a0b9b12ffa88759c4b3c4f5033238 --- /dev/null +++ b/output/claude-3-5-haiku-20241022/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2aa32dcf4ced1c07020f08b18bf98012be350f8bcae06d7c5592fb6d98ac39f9 +size 56628 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_irrelevance.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a80a665e09ee2e4a0fd2360b81c15cf9cfbd1e19 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:183ba40828d01bb6c08607124b590017baa18e23304ca8f2818c39327928b69d +size 47355 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..63bc8dfaf4720c3c75728502925ca137cd99a7bc --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38f7e26b43084e3b991f28a5dfc2398f2f99aac1c17d9d67cf56dda0235e86b5 +size 25897 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8d572ae03a8d241bfcf849d72d6ade3f2f8e2fe6 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d01add1c6b6043ca5976b0bd2ec37aa3ef23f384a73e8e40f212138896cdea2 +size 25472 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_composite.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..809eb1dcc62487f531c819d949b4d56381629416 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b122f693fc7465a00ddc1275647ffc7d3dfbe9455b5d363f701f0d8afba2ed9b +size 51374 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_long_context.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..44c02fa85c5a924ac7e06a84e06ee53f82b06e13 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74f93575491c0db9dbec030cce34a7fa08f233bc5d638af284fc9f0660abe376 +size 41032 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_func.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d4c659c625a82d13e1fbac1d45be05c503f073ac --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:07b49eb0c3b1a971bc0018eb5ca264c568a382784d7c28535f3b1910bec44d4e +size 51224 diff --git a/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_param.parquet b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8d2bce33550055792d97d80446d14d026757ac34 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08462753aace1e06e36a150b1623b4068f8c9e14f023e766a7ccdfadd6411e16 +size 51509 diff --git a/output/claude-3-5-sonnet-20241022/tau_long_context.parquet b/output/claude-3-5-sonnet-20241022/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..823cf8ac19d1f17ed3522d5781d8c69b1883e393 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:593863c22b76cabcc3c7a48a651fac463efa13028597c229359a41b6388d12ce +size 48252 diff --git a/output/claude-3-5-sonnet-20241022/toolace_single_func_call_1.parquet b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c04582e41ad3ca6dff83d18365a1a8eff87f2470 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13d88fddb7edb096a3314189fe8c6f6b4c6f2ec37a1c81122f57f82e6741ad12 +size 20414 diff --git a/output/claude-3-5-sonnet-20241022/toolace_single_func_call_2.parquet b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fd4410f1ada24a334f57dddcd7c0e44298821353 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9800126a411b0b363eeb35fc41e1dd517a34ead3f5acffef6659703cdd3aa1d2 +size 13925 diff --git a/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_multiple_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6be31619e666a8ce6faabe9e0cdad4c31e381bcc --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0fec49a52067984f7666812b7c13bde0c3e537a89cb43fbcd82cf00ea283b70 +size 91529 diff --git a/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_single_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..08ef91886a31ea69d8dc303b7130b2c57d23b3fc --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a5caee275f8a18625a7cf8daec40584e42b20a6282f2b2b84814182e8bfe9c91 +size 42361 diff --git a/output/claude-3-5-sonnet-20241022/xlam_single_tool_multiple_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..418ac29c4bc871c6f97969cec9877c44bdc587da --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:505cb9a3145c79feed04e7e39d50aa1abb17575847f8187577d0535f0301f450 +size 29049 diff --git a/output/claude-3-5-sonnet-20241022/xlam_single_tool_single_call.parquet b/output/claude-3-5-sonnet-20241022/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3c78850b41761a4b4d093cc1eca5714b3951e4ce --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cfe5e8d50e76eacf9bce74c139bbf7cc9512f617414b8f909657eeaf88873d39 +size 48333 diff --git a/output/claude-3-5-sonnet-20241022/xlam_tool_miss.parquet b/output/claude-3-5-sonnet-20241022/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ea9c1f04317106b917b3175914d0860b0f099354 --- /dev/null +++ b/output/claude-3-5-sonnet-20241022/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fdbff91fab1aec4cac5e0d51149def4c5d2a6cbae29a0407a5056c1f5be47e8 +size 53233 diff --git a/output/gemini-1.5-flash/BFCL_v3_irrelevance.parquet b/output/gemini-1.5-flash/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9a3aa5aa3817675c572d49e7ed4dcc7ec0ca40ce --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5249956839f8b75467416244a75563b50141ecff61b32ad2804b806b820b6af6 +size 27599 diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..97b02ecf65505e2107a2fdc93c32122890d6e11f --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e0a81daa696e6a1b06eb61646915243019c028d8a51fbc7bf74a8b305e5ec513 +size 26376 diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..330b4f420b04865496e803c34c8243407ef3651f --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd2df42ea847e440673a079bc87991f77fa6701a1798012d55b078b8caf066c8 +size 21530 diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_composite.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..193b142a25c6045ea3b3c15246d83bbafc74e5db --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bf75bddddb88394586817b6530fd517d27ffa06b6e3d369735affd103c2eff9 +size 43787 diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_long_context.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4ef4acf9b0dd1e73e3a5958f68704e3bd2a0a5ff --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0513db147d6662a92aef4b69f8bb54ea7b45029cd08df573c4f05e128482004b +size 39527 diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_func.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..43de56edf186a05ee6dbc2c726427a28e36e9767 --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b2a7cb7add0a51cc5e3ae7dadea03ec3c0c9be4a92393cae05df5672d69953c6 +size 41897 diff --git a/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_param.parquet b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d5097ab21e59a6e652b5d2179e961e1f661f660c --- /dev/null +++ b/output/gemini-1.5-flash/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcae1f2141f50404ea07282e3408baba4dd0f0b3490f85bf1042da01ff293009 +size 42215 diff --git a/output/gemini-1.5-flash/tau_long_context.parquet b/output/gemini-1.5-flash/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c3200a15b1599ab19668fd2b5f2b7939e7d0f142 --- /dev/null +++ b/output/gemini-1.5-flash/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc8cd5d90a35855e7606af76a4b7d128c1a97f68029093ae33d0aa7d343a0dfa +size 46692 diff --git a/output/gemini-1.5-flash/toolace_single_func_call_1.parquet b/output/gemini-1.5-flash/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..765621308a5df79f493fb0b932ffe7074fdd1ff4 --- /dev/null +++ b/output/gemini-1.5-flash/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e3f46ad06d06585ddced90bf3f0fbf716795b2682dd026ffa0f7d18d80ca8b0 +size 14004 diff --git a/output/gemini-1.5-flash/toolace_single_func_call_2.parquet b/output/gemini-1.5-flash/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1eb5b348f08be4bd31d0d1a1bb2197628b9fe498 --- /dev/null +++ b/output/gemini-1.5-flash/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b9fc26fb06c55e6373e038285a08007451a1f6d40ff11277832e29d4c540bf1 +size 12151 diff --git a/output/gemini-1.5-flash/xlam_multiple_tool_multiple_call.parquet b/output/gemini-1.5-flash/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7f20af45d95ebd57b8d15d5c94ba1faa8a5027f5 --- /dev/null +++ b/output/gemini-1.5-flash/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b3705e6af4df10ed316ff6e9b22c9ec8bb432cb68ac5391527f2bcff3926dc5 +size 107671 diff --git a/output/gemini-1.5-flash/xlam_multiple_tool_single_call.parquet b/output/gemini-1.5-flash/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..eb54a9da2058e4478c9febfb05da3bce1b6ac8bc --- /dev/null +++ b/output/gemini-1.5-flash/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b5f2dff11755ec0ff5d3f6e84fe90f49c54afb38c00f743cd7d6858f37284d10 +size 42317 diff --git a/output/gemini-1.5-flash/xlam_single_tool_multiple_call.parquet b/output/gemini-1.5-flash/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..89010fc3ca95428da46136da48648fc438cb8f2a --- /dev/null +++ b/output/gemini-1.5-flash/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3694c13eaa74271d20430cda72ea079866fd6bb11ff3247edf015578520658d9 +size 30892 diff --git a/output/gemini-1.5-flash/xlam_single_tool_single_call.parquet b/output/gemini-1.5-flash/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c29c6d042611eb508b29340530a6f5219405cbfa --- /dev/null +++ b/output/gemini-1.5-flash/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:57a3aad5dd793d31b6558221b305b20766e99361c6b78e53b896c432ce197ec1 +size 45169 diff --git a/output/gemini-1.5-flash/xlam_tool_miss.parquet b/output/gemini-1.5-flash/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2c848279ab6b841f99ac96204b73424e2431c56c --- /dev/null +++ b/output/gemini-1.5-flash/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf00d5e071b49f106144109c6d5fb01753ab08d6031da71a58ad37ce706c55c5 +size 47444 diff --git a/output/gemini-1.5-pro/BFCL_v3_irrelevance.parquet b/output/gemini-1.5-pro/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..33eef5872fa64c85678662f81ad9f3afc7ef1441 --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4612d61b195f16a6fe53c62fc5d28cfd5a661d769737eb623bb34bf8ab87928e +size 28338 diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ed8886d326e812362dbd3e52f6b87f74188e7b09 --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cbde82c9cfd00b594dbbd0e3b1da13ec4f6f9d0ec9b0227a422cfd358750d906 +size 22381 diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bbb58ca69a1d7864b4b918241f8fbdec17f482ce --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:99cea91fdd703ea5949aa8278763c2e469cf06b6b4487314dd658fe91c1eac3d +size 22202 diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_composite.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9b8581531c2dd95862e8743790e294100225a6b7 --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:41ec4ad5cf3508dc086b147b2258e06dd2cf6829110fb6d16672a5faed11b97a +size 41073 diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_long_context.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d8e1a706c1f54bf1fc84ea63fc318fdf8e339862 --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc9448ddec190eb66d29dddc24243cdccbbf6176c06aef6f88c3c1ebc2d805f9 +size 36764 diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_func.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d80edbbdbe6638fe3b44a66fa4dc2f0ccc9fdc11 --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ff269a50b030cd799da9ba80965c6efb811734f2574895f9c4be3476e1bf17a8 +size 35004 diff --git a/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_param.parquet b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0b315417b725fde4aae1366d1e201f25a81521d4 --- /dev/null +++ b/output/gemini-1.5-pro/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0977062fc3effefec9599db7903721f7c282b3f1719cf42dca0b768b692e32da +size 39353 diff --git a/output/gemini-1.5-pro/tau_long_context.parquet b/output/gemini-1.5-pro/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..61ae143803a610c40fa64950a9a3d0b0f6d5c1cb --- /dev/null +++ b/output/gemini-1.5-pro/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3ad91af5a1002edc2819545c9d6af16192bf6783fd5209492c01f7b770be28f +size 45651 diff --git a/output/gemini-1.5-pro/toolace_single_func_call_1.parquet b/output/gemini-1.5-pro/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..17308eeabda8ca302fa89d5be7d903c49acb7239 --- /dev/null +++ b/output/gemini-1.5-pro/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cef131778ae6557b60a107aae9d8880a215c53acaeea90b94d032be8467f1a70 +size 15193 diff --git a/output/gemini-1.5-pro/toolace_single_func_call_2.parquet b/output/gemini-1.5-pro/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6a312494b76573ee1a0e7a9a81351d07dcbed5bc --- /dev/null +++ b/output/gemini-1.5-pro/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20d8c065f95f21e9056242df690edc31790da5ad360a66896b766c5f6294bebf +size 12087 diff --git a/output/gemini-1.5-pro/xlam_multiple_tool_multiple_call.parquet b/output/gemini-1.5-pro/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8327a22e8ba737ec31a862d2bb1c8e90c78b9572 --- /dev/null +++ b/output/gemini-1.5-pro/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e1a48301a76bf92981adfff1920ed0b6b71c5d74c0865b4b7c2c6cddf23b0b1 +size 49774 diff --git a/output/gemini-1.5-pro/xlam_multiple_tool_single_call.parquet b/output/gemini-1.5-pro/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d151bab1d5dad1b12d4bb14f5499f00365edda3c --- /dev/null +++ b/output/gemini-1.5-pro/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b5a8fc9a8476f7963bd0dbefd58e59faaf2c55e3f0f46146f4ecb043e57bbd +size 41034 diff --git a/output/gemini-1.5-pro/xlam_single_tool_multiple_call.parquet b/output/gemini-1.5-pro/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..daeae99f5d3890dd19cdcd8582097fd102a3e6cf --- /dev/null +++ b/output/gemini-1.5-pro/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e00f16af167fbe6c302470f6b66591749f6f94c52b93fc636012bada7f511c9c +size 30936 diff --git a/output/gemini-1.5-pro/xlam_single_tool_single_call.parquet b/output/gemini-1.5-pro/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c8ec1546e7353731b38bbe253b0dba129acdc459 --- /dev/null +++ b/output/gemini-1.5-pro/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ede8fe40323d0f17736b361cf615adafed1e82a320efd91016fbd479c504f2de +size 40537 diff --git a/output/gemini-1.5-pro/xlam_tool_miss.parquet b/output/gemini-1.5-pro/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..899bc1a01ee759b404ac8e0a1e1721308419b968 --- /dev/null +++ b/output/gemini-1.5-pro/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:06c68ecda86c3fd2a1488985b96443a17f2895429e95c4781d26d6fa9bf0bc41 +size 48728 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_irrelevance.parquet b/output/gemini-2.0-flash-001/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..693898c6fb21ea87be007e5dec6b2f328071d394 --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9418f5b9e8302dccf22089063cb7fca50d276fbbb5f1a14ac87d9758f3a84785 +size 30928 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cacbc67d7cc74e559c6712e568d8e0c22c361c2d --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e7beb22784290c6880d218232c099e368365a02382e902a58e0408f7f7d0916 +size 22138 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7b289d304287e0ae5dbe336bf761fb83d5221e50 --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0302490b6eab03294fa631f3861917ae77262208dd43c69987437cad87c7ae06 +size 21570 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_composite.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ff0e1f0fd9c6316c9c14a7f56d46992240296efc --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b40fda69942889b0b9a4cd6d0e5ca5b4395e05b5627e9f07a35f3daabb7b319e +size 40294 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_long_context.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7bdf5db27d666749cc4a6662dc8ca779683d9b1d --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef5e919669de1102fe8ecabfdcc6a0aefafb6a05e37bda4b7ebcdce206505e27 +size 36774 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_func.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d6e269f16fe611c85f7447f15a4cc6ec42a59857 --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0c0675c09704dfcb93c5c3f16468eb1c0eacf6b6d402ef441158a7a02516f48f +size 39831 diff --git a/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_param.parquet b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4655947cc601e260630625822ee078cafa270ffb --- /dev/null +++ b/output/gemini-2.0-flash-001/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4eb7f9916b63087e616d35f583c6d7dddd815ccdbe8e9f61a6e1ba7576973b8d +size 37934 diff --git a/output/gemini-2.0-flash-001/tau_long_context.parquet b/output/gemini-2.0-flash-001/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2493c7792f0c825cac983bfc61ec1322f2a55a19 --- /dev/null +++ b/output/gemini-2.0-flash-001/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2bc30eefa28286f8b2b4d9958267c78eba08bb674bde423f64430bd66319916 +size 42499 diff --git a/output/gemini-2.0-flash-001/toolace_single_func_call_1.parquet b/output/gemini-2.0-flash-001/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..84e9ef6b4a11bc19ed04cc5be1b4033f2aa165c1 --- /dev/null +++ b/output/gemini-2.0-flash-001/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1f1ca02d956c0002c31978df685f309576361656f4942eb60ca00ec16566183a +size 15363 diff --git a/output/gemini-2.0-flash-001/toolace_single_func_call_2.parquet b/output/gemini-2.0-flash-001/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..eb47b90d357a5ef07f10c4d982e5679efd10fd1e --- /dev/null +++ b/output/gemini-2.0-flash-001/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a090befc959e0aef2b8db1d3136af52d626b1a418ebbf72442a36a4f4952ac6a +size 11943 diff --git a/output/gemini-2.0-flash-001/xlam_multiple_tool_multiple_call.parquet b/output/gemini-2.0-flash-001/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b537726470a5cb5411b6349aeb268d53187ad71b --- /dev/null +++ b/output/gemini-2.0-flash-001/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec2153c4db805f63b35f9509b702e485d831b9e855186e884704d118b5f321f6 +size 107999 diff --git a/output/gemini-2.0-flash-001/xlam_multiple_tool_single_call.parquet b/output/gemini-2.0-flash-001/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..609481c4fe924f7a88a2d9920da1c600c49d5506 --- /dev/null +++ b/output/gemini-2.0-flash-001/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68f0f3315d3db9a991cc0d8b3964a551a6055cd54be4d728d3005bbfddead472 +size 41168 diff --git a/output/gemini-2.0-flash-001/xlam_single_tool_multiple_call.parquet b/output/gemini-2.0-flash-001/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d54588003197e1c9bd626524b577bf8f164be36e --- /dev/null +++ b/output/gemini-2.0-flash-001/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26ac115f0f57da9565aafc315c0275bcecb0b88827f9db238f2a2ccd6defc8c5 +size 30989 diff --git a/output/gemini-2.0-flash-001/xlam_single_tool_single_call.parquet b/output/gemini-2.0-flash-001/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..29d871730795c6fb3f4dfe778d30526bbd6c2415 --- /dev/null +++ b/output/gemini-2.0-flash-001/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82027447d8573256810f8f0430152612d3795fc13dfb948f6fed5af145c93757 +size 43836 diff --git a/output/gemini-2.0-flash-001/xlam_tool_miss.parquet b/output/gemini-2.0-flash-001/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..85a026834da6a13b5f6336ce65dae4530ea90fdb --- /dev/null +++ b/output/gemini-2.0-flash-001/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5121c050fd3db66c2e8268a8c2703ccb19f8675a3394609f4f4a2dba8efd31e7 +size 45973 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_irrelevance.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..85e55849bedd2ee342dab292dca85fdd7c631f6a --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d035642bc7cec4942ca79b3c4e19b06f7823a53ce64530f1a25ea5a534226fd7 +size 33211 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f95f8b7630963f46faa5f485442e6412148303f0 --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ff79e50a4731de0671cbed84004783afcea66e136cc9e7ff4d7773b82b5add9 +size 23423 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..91579af1f655e0ca048be4c1efe24f178b7e1683 --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd79507b606df0599766938b8cd08d2d5874f2312b9b991c5f9816082a09af35 +size 21602 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_composite.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8c9ce9dc5f181ce7b72f6297a5bd282a8649db78 --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0094efbfe2bb9f463ca99866bbc5ec3c050fcad6c04125121a691f3d439cd8e5 +size 40253 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_long_context.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6b4d09e7891042c3d2cf88b88f1cb7e965a6a5e4 --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:370f700f343609cd1741163dbe2c5dbb5c19941d02feaa51304aac66c3012553 +size 36183 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_func.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..64ff5a1147b304e04b3377b5fbacdc823acbca27 --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b7b0f80e2f1180bcbe3f28061f861f18450efc9747b09678f709ccc8f48138f +size 37994 diff --git a/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_param.parquet b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a4a9246b18f7a220aec653c00542462c3130c16c --- /dev/null +++ b/output/gpt-4o-2024-11-20/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15db89e972e0f6be8097202f4e71f54a2dfb4a94e48bc37b669f0739f27388ad +size 40102 diff --git a/output/gpt-4o-2024-11-20/tau_long_context.parquet b/output/gpt-4o-2024-11-20/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7904d1213a16f46410cb40111ccc381732f8831b --- /dev/null +++ b/output/gpt-4o-2024-11-20/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b6dc08bcf01b49c401368ad8b88be91d09e21be37a4e9643512e8ffbc9a14fe +size 39940 diff --git a/output/gpt-4o-2024-11-20/toolace_single_func_call_1.parquet b/output/gpt-4o-2024-11-20/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7aeb7261c24d4c7617bca62d9a9161ea432c914e --- /dev/null +++ b/output/gpt-4o-2024-11-20/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5d0d503e8e58736f31d2228b42bc1b583e991a090c19ebf85062a40e11e8066e +size 15209 diff --git a/output/gpt-4o-2024-11-20/toolace_single_func_call_2.parquet b/output/gpt-4o-2024-11-20/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c41e49c4e38d05c4106aa89df2e0612cdc31c488 --- /dev/null +++ b/output/gpt-4o-2024-11-20/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12e6f73695cf56607aaae7b42c8178f3587642de6cee745bfd53f80b4d465b01 +size 10966 diff --git a/output/gpt-4o-2024-11-20/xlam_multiple_tool_multiple_call.parquet b/output/gpt-4o-2024-11-20/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..00a4a9c0834f26b1508cf4ce8824731fc10f4dc5 --- /dev/null +++ b/output/gpt-4o-2024-11-20/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:153ca6f78eb961159d099969c563773ccdc2ec973b0a7339615e99766b52d929 +size 101090 diff --git a/output/gpt-4o-2024-11-20/xlam_multiple_tool_single_call.parquet b/output/gpt-4o-2024-11-20/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..036dd1633f6eafe46bd144877124d6828985061b --- /dev/null +++ b/output/gpt-4o-2024-11-20/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d5ad47d7c0b4c8c52a8243cd9c8e553c8ef0ff13f9c6b08ed7df7666c0089d6 +size 40026 diff --git a/output/gpt-4o-2024-11-20/xlam_single_tool_multiple_call.parquet b/output/gpt-4o-2024-11-20/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..632058b7f953b1177d134969d8b7dd62ba986f9a --- /dev/null +++ b/output/gpt-4o-2024-11-20/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f27bd670790f9457ad58e4c1e08fa0ad4ca63a84be5dc2504c3a252cca8c805e +size 31855 diff --git a/output/gpt-4o-2024-11-20/xlam_single_tool_single_call.parquet b/output/gpt-4o-2024-11-20/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d56aa7bef0cb6789e4372fffb99228e434691a20 --- /dev/null +++ b/output/gpt-4o-2024-11-20/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4655e438e12d954da0b1ce12e4c03ae897995509bf1c42c9290e8387472f512e +size 48007 diff --git a/output/gpt-4o-2024-11-20/xlam_tool_miss.parquet b/output/gpt-4o-2024-11-20/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..65d98d4effb6c5d026fba9d3e9aec9563dd005d8 --- /dev/null +++ b/output/gpt-4o-2024-11-20/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1ca31c1e28060629449372af722cb3f6d01253de06b90a94078bbc1a6a3f8ac7 +size 49543 diff --git a/output/gpt-4o-mini/BFCL_v3_irrelevance.parquet b/output/gpt-4o-mini/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..51f9ab5d77e907a70a5bfedd217f13bd27ba6870 --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a0c3bf98f298552eed56303e350616c100acfb8197d971974cc4a3d0fd27f41 +size 28707 diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9b460d74997a5a1c4ce670bab9695ae49defddba --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18e7f39d5eed2bdced751486fcf08023af79791741c331de5e0f6b3054a4fdf1 +size 24007 diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b19e30d6c0dea8bc734a704a875502e7f3def4d0 --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:752bb7faee66296dceb18f69e1eeb0b7c8e6709cb34155d119b07d8942ea0501 +size 22673 diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_composite.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fdf426b7cdc6a7459c9245b8e883670d5704b015 --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e43f408cfb62859b3cf29ba33b87fa10843cd2f618f8473420ac7b99d9d0c26 +size 40128 diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_long_context.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5878b1e8f2393b6540d3e72d7de508e8ef73ca16 --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ed05f5776b15df238c7260483532ef68996d5ccd7f7ab1db37962272a84e1291 +size 39154 diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_func.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b94d1e87b4d26b8eff920f0239c9c2c4e97f733f --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:34841ca690b9b245ee54d1c3a2721863cbe7ae343441e082258af3c1411ffbcc +size 39671 diff --git a/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_param.parquet b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..33c51b4d5462f351f90ebc0ac64938c112741d5e --- /dev/null +++ b/output/gpt-4o-mini/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:35769131c4e582a3a7c7991650dc8b4be53f24690963c002bd5e4b05592c9fc9 +size 41463 diff --git a/output/gpt-4o-mini/tau_long_context.parquet b/output/gpt-4o-mini/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7835a91c98d643d90598a2ed4a63d4fecf433a07 --- /dev/null +++ b/output/gpt-4o-mini/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:707a33e2bc4d82ca725a3d0f95b85a088abfcb753c97ecf42c2f9d9962a1f9b2 +size 46378 diff --git a/output/gpt-4o-mini/toolace_single_func_call_1.parquet b/output/gpt-4o-mini/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1a83065bfa3ca107985846a851837ff5612f4652 --- /dev/null +++ b/output/gpt-4o-mini/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66b118eb4bebcc5e24045ef248f37d9d8d48963903e8650b6522b2c91f77f2d1 +size 15218 diff --git a/output/gpt-4o-mini/toolace_single_func_call_2.parquet b/output/gpt-4o-mini/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f6a1377ed0d77a6b423287525d775cbe51611773 --- /dev/null +++ b/output/gpt-4o-mini/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83060408277527b9140ebb6cdd093a3f407c91093b0ca469a1c90728c064095d +size 12157 diff --git a/output/gpt-4o-mini/xlam_multiple_tool_multiple_call.parquet b/output/gpt-4o-mini/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d4234ff75e88f38ae6ee649cc06e6a8dbde81564 --- /dev/null +++ b/output/gpt-4o-mini/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c7198ccf61e7427817437f564b3a2345b3c93bd253739efeb7a735d25a22f2f +size 101412 diff --git a/output/gpt-4o-mini/xlam_multiple_tool_single_call.parquet b/output/gpt-4o-mini/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..32751c29f75bd71f9a469fa610d200d42d898ded --- /dev/null +++ b/output/gpt-4o-mini/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82ee2b0e69490b2a03a18c530ca029ba0cad4691e0c5a1504e8ed00b4d481dcb +size 40464 diff --git a/output/gpt-4o-mini/xlam_single_tool_multiple_call.parquet b/output/gpt-4o-mini/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e842fc5afa13e792aa5a823ab95abbf349f4af5a --- /dev/null +++ b/output/gpt-4o-mini/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e14b262aee04ef4cd0d7ff12b050e12dd8d4d4ab543c968e1850082e4821f3e9 +size 30350 diff --git a/output/gpt-4o-mini/xlam_single_tool_single_call.parquet b/output/gpt-4o-mini/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a830be1540f9d1cd6d2a1053c599718ea5b0b6c4 --- /dev/null +++ b/output/gpt-4o-mini/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a40bb3fa0c725d24b3e34011599d50847476f061260e05d3cb717758c570a603 +size 43587 diff --git a/output/gpt-4o-mini/xlam_tool_miss.parquet b/output/gpt-4o-mini/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3d80cf92d47819a4b83898ad0bd9b48add222981 --- /dev/null +++ b/output/gpt-4o-mini/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:916a301206a81efea98bb017d04f7f19c4160a880ffaed682bd4535cf8bb0e0f +size 55624 diff --git a/output/ministral-8b-2410/BFCL_v3_irrelevance.parquet b/output/ministral-8b-2410/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e435d64de8fd47def0bdd37a26cf93f76cbbbf5d --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0cb65900fd4b1382a5b2f98820b7869f1a66e256a4f5a265900c786f10e57489 +size 38643 diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d275c140536c8da8aa228f7599aa1aa2c0400bf5 --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:270e7928520a15de3be350539397da80b14c40fdb4778eda4c3f539d09101599 +size 22410 diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..523907a841e68546ce5cb2d85709c229b157a4e4 --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70b39d2fe6c9014c15c1f5fb153e4479ebcdee38cf5dc1e0e4e496331b2e0ddf +size 21303 diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_composite.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9e42e846417bb4a2ef410a084bbb474f0ce2c810 --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8f051b0eb97310cd4452f75b1b806808bacdeeb332a9478351be51a39c1bc780 +size 39382 diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_long_context.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0839b77412941ebfcc27ebe2d263cd2031c9a38f --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df381465902a3e4b1fd02115ae4bff668c1ef9552c7f88cea76790381c81def2 +size 35245 diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_func.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c45438e963e46e294c28aed6f63961938be39145 --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec0d7beec730a4ab3938434fd1b7d876d1d9f55cceb204007e5213f5085bb92c +size 39912 diff --git a/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_param.parquet b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e2187410980d7173310c3e9f4de394822de318b3 --- /dev/null +++ b/output/ministral-8b-2410/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9841a5541a80490e3e4ff8f23e0583ec883298b587bbb45199309a7602a38ed0 +size 41529 diff --git a/output/ministral-8b-2410/tau_long_context.parquet b/output/ministral-8b-2410/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fbed7aa8040266215f3d96fd17fbb211684b2b4f --- /dev/null +++ b/output/ministral-8b-2410/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:28c0240198811bd731cf5ff066442a806f519cf208e7642b2161baee4b7c2cd3 +size 38416 diff --git a/output/ministral-8b-2410/toolace_single_func_call_1.parquet b/output/ministral-8b-2410/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..30718c71d45efe62b84c533e34858d184196f474 --- /dev/null +++ b/output/ministral-8b-2410/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d653e7c1e7ec22f2e86adaeb972704efd7ddfaff4ec8c11ddb5b34c4655dfb63 +size 16172 diff --git a/output/ministral-8b-2410/toolace_single_func_call_2.parquet b/output/ministral-8b-2410/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..001c281ebda6bcc71b76ba713f9c38cdccea0863 --- /dev/null +++ b/output/ministral-8b-2410/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5256c752355e6073b42f72f521624c3d304ecddc54897304e8daf208cd5d9e0f +size 11889 diff --git a/output/ministral-8b-2410/xlam_multiple_tool_multiple_call.parquet b/output/ministral-8b-2410/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6a8bbd709096c859b32e5e8df39b218608ec9ddc --- /dev/null +++ b/output/ministral-8b-2410/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f36990e2b07b50edefef4c9194db0be223ee73ea1f4b5232934dc536cb6c8ffb +size 94431 diff --git a/output/ministral-8b-2410/xlam_multiple_tool_single_call.parquet b/output/ministral-8b-2410/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..76a044fec5a92f211ef8bf26e5b0e08c84fb2647 --- /dev/null +++ b/output/ministral-8b-2410/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16e5404eab455248d2ca129d2d46a06f1465dea1cbd30b05503cbace90304421 +size 37736 diff --git a/output/ministral-8b-2410/xlam_single_tool_multiple_call.parquet b/output/ministral-8b-2410/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9bdc2b838092e24730fa6c0f05c60494f0ec8d57 --- /dev/null +++ b/output/ministral-8b-2410/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:72d0f720192d62e632d752ffef9c67b0091d596fa5d27610dca536c544cd34fe +size 28011 diff --git a/output/ministral-8b-2410/xlam_single_tool_single_call.parquet b/output/ministral-8b-2410/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f3ceec1d673ab403a355607d5b901bdf507b2f0b --- /dev/null +++ b/output/ministral-8b-2410/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b910566e53966b82884caf149e2be5baed35dbdb1d47293f37d3fd839d8f13d +size 44101 diff --git a/output/ministral-8b-2410/xlam_tool_miss.parquet b/output/ministral-8b-2410/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6c0dfbd98d02a2afec940706e3f7eb48af59e613 --- /dev/null +++ b/output/ministral-8b-2410/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3feeb44ac4b5dbb03891b2cc8e9b50fd09a4ae1f8f7fb84fba92970c4cce0218 +size 49060 diff --git a/output/mistral-large-2411/BFCL_v3_irrelevance.parquet b/output/mistral-large-2411/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f7389f20186e36d23bbce580ce28401bda94ede7 --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:857d12f4b4e99fa9f00467bde5ebd982425da63755652a0fb3b97bc920e1933c +size 30702 diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9ad19e787d09e7d7fe050283b8fda2e3308ca8cb --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f52c51a0f15ee2d231105da9cb76f254d1af3c608ff9429884ac462f02e61e90 +size 22852 diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..01cfd3b0a0f81593b789b5326eeeddfec7b985c4 --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46e3ebb22d755439a3414edf86765a0cdb3559f441950ac88071e294faf473f3 +size 22001 diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_composite.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..60ea24b062b19f35cb1a590eb74a58b33118f2b0 --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:94b7969bf0f6ea006359f33c4d195dbb2ab62d0b6103ddeec677729dead38017 +size 41223 diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9be57196a626a03d366e489c1322bc1cd01d64cd --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e71d6b8cab2e9c3140ac09eb45e43512a2f362b12aa1a5c1e1b071a5b8043740 +size 35651 diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6eb0317af5d2963b113261b049cc040003b02ef4 --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c86ed19c77109d501de73d7f3f2c359749ceb7d605b1f71463f4859e923cd6d +size 38813 diff --git a/output/mistral-large-2411/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3f3daffc5bc9a735c516b514d54a49f05ce9ae80 --- /dev/null +++ b/output/mistral-large-2411/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9637d6210edb287c9b8c5f1b6fe6d92f51a5b6fc73a4141553db7665a7c01a95 +size 40458 diff --git a/output/mistral-large-2411/tau_long_context.parquet b/output/mistral-large-2411/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..49ba86f2ae2fa013b93345a45d637bd13c3d5934 --- /dev/null +++ b/output/mistral-large-2411/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:068bbc01cc56b2721c87afbc9f6918518771ab87c5dd6847200fc86f09d10d77 +size 40230 diff --git a/output/mistral-large-2411/toolace_single_func_call_1.parquet b/output/mistral-large-2411/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d7389eef3539480ff8fb2bfee294e6b66989a745 --- /dev/null +++ b/output/mistral-large-2411/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:79d25ee70a7c715fc5277c594d44f96f4f8d985423a7795bedee99f7e1d89b8f +size 15179 diff --git a/output/mistral-large-2411/toolace_single_func_call_2.parquet b/output/mistral-large-2411/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fc072c8de1377fad2469dd8de541b861040318ba --- /dev/null +++ b/output/mistral-large-2411/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b850d7b0175b43f70bba156d5aa36604fa16f0dfccc60e334417b338864efb70 +size 10603 diff --git a/output/mistral-large-2411/xlam_multiple_tool_multiple_call.parquet b/output/mistral-large-2411/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8caebc59c020015d2645358f13c70a13af9e4fed --- /dev/null +++ b/output/mistral-large-2411/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d8289b3b6b4ca120bb104a1380020c0352950aa8fbe15f175256176fc7770be +size 92619 diff --git a/output/mistral-large-2411/xlam_multiple_tool_single_call.parquet b/output/mistral-large-2411/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b7b4e7e4eb7f7bf119a55ca6e9bc14670c8ed438 --- /dev/null +++ b/output/mistral-large-2411/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f4d70af4a64c32a58e5132b3a1e80f6d99389880fb7db3d4d7f8e8e0bde1605 +size 39237 diff --git a/output/mistral-large-2411/xlam_single_tool_multiple_call.parquet b/output/mistral-large-2411/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9a1d8b7bdc5c271b9265712c889676d6f99cdede --- /dev/null +++ b/output/mistral-large-2411/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6e96162198615b2adc1dbfffda3d70bc87395f45740f6471c1d5e15b05b3730d +size 28523 diff --git a/output/mistral-large-2411/xlam_single_tool_single_call.parquet b/output/mistral-large-2411/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..03dfed26abee7a2572808be93faf5f7468618beb --- /dev/null +++ b/output/mistral-large-2411/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:36ceadddf825d0f9db71e231c7d71880b0baf5239297ba02e907ad280d8ea5b5 +size 43714 diff --git a/output/mistral-large-2411/xlam_tool_miss.parquet b/output/mistral-large-2411/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..576cfb23933f88568cd98f572ce7f2286941370b --- /dev/null +++ b/output/mistral-large-2411/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d579a3e339e5bcc021ce9dd7e5a971f6c1561a09d57737afebf0b041b04f863 +size 43655 diff --git a/output/mistral-small-2409/BFCL_v3_irrelevance.parquet b/output/mistral-small-2409/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c812b2e3e5d754ca3f7fe40919b37ec8b9b15e05 --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:52a4b575dd9dc0b2e34a2d54f043e6c4ad67b9979b5c399fd860443e5a664703 +size 31374 diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5b221f434e1f46c900972d67c728ef8943ce9c2b --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:49740ae3894319d5a40c86e13ca0355a4dface0ba78b072fc0b0c8e175d7feba +size 21949 diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4ff4ea42085245f67562cbf239c2a4f0dc64f178 --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93a8714037833303f75bb7375259623b094b448f27d84a0ccdb39fc4d2b21fbe +size 21432 diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_composite.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1fa6811594c97a83f5c76c40d2337ed03ea56102 --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27d356a0cf2590098fe2e180efe41a26b26c2901f701993abd1e06439eb1006c +size 40361 diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4baa778a23b79a68c05f06e808cf160efdc27c87 --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cae8d071ae4629a6b1c82c6c0b00736c6a20d699dfad2fd71eb51c423cc09255 +size 36116 diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4df00cc9b7d6d29bbb958b068e8bb9a25d710313 --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0bf76aa092de9c7c3304066c2c7345483b8612a052a0e6b15679cf7f41e2198a +size 40792 diff --git a/output/mistral-small-2409/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..060225bd53bc884eb4b4826b3738d0dd4538fbcb --- /dev/null +++ b/output/mistral-small-2409/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47e661baa5d200644f967d1d9b1f99f0242330233b595265842cb8bc7f554f81 +size 40699 diff --git a/output/mistral-small-2409/tau_long_context.parquet b/output/mistral-small-2409/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..03722624d50b833c62155ff608601d0967779493 --- /dev/null +++ b/output/mistral-small-2409/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7145187a08f1bb1548a83cd1173fe767c3ef2bf710bf2b2230c62f2d97b3d7bd +size 40104 diff --git a/output/mistral-small-2409/toolace_single_func_call_1.parquet b/output/mistral-small-2409/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3e93e7ad0e1d99250086b3d4b3293769c78f320b --- /dev/null +++ b/output/mistral-small-2409/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:08a888330b39b9ad7e0c739bc38162e0e44040fb517490eb0d64214468dcb656 +size 16134 diff --git a/output/mistral-small-2409/toolace_single_func_call_2.parquet b/output/mistral-small-2409/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..35ebfee340e7ae0e4c4e711628c3767f10b74bef --- /dev/null +++ b/output/mistral-small-2409/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e58913a05a63dc6a4b2317f44a5d189594a711990e1c0b94bd290ec1a1fe61f3 +size 11793 diff --git a/output/mistral-small-2409/xlam_multiple_tool_multiple_call.parquet b/output/mistral-small-2409/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b8f667b5d277cb90eca06bb4531003b5f6665e2e --- /dev/null +++ b/output/mistral-small-2409/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c8e27644df90a7d784ee19abcb4aad028b0539b5c28e9950aec46c24246e05ac +size 92947 diff --git a/output/mistral-small-2409/xlam_multiple_tool_single_call.parquet b/output/mistral-small-2409/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cb72d33d1ad990d7e0fbeba45e3207811bd55b04 --- /dev/null +++ b/output/mistral-small-2409/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4c985a99c1b5ebf114cc8356593cea804aae60e45ee84755793bdc52e7d024 +size 38472 diff --git a/output/mistral-small-2409/xlam_single_tool_multiple_call.parquet b/output/mistral-small-2409/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c529d6209b50d59cdb22b22c448b037b82d1cd2a --- /dev/null +++ b/output/mistral-small-2409/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:74c20414f38f72174ff08b6b6efcd8a4c8183c97616cfacbf3414c15fa613beb +size 27899 diff --git a/output/mistral-small-2409/xlam_single_tool_single_call.parquet b/output/mistral-small-2409/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..03108398db031500332ba4abf6d395c01f9218f2 --- /dev/null +++ b/output/mistral-small-2409/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:62a406bfd71d285dff63c9ad405075d8638d82236efa9f85b806e1c8d02b2e56 +size 42800 diff --git a/output/mistral-small-2409/xlam_tool_miss.parquet b/output/mistral-small-2409/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..63c1be62f771dbe889c7976e1d4708304d7a58a6 --- /dev/null +++ b/output/mistral-small-2409/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d9e0570b39b726a0df2d4d2e8306ab306c0d78761be1bc0e7d310b95cedb36a +size 47367 diff --git a/output/mistral-small-2501/BFCL_v3_irrelevance.parquet b/output/mistral-small-2501/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d34fb9e950338ed26d4b30a42d45ee9b82a95336 --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d1f5575bac38de57aa3ffbd47a605aa2d50535c15c380aa589dce7da757c37c +size 34457 diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..09798ea3ce13451be90bc33a95504374710439f4 --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f572de78c8b18b1de9cfa28b2cb86840ca05b000a8c5d754257e27784232ec99 +size 22288 diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2898c11665c05478d06d9e5cb807eeedc82aa06f --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:158bb34db42a4149bd551a47b0ccd74ca370e92276c96556620731abca86c228 +size 21869 diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_composite.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4cc54d55541183ecbbe5c24c86b53f4e64dc5927 --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8cd510d392dc22e33f5b5dd620501415a2d40a38736eb0dcfe8057a24ccb2c1e +size 37681 diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_long_context.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..344d2ee5aab9ea842f96d0186e1814004e838002 --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f71a6db126ce6135c7a82f0cae4701041ba598173535169d2f5b2cf39fca6c45 +size 35179 diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_miss_func.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a17e8c4b19bc481bd2aaed3f6d666617a22e03a8 --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0401813ef618a6da8c60238d2eec00118c03f4e28dc810bc50a7ac87a798ba25 +size 35564 diff --git a/output/mistral-small-2501/BFCL_v3_multi_turn_miss_param.parquet b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6b0cccdb32b0ed36ab92a40ebb96df8ccbffbbc5 --- /dev/null +++ b/output/mistral-small-2501/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4cc168c2931f9ed461efdb91ee809ade9f092613b3d4a542c5155df21b5e33fa +size 36579 diff --git a/output/mistral-small-2501/tau_long_context.parquet b/output/mistral-small-2501/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d3833c00ae4ebd68b945248cd475ed031bede6ef --- /dev/null +++ b/output/mistral-small-2501/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2318522680f2c54050da9773099cf787ee680c047697fa37bb1f395c32dc1634 +size 37570 diff --git a/output/mistral-small-2501/toolace_single_func_call_1.parquet b/output/mistral-small-2501/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ef8e963d8afb011a69448b71b743ae11831cfe1a --- /dev/null +++ b/output/mistral-small-2501/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d052bb4bbdf8a4b418305d185297ddb098ddf818b381cd541136228305efc3ce +size 14905 diff --git a/output/mistral-small-2501/toolace_single_func_call_2.parquet b/output/mistral-small-2501/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..865e09098ec07f9cf935537dc5d7c7b530b0a99f --- /dev/null +++ b/output/mistral-small-2501/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:39b1fb656c21148e21c585b58ce82647e748b0045659daecd42a8bfc3a0eecb9 +size 10302 diff --git a/output/mistral-small-2501/xlam_multiple_tool_multiple_call.parquet b/output/mistral-small-2501/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..50f35bb1d00348d61b6691b331484d2f6ba3b260 --- /dev/null +++ b/output/mistral-small-2501/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa8db2630ce302e16ef948885632346cd8008154737649c2bb675972d09bf1ca +size 91560 diff --git a/output/mistral-small-2501/xlam_multiple_tool_single_call.parquet b/output/mistral-small-2501/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..338c64f2dbcf865cdb7733b55c7b2e8e48cda245 --- /dev/null +++ b/output/mistral-small-2501/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b02e39f1bf1b52b2a0269bdf464289cefdaf69f0311b21124e3a4caaf78530f +size 37546 diff --git a/output/mistral-small-2501/xlam_single_tool_multiple_call.parquet b/output/mistral-small-2501/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..dc0fe37b94ff83d7cf8c3705d399edee8ef4280a --- /dev/null +++ b/output/mistral-small-2501/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:159df7de0f244b043c97fdc99600f7da9b1de1a94f0f7b985bb337db71d34695 +size 26915 diff --git a/output/mistral-small-2501/xlam_single_tool_single_call.parquet b/output/mistral-small-2501/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3fcf53a16f24cc4ea99ca2ddf569f081d393b20e --- /dev/null +++ b/output/mistral-small-2501/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd2e8fc7dc73648dd5051ccb4f65a17fa932a4ac60c8fe611731feb1d6cedc80 +size 41430 diff --git a/output/mistral-small-2501/xlam_tool_miss.parquet b/output/mistral-small-2501/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5c9786e2efce611cc664e8d1884820bb845c854d --- /dev/null +++ b/output/mistral-small-2501/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:63a128128e5aa05109924c78f6c075ceeb67952576f63fa587726b263ff19a7a +size 45045 diff --git a/output/o1-2024-12-17/BFCL_v3_irrelevance.parquet b/output/o1-2024-12-17/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..827fa6ea428a62d37298f419fb66e821b2243ca6 --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf8e45b0af8aa06deaea9b797ceeacbca8467886211da5044f06d0e829589604 +size 33228 diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..eb5c611e875e6086e475a8f092e3fce51dbb4718 --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f0bcb7368700a11dbaca015efd51e3df5bccf23cd4e021917b95c4919e78a6dc +size 22701 diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..a7c485059ecf2f62fd321e1155b50542671da8e2 --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e16c694aa2b14884e62b2ee13e413e52eac732e2fe00866665944fd843ca1ed +size 22862 diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_composite.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..460949868028b3eb0b7b04b2dfe79dd41b88c771 --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ca98082f3d0ab1bff6ec03055fec9b76dc101168e28c4796c921a1c11fb873a +size 42084 diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_long_context.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..fe07129f7972fe81e40f132a2efb57b50e76bd4f --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b7b263cf86953638b63ed46b632acc599d395109c04449e72095d12eac159f2 +size 32475 diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_func.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8ad0e825635a46eafc1589fda1074c5815acb47f --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90ba6109fde88737682fcc0df1e61e8e37b6b55d6201f73542da97abc93c45d9 +size 35092 diff --git a/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_param.parquet b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..973f92b82c168947472d82694ca6fed5f9248d08 --- /dev/null +++ b/output/o1-2024-12-17/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:de90d8ba891984767feab406ad7d8804a3895c10a1732ecf17f71eaf8947805e +size 41446 diff --git a/output/o1-2024-12-17/tau_long_context.parquet b/output/o1-2024-12-17/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3a91794bfb2a9d237020801cce451fc548e3f4bd --- /dev/null +++ b/output/o1-2024-12-17/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8956a3ba615f5bf658bfb0aa39bee518f29754f48a8ac7230ae36047a7c372c1 +size 47664 diff --git a/output/o1-2024-12-17/toolace_single_func_call_1.parquet b/output/o1-2024-12-17/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..37dc632408a5b21c55adf3297fdb12efee84b1a2 --- /dev/null +++ b/output/o1-2024-12-17/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df2cdeb823d381ec65771e95ecb0c8ff51b9f988a4af124cfd4bb3b65d365626 +size 13993 diff --git a/output/o1-2024-12-17/toolace_single_func_call_2.parquet b/output/o1-2024-12-17/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..cb73c1fa8b2c45cc8e117650d3cc42f5e4015c70 --- /dev/null +++ b/output/o1-2024-12-17/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:063485c79b0ba1fb0400641df12df7d55bc3935912b8dce81c07662041684b7e +size 10883 diff --git a/output/o1-2024-12-17/xlam_multiple_tool_multiple_call.parquet b/output/o1-2024-12-17/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2fa8c11bf129164125c0e377ad441e65f4cb6942 --- /dev/null +++ b/output/o1-2024-12-17/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0ea33a82d1afdfee6e894d2b9a81ac463a36cf792f3e36166bc551eebf41c6df +size 89207 diff --git a/output/o1-2024-12-17/xlam_multiple_tool_single_call.parquet b/output/o1-2024-12-17/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ae3af62c7e70b4ff9186c7aa5b05e4f4a05ce154 --- /dev/null +++ b/output/o1-2024-12-17/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4c874617dd8a9ad5b090c2627aafad3a0625040eced52387f1104c393479656 +size 41318 diff --git a/output/o1-2024-12-17/xlam_single_tool_multiple_call.parquet b/output/o1-2024-12-17/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7679b6cf1eabe601344499714792104f2bb011f0 --- /dev/null +++ b/output/o1-2024-12-17/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:05941c92ca197ded8d7f03ed7a5a6eea427b35e520d98a14b9ce50d4a43ca80d +size 28346 diff --git a/output/o1-2024-12-17/xlam_single_tool_single_call.parquet b/output/o1-2024-12-17/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..52fd12bb36cd18ea9a342a975dbf9ea137fe13fc --- /dev/null +++ b/output/o1-2024-12-17/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a8c4c5a64f6cb6f77c3a0742c9b7119f7c1444a2528775edc7029ad9ce8039ed +size 50475 diff --git a/output/o1-2024-12-17/xlam_tool_miss.parquet b/output/o1-2024-12-17/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..d0ccdc8a682241bb4c7f8955155fa5bd85c7fe91 --- /dev/null +++ b/output/o1-2024-12-17/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:599c5d16379ac412445750e09899ef6e105b08cda3873994c113612089b83141 +size 50779 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_irrelevance.parquet b/output/o3-mini-2025-01-31/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9e895d8d79ed7a51af75bff6a0be7ba7866dc224 --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0251df7573102d184e1b2278978f136fadd3309f94027b76ed26ee96b95fafb2 +size 40772 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..200ca8e404037c3345b00afab48eca0676a108e6 --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:681b126959dd0a482a24fe37646cacca6f7641e9777904fb6b1ec23a1a100144 +size 22986 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ed73de7a514f160579d0bdc39a4da2156a7bda29 --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67d6aa260eca1288ccf890a29ef708b4d83488da2f40cd8665061f803eaafead +size 22841 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_composite.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..651ebf19a7bc7388542379584d56a5870e3c5e68 --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e129d5f6c881ef1389e0d003f841719d24d3ffbb0d4e68c974136acca80f02 +size 44739 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_long_context.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..44f9f1534f6b8a3bafe360e50423d4575c40ead9 --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16ccde7de2ee7995b0e85af8ec325a7777adc794ddee805ef28bde9812cbc78b +size 37387 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_func.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..54f5ada7cfd50338c104c59988aa31bbda5f05eb --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61cf3c67cb952d0900d69590df6f9a997aed1a658c074f2423a807bf00a9cb3c +size 40550 diff --git a/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_param.parquet b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..68af3ed747d24c734a4bc6267206bc518f1cac69 --- /dev/null +++ b/output/o3-mini-2025-01-31/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:142b0d29c80723c298d34c111f5f698e81b4fcb99028514800faa3ef00292ffc +size 40149 diff --git a/output/o3-mini-2025-01-31/tau_long_context.parquet b/output/o3-mini-2025-01-31/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..923d5189246a7cbd53766f3350a384991c787a36 --- /dev/null +++ b/output/o3-mini-2025-01-31/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1b48df930e501711013b6a68ff943a0f202cf42121ba5a27cbb38becb6b27096 +size 53306 diff --git a/output/o3-mini-2025-01-31/toolace_single_func_call_1.parquet b/output/o3-mini-2025-01-31/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..4c09b669467ee241714be765d9432ffa5eff369d --- /dev/null +++ b/output/o3-mini-2025-01-31/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d470cf0ebfbc10c66ef1ffc0d870b3840f5a17825a1bde0ae20e7559ffd7c6fa +size 14793 diff --git a/output/o3-mini-2025-01-31/toolace_single_func_call_2.parquet b/output/o3-mini-2025-01-31/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..29f149b4d48ffe8f6100620209f5b99af1cbef0b --- /dev/null +++ b/output/o3-mini-2025-01-31/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c7a700ac0dcae0906a7223444d105fe535ad29ba1c21317b04a1e43acf234de0 +size 12425 diff --git a/output/o3-mini-2025-01-31/xlam_multiple_tool_multiple_call.parquet b/output/o3-mini-2025-01-31/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3b31d35eb213f9e336afdf75bb8e18b4140baeb7 --- /dev/null +++ b/output/o3-mini-2025-01-31/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bae876dcfc459e6a46a4169f36f0d01eb41cc30a37136488fd8dfced732c6e48 +size 89686 diff --git a/output/o3-mini-2025-01-31/xlam_multiple_tool_single_call.parquet b/output/o3-mini-2025-01-31/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..78a95a29211ddebcfe1ab92ac2f32778378269e1 --- /dev/null +++ b/output/o3-mini-2025-01-31/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f79a4f11fdd7db611a0e889cda2e2244bc803b659d3c8e849a85828a9dccca0d +size 39152 diff --git a/output/o3-mini-2025-01-31/xlam_single_tool_multiple_call.parquet b/output/o3-mini-2025-01-31/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..3fedb01a603b9e576e9d46079ff5835b89878fda --- /dev/null +++ b/output/o3-mini-2025-01-31/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f696bb5b6dad79e6bb4514ab4a125975d575d2f574536f9abf1354b44e893ed +size 30359 diff --git a/output/o3-mini-2025-01-31/xlam_single_tool_single_call.parquet b/output/o3-mini-2025-01-31/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..56ba40cba26f66181963c82bab00468304026fc1 --- /dev/null +++ b/output/o3-mini-2025-01-31/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9e885e1de218ee18ad556a5da25cb70680fa471f9d35a1f92d428b94008c255 +size 42511 diff --git a/output/o3-mini-2025-01-31/xlam_tool_miss.parquet b/output/o3-mini-2025-01-31/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..bda7a366d0d8f6f39878bd636625f8387b6d24d6 --- /dev/null +++ b/output/o3-mini-2025-01-31/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fabbb9419be03d34acc719002618b38e841c0ef0930daf43229397e69bf6be9f +size 54065 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_irrelevance.parquet b/output/open-mistral-nemo-2407/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..606fdb337838d9d288b8d7e05991fe4770055f18 --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdaff452ecbb0203d9710208941614db2ae98cfe38a73505a27cc1664953211f +size 40306 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..79d1332f622c94a8c0f2da4de47f77a72c41095c --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:df59f21532f95785aecab0401403c1de55c5e1f129fe3d5cfdbad7748bdefbd9 +size 22264 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..e3e7e90085397be2a325941a849e4e9cc16a283b --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f5ed6a5bb72e3cb89fafe32faa61f805c78cb2e184c48845e91582e42bff374 +size 24759 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_composite.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..f68872a1056340ee253a1255a4d810e145844cef --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4d753cb575bbda366b3151d13f5f20224ae37c553a7f50fd52a390b0e704601 +size 40008 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_long_context.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..8d8c4feb8bd567d602481f2c548cae41996a44ff --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:66254fa40b2e3adf4e843a5aecbe7ef0d4c1089bc65939b4cbff53e81b09825b +size 38243 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_func.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c40391605f725d05a7355fc33fe12dc6d5bad069 --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bb2e78ffc0d7d42c3bb94541da7e9d2e69ae792b18ca2cbceec9919ab191c2d +size 43658 diff --git a/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_param.parquet b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..be54a66ceef256fb6abe7921513a19add471b315 --- /dev/null +++ b/output/open-mistral-nemo-2407/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ebb7fd52431bfa843a0a168def5740fc5bdb937483654ac72dfb86812460d1df +size 40354 diff --git a/output/open-mistral-nemo-2407/tau_long_context.parquet b/output/open-mistral-nemo-2407/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..ee5acc922ed10ce1b73052b8bece3a6d4d55600e --- /dev/null +++ b/output/open-mistral-nemo-2407/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:043e030ac8a5c30d42a732af4cd35a5811e900da5dd7b4ddcc2b4d8bc5ec07c5 +size 45520 diff --git a/output/open-mistral-nemo-2407/toolace_single_func_call_1.parquet b/output/open-mistral-nemo-2407/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..26a6bb9ac0cc18a6015525dba443922ae788e4cb --- /dev/null +++ b/output/open-mistral-nemo-2407/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c71c8f7a2165d4d097dce1eb662a44fc45a4c26659dcd6ad03fb6b6f1643ee0f +size 15778 diff --git a/output/open-mistral-nemo-2407/toolace_single_func_call_2.parquet b/output/open-mistral-nemo-2407/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..11e425fb6de72ca83bd254ab9c1eace778c94aeb --- /dev/null +++ b/output/open-mistral-nemo-2407/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0742484e5de6cb0de2207627046b52efeb372ca734f8dbc3931f7c01249278ad +size 11793 diff --git a/output/open-mistral-nemo-2407/xlam_multiple_tool_multiple_call.parquet b/output/open-mistral-nemo-2407/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..881313d5f21bcc6dab1549e5b786ce263755c161 --- /dev/null +++ b/output/open-mistral-nemo-2407/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e034675ab307ccba53f1e074f80dccce097b23f0114a66db68885eb448b3e70f +size 93944 diff --git a/output/open-mistral-nemo-2407/xlam_multiple_tool_single_call.parquet b/output/open-mistral-nemo-2407/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7b39bdb8318b3cacdc16b7c42a718b2319587b83 --- /dev/null +++ b/output/open-mistral-nemo-2407/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0faea55eb2d9698e1b7622c9fbfcabfc14a64c498ea1fc063aa3e12d6c0a3706 +size 39269 diff --git a/output/open-mistral-nemo-2407/xlam_single_tool_multiple_call.parquet b/output/open-mistral-nemo-2407/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2b7d9f2566dc9beacc5ad08d308d81065fb80919 --- /dev/null +++ b/output/open-mistral-nemo-2407/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:edfd01e6497ac3c1adb50a4496911c662d90899d124166fad2420e4f8f294889 +size 28422 diff --git a/output/open-mistral-nemo-2407/xlam_single_tool_single_call.parquet b/output/open-mistral-nemo-2407/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..9352a9ab109c3ca494165d4efa3874e71fc835c3 --- /dev/null +++ b/output/open-mistral-nemo-2407/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:12a6ed223b4a817d12beee36ad833e1d14b144afc7b9b4b6d3ccdc659fa6af28 +size 43566 diff --git a/output/open-mistral-nemo-2407/xlam_tool_miss.parquet b/output/open-mistral-nemo-2407/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..33e23110e7ef06d901b700fe6132efa04af829f5 --- /dev/null +++ b/output/open-mistral-nemo-2407/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3a686d3619bbcb86f40f2a12fadaab442245faa3abe4225f293ebdaabe773c2 +size 51517 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_irrelevance.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_irrelevance.parquet new file mode 100644 index 0000000000000000000000000000000000000000..5cbdff5273ed655891f9ad67494c707d9758e364 --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_irrelevance.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c25cfdf1d1dc5c6b2ab1adb0432d48d8e1247ca784e2a1627375f877127503e0 +size 38324 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_multi_func_call.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_multi_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..b346039eb9fa02a0a9482505fc1d027df2268a1a --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_multi_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6bda1dba70342c11f13a8d9a1176c7b50cc3e59a3009b0818299293cc3a011c3 +size 22015 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_single_func_call.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_single_func_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..6d7ac9f5c10cf2621235a0f06ed7ff45236d4f6b --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_base_single_func_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cf027e54e639b8ae17bf7ab677ea982daa1ea8ba9983ca57280eadd91afb7155 +size 22667 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_composite.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_composite.parquet new file mode 100644 index 0000000000000000000000000000000000000000..0b3623617c253f60f9bc8ea104e620130f08737d --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_composite.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d1276117805c09b1a22cc58e70672918ecc180ec181d87425c33c2482863ec1 +size 45055 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_long_context.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2a4893b3f8b4984ce0bef4ae40477e94b939b21d --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:86a20e72c28d8e1c692ecbfd2f11ac5a6efa0da1721e0a70c0d287a2b4c31891 +size 36947 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_func.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_func.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c7171dd32677628b9a9134e22b6914476dfd2cc0 --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_func.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:048ec3197847ab01d84d13a91698fd05b501697ce1eaefe5ed6984634eacaeb7 +size 44135 diff --git a/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_param.parquet b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_param.parquet new file mode 100644 index 0000000000000000000000000000000000000000..66aa7910baff001746eec20ab5581cddc683e8dc --- /dev/null +++ b/output/qwen2.5-72b-instruct/BFCL_v3_multi_turn_miss_param.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2858d953d6e9e07ab8470cfaac7772833ff82885093a2705e8d22eca4e0b0cac +size 43816 diff --git a/output/qwen2.5-72b-instruct/tau_long_context.parquet b/output/qwen2.5-72b-instruct/tau_long_context.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c15864bcf58e1e42d50885bf04c2f6bf01074cbf --- /dev/null +++ b/output/qwen2.5-72b-instruct/tau_long_context.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1031a6909a0f2783bfb7cf8a89fcf473217ddb75c06829d8e722c7a0cb2ffa +size 40162 diff --git a/output/qwen2.5-72b-instruct/toolace_single_func_call_1.parquet b/output/qwen2.5-72b-instruct/toolace_single_func_call_1.parquet new file mode 100644 index 0000000000000000000000000000000000000000..38b3a9e90d904b327ec9f2fca3e19c382d9332dc --- /dev/null +++ b/output/qwen2.5-72b-instruct/toolace_single_func_call_1.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9c0bced5349fd60838e8457a5eff5c20f5550b79e829cc6e02bfaab4f66be66b +size 15262 diff --git a/output/qwen2.5-72b-instruct/toolace_single_func_call_2.parquet b/output/qwen2.5-72b-instruct/toolace_single_func_call_2.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c9c4a0d1b55cab5d69952e85c9e0c33310e327af --- /dev/null +++ b/output/qwen2.5-72b-instruct/toolace_single_func_call_2.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:194465e6bcbd2c9ca72a2bcaf1850ec0c2cf93a6b099a35256746ac156884caf +size 12250 diff --git a/output/qwen2.5-72b-instruct/xlam_multiple_tool_multiple_call.parquet b/output/qwen2.5-72b-instruct/xlam_multiple_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..2320e63a68c6eb389913d44285041eca1a082f11 --- /dev/null +++ b/output/qwen2.5-72b-instruct/xlam_multiple_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:46f0ae6dcee6ac4f4984e88b03ad4cc6f3dae69eeb74fc88311748e73517c4cf +size 87738 diff --git a/output/qwen2.5-72b-instruct/xlam_multiple_tool_single_call.parquet b/output/qwen2.5-72b-instruct/xlam_multiple_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..7b02f842fc3aafe702bafe132db7b97d1bdacc44 --- /dev/null +++ b/output/qwen2.5-72b-instruct/xlam_multiple_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:356d18bda1985b977c02ca7731dc6304abad56354ca2854530ba199826b7f005 +size 39426 diff --git a/output/qwen2.5-72b-instruct/xlam_single_tool_multiple_call.parquet b/output/qwen2.5-72b-instruct/xlam_single_tool_multiple_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..c1e5bd1319c49e6b26c8251abd1e9279979de91d --- /dev/null +++ b/output/qwen2.5-72b-instruct/xlam_single_tool_multiple_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a30f45f7053bf4bfae96b525b233748796032dac6646903381935417c7a64954 +size 28692 diff --git a/output/qwen2.5-72b-instruct/xlam_single_tool_single_call.parquet b/output/qwen2.5-72b-instruct/xlam_single_tool_single_call.parquet new file mode 100644 index 0000000000000000000000000000000000000000..88060b49662d444955ecf6d6a8fb7b907cd59139 --- /dev/null +++ b/output/qwen2.5-72b-instruct/xlam_single_tool_single_call.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:039ffb80b746e7c25416fec9724f846eb0261a27690eb821d43ad493a47f8c5a +size 40202 diff --git a/output/qwen2.5-72b-instruct/xlam_tool_miss.parquet b/output/qwen2.5-72b-instruct/xlam_tool_miss.parquet new file mode 100644 index 0000000000000000000000000000000000000000..1e982be07ca95f724ee9f0760be8007b35190ed1 --- /dev/null +++ b/output/qwen2.5-72b-instruct/xlam_tool_miss.parquet @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:278daacc873ff49258937965a0a7059c50bd62d930c3c0c6c091813e313e72c8 +size 49739 diff --git a/tabs/data_exploration.py b/tabs/data_exploration.py index 6c4ae828a1ffefc45b4d83469a2bc761875f8e53..2c25f86cfb24e19dfe2a00e6215d257ef4155535 100644 --- a/tabs/data_exploration.py +++ b/tabs/data_exploration.py @@ -1,20 +1,22 @@ import gradio as gr from chat import get_chat_and_score_df, update_chat_display - def create_exploration_tab(df, MODELS, DATASETS, SCORES, HEADER_CONTENT): - def filter_and_update_display(model, dataset, selected_scores, current_index): + + def filter_and_update_display(model, dataset, min_score, max_score, current_index): try: df_chat = get_chat_and_score_df(model, dataset) - if selected_scores: - df_chat = df_chat[df_chat["score"].isin(selected_scores)] + + # Filter by score range + df_chat = df_chat[ + (df_chat["score"] >= min_score) & (df_chat["score"] <= max_score) + ] if df_chat.empty: return ( "
No data available for selected filters
", "
No metrics available
", "
No tool information available
", - gr.update(maximum=0, value=0), "0/0", ) @@ -28,7 +30,6 @@ def create_exploration_tab(df, MODELS, DATASETS, SCORES, HEADER_CONTENT): chat_html, metrics_html, tool_html, - gr.update(maximum=max_index, value=current_index), f"{current_index + 1}/{len(df_chat)}", ) except Exception as e: @@ -37,112 +38,129 @@ def create_exploration_tab(df, MODELS, DATASETS, SCORES, HEADER_CONTENT): f"
Error: {str(e)}
", "
No metrics available
", "
No tool information available
", - gr.update(maximum=0, value=0), "0/0", ) with gr.Tab("Data Exploration"): gr.HTML(HEADER_CONTENT) - with gr.Row(): - filters_column = gr.Column(scale=1, min_width=300) - with filters_column: - gr.Markdown("# Exploration Filters") - explore_model = gr.Dropdown( - choices=MODELS, - value=MODELS[0], - label="Select Model", - ) - explore_dataset = gr.Dropdown( - choices=DATASETS, - value=DATASETS[0], - label="Select Dataset", - ) - explore_scores = gr.Dropdown( - choices=SCORES, - value=SCORES, - multiselect=True, - label="Score Range", - ) - gr.Markdown("## Navigation") - index_slider = gr.Slider( - minimum=0, - maximum=0, - step=1, - value=0, - label="Position", - ) - index_text = gr.HTML("0/0") - with gr.Row(): - prev_btn = gr.Button("← Previous") - next_btn = gr.Button("Next →") + # All filters in a single row with consistent sizing + with gr.Row(equal_height=True): + explore_model = gr.Dropdown( + choices=MODELS, + value=MODELS[0], + label="Model", + container=True, + scale=1, + ) + explore_dataset = gr.Dropdown( + choices=DATASETS, + value=DATASETS[0], + label="Dataset", + container=True, + scale=1, + ) + min_score = gr.Slider( + minimum=min(SCORES), + maximum=max(SCORES), + value=min(SCORES), + step=0.1, + label="Minimum Score - TSQ", + container=True, + scale=1, + ) + max_score = gr.Slider( + minimum=min(SCORES), + maximum=max(SCORES), + value=max(SCORES), + step=0.1, + label="Maximum Score - TSQ", + container=True, + scale=1, + ) + + # Navigation row + with gr.Row(variant="panel"): + index_display = gr.HTML( # Changed the variable name to index_display + value="0/0", elem_id="index-display", elem_classes="text-center" + ) + with gr.Row(): + prev_btn = gr.Button("← Previous", size="lg", variant="secondary") + next_btn = gr.Button("Next →", size="lg", variant="secondary") - content_column = gr.Column(scale=4) - with content_column: - chat_display = gr.HTML() - metrics_display = gr.HTML() - tool_info_display = gr.HTML() + # Content area with equal column widths + with gr.Row(equal_height=True): + chat_display = gr.HTML() + metrics_display = gr.HTML() + tool_info_display = gr.HTML() - def update_on_filter_change(model, dataset, scores, _): - return filter_and_update_display(model, dataset, scores, 0) + current_index = gr.State(value=0) - for control in [explore_model, explore_dataset, explore_scores]: + # Update display on filter change + def update_on_filter_change(model, dataset, min_score, max_score): + return filter_and_update_display(model, dataset, min_score, max_score, 0) + + for control in [explore_model, explore_dataset, min_score, max_score]: control.change( update_on_filter_change, - inputs=[explore_model, explore_dataset, explore_scores, gr.State(0)], + inputs=[explore_model, explore_dataset, min_score, max_score], outputs=[ chat_display, metrics_display, tool_info_display, - index_slider, - index_text, - ], + index_display, + ], # Changed to index_display ) - def navigate(direction, current, model, dataset, scores): - new_index = current + direction - return filter_and_update_display(model, dataset, scores, new_index) + # Navigation functions + def navigate(direction, current_idx, model, dataset, min_score, max_score): + new_index = current_idx + direction + return ( + *filter_and_update_display( + model, dataset, min_score, max_score, new_index + ), + new_index, + ) prev_btn.click( - lambda idx, m, d, s: navigate(-1, idx, m, d, s), - inputs=[index_slider, explore_model, explore_dataset, explore_scores], + lambda idx, m, d, min_s, max_s: navigate(-1, idx, m, d, min_s, max_s), + inputs=[ + current_index, + explore_model, + explore_dataset, + min_score, + max_score, + ], outputs=[ chat_display, metrics_display, tool_info_display, - index_slider, - index_text, - ], + index_display, + current_index, + ], # Changed to index_display ) next_btn.click( - lambda idx, m, d, s: navigate(1, idx, m, d, s), - inputs=[index_slider, explore_model, explore_dataset, explore_scores], - outputs=[ - chat_display, - metrics_display, - tool_info_display, - index_slider, - index_text, + lambda idx, m, d, min_s, max_s: navigate(1, idx, m, d, min_s, max_s), + inputs=[ + current_index, + explore_model, + explore_dataset, + min_score, + max_score, ], - ) - - index_slider.change( - lambda idx, m, d, s: filter_and_update_display(m, d, s, int(idx)), - inputs=[index_slider, explore_model, explore_dataset, explore_scores], outputs=[ chat_display, metrics_display, tool_info_display, - index_slider, - index_text, - ], + index_display, + current_index, + ], # Changed to index_display ) return ( chat_display, metrics_display, tool_info_display, - index_slider, - index_text, + index_display, # Changed to index_display )