{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "hello\n" ] } ], "source": [ "print(\"hello\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/tamizh/miniconda3/envs/movies-app/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): huggingface.co:443\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/tokenizer_config.json HTTP/11\" 200 0\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/config.json HTTP/11\" 200 0\n", "DEBUG:bitsandbytes.cextension:Loading bitsandbytes native library from: /home/tamizh/miniconda3/envs/movies-app/lib/python3.11/site-packages/bitsandbytes/libbitsandbytes_cuda121.so\n", "INFO:accelerate.utils.modeling:We will use 90% of the memory on device 0 for storing the model, and 10% for the buffer to avoid OOM. You can set `max_memory` in to a higher value to use more memory (at your own risk).\n", "Loading checkpoint shards: 100%|██████████| 4/4 [00:07<00:00, 1.81s/it]\n", "DEBUG:urllib3.connectionpool:https://huggingface.co:443 \"HEAD /meta-llama/Meta-Llama-3.1-8B-Instruct/resolve/main/generation_config.json HTTP/11\" 200 0\n" ] } ], "source": [ "import re\n", "import json\n", "\n", "from functions import *\n", "from transformers import pipeline\n", "from tools import tools\n", "\n", "import functions\n", "import torch\n", "from transformers import (\n", " AutoModelForCausalLM,\n", " AutoTokenizer,\n", " BitsAndBytesConfig\n", ")\n", "\n", "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "\n", "quantization_config = BitsAndBytesConfig(\n", " load_in_8bit=True,\n", " load_in_4bit=False,\n", " bnb_4bit_quant_type=\"nf4\",\n", " bnb_4bit_compute_dtype=torch.bfloat16\n", ")\n", "\n", "model_id = \"meta-llama/Meta-Llama-3.1-8B-Instruct\"\n", "tokenizer = AutoTokenizer.from_pretrained(model_id)\n", "model = AutoModelForCausalLM.from_pretrained(model_id, \n", " device_map=\"auto\", \n", " quantization_config=quantization_config)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def generate_reasoning_chain(query):\n", " user_message = f\"\"\"\n", " Given the user query: \"{query}\"\n", " Generate a multi-step reasoning chain to answer the query. Include steps for using available tools if necessary.\n", " \"\"\"\n", "\n", " messages = [\n", " {\"role\": \"system\", \"content\": \"You are a movie search assistant bot who uses TMDB to help users find movies. Think step by step and identify the sequence of function calls that will help to answer.\"},\n", " {\"role\": \"user\", \"content\": user_message},\n", " ]\n", "\n", " tokenized_chat = tokenizer.apply_chat_template(\n", " messages, tools=tools, add_generation_prompt=False, tokenize=True, return_tensors=\"pt\")\n", "\n", "\n", " outputs = model.generate(tokenized_chat, max_new_tokens=128)\n", " # return tokenizer.batch_decode(outputs[:, tokenized_chat.shape[1]:])[0]\n", " return tokenizer.batch_decode(outputs)[0]" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{{- bos_token }}\n", "{%- if custom_tools is defined %}\n", " {%- set tools = custom_tools %}\n", "{%- endif %}\n", "{%- if not tools_in_user_message is defined %}\n", " {%- set tools_in_user_message = true %}\n", "{%- endif %}\n", "{%- if not date_string is defined %}\n", " {%- set date_string = \"26 Jul 2024\" %}\n", "{%- endif %}\n", "{%- if not tools is defined %}\n", " {%- set tools = none %}\n", "{%- endif %}\n", "\n", "{#- This block extracts the system message, so we can slot it into the right place. #}\n", "{%- if messages[0]['role'] == 'system' %}\n", " {%- set system_message = messages[0]['content']|trim %}\n", " {%- set messages = messages[1:] %}\n", "{%- else %}\n", " {%- set system_message = \"\" %}\n", "{%- endif %}\n", "\n", "{#- System message + builtin tools #}\n", "{{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n", "{%- if builtin_tools is defined or tools is not none %}\n", " {{- \"Environment: ipython\\n\" }}\n", "{%- endif %}\n", "{%- if builtin_tools is defined %}\n", " {{- \"Tools: \" + builtin_tools | reject('equalto', 'code_interpreter') | join(\", \") + \"\\n\\n\"}}\n", "{%- endif %}\n", "{{- \"Cutting Knowledge Date: December 2023\\n\" }}\n", "{{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n", "{%- if tools is not none and not tools_in_user_message %}\n", " {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n", " {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n", " {{- \"Do not use variables.\\n\\n\" }}\n", " {%- for t in tools %}\n", " {{- t | tojson(indent=4) }}\n", " {{- \"\\n\\n\" }}\n", " {%- endfor %}\n", "{%- endif %}\n", "{{- system_message }}\n", "{{- \"<|eot_id|>\" }}\n", "\n", "{#- Custom tools are passed in a user message with some extra guidance #}\n", "{%- if tools_in_user_message and not tools is none %}\n", " {#- Extract the first user message so we can plug it in here #}\n", " {%- if messages | length != 0 %}\n", " {%- set first_user_message = messages[0]['content']|trim %}\n", " {%- set messages = messages[1:] %}\n", " {%- else %}\n", " {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n", "{%- endif %}\n", " {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n", " {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n", " {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n", " {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n", " {{- \"Do not use variables.\\n\\n\" }}\n", " {%- for t in tools %}\n", " {{- t | tojson(indent=4) }}\n", " {{- \"\\n\\n\" }}\n", " {%- endfor %}\n", " {{- first_user_message + \"<|eot_id|>\"}}\n", "{%- endif %}\n", "\n", "{%- for message in messages %}\n", " {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n", " {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n'+ message['content'] | trim + '<|eot_id|>' }}\n", " {%- elif 'tool_calls' in message %}\n", " {%- if not message.tool_calls|length == 1 %}\n", " {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n", " {%- endif %}\n", " {%- set tool_call = message.tool_calls[0].function %}\n", " {%- if builtin_tools is defined and tool_call.name in builtin_tools %}\n", " {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n", " {{- \"<|python_tag|>\" + tool_call.name + \".call(\" }}\n", " {%- for arg_name, arg_val in tool_call.arguments | items %}\n", " {{- arg_name + '=\"' + arg_val + '\"' }}\n", " {%- if not loop.last %}\n", " {{- \", \" }}\n", " {%- endif %}\n", " {%- endfor %}\n", " {{- \")\" }}\n", " {%- else %}\n", " {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n", " {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n", " {{- '\"parameters\": ' }}\n", " {{- tool_call.arguments | tojson }}\n", " {{- \"}\" }}\n", " {%- endif %}\n", " {%- if builtin_tools is defined %}\n", " {#- This means we're in ipython mode #}\n", " {{- \"<|eom_id|>\" }}\n", " {%- else %}\n", " {{- \"<|eot_id|>\" }}\n", " {%- endif %}\n", " {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n", " {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n", " {%- if message.content is mapping or message.content is iterable %}\n", " {{- message.content | tojson }}\n", " {%- else %}\n", " {{- message.content }}\n", " {%- endif %}\n", " {{- \"<|eot_id|>\" }}\n", " {%- endif %}\n", "{%- endfor %}\n", "{%- if add_generation_prompt %}\n", " {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n", "{%- endif %}\n", "\n" ] } ], "source": [ "print(tokenizer.chat_template)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[{'type': 'function',\n", " 'function': {'name': 'search_person',\n", " 'description': 'Search for people in the entertainment industry.',\n", " 'parameters': {'type': 'object',\n", " 'properties': {'query': {'type': 'string',\n", " 'description': 'The search query for the person'},\n", " 'include_adult': {'type': 'boolean',\n", " 'description': 'Include adult (pornography) content in the results',\n", " 'default': False},\n", " 'language': {'type': 'string',\n", " 'description': 'Language for the search results',\n", " 'default': 'en-US'},\n", " 'page': {'type': 'integer',\n", " 'description': 'Page number of results',\n", " 'default': 1}},\n", " 'required': ['query']}}},\n", " {'type': 'function',\n", " 'function': {'name': 'get_person_details',\n", " 'description': 'Get detailed information about a specific person.',\n", " 'parameters': {'type': 'object',\n", " 'properties': {'person_id': {'type': 'integer',\n", " 'description': 'The ID of the person to get details for'},\n", " 'language': {'type': 'string',\n", " 'description': 'Language for the person details',\n", " 'default': 'en-US'},\n", " 'append_to_response': {'type': 'string',\n", " 'description': \"Comma-separated list of additional details to append to the response (e.g., 'images,credits')\"}},\n", " 'required': ['person_id']}}}]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "tools = [\n", " {'type': 'function', 'function': {'name': 'search_person'}},\n", " {'type': 'function', 'function': {'name': 'get_person_details'}} \n", "]\n", "\n", "messages = [\n", " {\"role\": \"system\", \"content\": \"You are a movie search assistant bot who uses TMDB to help users find movies. Think step by step and identify the sequence of function calls that will help to answer.\"},\n", " {\"role\": \"user\", \"content\": \"\"\"Generate a multi-step reasoning chain to answer the query. Include steps for using available tools if necessary.\n", " Reasoning chain:\n", " \"\"\"},\n", " {\"role\": \"assistant\", \"content\": \"Model response\"},\n", " ]\n", "\n", "\n", "expected_rendered_text = \"\"\"\n", "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "\n", "Environment: ipython\n", "Cutting Knowledge Date: December 2023\n", "Today Date: 26 Jul 2024\n", "\n", "You are a movie search assistant bot who uses TMDB to help users find movies. Think step by step and identify the sequence of function calls that will help to answer.<|eot_id|>\n", "<|start_header_id|>user<|end_header_id|>\n", "Generate a multi-step reasoning chain to answer the query. Include steps for using available tools if necessary.\n", "<|eot_id|>\n", "<|start_header_id|>assistant<|end_header_id|>model_response<|eot_id|>\n", "<|start_header_id|>user<|end_header_id|>\n", "Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n", "\n", "Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.Do not use variables.\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"discover_movie\"}\n", "}\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"get_person_details\"}\n", "}\n", "\"\"\"\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<|begin_of_text|><|start_header_id|>system<|end_header_id|>\n", "\n", "Environment: ipython\n", "Cutting Knowledge Date: December 2023\n", "Today Date: 26 Jul 2024\n", "\n", "You are a movie search assistant bot who uses TMDB to help users find movies. Think step by step and identify the sequence of function calls that will help to answer.<|eot_id|><|start_header_id|>user<|end_header_id|>\n", "\n", "Given the following functions, please respond with a JSON for a function call with its proper arguments that best answers the given prompt.\n", "\n", "Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.Do not use variables.\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"discover_movie\",\n", " \"description\": \"Find movies using over 30 filters and sort options\",\n", " \"parameters\": {\n", " \"type\": \"object\",\n", " \"properties\": {\n", " \"region\": {\n", " \"type\": \"string\",\n", " \"description\": \"ISO 3166-1 code to filter release dates\"\n", " },\n", " \"sort_by\": {\n", " \"type\": \"string\",\n", " \"description\": \"Sort the results\"\n", " },\n", " \"release_date.gte\": {\n", " \"type\": \"string\",\n", " \"description\": \"Filter and only include movies that have a release date (looking at all release dates) that is greater or equal to the specified value\"\n", " },\n", " \"release_date.lte\": {\n", " \"type\": \"string\",\n", " \"description\": \"Filter and only include movies that have a release date (looking at all release dates) that is less than or equal to the specified value\"\n", " },\n", " \"with_release_type\": {\n", " \"type\": \"integer\",\n", " \"description\": \"Specify a comma (AND) or pipe (OR) separated value to filter release types\"\n", " },\n", " \"year\": {\n", " \"type\": \"integer\",\n", " \"description\": \"Filter the results to only include movies that have a release year that equals the specified value\"\n", " },\n", " \"with_cast\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of person ID's to filter the results with\"\n", " },\n", " \"with_crew\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of person ID's to filter the results with\"\n", " },\n", " \"with_people\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of person ID's to filter the results with\"\n", " },\n", " \"with_companies\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of production company ID's to filter the results with\"\n", " },\n", " \"with_genres\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of genre ID's to filter the results with\"\n", " },\n", " \"without_genres\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of genre ID's to exclude from the results\"\n", " },\n", " \"with_keywords\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of keyword ID's to filter the results with\"\n", " },\n", " \"without_keywords\": {\n", " \"type\": \"string\",\n", " \"description\": \"A comma separated list of keyword ID's to exclude from the results\"\n", " }\n", " },\n", " \"required\": []\n", " }\n", " }\n", "}\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"get_movie_details\",\n", " \"description\": \"Get the top level details of a movie by ID\",\n", " \"parameters\": {\n", " \"type\": \"object\",\n", " \"properties\": {\n", " \"movie_id\": {\n", " \"type\": \"integer\",\n", " \"description\": \"The ID of the movie to get details for\"\n", " },\n", " \"append_to_response\": {\n", " \"type\": \"string\",\n", " \"description\": \"Comma-separated list of sub requests to append to the response\"\n", " }\n", " },\n", " \"required\": [\n", " \"movie_id\"\n", " ]\n", " }\n", " }\n", "}\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"search_person\",\n", " \"description\": \"Search for people in the entertainment industry.\",\n", " \"parameters\": {\n", " \"type\": \"object\",\n", " \"properties\": {\n", " \"query\": {\n", " \"type\": \"string\",\n", " \"description\": \"The search query for the person\"\n", " },\n", " \"include_adult\": {\n", " \"type\": \"boolean\",\n", " \"description\": \"Include adult (pornography) content in the results\",\n", " \"default\": false\n", " },\n", " \"language\": {\n", " \"type\": \"string\",\n", " \"description\": \"Language for the search results\",\n", " \"default\": \"en-US\"\n", " },\n", " \"page\": {\n", " \"type\": \"integer\",\n", " \"description\": \"Page number of results\",\n", " \"default\": 1\n", " }\n", " },\n", " \"required\": [\n", " \"query\"\n", " ]\n", " }\n", " }\n", "}\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"get_person_details\",\n", " \"description\": \"Get detailed information about a specific person.\",\n", " \"parameters\": {\n", " \"type\": \"object\",\n", " \"properties\": {\n", " \"person_id\": {\n", " \"type\": \"integer\",\n", " \"description\": \"The ID of the person to get details for\"\n", " },\n", " \"language\": {\n", " \"type\": \"string\",\n", " \"description\": \"Language for the person details\",\n", " \"default\": \"en-US\"\n", " },\n", " \"append_to_response\": {\n", " \"type\": \"string\",\n", " \"description\": \"Comma-separated list of additional details to append to the response (e.g., 'images,credits')\"\n", " }\n", " },\n", " \"required\": [\n", " \"person_id\"\n", " ]\n", " }\n", " }\n", "}\n", "\n", "{\n", " \"type\": \"function\",\n", " \"function\": {\n", " \"name\": \"get_movie_genres\",\n", " \"description\": \"Get the list of official genres for movies.\",\n", " \"parameters\": {\n", " \"type\": \"object\",\n", " \"properties\": {\n", " \"language\": {\n", " \"type\": \"string\",\n", " \"description\": \"Language for the genre names\",\n", " \"default\": \"en-US\"\n", " }\n", " }\n", " }\n", " }\n", "}\n", "\n", "Given the user query: \"What are the genres of the movie 'The Dark Knight'?\"\n", " Generate a multi-step reasoning chain to answer the query. Include steps for using available tools if necessary.<|eot_id|>\n" ] } ], "source": [ "import textwrap\n", "\n", "\n", "query = \"What are the genres of the movie 'The Dark Knight'?\"\n", "\n", "user_message = f\"\"\"\n", " Given the user query: \"{query}\"\n", " Generate a multi-step reasoning chain to answer the query. Include steps for using available tools if necessary.\n", " \"\"\"\n", "messages = [\n", " {\"role\": \"system\", \"content\": \"You are a movie search assistant bot who uses TMDB to help users find movies. Think step by step and identify the sequence of function calls that will help to answer.\"},\n", " {\"role\": \"user\", \"content\": user_message},\n", " ]\n", "\n", "chat = tokenizer.apply_chat_template(\n", " messages, tools=tools, \n", " add_generation_prompt=False, \n", " tools_in_user_message=True,\n", " tokenize=False, \n", " return_tensors=\"pt\")\n", "print(chat)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "response = generate_reasoning_chain(\"What are the genres of the movie 'The Dark Knight'?\")\n", "print(response)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "\"\"\"\n", "<|begin_of_text|>\n", "<|start_header_id|>system<|end_header_id|> {{ system_prompt }}<|eot_id|>\n", "<|start_header_id|>user<|end_header_id|> {{ user_message_1 }}<|eot_id|>\n", "<|start_header_id|>assistant<|end_header_id|> <|python_tag|>{{ model_tool_call_1 }}<|eom_id|>\n", "<|start_header_id|>ipython<|end_header_id|> {{ tool_response }}<|eot_id|>\n", "<|start_header_id|>assistant<|end_header_id|> {{ model_response_based_on_tool_response }}<|eot_id|>\n", "\"\"\"\n", "print(response)" ] } ], "metadata": { "kernelspec": { "display_name": "movies-app", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.9" } }, "nbformat": 4, "nbformat_minor": 2 }