jdaddyalbs
/

qwen3_sft_playwright_gguf

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bdb9c28f-d4a7-49e1-9d5e-74a878e2edc3",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "from unsloth import FastLanguageModel, is_bfloat16_supported\n",
+    "from trl import SFTConfig, SFTTrainer\n",
+    "from datasets import load_dataset, Dataset\n",
+    "from unsloth import FastLanguageModel, is_bfloat16_supported\n",
+    "import torch\n",
+    "from mcp.types import Tool, ToolAnnotations\n",
+    "import os \n",
+    "import wandb\n",
+    "import torch\n",
+    "import json\n",
+    "from transformers import DataCollatorForSeq2Seq\n",
+    "from unsloth.chat_templates import train_on_responses_only\n",
+    "from urllib.parse import urlencode\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ca3b85c-fd3d-4a2f-a239-2a1949d608fc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# token used to upload models\n",
+    "#HF_TOKEN = \"\"\n",
+    "# wandb optional for logging training data\n",
+    "#os.environ['WANDB_PROJECT'] = \"\"\n",
+    "#os.environ['WANDB_API_KEY'] = \"\"\n",
+    "\n",
+    "#wandb.login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1da19b51-f367-44c4-b4ad-f9dbe208ae56",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "max_seq_length = 18000 # Can increase for longer reasoning traces, works with 32GB GPU, may need to reduce to avoid out-of-memory errors\n",
+    "lora_rank = 32 # Larger rank = smarter, but slower\n",
+    "\n",
+    "\n",
+    "model, tokenizer = FastLanguageModel.from_pretrained(\n",
+    "    model_name = \"unsloth/Qwen3-4B-bnb-4bit\",\n",
+    "    #model_name = \"./qwen3-sft/checkpoint-765\", # uncomment to load a local checkpoint like this example\n",
+    "    max_seq_length = max_seq_length,\n",
+    "    load_in_4bit = True, # False for LoRA 16bit\n",
+    "    fast_inference = False,\n",
+    "    max_lora_rank = lora_rank,\n",
+    "    gpu_memory_utilization = 0.5, # Reduce if out of memory\n",
+    ")\n",
+    "\n",
+    "\n",
+    "model = FastLanguageModel.get_peft_model(\n",
+    "    model,\n",
+    "    r = lora_rank, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\n",
+    "        \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "        \"gate_proj\", \"up_proj\", \"down_proj\",\n",
+    "    ], # Remove QKVO if out of memory\n",
+    "    lora_alpha = lora_rank*2,\n",
+    "    use_gradient_checkpointing = \"unsloth\", # Enable long context finetuning\n",
+    "    random_state = 3407,\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d60f8797-20b3-44a8-9068-156488863427",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# may not be necessary, unsloth version may already have this template in the tokenizer\n",
+    "from unsloth.chat_templates import get_chat_template\n",
+    "\n",
+    "tokenizer = get_chat_template(\n",
+    "    tokenizer,\n",
+    "    chat_template = \"qwen-3\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a0754d2a-baf7-406b-8450-273c668a0f38",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# see dataset repo for details on dataset generation\n",
+    "dataset = load_dataset(\"jdaddyalbs/playwright-mcp-toolcalling\", split=\"unprocessed\").remove_columns([\"text\"])\n",
+    "# alternately you can load the preprocessed train test files directly (can be used with other qwen3 models with same chat template)\n",
+    "# if loading the files below, you can skip ahead to the \"trainer = SFTTrainer(....\" part\n",
+    "#eval_dataset = load_dataset(\"jdaddyalbs/playwright-mcp-toolcalling\", data_files=\"data/test.parquet\")['train']\n",
+    "#train_dataset = load_dataset(\"jdaddyalbs/playwright-mcp-toolcalling\", data_files=\"data/train.parquet\")['train']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e055354-d080-4a71-ae20-227a88c79fa7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# tools from microsoft's playwright-mcp v0.0.31\n",
+    "tools = load_dataset(\"jdaddyalbs/playwright-mcp-toolcalling\",data_files=\"tools.txt\")\n",
+    "tools = eval(\"\".join([tools['train']['text'][i] for i in range(len(tools['train']['text']))]))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "263ecd0f-41b0-4cf2-bb47-61d6d0725960",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert to valid json\n",
+    "tools_json = [\n",
+    "    {\n",
+    "        \"type\":\"function\",\n",
+    "        \"function\": {\n",
+    "            \"name\": tool.name,\n",
+    "            \"description\": tool.description,\n",
+    "            \"parameters\": tool.inputSchema\n",
+    "        }\n",
+    "    } for tool in tools\n",
+    "]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bb4290d2-8151-43c4-a9a0-28600c85a12b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# convert messages to correct format for model using chat template\n",
+    "def apply_template(messages):\n",
+    "    return tokenizer.apply_chat_template(\n",
+    "        messages,\n",
+    "        tools=tools_json,\n",
+    "        tokenize=False,\n",
+    "        add_generation_prompt=False,\n",
+    "        enable_thinking=True\n",
+    "    ) "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6ec133c-8c7d-469d-8380-fb6cf54a7264",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "messages = []\n",
+    "for i in range(len(dataset['messages'])):\n",
+    "    msgs = [json.loads(msg) for msg in dataset['messages'][i]]\n",
+    "    messages.append(apply_template(msgs))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71a5b8d5-0299-4c36-af9d-c028ecccf7cd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = dataset.add_column(\"text\",messages)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2f2bb399-bf1f-43a8-9f15-1efef2c35a80",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# I want to encourage the model to use tools, so I only give examples where tools are used\n",
+    "# May or may not be helpful\n",
+    "dataset = dataset.filter(lambda x: x[\"num_tools\"] > 0)\n",
+    "dataset = dataset.filter(lambda x: x[\"llm_match\"])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4a0ccd1a-d217-4ece-8773-73117ab355dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# keep seed constant to get repeatable split\n",
+    "ds = dataset.train_test_split(test_size = 0.1, seed=42)\n",
+    "train_dataset = ds['train']\n",
+    "eval_dataset = ds['test']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "659a2864-fa2f-4abc-b4ae-6d267f7c1bd9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer = SFTTrainer(\n",
+    "    model = model,\n",
+    "    tokenizer = tokenizer,\n",
+    "    train_dataset = train_dataset,\n",
+    "    eval_dataset = eval_dataset, # Can set up evaluation!\n",
+    "    data_collator = DataCollatorForSeq2Seq(tokenizer = tokenizer),\n",
+    "    args = SFTConfig(\n",
+    "        dataset_text_field = \"text\",\n",
+    "        per_device_train_batch_size = 1, # bigger batches takes up too much GPU memory\n",
+    "        gradient_accumulation_steps = 4, # 1 gradient update every 4 samples, higher should make training more stable but take longer\n",
+    "        warmup_steps = 5,\n",
+    "        num_train_epochs = 1, # Set this for 1 full training run.\n",
+    "        learning_rate = 2e-4, # Reduce to 2e-5 for long training runs\n",
+    "        logging_steps = 1,\n",
+    "        optim = \"adamw_8bit\",\n",
+    "        weight_decay = 0.01,\n",
+    "        lr_scheduler_type = \"linear\",\n",
+    "        seed = 3407,\n",
+    "        #report_to = \"wandb\", # Use this for WandB, comment out if not using\n",
+    "        output_dir='qwen3-sft',\n",
+    "        dataset_num_proc=2,\n",
+    "        eval_steps=50,\n",
+    "        fp16_full_eval = True,\n",
+    "        per_device_eval_batch_size = 1,\n",
+    "        eval_accumulation_steps = 1,\n",
+    "        eval_strategy = \"steps\",\n",
+    "    ),\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e9d1051-7813-4570-9fcf-183d70e845bb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# start the finetuning process\n",
+    "trainer_stats = trainer.train(resume_from_checkpoint=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7e8d7a36-a5b6-452c-9fcf-5bb4325bd5d0",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "#model.push_to_hub_gguf(\"jdaddyalbs/qwen3_sft_playwright_gguf\", tokenizer,token=HF_TOKEN, quantization_method='q8_0')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ad0eb650-ea62-4a3a-b968-88d6f792dc28",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "#model.push_to_hub_merged(\"jdaddyalbs/qwen3_sft_playwright\",tokenizer,token=HF_TOKEN,save_method=\"merged_16bit\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "40b285d6-49ff-4d8f-8220-7ad7e3ef4fa9",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "# example evaluate a single sample\n",
+    "idx = 51\n",
+    "print(eval_dataset[idx]['true_answer'])\n",
+    "print(eval_dataset[idx]['answer'])\n",
+    "\n",
+    "text = tokenizer.apply_chat_template(\n",
+    "    eval_dataset[idx][\"evil_messages\"][:2],\n",
+    "    tokenize = False,\n",
+    "    tools=tools_json,\n",
+    "    add_generation_prompt = True, # Must add for generation\n",
+    "    enable_thinking = True,\n",
+    ")\n",
+    "\n",
+    "from transformers import TextStreamer\n",
+    "out = model.generate(\n",
+    "    **tokenizer(text, return_tensors = \"pt\").to(\"cuda\"),\n",
+    "    temperature = 0.0001, top_p = 0.95, top_k = 20, # For thinking\n",
+    "    max_new_tokens = 2048,\n",
+    "    streamer = TextStreamer(tokenizer, skip_prompt = False),\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.13.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}