{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "ae79196c-1b84-4cde-8051-6cb113b80373",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/Users/Pi/miniconda3/envs/myenv/lib/python3.10/site-packages/xgboost/training.py:183: UserWarning: [00:11:22] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:738: \n",
      "Parameters: { \"use_label_encoder\" } are not used.\n",
      "\n",
      "  bst.update(dtrain, iteration=i, fobj=obj)\n",
      "llama_model_load_from_file_impl: using device Metal (Apple M2 Pro) - 21845 MiB free\n",
      "llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from /Users/Pi/models/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf (version GGUF V2)\n",
      "llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.\n",
      "llama_model_loader: - kv   0:                       general.architecture str              = llama\n",
      "llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1\n",
      "llama_model_loader: - kv   2:                       llama.context_length u32              = 32768\n",
      "llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096\n",
      "llama_model_loader: - kv   4:                          llama.block_count u32              = 32\n",
      "llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336\n",
      "llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128\n",
      "llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32\n",
      "llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 8\n",
      "llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010\n",
      "llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000\n",
      "llama_model_loader: - kv  11:                          general.file_type u32              = 15\n",
      "llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama\n",
      "llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = [\"<unk>\", \"<s>\", \"</s>\", \"<0x00>\", \"<...\n",
      "llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...\n",
      "llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...\n",
      "llama_model_loader: - kv  16:                tokenizer.ggml.bos_token_id u32              = 1\n",
      "llama_model_loader: - kv  17:                tokenizer.ggml.eos_token_id u32              = 2\n",
      "llama_model_loader: - kv  18:            tokenizer.ggml.unknown_token_id u32              = 0\n",
      "llama_model_loader: - kv  19:               general.quantization_version u32              = 2\n",
      "llama_model_loader: - type  f32:   65 tensors\n",
      "llama_model_loader: - type q4_K:  193 tensors\n",
      "llama_model_loader: - type q6_K:   33 tensors\n",
      "print_info: file format = GGUF V2\n",
      "print_info: file type   = Q4_K - Medium\n",
      "print_info: file size   = 4.07 GiB (4.83 BPW) \n",
      "init_tokenizer: initializing tokenizer for type 1\n",
      "load: control token:      2 '</s>' is not marked as EOG\n",
      "load: control token:      1 '<s>' is not marked as EOG\n",
      "load: special_eos_id is not in special_eog_ids - the tokenizer config may be incorrect\n",
      "load: special tokens cache size = 3\n",
      "load: token to piece cache size = 0.1637 MB\n",
      "print_info: arch             = llama\n",
      "print_info: vocab_only       = 0\n",
      "print_info: n_ctx_train      = 32768\n",
      "print_info: n_embd           = 4096\n",
      "print_info: n_layer          = 32\n",
      "print_info: n_head           = 32\n",
      "print_info: n_head_kv        = 8\n",
      "print_info: n_rot            = 128\n",
      "print_info: n_swa            = 0\n",
      "print_info: n_embd_head_k    = 128\n",
      "print_info: n_embd_head_v    = 128\n",
      "print_info: n_gqa            = 4\n",
      "print_info: n_embd_k_gqa     = 1024\n",
      "print_info: n_embd_v_gqa     = 1024\n",
      "print_info: f_norm_eps       = 0.0e+00\n",
      "print_info: f_norm_rms_eps   = 1.0e-05\n",
      "print_info: f_clamp_kqv      = 0.0e+00\n",
      "print_info: f_max_alibi_bias = 0.0e+00\n",
      "print_info: f_logit_scale    = 0.0e+00\n",
      "print_info: f_attn_scale     = 0.0e+00\n",
      "print_info: n_ff             = 14336\n",
      "print_info: n_expert         = 0\n",
      "print_info: n_expert_used    = 0\n",
      "print_info: causal attn      = 1\n",
      "print_info: pooling type     = 0\n",
      "print_info: rope type        = 0\n",
      "print_info: rope scaling     = linear\n",
      "print_info: freq_base_train  = 10000.0\n",
      "print_info: freq_scale_train = 1\n",
      "print_info: n_ctx_orig_yarn  = 32768\n",
      "print_info: rope_finetuned   = unknown\n",
      "print_info: ssm_d_conv       = 0\n",
      "print_info: ssm_d_inner      = 0\n",
      "print_info: ssm_d_state      = 0\n",
      "print_info: ssm_dt_rank      = 0\n",
      "print_info: ssm_dt_b_c_rms   = 0\n",
      "print_info: model type       = 7B\n",
      "print_info: model params     = 7.24 B\n",
      "print_info: general.name     = mistralai_mistral-7b-instruct-v0.1\n",
      "print_info: vocab type       = SPM\n",
      "print_info: n_vocab          = 32000\n",
      "print_info: n_merges         = 0\n",
      "print_info: BOS token        = 1 '<s>'\n",
      "print_info: EOS token        = 2 '</s>'\n",
      "print_info: UNK token        = 0 '<unk>'\n",
      "print_info: LF token         = 13 '<0x0A>'\n",
      "print_info: EOG token        = 2 '</s>'\n",
      "print_info: max token length = 48\n",
      "load_tensors: loading model tensors, this can take a while... (mmap = true)\n",
      "load_tensors: layer   0 assigned to device CPU\n",
      "load_tensors: layer   1 assigned to device CPU\n",
      "load_tensors: layer   2 assigned to device CPU\n",
      "load_tensors: layer   3 assigned to device CPU\n",
      "load_tensors: layer   4 assigned to device CPU\n",
      "load_tensors: layer   5 assigned to device CPU\n",
      "load_tensors: layer   6 assigned to device CPU\n",
      "load_tensors: layer   7 assigned to device CPU\n",
      "load_tensors: layer   8 assigned to device CPU\n",
      "load_tensors: layer   9 assigned to device CPU\n",
      "load_tensors: layer  10 assigned to device CPU\n",
      "load_tensors: layer  11 assigned to device CPU\n",
      "load_tensors: layer  12 assigned to device CPU\n",
      "load_tensors: layer  13 assigned to device CPU\n",
      "load_tensors: layer  14 assigned to device CPU\n",
      "load_tensors: layer  15 assigned to device CPU\n",
      "load_tensors: layer  16 assigned to device CPU\n",
      "load_tensors: layer  17 assigned to device CPU\n",
      "load_tensors: layer  18 assigned to device CPU\n",
      "load_tensors: layer  19 assigned to device CPU\n",
      "load_tensors: layer  20 assigned to device CPU\n",
      "load_tensors: layer  21 assigned to device CPU\n",
      "load_tensors: layer  22 assigned to device CPU\n",
      "load_tensors: layer  23 assigned to device CPU\n",
      "load_tensors: layer  24 assigned to device CPU\n",
      "load_tensors: layer  25 assigned to device CPU\n",
      "load_tensors: layer  26 assigned to device CPU\n",
      "load_tensors: layer  27 assigned to device CPU\n",
      "load_tensors: layer  28 assigned to device CPU\n",
      "load_tensors: layer  29 assigned to device CPU\n",
      "load_tensors: layer  30 assigned to device CPU\n",
      "load_tensors: layer  31 assigned to device CPU\n",
      "load_tensors: layer  32 assigned to device CPU\n",
      "load_tensors: tensor 'token_embd.weight' (q4_K) (and 290 others) cannot be used with preferred buffer type CPU_AARCH64, using CPU instead\n",
      "load_tensors: offloading 0 repeating layers to GPU\n",
      "load_tensors: offloaded 0/33 layers to GPU\n",
      "load_tensors:   CPU_Mapped model buffer size =  4165.37 MiB\n",
      ".................................................................................................\n",
      "llama_init_from_model: n_seq_max     = 1\n",
      "llama_init_from_model: n_ctx         = 2048\n",
      "llama_init_from_model: n_ctx_per_seq = 2048\n",
      "llama_init_from_model: n_batch       = 512\n",
      "llama_init_from_model: n_ubatch      = 512\n",
      "llama_init_from_model: flash_attn    = 0\n",
      "llama_init_from_model: freq_base     = 10000.0\n",
      "llama_init_from_model: freq_scale    = 1\n",
      "llama_init_from_model: n_ctx_per_seq (2048) < n_ctx_train (32768) -- the full capacity of the model will not be utilized\n",
      "ggml_metal_init: allocating\n",
      "ggml_metal_init: found device: Apple M2 Pro\n",
      "ggml_metal_init: picking default device: Apple M2 Pro\n",
      "ggml_metal_load_library: using embedded metal library\n",
      "ggml_metal_init: GPU name:   Apple M2 Pro\n",
      "ggml_metal_init: GPU family: MTLGPUFamilyApple8  (1008)\n",
      "ggml_metal_init: GPU family: MTLGPUFamilyCommon3 (3003)\n",
      "ggml_metal_init: GPU family: MTLGPUFamilyMetal3  (5001)\n",
      "ggml_metal_init: simdgroup reduction   = true\n",
      "ggml_metal_init: simdgroup matrix mul. = true\n",
      "ggml_metal_init: has residency sets    = true\n",
      "ggml_metal_init: has bfloat            = true\n",
      "ggml_metal_init: use bfloat            = false\n",
      "ggml_metal_init: hasUnifiedMemory      = true\n",
      "ggml_metal_init: recommendedMaxWorkingSetSize  = 22906.50 MB\n",
      "ggml_metal_init: loaded kernel_add                                    0x11f38a990 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_add_row                                0x12ef6f310 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sub                                    0x11f405df0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sub_row                                0x11f38b080 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul                                    0x11bc91a30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_row                                0x11f1aa2f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_div                                    0x12ee21f20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_div_row                                0x12ef82170 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_f32                             0x11bc91cc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_f16                             0x13b813e80 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_i32                             0x11f38b2e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_repeat_i16                             0x11f1aaa50 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_scale                                  0x12ef826e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_scale_4                                0x11bc92230 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_clamp                                  0x11f406d10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_tanh                                   0x11bc92580 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_relu                                   0x11f423300 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sigmoid                                0x13a3d6930 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu                                   0x11bc92a60 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu_4                                 0x11f1aacb0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu_quick                             0x11bc93220 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_gelu_quick_4                           0x11bc937a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_silu                                   0x11bc93ce0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_silu_4                                 0x13a3d7120 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_elu                                    0x13a3d7600 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f16                           0x11bc93fa0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f16_4                         0x12ef82940 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f32                           0x11bc94400 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_soft_max_f32_4                         0x13a3d7860 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_diag_mask_inf                          0x12ef82ba0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_diag_mask_inf_8                        0x13a3d7ac0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_f32                           0x13a3d7d20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_f16                           0x11bc94660 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_get_rows_bf16                     (not supported)\n",
      "ggml_metal_init: loaded kernel_get_rows_q4_0                          0x13a3d7f80 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q4_1                          0x11bc948c0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q5_0                          0x11bc94b20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q5_1                          0x11f423890 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q8_0                          0x11bc94d80 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q2_K                          0x11f1ab1d0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q3_K                          0x11bc94fe0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q4_K                          0x11bc95240 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q5_K                          0x11bc954a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_q6_K                          0x11bc95700 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq2_xxs                       0x11f1ab430 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq2_xs                        0x12ef82e00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq3_xxs                       0x13a3d81e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq3_s                         0x11bc95960 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq2_s                         0x12ee225b0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq1_s                         0x11bc95bc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq1_m                         0x13a3d6e10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq4_nl                        0x11f423c10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_iq4_xs                        0x13a3d8440 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_get_rows_i32                           0x11bc95e30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rms_norm                               0x13a3d86a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_group_norm                             0x11f1ab690 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_norm                                   0x11bc96090 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_ssm_conv_f32                           0x12ef83060 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_ssm_scan_f32                           0x11bc962f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f32_f32                         0x12ef833e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_f32                   (not supported)\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_f32_1row              (not supported)\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_f32_l4                (not supported)\n",
      "ggml_metal_init: skipping kernel_mul_mv_bf16_bf16                  (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f32                         0x13a3d8900 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f32_1row                    0x11f1aba20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f32_l4                      0x11bc96580 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_f16_f16                         0x11bc967e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q4_0_f32                        0x12ef83760 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q4_1_f32                        0x12ef83ae0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q5_0_f32                        0x11bc96a40 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q5_1_f32                        0x13a3d8b60 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q8_0_f32                        0x13a3d8dc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_2                0x11bc96ca0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_3                0x11f38b7d0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_4                0x11f424100 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_f16_f32_r1_5                0x13a3d9020 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_2               0x13a3d9280 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_3               0x11bc96f00 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_4               0x13a3d94e0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_0_f32_r1_5               0x11bc97160 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_2               0x12ef83e60 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_3               0x11f1abf10 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_4               0x11bc973c0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_1_f32_r1_5               0x13a3d9740 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_2               0x13a3d99a0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_3               0x11bc97620 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_4               0x13a3d9c00 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_0_f32_r1_5               0x13a3d9e60 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_2               0x11bc97880 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_3               0x13a3da0c0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_4               0x11bc97ae0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_1_f32_r1_5               0x11f424360 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_2               0x11f1ac170 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_3               0x11f38ba30 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_4               0x13a3da320 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q8_0_f32_r1_5               0x11f1ac670 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_2               0x13a3da580 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_3               0x11f1ac8d0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_4               0x12ef841e0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q4_K_f32_r1_5               0x11bc97d40 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_2               0x13a3da7e0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_3               0x13a3daa40 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_4               0x11bc97fa0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q5_K_f32_r1_5               0x11bc98200 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_2               0x11f38bf30 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_3               0x13a3daca0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_4               0x11f424880 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_q6_K_f32_r1_5               0x12ef84740 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_2             0x13b814370 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_3             0x11f424ae0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_4             0x11f425270 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_ext_iq4_nl_f32_r1_5             0x12ef84ac0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q2_K_f32                        0x11bc98460 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q3_K_f32                        0x13b8145d0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q4_K_f32                        0x12ef84e40 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q5_K_f32                        0x11f4254d0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_q6_K_f32                        0x11f425730 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq2_xxs_f32                     0x12ef85250 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq2_xs_f32                      0x11f425ce0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq3_xxs_f32                     0x11f425f40 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq3_s_f32                       0x11f4262c0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq2_s_f32                       0x11f426640 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq1_s_f32                       0x11f426c30 | th_max =  448 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq1_m_f32                       0x12ef85720 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq4_nl_f32                      0x11f4271e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_iq4_xs_f32                      0x11f427440 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_f32_f32                      0x13a3daf00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_f16_f32                      0x13b814830 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mv_id_bf16_f32                (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q4_0_f32                     0x11f4277c0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q4_1_f32                     0x13a3db160 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q5_0_f32                     0x11f427b40 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q5_1_f32                     0x12ef85980 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q8_0_f32                     0x13b814a90 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q2_K_f32                     0x13b814cf0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q3_K_f32                     0x12ef85d00 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q4_K_f32                     0x12ef86080 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q5_K_f32                     0x11bc987f0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_q6_K_f32                     0x13b815070 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq2_xxs_f32                  0x11bc98a50 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq2_xs_f32                   0x11f1acdd0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq3_xxs_f32                  0x11bc98cb0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq3_s_f32                    0x13a3db3c0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq2_s_f32                    0x13a3db620 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq1_s_f32                    0x11f4280f0 | th_max =  448 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq1_m_f32                    0x11bc98f10 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq4_nl_f32                   0x13b8153f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mv_id_iq4_xs_f32                   0x11f4285c0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_f32_f32                         0x12ee22810 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_f16_f32                         0x11f428820 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mm_bf16_f32                   (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mm_q4_0_f32                        0x11f428ba0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q4_1_f32                        0x12ef86400 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q5_0_f32                        0x11f428f20 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q5_1_f32                        0x12ef86780 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q8_0_f32                        0x11f4292a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q2_K_f32                        0x12ef86d30 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q3_K_f32                        0x13a3db880 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q4_K_f32                        0x11f429620 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q5_K_f32                        0x12ee22b90 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_q6_K_f32                        0x11bc99180 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq2_xxs_f32                     0x11f38c190 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq2_xs_f32                      0x11f4299a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq3_xxs_f32                     0x12ee22df0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq3_s_f32                       0x11bc993e0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq2_s_f32                       0x11bc99640 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq1_s_f32                       0x12ee233a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq1_m_f32                       0x11f429d20 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq4_nl_f32                      0x12ef86f90 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_iq4_xs_f32                      0x12ee23870 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_f32_f32                      0x13b815950 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_f16_f32                      0x12ef87320 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_mul_mm_id_bf16_f32                (not supported)\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q4_0_f32                     0x11f42a0a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q4_1_f32                     0x11bc998a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q5_0_f32                     0x11f42a420 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q5_1_f32                     0x12ef878d0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q8_0_f32                     0x11f42a7a0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q2_K_f32                     0x13b815bb0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q3_K_f32                     0x11f42ab20 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q4_K_f32                     0x12ee23ad0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q5_K_f32                     0x11f42afc0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_q6_K_f32                     0x12ef87c50 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq2_xxs_f32                  0x12ee23d30 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq2_xs_f32                   0x12ef88120 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq3_xxs_f32                  0x11f42b490 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq3_s_f32                    0x13a3dbae0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq2_s_f32                    0x12ef88380 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq1_s_f32                    0x11f42b6f0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq1_m_f32                    0x11bc99b10 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq4_nl_f32                   0x11f42ba80 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_mul_mm_id_iq4_xs_f32                   0x12ef885e0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_norm_f32                          0x12ef88840 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_norm_f16                          0x12ef88d60 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_neox_f32                          0x13a3dbd40 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_rope_neox_f16                          0x12ee240b0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_f16                             0x12ef88fc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_f32                             0x13b815f30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_ext_f16                         0x11f42be00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_im2col_ext_f32                         0x11f42c180 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_conv_transpose_1d_f32_f32              0x11f42c500 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_conv_transpose_1d_f16_f32              0x11f42ca60 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_upscale_f32                            0x11bc99d70 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pad_f32                                0x11f42ccc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pad_reflect_1d_f32                     0x12ee24490 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_timestep_embedding_f32                 0x11bc9a000 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_arange_f32                             0x11bc9a260 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_argsort_f32_i32_asc                    0x11f42d040 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_argsort_f32_i32_desc                   0x11f42d3c0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_leaky_relu_f32                         0x11f42ddc0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h64                 0x11f42e2b0 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h80                 0x12ef89620 | th_max =  640 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h96                 0x11f42e510 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h112                0x12ef89c80 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h128                0x11f42e770 | th_max =  512 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_f16_h256                0x12ee24980 | th_max =  512 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h64           (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h80           (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h96           (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h112          (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h128          (not supported)\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_bf16_h256          (not supported)\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h64                0x12ee24be0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h80                0x12ef8a000 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h96                0x12ef8a260 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h112               0x12ef8a5e0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h128               0x11f42ecd0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_0_h256               0x13b8162b0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h64                0x11f42ef30 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h80                0x12ef8ab40 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h96                0x12ef8ada0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h112               0x12ef8b250 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h128               0x11f42f6c0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q4_1_h256               0x11bc9a4c0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h64                0x11bc9a990 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h80                0x11bc9abf0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h96                0x13a3dbfa0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h112               0x13a3dc200 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h128               0x11f38c3f0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_0_h256               0x11bc9ae50 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h64                0x12ef8b8e0 | th_max =  576 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h80                0x12ef8bb40 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h96                0x13a3dc460 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h112               0x11bc9b0b0 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h128               0x13a3dc6c0 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q5_1_h256               0x11bc9b310 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h64                0x13a3dc920 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h80                0x11bc9b570 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h96                0x11f38c770 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h112               0x13a3dcb80 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h128               0x13a3dcde0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_q8_0_h256               0x11bc9b7d0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_f16_h128            0x12ef8c040 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_vec_bf16_h128      (not supported)\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_0_h128           0x13a3dd040 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_1_h128           0x13a3dd2a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_0_h128           0x13a3dd500 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_1_h128           0x11bc9bc90 | th_max =  768 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q8_0_h128           0x11bc9bef0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_f16_h256            0x11bc9c150 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_flash_attn_ext_vec_bf16_h256      (not supported)\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_0_h256           0x11f1ad030 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q4_1_h256           0x11bc9c3b0 | th_max =  896 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_0_h256           0x11f38caf0 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q5_1_h256           0x11bc9c610 | th_max =  704 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_flash_attn_ext_vec_q8_0_h256           0x11bc9c870 | th_max =  832 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_set_f32                                0x11bc9cc90 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_set_i32                                0x12ef8c2a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_f32                            0x13a3dd760 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_f16                            0x11f1ad530 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_cpy_f32_bf16                      (not supported)\n",
      "ggml_metal_init: loaded kernel_cpy_f16_f32                            0x13a3dd9c0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f16_f16                            0x13a3ddc20 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: skipping kernel_cpy_bf16_f32                      (not supported)\n",
      "ggml_metal_init: skipping kernel_cpy_bf16_bf16                     (not supported)\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q8_0                           0x11bc9cef0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q4_0                           0x11bc9d150 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q4_1                           0x11bc9d3b0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q5_0                           0x13a3dde80 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_q5_1                           0x11f1ad790 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_f32_iq4_nl                         0x12ef8c920 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q4_0_f32                           0x12ee250e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q4_0_f16                           0x11bc9d610 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q4_1_f32                           0x11bc9d870 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q4_1_f16                           0x11bc9dad0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q5_0_f32                           0x11bc9dd30 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q5_0_f16                           0x11bc9df90 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q5_1_f32                           0x13a3de0e0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q5_1_f16                           0x11bc9e1f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q8_0_f32                           0x13a3de340 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cpy_q8_0_f16                           0x11bc9e450 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_concat                                 0x11f1ad9f0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sqr                                    0x11f1adf00 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sqrt                                   0x11f38d440 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sin                                    0x11f1ae820 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_cos                                    0x13a3de820 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_sum_rows                               0x12ef8ce10 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_argmax                                 0x11f38d6a0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pool_2d_avg_f32                        0x11f1aeba0 | th_max = 1024 | th_width =   32\n",
      "ggml_metal_init: loaded kernel_pool_2d_max_f32                        0x13a3dea80 | th_max = 1024 | th_width =   32\n",
      "llama_kv_cache_init: kv_size = 2048, offload = 1, type_k = 'f16', type_v = 'f16', n_layer = 32, can_shift = 1\n",
      "llama_kv_cache_init: layer 0: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 1: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 2: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 3: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 4: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 5: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 6: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 7: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 8: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 9: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 10: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 11: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 12: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 13: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 14: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 15: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 16: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 17: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 18: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 19: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 20: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 21: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 22: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 23: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 24: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 25: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 26: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 27: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 28: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 29: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 30: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init: layer 31: n_embd_k_gqa = 1024, n_embd_v_gqa = 1024\n",
      "llama_kv_cache_init:        CPU KV buffer size =   256.00 MiB\n",
      "llama_init_from_model: KV self size  =  256.00 MiB, K (f16):  128.00 MiB, V (f16):  128.00 MiB\n",
      "llama_init_from_model:        CPU  output buffer size =     0.12 MiB\n",
      "llama_init_from_model:        CPU compute buffer size =   164.01 MiB\n",
      "llama_init_from_model: graph nodes  = 1030\n",
      "llama_init_from_model: graph splits = 514 (with bs=512), 1 (with bs=1)\n",
      "Metal : EMBED_LIBRARY = 1 | CPU : NEON = 1 | ARM_FMA = 1 | FP16_VA = 1 | MATMUL_INT8 = 1 | DOTPROD = 1 | ACCELERATE = 1 | AARCH64_REPACK = 1 | \n",
      "Model metadata: {'general.quantization_version': '2', 'tokenizer.ggml.unknown_token_id': '0', 'tokenizer.ggml.eos_token_id': '2', 'tokenizer.ggml.bos_token_id': '1', 'tokenizer.ggml.model': 'llama', 'llama.attention.head_count_kv': '8', 'llama.context_length': '32768', 'llama.attention.head_count': '32', 'llama.rope.freq_base': '10000.000000', 'llama.rope.dimension_count': '128', 'general.file_type': '15', 'llama.feed_forward_length': '14336', 'llama.embedding_length': '4096', 'llama.block_count': '32', 'general.architecture': 'llama', 'llama.attention.layer_norm_rms_epsilon': '0.000010', 'general.name': 'mistralai_mistral-7b-instruct-v0.1'}\n",
      "Using fallback chat format: llama-2\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "--- Multi-Turn Mental Health Chatbot ---\n",
      "This assistant simulates a counselor's conversation using AI.\n",
      "- Type something your patient/student might say\n",
      "- Type 'save' to export the conversation\n",
      "- Type 'exit' to quit\n",
      "\n",
      "Example:\n",
      "User: I feel like I’ll mess up my big presentation tomorrow.\n",
      "Counselor: It’s completely normal to feel nervous before a big event...\n",
      "\n"
     ]
    },
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      "User:  i'm nervous\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(Predicted: information, Confidence: 85.5%)\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "llama_perf_context_print:        load time =     766.62 ms\n",
      "llama_perf_context_print: prompt eval time =     766.43 ms /    23 tokens (   33.32 ms per token,    30.01 tokens per second)\n",
      "llama_perf_context_print:        eval time =   17184.04 ms /   149 runs   (  115.33 ms per token,     8.67 tokens per second)\n",
      "llama_perf_context_print:       total time =   17971.68 ms /   172 tokens\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Counselor: From a mental health perspective, feeling nervous or anxious is a common and normal human experience. It's a natural response to stress or a challenging situation. However, if the feelings of nervousness persist or interfere with daily life, it could be a sign of an anxiety disorder.\n",
      "\n",
      "Anxiety disorders are a group of mental health conditions in which a person feels excessive and persistent worry or fear. These feelings can interfere with daily life, such as work, school, relationships, and personal activities. The symptoms of anxiety disorders vary from person to person, but typically include physical symptoms like sweating, trembling, and difficulty sleeping, as well as psychological symptoms like feelings of worry, fear, and avoidance\n"
     ]
    }
   ],
   "source": [
    "# Multi-Turn Mental Health Chatbot using XGBoost + Mistral-7B\n",
    "\n",
    "import os\n",
    "import pandas as pd\n",
    "from sklearn.feature_extraction.text import TfidfVectorizer\n",
    "from sklearn.preprocessing import LabelEncoder\n",
    "from sklearn.model_selection import train_test_split\n",
    "from xgboost import XGBClassifier\n",
    "from llama_cpp import Llama\n",
    "import json\n",
    "\n",
    "# --- Load and prepare the dataset ---\n",
    "df = pd.read_csv(\"dataset/Kaggle_Mental_Health_Conversations_train.csv\")\n",
    "df = df[['Context', 'Response']].dropna().copy()\n",
    "\n",
    "# Auto-labeling\n",
    "keywords_to_labels = {\n",
    "    'advice': ['try', 'should', 'suggest', 'recommend'],\n",
    "    'validation': ['understand', 'feel', 'valid', 'normal'],\n",
    "    'information': ['cause', 'often', 'disorder', 'symptom'],\n",
    "    'question': ['how', 'what', 'why', 'have you']\n",
    "}\n",
    "\n",
    "def auto_label_response(response):\n",
    "    response = response.lower()\n",
    "    for label, keywords in keywords_to_labels.items():\n",
    "        if any(word in response for word in keywords):\n",
    "            return label\n",
    "    return 'information'\n",
    "\n",
    "df['response_type'] = df['Response'].apply(auto_label_response)\n",
    "\n",
    "# Combine Context and Response for richer features\n",
    "df['combined_text'] = df['Context'] + \" \" + df['Response']\n",
    "\n",
    "# Encode labels\n",
    "le = LabelEncoder()\n",
    "y = le.fit_transform(df['response_type'])\n",
    "\n",
    "# TF-IDF Vectorization\n",
    "vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2))\n",
    "X = vectorizer.fit_transform(df['combined_text'])\n",
    "\n",
    "# Train/Test Split\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.2, stratify=y, random_state=42\n",
    ")\n",
    "\n",
    "# Train XGBoost Classifier\n",
    "xgb_model = XGBClassifier(\n",
    "    objective='multi:softmax',\n",
    "    num_class=len(le.classes_),\n",
    "    eval_metric='mlogloss',\n",
    "    use_label_encoder=False,\n",
    "    max_depth=6,\n",
    "    learning_rate=0.1,\n",
    "    n_estimators=100\n",
    ")\n",
    "xgb_model.fit(X_train, y_train)\n",
    "\n",
    "# Load Mistral model\n",
    "MODEL_PATH = os.path.expanduser(\"/Users/Pi/models/mistral/mistral-7b-instruct-v0.1.Q4_K_M.gguf\")\n",
    "llm = Llama(model_path=MODEL_PATH, n_ctx=2048, n_threads=os.cpu_count())\n",
    "\n",
    "# --- Chatbot Functions ---\n",
    "def predict_response_type(user_input):\n",
    "    vec = vectorizer.transform([user_input])\n",
    "    pred = xgb_model.predict(vec)\n",
    "    proba = xgb_model.predict_proba(vec).max()\n",
    "    label = le.inverse_transform(pred)[0]\n",
    "    return label, proba\n",
    "\n",
    "def build_prompt(user_input, response_type):\n",
    "    prompts = {\n",
    "        \"advice\": f\"A student said: \\\"{user_input}\\\". What advice should a counselor give to help?\",\n",
    "        \"validation\": f\"A student said: \\\"{user_input}\\\". How can a counselor validate the student's emotions?\",\n",
    "        \"information\": f\"A student said: \\\"{user_input}\\\". Explain what might be happening from a mental health perspective.\",\n",
    "        \"question\": f\"A student said: \\\"{user_input}\\\". What follow-up questions should a counselor ask?\"\n",
    "    }\n",
    "    return prompts.get(response_type, prompts[\"information\"])\n",
    "\n",
    "def generate_llm_response(user_input, response_type):\n",
    "    prompt = build_prompt(user_input, response_type)\n",
    "    result = llm(prompt, max_tokens=150, temperature=0.7)\n",
    "    return result['choices'][0]['text'].strip()\n",
    "\n",
    "def trim_memory(history, max_turns=6):\n",
    "    return history[-max_turns * 2:]\n",
    "\n",
    "def save_conversation(history):\n",
    "    with open(\"chat_history.json\", \"w\") as f:\n",
    "        json.dump(history, f, indent=2)\n",
    "    print(\"✅ Conversation saved to chat_history.json\\n\")\n",
    "\n",
    "def show_intro():\n",
    "    print(\"\\n--- Multi-Turn Mental Health Chatbot ---\")\n",
    "    print(\"This assistant simulates a counselor's conversation using AI.\")\n",
    "    print(\"- Type something your patient/student might say\")\n",
    "    print(\"- Type 'save' to export the conversation\")\n",
    "    print(\"- Type 'exit' to quit\\n\")\n",
    "\n",
    "    print(\"Example:\")\n",
    "    print(\"User: I feel like I’ll mess up my big presentation tomorrow.\")\n",
    "    print(\"Counselor: It’s completely normal to feel nervous before a big event...\\n\")\n",
    "\n",
    "# --- Run Chatbot ---\n",
    "history = []\n",
    "show_intro()\n",
    "\n",
    "while True:\n",
    "    user_input = input(\"User: \").strip()\n",
    "\n",
    "    if user_input.lower() == \"exit\":\n",
    "        print(\"Goodbye\")\n",
    "        break\n",
    "    elif user_input.lower() == \"save\":\n",
    "        save_conversation(history)\n",
    "        continue\n",
    "\n",
    "    predicted_type, confidence = predict_response_type(user_input)\n",
    "    print(f\"(Prfedicted: {predicted_type}, Confidence: {confidence:.1%})\")\n",
    "\n",
    "    llm_reply = generate_llm_response(user_input, predicted_type)\n",
    "\n",
    "    history.append({\"role\": \"user\", \"content\": user_input})\n",
    "    history.append({\"role\": \"assistant\", \"content\": llm_reply})\n",
    "    history = trim_memory(history)\n",
    "\n",
    "    print(\"Counselor:\", llm_reply)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8e2f9122-f3dd-4d09-bfb1-c0438df2e8f1",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (myenv)",
   "language": "python",
   "name": "myenv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.16"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}