John6666 commited on
Commit
8f48a77
Β·
verified Β·
1 Parent(s): e548b67

Upload 6 files

Browse files
Files changed (6) hide show
  1. README.md +1 -1
  2. app.py +27 -25
  3. genimage.py +26 -16
  4. llmdolphin.py +44 -46
  5. llmenv.py +45 -0
  6. requirements.txt +5 -4
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸ‘€πŸ˜»
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 5.34.2
8
  app_file: app.py
9
  pinned: true
10
  license: apache-2.0
 
4
  colorFrom: red
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 5.45.0
8
  app_file: app.py
9
  pinned: true
10
  license: apache-2.0
app.py CHANGED
@@ -21,7 +21,7 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_ca
21
  """, elem_classes="title")
22
  state = gr.State(value={})
23
  with gr.Group():
24
- chatbot = gr.Chatbot(show_copy_button=True, show_share_button=False, layout="bubble", container=True)
25
  with gr.Row(equal_height=True):
26
  chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
27
  chat_submit = gr.Button("Send", scale=1, variant="primary")
@@ -71,38 +71,40 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_ca
71
  gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
72
  This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
73
  Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
74
- pg_chatbot = gr.Chatbot(scale=1, show_copy_button=True, show_share_button=False)
75
- with gr.Accordion("Additional inputs", open=False):
76
- pg_chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0], allow_custom_value=True, label="Model")
77
- pg_chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0]), label="Model info")
78
- pg_chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0]), label="Message format")
79
- pg_chat_sysmsg = gr.Textbox(value="You are a helpful assistant.", label="System message")
80
- with gr.Row():
81
- pg_chat_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
82
- pg_chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
83
- pg_chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
84
- pg_chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
85
- pg_chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
86
- with gr.Accordion("Loras", open=True, visible=False):
87
- pg_chat_lora = gr.Dropdown(choices=get_dolphin_loras(), value=get_dolphin_loras()[0], allow_custom_value=True, label="Lora")
88
- pg_chat_lora_scale = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.01, label="Lora scale")
89
- pg_chat_add_lora_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/ggml-org/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-F16-GGUF/blob/main/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-f16.gguf", lines=1)
90
- pg_chat_add_lora_submit = gr.Button("Update lists of loras")
91
- with gr.Accordion("Add models", open=True):
92
- pg_chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
93
- pg_chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
94
- pg_chat_add_submit = gr.Button("Update lists of models")
95
  gr.ChatInterface(
96
  fn=respond_playground,
97
  #title="Chat with lots of Models and LLMs using llama.cpp",
98
  #retry_btn="Retry",
99
  #undo_btn="Undo",
100
- #clear_btn="Clear",
101
- submit_btn="Send",
102
  #additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
103
  additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp,
104
  pg_chat_lora, pg_chat_lora_scale, state],
105
- chatbot=pg_chatbot
 
 
106
  )
107
  gr.LoginButton()
108
  gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
 
21
  """, elem_classes="title")
22
  state = gr.State(value={})
23
  with gr.Group():
24
+ chatbot = gr.Chatbot(type="messages", show_copy_button=True, show_share_button=False, layout="bubble", container=True)
25
  with gr.Row(equal_height=True):
26
  chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
27
  chat_submit = gr.Button("Send", scale=1, variant="primary")
 
71
  gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
72
  This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
73
  Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
74
+ pg_chatbot = gr.Chatbot(scale=1, type="messages", show_copy_button=True, show_share_button=False)
75
+ #with gr.Accordion("Additional inputs", open=False):
76
+ pg_chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0], allow_custom_value=True, label="Model", render=False)
77
+ pg_chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0]), label="Model info", render=False)
78
+ pg_chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0]), label="Message format", render=False)
79
+ pg_chat_sysmsg = gr.Textbox(value="You are a helpful assistant.", label="System message", render=False)
80
+ with gr.Row():
81
+ pg_chat_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens", render=False)
82
+ pg_chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", render=False)
83
+ pg_chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", render=False)
84
+ pg_chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k", render=False)
85
+ pg_chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty", render=False)
86
+ pg_chat_lora = gr.Dropdown(choices=get_dolphin_loras(), value=get_dolphin_loras()[0], allow_custom_value=True, label="Lora", render=False)
87
+ pg_chat_lora_scale = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.01, label="Lora scale", render=False)
88
+ with gr.Accordion("Add models", open=False):
89
+ pg_chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
90
+ pg_chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
91
+ pg_chat_add_submit = gr.Button("Update lists of models")
92
+ with gr.Accordion("Loras", open=False, visible=False):
93
+ pg_chat_add_lora_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/ggml-org/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-F16-GGUF/blob/main/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-f16.gguf", lines=1)
94
+ pg_chat_add_lora_submit = gr.Button("Update lists of loras")
95
  gr.ChatInterface(
96
  fn=respond_playground,
97
  #title="Chat with lots of Models and LLMs using llama.cpp",
98
  #retry_btn="Retry",
99
  #undo_btn="Undo",
100
+ stop_btn=True,
101
+ submit_btn=True,
102
  #additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
103
  additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp,
104
  pg_chat_lora, pg_chat_lora_scale, state],
105
+ chatbot=pg_chatbot,
106
+ multimodal=False,
107
+ type="messages",
108
  )
109
  gr.LoginButton()
110
  gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
genimage.py CHANGED
@@ -3,9 +3,11 @@ import gradio as gr
3
  import torch
4
  import gc, os, uuid, json
5
  from PIL import PngImagePlugin
 
6
 
7
 
8
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 
9
  if os.getenv("SPACES_ZERO_GPU", None):
10
  torch.backends.cudnn.deterministic = True
11
  torch.backends.cudnn.benchmark = False
@@ -14,13 +16,16 @@ if os.getenv("SPACES_ZERO_GPU", None):
14
 
15
 
16
  def load_pipeline():
17
- from diffusers import DiffusionPipeline
18
  pipe = DiffusionPipeline.from_pretrained(
19
- "John6666/rae-diffusion-xl-v2-sdxl-spo-pcm",
20
- custom_pipeline="lpw_stable_diffusion_xl",
 
21
  #custom_pipeline="nyanko7/sdxl_smoothed_energy_guidance",
22
- torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
 
23
  )
 
24
  pipe.to("cpu")
25
  return pipe
26
 
@@ -69,30 +74,35 @@ pipe = load_pipeline()
69
 
70
 
71
  @torch.inference_mode()
72
- @spaces.GPU(duration=10)
73
  def generate_image(prompt, neg_prompt, progress=gr.Progress(track_tqdm=True)):
74
  pipe.to(device)
75
- prompt += ", anime, masterpiece, best quality, very aesthetic, absurdres"
76
- neg_prompt += ", bad hands, bad feet, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract], photo, deformed, disfigured, low contrast, photo, deformed, disfigured, low contrast"
 
 
 
 
 
77
  metadata = {
78
  "prompt": prompt,
79
  "negative_prompt": neg_prompt,
80
- "resolution": f"{1024} x {1024}",
81
- "guidance_scale": 7.0,
82
- "num_inference_steps": 28,
83
- "sampler": "Euler",
84
  }
85
  try:
86
  #positive_embeds, negative_embeds = token_auto_concat_embeds(pipe, prompt, neg_prompt)
87
  images = pipe(
88
  prompt=prompt,
89
  negative_prompt=neg_prompt,
90
- width=1024,
91
- height=1024,
92
- guidance_scale=7.0,# seg_scale=3.0, seg_applied_layers=["mid"],
93
- num_inference_steps=28,
94
  output_type="pil",
95
- clip_skip=2,
96
  ).images
97
  if images:
98
  image_paths = [
 
3
  import torch
4
  import gc, os, uuid, json
5
  from PIL import PngImagePlugin
6
+ from diffusers import DiffusionPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
7
 
8
 
9
  device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
10
+ dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
11
  if os.getenv("SPACES_ZERO_GPU", None):
12
  torch.backends.cudnn.deterministic = True
13
  torch.backends.cudnn.benchmark = False
 
16
 
17
 
18
  def load_pipeline():
19
+ #vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype)
20
  pipe = DiffusionPipeline.from_pretrained(
21
+ #"John6666/rae-diffusion-xl-v2-sdxl-spo-pcm",
22
+ "Raelina/Raehoshi-illust-XL-6",
23
+ #custom_pipeline="lpw_stable_diffusion_xl",
24
  #custom_pipeline="nyanko7/sdxl_smoothed_energy_guidance",
25
+ torch_dtype=dtype,
26
+ #vae=vae,
27
  )
28
+ pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
29
  pipe.to("cpu")
30
  return pipe
31
 
 
74
 
75
 
76
  @torch.inference_mode()
77
+ @spaces.GPU(duration=15)
78
  def generate_image(prompt, neg_prompt, progress=gr.Progress(track_tqdm=True)):
79
  pipe.to(device)
80
+ #prompt += ", masterpiece, best quality, very aesthetic, absurdres"
81
+ #neg_prompt += "bad hands, bad feet, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract], photo, deformed, disfigured, low contrast, photo, deformed, disfigured, low contrast"
82
+ neg_prompt += "bad quality, worst quality, poorly drawn, sketch, multiple views, bad anatomy, bad hands, missing fingers, extra fingers, extra digits, fewer digits, signature, watermark, username"
83
+ width = 1024
84
+ height = 1024
85
+ cfg = 6.0
86
+ steps = 28
87
  metadata = {
88
  "prompt": prompt,
89
  "negative_prompt": neg_prompt,
90
+ "resolution": f"{width} x {height}",
91
+ "guidance_scale": cfg,
92
+ "num_inference_steps": steps,
93
+ "sampler": "Euler a",
94
  }
95
  try:
96
  #positive_embeds, negative_embeds = token_auto_concat_embeds(pipe, prompt, neg_prompt)
97
  images = pipe(
98
  prompt=prompt,
99
  negative_prompt=neg_prompt,
100
+ width=width,
101
+ height=height,
102
+ guidance_scale=cfg,# seg_scale=3.0, seg_applied_layers=["mid"],
103
+ num_inference_steps=steps,
104
  output_type="pil",
105
+ #clip_skip=1,
106
  ).images
107
  if images:
108
  image_paths = [
llmdolphin.py CHANGED
@@ -7,6 +7,7 @@ import gc
7
  import os
8
  import urllib
9
  from typing import Any
 
10
  from huggingface_hub import hf_hub_download, HfApi
11
  from llama_cpp import Llama
12
  from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
@@ -376,10 +377,10 @@ def get_raw_prompt(msg: str):
376
 
377
  # https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
378
  @torch.inference_mode()
379
- @spaces.GPU(duration=59)
380
  def dolphin_respond(
381
  message: str,
382
- history: list[tuple[str, str]],
383
  model: str = default_llm_model_filename,
384
  system_message: str = get_dolphin_sysprompt(),
385
  max_tokens: int = 1024,
@@ -434,16 +435,12 @@ def dolphin_respond(
434
  messages = BasicChatHistory()
435
 
436
  for msn in history:
437
- user = {
438
- 'role': Roles.user,
439
- 'content': msn[0]
440
- }
441
- assistant = {
442
- 'role': Roles.assistant,
443
- 'content': msn[1]
444
- }
445
- messages.add_message(user)
446
- messages.add_message(assistant)
447
 
448
  stream = agent.get_chat_response(
449
  message,
@@ -455,28 +452,28 @@ def dolphin_respond(
455
 
456
  progress(0.5, desc="Processing...")
457
 
458
- outputs = ""
 
459
  for output in stream:
460
- outputs += output
461
- yield [(outputs, None)]
462
  except Exception as e:
463
  print(e)
464
  raise gr.Error(f"Error: {e}")
465
- #yield [("", None)]
466
  finally:
467
  torch.cuda.empty_cache()
468
  gc.collect()
469
 
470
 
471
  def dolphin_parse(
472
- history: list[tuple[str, str]],
473
  state: dict,
474
  ):
475
  try:
476
  dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
477
  if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
478
  return "", gr.update(), gr.update()
479
- msg = history[-1][0]
480
  raw_prompt = get_raw_prompt(msg)
481
  prompts = []
482
  if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
@@ -490,10 +487,10 @@ def dolphin_parse(
490
 
491
 
492
  @torch.inference_mode()
493
- @spaces.GPU(duration=59)
494
  def dolphin_respond_auto(
495
  message: str,
496
- history: list[tuple[str, str]],
497
  model: str = default_llm_model_filename,
498
  system_message: str = get_dolphin_sysprompt(),
499
  max_tokens: int = 1024,
@@ -549,16 +546,12 @@ def dolphin_respond_auto(
549
  messages = BasicChatHistory()
550
 
551
  for msn in history:
552
- user = {
553
- 'role': Roles.user,
554
- 'content': msn[0]
555
- }
556
- assistant = {
557
- 'role': Roles.assistant,
558
- 'content': msn[1]
559
- }
560
- messages.add_message(user)
561
- messages.add_message(assistant)
562
 
563
  progress(0, desc="Translating...")
564
  stream = agent.get_chat_response(
@@ -571,13 +564,16 @@ def dolphin_respond_auto(
571
 
572
  progress(0.5, desc="Processing...")
573
 
574
- outputs = ""
 
575
  for output in stream:
576
- outputs += output
577
- yield [(outputs, None)], gr.update(), gr.update()
578
  except Exception as e:
579
  print(e)
580
- yield [("", None)], gr.update(), gr.update()
 
 
581
  finally:
582
  torch.cuda.empty_cache()
583
  gc.collect()
@@ -585,14 +581,14 @@ def dolphin_respond_auto(
585
 
586
  def dolphin_parse_simple(
587
  message: str,
588
- history: list[tuple[str, str]],
589
  state: dict,
590
  ):
591
  try:
592
  #if not is_japanese(message): return message
593
  dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
594
  if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
595
- msg = history[-1][0]
596
  raw_prompt = get_raw_prompt(msg)
597
  prompts = []
598
  if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
@@ -611,10 +607,10 @@ cv2.setNumThreads(1)
611
 
612
 
613
  @torch.inference_mode()
614
- @spaces.GPU(duration=59)
615
  def respond_playground(
616
  message: str,
617
- history: list[tuple[str, str]],
618
  model: str = default_llm_model_filename,
619
  system_message: str = get_dolphin_sysprompt(),
620
  max_tokens: int = 1024,
@@ -669,10 +665,12 @@ def respond_playground(
669
 
670
  # Add user and assistant messages to the history
671
  for msn in history:
672
- user = {'role': Roles.user, 'content': msn[0]}
673
- assistant = {'role': Roles.assistant, 'content': msn[1]}
674
- messages.add_message(user)
675
- messages.add_message(assistant)
 
 
676
 
677
  # Stream the response
678
  stream = agent.get_chat_response(
@@ -683,14 +681,14 @@ def respond_playground(
683
  print_output=False
684
  )
685
 
686
- outputs = ""
 
687
  for output in stream:
688
- outputs += output
689
- yield outputs
690
  except Exception as e:
691
  print(e)
692
  raise gr.Error(f"Error: {e}")
693
- #yield ""
694
  finally:
695
  torch.cuda.empty_cache()
696
  gc.collect()
 
7
  import os
8
  import urllib
9
  from typing import Any
10
+ from gradio import MessageDict
11
  from huggingface_hub import hf_hub_download, HfApi
12
  from llama_cpp import Llama
13
  from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
 
377
 
378
  # https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
379
  @torch.inference_mode()
380
+ @spaces.GPU(duration=30)
381
  def dolphin_respond(
382
  message: str,
383
+ history: list[MessageDict],
384
  model: str = default_llm_model_filename,
385
  system_message: str = get_dolphin_sysprompt(),
386
  max_tokens: int = 1024,
 
435
  messages = BasicChatHistory()
436
 
437
  for msn in history:
438
+ if msn["role"] == "user":
439
+ user = {'role': Roles.user, 'content': msn["content"]}
440
+ messages.add_message(user)
441
+ elif msn["role"] == "assistant":
442
+ assistant = {'role': Roles.assistant, 'content': msn["content"]}
443
+ messages.add_message(assistant)
 
 
 
 
444
 
445
  stream = agent.get_chat_response(
446
  message,
 
452
 
453
  progress(0.5, desc="Processing...")
454
 
455
+ history.append({"role": "user", "content": message})
456
+ history.append({"role": "assistant", "content": ""})
457
  for output in stream:
458
+ history[-1]['content'] += output
459
+ yield history
460
  except Exception as e:
461
  print(e)
462
  raise gr.Error(f"Error: {e}")
 
463
  finally:
464
  torch.cuda.empty_cache()
465
  gc.collect()
466
 
467
 
468
  def dolphin_parse(
469
+ history: list[MessageDict],
470
  state: dict,
471
  ):
472
  try:
473
  dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
474
  if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
475
  return "", gr.update(), gr.update()
476
+ msg = history[-1]["content"]
477
  raw_prompt = get_raw_prompt(msg)
478
  prompts = []
479
  if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
 
487
 
488
 
489
  @torch.inference_mode()
490
+ @spaces.GPU(duration=30)
491
  def dolphin_respond_auto(
492
  message: str,
493
+ history: list[MessageDict],
494
  model: str = default_llm_model_filename,
495
  system_message: str = get_dolphin_sysprompt(),
496
  max_tokens: int = 1024,
 
546
  messages = BasicChatHistory()
547
 
548
  for msn in history:
549
+ if msn["role"] == "user":
550
+ user = {'role': Roles.user, 'content': msn["content"]}
551
+ messages.add_message(user)
552
+ elif msn["role"] == "assistant":
553
+ assistant = {'role': Roles.assistant, 'content': msn["content"]}
554
+ messages.add_message(assistant)
 
 
 
 
555
 
556
  progress(0, desc="Translating...")
557
  stream = agent.get_chat_response(
 
564
 
565
  progress(0.5, desc="Processing...")
566
 
567
+ history.append({"role": "user", "content": message})
568
+ history.append({"role": "assistant", "content": ""})
569
  for output in stream:
570
+ history[-1]['content'] += output
571
+ yield history, gr.update(), gr.update()
572
  except Exception as e:
573
  print(e)
574
+ history.append({"role": "user", "content": message})
575
+ history.append({"role": "assistant", "content": message})
576
+ yield history, gr.update(), gr.update()
577
  finally:
578
  torch.cuda.empty_cache()
579
  gc.collect()
 
581
 
582
  def dolphin_parse_simple(
583
  message: str,
584
+ history: list[MessageDict],
585
  state: dict,
586
  ):
587
  try:
588
  #if not is_japanese(message): return message
589
  dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
590
  if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
591
+ msg = history[-1]["content"]
592
  raw_prompt = get_raw_prompt(msg)
593
  prompts = []
594
  if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
 
607
 
608
 
609
  @torch.inference_mode()
610
+ @spaces.GPU(duration=30)
611
  def respond_playground(
612
  message: str,
613
+ history: list[MessageDict],
614
  model: str = default_llm_model_filename,
615
  system_message: str = get_dolphin_sysprompt(),
616
  max_tokens: int = 1024,
 
665
 
666
  # Add user and assistant messages to the history
667
  for msn in history:
668
+ if msn["role"] == "user":
669
+ user = {'role': Roles.user, 'content': msn["content"]}
670
+ messages.add_message(user)
671
+ elif msn["role"] == "assistant":
672
+ assistant = {'role': Roles.assistant, 'content': msn["content"]}
673
+ messages.add_message(assistant)
674
 
675
  # Stream the response
676
  stream = agent.get_chat_response(
 
681
  print_output=False
682
  )
683
 
684
+ history.append({"role": "user", "content": message})
685
+ history.append({"role": "assistant", "content": ""})
686
  for output in stream:
687
+ history[-1]['content'] += output
688
+ yield history
689
  except Exception as e:
690
  print(e)
691
  raise gr.Error(f"Error: {e}")
 
692
  finally:
693
  torch.cuda.empty_cache()
694
  gc.collect()
llmenv.py CHANGED
@@ -147,11 +147,17 @@ llm_models = {
147
  "SnowElf-12B-v2.Q4_K_M.gguf": ["mradermacher/SnowElf-12B-v2-GGUF", MessagesFormatterType.CHATML],
148
  "Queen-2.5-14B-aka.Q4_K_M.gguf": ["mradermacher/Queen-2.5-14B-aka-GGUF", MessagesFormatterType.OPEN_CHAT],
149
  "KnowledgeCore-12B.Q4_K_M.gguf": ["mradermacher/KnowledgeCore-12B-GGUF", MessagesFormatterType.CHATML],
 
150
  "PatriSlush-DarkRPReign-12B.Q4_K_M.gguf": ["mradermacher/PatriSlush-DarkRPReign-12B-GGUF", MessagesFormatterType.MISTRAL],
151
  "BianCang-Qwen2.5-14B-Instruct.Q4_K_M.gguf": ["mradermacher/BianCang-Qwen2.5-14B-Instruct-GGUF", MessagesFormatterType.OPEN_CHAT],
152
  "Simulation_LLM_wiki_14B_V2.Q4_K_M.gguf": ["mradermacher/Simulation_LLM_wiki_14B_V2-GGUF", MessagesFormatterType.OPEN_CHAT],
153
  "Neona-12B.i1-Q4_K_M.gguf": ["mradermacher/Neona-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
 
 
154
  "Pinecone-Rune-12b.Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-GGUF", MessagesFormatterType.MISTRAL],
 
 
 
155
  "claude-3.7-sonnet-reasoning-gemma3-12B.Q4_K_M.gguf": ["mradermacher/claude-3.7-sonnet-reasoning-gemma3-12B-GGUF", MessagesFormatterType.ALPACA],
156
  "allura-org_MN-Lyrebird-12B-Q4_K_M.gguf": ["bartowski/allura-org_MN-Lyrebird-12B-GGUF", MessagesFormatterType.MISTRAL],
157
  "ape-fiction-2-mistral-nemo.Q4_K_M.gguf": ["mradermacher/ape-fiction-2-mistral-nemo-GGUF", MessagesFormatterType.MISTRAL],
@@ -172,6 +178,45 @@ llm_models = {
172
  #"": ["", MessagesFormatterType.OPEN_CHAT],
173
  #"": ["", MessagesFormatterType.CHATML],
174
  #"": ["", MessagesFormatterType.PHI_3],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  "KansenSakura-Eclipse-RP-12b.Q4_K_M.gguf": ["mradermacher/KansenSakura-Eclipse-RP-12b-GGUF", MessagesFormatterType.CHATML],
176
  "Sugoi-14B-Ultra-HF.Q4_K_M.gguf": ["mradermacher/Sugoi-14B-Ultra-HF-GGUF", MessagesFormatterType.OPEN_CHAT],
177
  "CaptainErisNebula-12B-Chimera-v1.1.i1-Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v1.1-i1-GGUF", MessagesFormatterType.MISTRAL],
 
147
  "SnowElf-12B-v2.Q4_K_M.gguf": ["mradermacher/SnowElf-12B-v2-GGUF", MessagesFormatterType.CHATML],
148
  "Queen-2.5-14B-aka.Q4_K_M.gguf": ["mradermacher/Queen-2.5-14B-aka-GGUF", MessagesFormatterType.OPEN_CHAT],
149
  "KnowledgeCore-12B.Q4_K_M.gguf": ["mradermacher/KnowledgeCore-12B-GGUF", MessagesFormatterType.CHATML],
150
+ "Mistral-Nemo-2407-Role-Playing-Final-4data_ga16_lr6e7.Q4_K_M.gguf": ["mradermacher/Mistral-Nemo-2407-Role-Playing-Final-4data_ga16_lr6e7-GGUF", MessagesFormatterType.CHATML],
151
  "PatriSlush-DarkRPReign-12B.Q4_K_M.gguf": ["mradermacher/PatriSlush-DarkRPReign-12B-GGUF", MessagesFormatterType.MISTRAL],
152
  "BianCang-Qwen2.5-14B-Instruct.Q4_K_M.gguf": ["mradermacher/BianCang-Qwen2.5-14B-Instruct-GGUF", MessagesFormatterType.OPEN_CHAT],
153
  "Simulation_LLM_wiki_14B_V2.Q4_K_M.gguf": ["mradermacher/Simulation_LLM_wiki_14B_V2-GGUF", MessagesFormatterType.OPEN_CHAT],
154
  "Neona-12B.i1-Q4_K_M.gguf": ["mradermacher/Neona-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
155
+ "NeoSage-12B.Q4_K_M.gguf": ["mradermacher/NeoSage-12B-GGUF", MessagesFormatterType.MISTRAL],
156
+ "Patricide-12B-Forgottenslop-Mell.i1-Q4_K_M.gguf": ["mradermacher/Patricide-12B-Forgottenslop-Mell-i1-GGUF", MessagesFormatterType.MISTRAL],
157
  "Pinecone-Rune-12b.Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-GGUF", MessagesFormatterType.MISTRAL],
158
+ "mn-12b-rp-without-dumb.Q4_K_M.gguf": ["mradermacher/mn-12b-rp-without-dumb-GGUF", MessagesFormatterType.MISTRAL],
159
+ "Denker-mistral-nemo-12B.Q4_K_M.gguf": ["mradermacher/Denker-mistral-nemo-12B-GGUF", MessagesFormatterType.MISTRAL],
160
+ "Goldcide-12B-Forgottenslop-Mell.Q4_K_M.gguf": ["mradermacher/Goldcide-12B-Forgottenslop-Mell-GGUF", MessagesFormatterType.MISTRAL],
161
  "claude-3.7-sonnet-reasoning-gemma3-12B.Q4_K_M.gguf": ["mradermacher/claude-3.7-sonnet-reasoning-gemma3-12B-GGUF", MessagesFormatterType.ALPACA],
162
  "allura-org_MN-Lyrebird-12B-Q4_K_M.gguf": ["bartowski/allura-org_MN-Lyrebird-12B-GGUF", MessagesFormatterType.MISTRAL],
163
  "ape-fiction-2-mistral-nemo.Q4_K_M.gguf": ["mradermacher/ape-fiction-2-mistral-nemo-GGUF", MessagesFormatterType.MISTRAL],
 
178
  #"": ["", MessagesFormatterType.OPEN_CHAT],
179
  #"": ["", MessagesFormatterType.CHATML],
180
  #"": ["", MessagesFormatterType.PHI_3],
181
+ "SauerHuatuoSkyworkDeepWatt-o1-Llama-3.1-8B.Q5_K_M.gguf": ["mradermacher/SauerHuatuoSkyworkDeepWatt-o1-Llama-3.1-8B-GGUF", MessagesFormatterType.LLAMA_3],
182
+ "care-japanese-llama3.1-8b.Q5_K_M.gguf": ["mradermacher/care-japanese-llama3.1-8b-GGUF", MessagesFormatterType.LLAMA_3],
183
+ "UltraPatriMerge-12B.Q4_K_M.gguf": ["mradermacher/UltraPatriMerge-12B-GGUF", MessagesFormatterType.MISTRAL],
184
+ "Llama-3.1-Amelia-MTFT-8B-v1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-Amelia-MTFT-8B-v1-GGUF", MessagesFormatterType.LLAMA_3],
185
+ "llama3-archimate-merged.Q5_K_M.gguf": ["mradermacher/llama3-archimate-merged-GGUF", MessagesFormatterType.LLAMA_3],
186
+ "Mistral-Nemo-Base-2407-RP-Merge.Q4_K_M.gguf": ["mradermacher/Mistral-Nemo-Base-2407-RP-Merge-GGUF", MessagesFormatterType.CHATML],
187
+ "PatriMaid-12B-Forgottenslop-NeonMell.Q4_K_M.gguf": ["mradermacher/PatriMaid-12B-Forgottenslop-NeonMell-GGUF", MessagesFormatterType.MISTRAL],
188
+ "Magnolia-v3-medis-dilute-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-v3-medis-dilute-12B-GGUF", MessagesFormatterType.MISTRAL],
189
+ "Magnolia-v3b-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-v3b-12B-GGUF", MessagesFormatterType.MISTRAL],
190
+ "MN-Mystic-Rune-12B.Q4_K_S.gguf": ["mradermacher/MN-Mystic-Rune-12B-GGUF", MessagesFormatterType.MISTRAL],
191
+ "MarinaraSpaghetti-NemoMix-Unleashed-12B-chat.Q4_K_M.gguf": ["mradermacher/MarinaraSpaghetti-NemoMix-Unleashed-12B-chat-GGUF", MessagesFormatterType.MISTRAL],
192
+ "GoldFox-12B-Forgottenslop-Mell.i1-Q4_K_M.gguf": ["mradermacher/GoldFox-12B-Forgottenslop-Mell-i1-GGUF", MessagesFormatterType.MISTRAL],
193
+ "Magnolia-Mell-v1-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-Mell-v1-12B-GGUF", MessagesFormatterType.MISTRAL],
194
+ "Shisa-DellaTest-12B.Q4_K_M.gguf": ["mradermacher/Shisa-DellaTest-12B-GGUF", MessagesFormatterType.MISTRAL],
195
+ "Q2.5-Coldbrew14B-FusionMix.i1-Q4_K_M.gguf": ["mradermacher/Q2.5-Coldbrew14B-FusionMix-i1-GGUF", MessagesFormatterType.OPEN_CHAT],
196
+ "nemo-instruct-books-model-stock.Q4_K_M.gguf": ["mradermacher/nemo-instruct-books-model-stock-GGUF", MessagesFormatterType.MISTRAL],
197
+ "FoxCide-12B-Forgottenslop-Mell.Q4_K_M.gguf": ["mradermacher/FoxCide-12B-Forgottenslop-Mell-GGUF", MessagesFormatterType.MISTRAL],
198
+ "BMO-CaptianMaid-12B.i1-Q4_K_M.gguf": ["mradermacher/BMO-CaptianMaid-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
199
+ "Aurore-Reveil_Koto-Small-7B-IT-Q5_K_M.gguf": ["bartowski/Aurore-Reveil_Koto-Small-7B-IT-GGUF", MessagesFormatterType.OPEN_CHAT],
200
+ "Pinecone-Rune-12b-Token-Surgery-Chatml-v0.1a.i1-Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-Token-Surgery-Chatml-v0.1a-i1-GGUF", MessagesFormatterType.MISTRAL],
201
+ "Kitsune-Symphony-V0.0-12B.Q4_K_M.gguf": ["mradermacher/Kitsune-Symphony-V0.0-12B-GGUF", MessagesFormatterType.MISTRAL],
202
+ "Anora-12b.i1-Q4_K_M.gguf": ["mradermacher/Anora-12b-i1-GGUF", MessagesFormatterType.MISTRAL],
203
+ "Minor-Repo-12B-omg.Q4_K_M.gguf": ["mradermacher/Minor-Repo-12B-omg-GGUF", MessagesFormatterType.MISTRAL],
204
+ "Luna.i1-Q5_K_M.gguf": ["mradermacher/Luna-i1-GGUF", MessagesFormatterType.OPEN_CHAT],
205
+ "OmegaMaid-DarkWay-FinalNoctis-12B.Q4_K_M.gguf": ["mradermacher/OmegaMaid-DarkWay-FinalNoctis-12B-GGUF", MessagesFormatterType.MISTRAL],
206
+ "CaptainMaid-12B-VioletMell-V0.420.Q4_K_M.gguf": ["mradermacher/CaptainMaid-12B-VioletMell-V0.420-GGUF", MessagesFormatterType.MISTRAL],
207
+ "LatentSoup-modelstock-8b.Q5_K_M.gguf": ["mradermacher/LatentSoup-modelstock-8b-GGUF", MessagesFormatterType.LLAMA_3],
208
+ "Erotic-Model.v1.Q5_K_M.gguf": ["mradermacher/Erotic-Model.v1-GGUF", MessagesFormatterType.MISTRAL],
209
+ "Llama-3.1-Amelia-CD-8B-v1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-Amelia-CD-8B-v1-GGUF", MessagesFormatterType.LLAMA_3],
210
+ "funny-nemo-embedding-merged.Q4_K_M.gguf": ["mradermacher/funny-nemo-embedding-merged-GGUF", MessagesFormatterType.MISTRAL],
211
+ "EviOmni-nq_train-7B.Q5_K_M.gguf": ["mradermacher/EviOmni-nq_train-7B-GGUF", MessagesFormatterType.OPEN_CHAT],
212
+ "NuMarkdown-8B-Thinking-fork.Q5_K_M.gguf": ["mradermacher/NuMarkdown-8B-Thinking-fork-GGUF", MessagesFormatterType.OPEN_CHAT],
213
+ "Comet_12B_V.7.Q4_K_M.gguf": ["mradermacher/Comet_12B_V.7-GGUF", MessagesFormatterType.ALPACA],
214
+ "Llama-3.1-8B-Instruct-wildfeedback-seed-RPO-0.1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-8B-Instruct-wildfeedback-seed-RPO-0.1-GGUF", MessagesFormatterType.LLAMA_3],
215
+ "NeuralMerge-9B-Dare.Q5_K_M.gguf": ["mradermacher/NeuralMerge-9B-Dare-GGUF", MessagesFormatterType.MISTRAL],
216
+ "CaptainErisNebula-12B-Chimera-v0.420.Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v0.420-GGUF", MessagesFormatterType.MISTRAL],
217
+ "Nemo-12B-OldSpice.Q4_K_M.gguf": ["mradermacher/Nemo-12B-OldSpice-GGUF", MessagesFormatterType.MISTRAL],
218
+ "funny-nemo-embedding-testing.Q4_K_M.gguf": ["mradermacher/funny-nemo-embedding-testing-GGUF", MessagesFormatterType.MISTRAL],
219
+ "francois-v3.Q4_K_M.gguf": ["mradermacher/francois-v3-GGUF", MessagesFormatterType.CHATML],
220
  "KansenSakura-Eclipse-RP-12b.Q4_K_M.gguf": ["mradermacher/KansenSakura-Eclipse-RP-12b-GGUF", MessagesFormatterType.CHATML],
221
  "Sugoi-14B-Ultra-HF.Q4_K_M.gguf": ["mradermacher/Sugoi-14B-Ultra-HF-GGUF", MessagesFormatterType.OPEN_CHAT],
222
  "CaptainErisNebula-12B-Chimera-v1.1.i1-Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v1.1-i1-GGUF", MessagesFormatterType.MISTRAL],
requirements.txt CHANGED
@@ -1,11 +1,11 @@
1
- spaces
2
  huggingface_hub
3
  hf_xet
4
  hf_transfer
5
  scikit-build-core
6
  #https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
7
  #git+https://github.com/Maximilian-Winter/llama-cpp-agent
8
- https://github.com/John6666cat/llama-cpp-python/releases/download/v0.3.14-cu124-AVX-linux-20250731/llama_cpp_python-0.3.14-cp310-cp310-linux_x86_64.whl
 
9
  git+https://github.com/John6666cat/llama-cpp-agent
10
  pybind11>=2.12
11
  torch==2.4.0
@@ -13,11 +13,12 @@ torchvision
13
  accelerate
14
  transformers<=4.48.3
15
  optimum[onnxruntime]
16
- dartrs
 
17
  translatepy
18
  diffusers
19
  rapidfuzz
20
  wrapt-timeout-decorator
21
  opencv-python
22
  numpy<2
23
- pydantic<=2.10.6
 
 
1
  huggingface_hub
2
  hf_xet
3
  hf_transfer
4
  scikit-build-core
5
  #https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
6
  #git+https://github.com/Maximilian-Winter/llama-cpp-agent
7
+ https://github.com/John6666cat/llama-cpp-python/releases/download/v0.3.16-cu124-AVX-linux-20250913/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
8
+ #https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
9
  git+https://github.com/John6666cat/llama-cpp-agent
10
  pybind11>=2.12
11
  torch==2.4.0
 
13
  accelerate
14
  transformers<=4.48.3
15
  optimum[onnxruntime]
16
+ #dartrs
17
+ git+https://github.com/John6666cat/dartrs
18
  translatepy
19
  diffusers
20
  rapidfuzz
21
  wrapt-timeout-decorator
22
  opencv-python
23
  numpy<2
24
+ pydantic==2.10.6