Spaces:
Running
on
Zero
Running
on
Zero
Upload 6 files
Browse files- README.md +1 -1
- app.py +27 -25
- genimage.py +26 -16
- llmdolphin.py +44 -46
- llmenv.py +45 -0
- requirements.txt +5 -4
README.md
CHANGED
@@ -4,7 +4,7 @@ emoji: ππ»
|
|
4 |
colorFrom: red
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: apache-2.0
|
|
|
4 |
colorFrom: red
|
5 |
colorTo: purple
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.45.0
|
8 |
app_file: app.py
|
9 |
pinned: true
|
10 |
license: apache-2.0
|
app.py
CHANGED
@@ -21,7 +21,7 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_ca
|
|
21 |
""", elem_classes="title")
|
22 |
state = gr.State(value={})
|
23 |
with gr.Group():
|
24 |
-
chatbot = gr.Chatbot(show_copy_button=True, show_share_button=False, layout="bubble", container=True)
|
25 |
with gr.Row(equal_height=True):
|
26 |
chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
|
27 |
chat_submit = gr.Button("Send", scale=1, variant="primary")
|
@@ -71,38 +71,40 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_ca
|
|
71 |
gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
|
72 |
This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
|
73 |
Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
|
74 |
-
pg_chatbot = gr.Chatbot(scale=1, show_copy_button=True, show_share_button=False)
|
75 |
-
with gr.Accordion("Additional inputs", open=False):
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
gr.ChatInterface(
|
96 |
fn=respond_playground,
|
97 |
#title="Chat with lots of Models and LLMs using llama.cpp",
|
98 |
#retry_btn="Retry",
|
99 |
#undo_btn="Undo",
|
100 |
-
|
101 |
-
submit_btn=
|
102 |
#additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
|
103 |
additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp,
|
104 |
pg_chat_lora, pg_chat_lora_scale, state],
|
105 |
-
chatbot=pg_chatbot
|
|
|
|
|
106 |
)
|
107 |
gr.LoginButton()
|
108 |
gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
|
|
|
21 |
""", elem_classes="title")
|
22 |
state = gr.State(value={})
|
23 |
with gr.Group():
|
24 |
+
chatbot = gr.Chatbot(type="messages", show_copy_button=True, show_share_button=False, layout="bubble", container=True)
|
25 |
with gr.Row(equal_height=True):
|
26 |
chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
|
27 |
chat_submit = gr.Button("Send", scale=1, variant="primary")
|
|
|
71 |
gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
|
72 |
This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
|
73 |
Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
|
74 |
+
pg_chatbot = gr.Chatbot(scale=1, type="messages", show_copy_button=True, show_share_button=False)
|
75 |
+
#with gr.Accordion("Additional inputs", open=False):
|
76 |
+
pg_chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0], allow_custom_value=True, label="Model", render=False)
|
77 |
+
pg_chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0]), label="Model info", render=False)
|
78 |
+
pg_chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0]), label="Message format", render=False)
|
79 |
+
pg_chat_sysmsg = gr.Textbox(value="You are a helpful assistant.", label="System message", render=False)
|
80 |
+
with gr.Row():
|
81 |
+
pg_chat_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens", render=False)
|
82 |
+
pg_chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", render=False)
|
83 |
+
pg_chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", render=False)
|
84 |
+
pg_chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k", render=False)
|
85 |
+
pg_chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty", render=False)
|
86 |
+
pg_chat_lora = gr.Dropdown(choices=get_dolphin_loras(), value=get_dolphin_loras()[0], allow_custom_value=True, label="Lora", render=False)
|
87 |
+
pg_chat_lora_scale = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.01, label="Lora scale", render=False)
|
88 |
+
with gr.Accordion("Add models", open=False):
|
89 |
+
pg_chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
|
90 |
+
pg_chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
|
91 |
+
pg_chat_add_submit = gr.Button("Update lists of models")
|
92 |
+
with gr.Accordion("Loras", open=False, visible=False):
|
93 |
+
pg_chat_add_lora_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/ggml-org/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-F16-GGUF/blob/main/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-f16.gguf", lines=1)
|
94 |
+
pg_chat_add_lora_submit = gr.Button("Update lists of loras")
|
95 |
gr.ChatInterface(
|
96 |
fn=respond_playground,
|
97 |
#title="Chat with lots of Models and LLMs using llama.cpp",
|
98 |
#retry_btn="Retry",
|
99 |
#undo_btn="Undo",
|
100 |
+
stop_btn=True,
|
101 |
+
submit_btn=True,
|
102 |
#additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
|
103 |
additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp,
|
104 |
pg_chat_lora, pg_chat_lora_scale, state],
|
105 |
+
chatbot=pg_chatbot,
|
106 |
+
multimodal=False,
|
107 |
+
type="messages",
|
108 |
)
|
109 |
gr.LoginButton()
|
110 |
gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")
|
genimage.py
CHANGED
@@ -3,9 +3,11 @@ import gradio as gr
|
|
3 |
import torch
|
4 |
import gc, os, uuid, json
|
5 |
from PIL import PngImagePlugin
|
|
|
6 |
|
7 |
|
8 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
|
|
9 |
if os.getenv("SPACES_ZERO_GPU", None):
|
10 |
torch.backends.cudnn.deterministic = True
|
11 |
torch.backends.cudnn.benchmark = False
|
@@ -14,13 +16,16 @@ if os.getenv("SPACES_ZERO_GPU", None):
|
|
14 |
|
15 |
|
16 |
def load_pipeline():
|
17 |
-
|
18 |
pipe = DiffusionPipeline.from_pretrained(
|
19 |
-
"John6666/rae-diffusion-xl-v2-sdxl-spo-pcm",
|
20 |
-
|
|
|
21 |
#custom_pipeline="nyanko7/sdxl_smoothed_energy_guidance",
|
22 |
-
torch_dtype=
|
|
|
23 |
)
|
|
|
24 |
pipe.to("cpu")
|
25 |
return pipe
|
26 |
|
@@ -69,30 +74,35 @@ pipe = load_pipeline()
|
|
69 |
|
70 |
|
71 |
@torch.inference_mode()
|
72 |
-
@spaces.GPU(duration=
|
73 |
def generate_image(prompt, neg_prompt, progress=gr.Progress(track_tqdm=True)):
|
74 |
pipe.to(device)
|
75 |
-
prompt += ",
|
76 |
-
neg_prompt += "
|
|
|
|
|
|
|
|
|
|
|
77 |
metadata = {
|
78 |
"prompt": prompt,
|
79 |
"negative_prompt": neg_prompt,
|
80 |
-
"resolution": f"{
|
81 |
-
"guidance_scale":
|
82 |
-
"num_inference_steps":
|
83 |
-
"sampler": "Euler",
|
84 |
}
|
85 |
try:
|
86 |
#positive_embeds, negative_embeds = token_auto_concat_embeds(pipe, prompt, neg_prompt)
|
87 |
images = pipe(
|
88 |
prompt=prompt,
|
89 |
negative_prompt=neg_prompt,
|
90 |
-
width=
|
91 |
-
height=
|
92 |
-
guidance_scale=
|
93 |
-
num_inference_steps=
|
94 |
output_type="pil",
|
95 |
-
clip_skip=
|
96 |
).images
|
97 |
if images:
|
98 |
image_paths = [
|
|
|
3 |
import torch
|
4 |
import gc, os, uuid, json
|
5 |
from PIL import PngImagePlugin
|
6 |
+
from diffusers import DiffusionPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
|
7 |
|
8 |
|
9 |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
10 |
+
dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
|
11 |
if os.getenv("SPACES_ZERO_GPU", None):
|
12 |
torch.backends.cudnn.deterministic = True
|
13 |
torch.backends.cudnn.benchmark = False
|
|
|
16 |
|
17 |
|
18 |
def load_pipeline():
|
19 |
+
#vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype)
|
20 |
pipe = DiffusionPipeline.from_pretrained(
|
21 |
+
#"John6666/rae-diffusion-xl-v2-sdxl-spo-pcm",
|
22 |
+
"Raelina/Raehoshi-illust-XL-6",
|
23 |
+
#custom_pipeline="lpw_stable_diffusion_xl",
|
24 |
#custom_pipeline="nyanko7/sdxl_smoothed_energy_guidance",
|
25 |
+
torch_dtype=dtype,
|
26 |
+
#vae=vae,
|
27 |
)
|
28 |
+
pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
|
29 |
pipe.to("cpu")
|
30 |
return pipe
|
31 |
|
|
|
74 |
|
75 |
|
76 |
@torch.inference_mode()
|
77 |
+
@spaces.GPU(duration=15)
|
78 |
def generate_image(prompt, neg_prompt, progress=gr.Progress(track_tqdm=True)):
|
79 |
pipe.to(device)
|
80 |
+
#prompt += ", masterpiece, best quality, very aesthetic, absurdres"
|
81 |
+
#neg_prompt += "bad hands, bad feet, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract], photo, deformed, disfigured, low contrast, photo, deformed, disfigured, low contrast"
|
82 |
+
neg_prompt += "bad quality, worst quality, poorly drawn, sketch, multiple views, bad anatomy, bad hands, missing fingers, extra fingers, extra digits, fewer digits, signature, watermark, username"
|
83 |
+
width = 1024
|
84 |
+
height = 1024
|
85 |
+
cfg = 6.0
|
86 |
+
steps = 28
|
87 |
metadata = {
|
88 |
"prompt": prompt,
|
89 |
"negative_prompt": neg_prompt,
|
90 |
+
"resolution": f"{width} x {height}",
|
91 |
+
"guidance_scale": cfg,
|
92 |
+
"num_inference_steps": steps,
|
93 |
+
"sampler": "Euler a",
|
94 |
}
|
95 |
try:
|
96 |
#positive_embeds, negative_embeds = token_auto_concat_embeds(pipe, prompt, neg_prompt)
|
97 |
images = pipe(
|
98 |
prompt=prompt,
|
99 |
negative_prompt=neg_prompt,
|
100 |
+
width=width,
|
101 |
+
height=height,
|
102 |
+
guidance_scale=cfg,# seg_scale=3.0, seg_applied_layers=["mid"],
|
103 |
+
num_inference_steps=steps,
|
104 |
output_type="pil",
|
105 |
+
#clip_skip=1,
|
106 |
).images
|
107 |
if images:
|
108 |
image_paths = [
|
llmdolphin.py
CHANGED
@@ -7,6 +7,7 @@ import gc
|
|
7 |
import os
|
8 |
import urllib
|
9 |
from typing import Any
|
|
|
10 |
from huggingface_hub import hf_hub_download, HfApi
|
11 |
from llama_cpp import Llama
|
12 |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
@@ -376,10 +377,10 @@ def get_raw_prompt(msg: str):
|
|
376 |
|
377 |
# https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
|
378 |
@torch.inference_mode()
|
379 |
-
@spaces.GPU(duration=
|
380 |
def dolphin_respond(
|
381 |
message: str,
|
382 |
-
history: list[
|
383 |
model: str = default_llm_model_filename,
|
384 |
system_message: str = get_dolphin_sysprompt(),
|
385 |
max_tokens: int = 1024,
|
@@ -434,16 +435,12 @@ def dolphin_respond(
|
|
434 |
messages = BasicChatHistory()
|
435 |
|
436 |
for msn in history:
|
437 |
-
|
438 |
-
'role': Roles.user,
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
443 |
-
'content': msn[1]
|
444 |
-
}
|
445 |
-
messages.add_message(user)
|
446 |
-
messages.add_message(assistant)
|
447 |
|
448 |
stream = agent.get_chat_response(
|
449 |
message,
|
@@ -455,28 +452,28 @@ def dolphin_respond(
|
|
455 |
|
456 |
progress(0.5, desc="Processing...")
|
457 |
|
458 |
-
|
|
|
459 |
for output in stream:
|
460 |
-
|
461 |
-
yield
|
462 |
except Exception as e:
|
463 |
print(e)
|
464 |
raise gr.Error(f"Error: {e}")
|
465 |
-
#yield [("", None)]
|
466 |
finally:
|
467 |
torch.cuda.empty_cache()
|
468 |
gc.collect()
|
469 |
|
470 |
|
471 |
def dolphin_parse(
|
472 |
-
history: list[
|
473 |
state: dict,
|
474 |
):
|
475 |
try:
|
476 |
dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
|
477 |
if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
|
478 |
return "", gr.update(), gr.update()
|
479 |
-
msg = history[-1][
|
480 |
raw_prompt = get_raw_prompt(msg)
|
481 |
prompts = []
|
482 |
if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
|
@@ -490,10 +487,10 @@ def dolphin_parse(
|
|
490 |
|
491 |
|
492 |
@torch.inference_mode()
|
493 |
-
@spaces.GPU(duration=
|
494 |
def dolphin_respond_auto(
|
495 |
message: str,
|
496 |
-
history: list[
|
497 |
model: str = default_llm_model_filename,
|
498 |
system_message: str = get_dolphin_sysprompt(),
|
499 |
max_tokens: int = 1024,
|
@@ -549,16 +546,12 @@ def dolphin_respond_auto(
|
|
549 |
messages = BasicChatHistory()
|
550 |
|
551 |
for msn in history:
|
552 |
-
|
553 |
-
'role': Roles.user,
|
554 |
-
|
555 |
-
|
556 |
-
|
557 |
-
|
558 |
-
'content': msn[1]
|
559 |
-
}
|
560 |
-
messages.add_message(user)
|
561 |
-
messages.add_message(assistant)
|
562 |
|
563 |
progress(0, desc="Translating...")
|
564 |
stream = agent.get_chat_response(
|
@@ -571,13 +564,16 @@ def dolphin_respond_auto(
|
|
571 |
|
572 |
progress(0.5, desc="Processing...")
|
573 |
|
574 |
-
|
|
|
575 |
for output in stream:
|
576 |
-
|
577 |
-
yield
|
578 |
except Exception as e:
|
579 |
print(e)
|
580 |
-
|
|
|
|
|
581 |
finally:
|
582 |
torch.cuda.empty_cache()
|
583 |
gc.collect()
|
@@ -585,14 +581,14 @@ def dolphin_respond_auto(
|
|
585 |
|
586 |
def dolphin_parse_simple(
|
587 |
message: str,
|
588 |
-
history: list[
|
589 |
state: dict,
|
590 |
):
|
591 |
try:
|
592 |
#if not is_japanese(message): return message
|
593 |
dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
|
594 |
if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
|
595 |
-
msg = history[-1][
|
596 |
raw_prompt = get_raw_prompt(msg)
|
597 |
prompts = []
|
598 |
if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
|
@@ -611,10 +607,10 @@ cv2.setNumThreads(1)
|
|
611 |
|
612 |
|
613 |
@torch.inference_mode()
|
614 |
-
@spaces.GPU(duration=
|
615 |
def respond_playground(
|
616 |
message: str,
|
617 |
-
history: list[
|
618 |
model: str = default_llm_model_filename,
|
619 |
system_message: str = get_dolphin_sysprompt(),
|
620 |
max_tokens: int = 1024,
|
@@ -669,10 +665,12 @@ def respond_playground(
|
|
669 |
|
670 |
# Add user and assistant messages to the history
|
671 |
for msn in history:
|
672 |
-
|
673 |
-
|
674 |
-
|
675 |
-
|
|
|
|
|
676 |
|
677 |
# Stream the response
|
678 |
stream = agent.get_chat_response(
|
@@ -683,14 +681,14 @@ def respond_playground(
|
|
683 |
print_output=False
|
684 |
)
|
685 |
|
686 |
-
|
|
|
687 |
for output in stream:
|
688 |
-
|
689 |
-
yield
|
690 |
except Exception as e:
|
691 |
print(e)
|
692 |
raise gr.Error(f"Error: {e}")
|
693 |
-
#yield ""
|
694 |
finally:
|
695 |
torch.cuda.empty_cache()
|
696 |
gc.collect()
|
|
|
7 |
import os
|
8 |
import urllib
|
9 |
from typing import Any
|
10 |
+
from gradio import MessageDict
|
11 |
from huggingface_hub import hf_hub_download, HfApi
|
12 |
from llama_cpp import Llama
|
13 |
from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
|
|
|
377 |
|
378 |
# https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
|
379 |
@torch.inference_mode()
|
380 |
+
@spaces.GPU(duration=30)
|
381 |
def dolphin_respond(
|
382 |
message: str,
|
383 |
+
history: list[MessageDict],
|
384 |
model: str = default_llm_model_filename,
|
385 |
system_message: str = get_dolphin_sysprompt(),
|
386 |
max_tokens: int = 1024,
|
|
|
435 |
messages = BasicChatHistory()
|
436 |
|
437 |
for msn in history:
|
438 |
+
if msn["role"] == "user":
|
439 |
+
user = {'role': Roles.user, 'content': msn["content"]}
|
440 |
+
messages.add_message(user)
|
441 |
+
elif msn["role"] == "assistant":
|
442 |
+
assistant = {'role': Roles.assistant, 'content': msn["content"]}
|
443 |
+
messages.add_message(assistant)
|
|
|
|
|
|
|
|
|
444 |
|
445 |
stream = agent.get_chat_response(
|
446 |
message,
|
|
|
452 |
|
453 |
progress(0.5, desc="Processing...")
|
454 |
|
455 |
+
history.append({"role": "user", "content": message})
|
456 |
+
history.append({"role": "assistant", "content": ""})
|
457 |
for output in stream:
|
458 |
+
history[-1]['content'] += output
|
459 |
+
yield history
|
460 |
except Exception as e:
|
461 |
print(e)
|
462 |
raise gr.Error(f"Error: {e}")
|
|
|
463 |
finally:
|
464 |
torch.cuda.empty_cache()
|
465 |
gc.collect()
|
466 |
|
467 |
|
468 |
def dolphin_parse(
|
469 |
+
history: list[MessageDict],
|
470 |
state: dict,
|
471 |
):
|
472 |
try:
|
473 |
dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
|
474 |
if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
|
475 |
return "", gr.update(), gr.update()
|
476 |
+
msg = history[-1]["content"]
|
477 |
raw_prompt = get_raw_prompt(msg)
|
478 |
prompts = []
|
479 |
if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
|
|
|
487 |
|
488 |
|
489 |
@torch.inference_mode()
|
490 |
+
@spaces.GPU(duration=30)
|
491 |
def dolphin_respond_auto(
|
492 |
message: str,
|
493 |
+
history: list[MessageDict],
|
494 |
model: str = default_llm_model_filename,
|
495 |
system_message: str = get_dolphin_sysprompt(),
|
496 |
max_tokens: int = 1024,
|
|
|
546 |
messages = BasicChatHistory()
|
547 |
|
548 |
for msn in history:
|
549 |
+
if msn["role"] == "user":
|
550 |
+
user = {'role': Roles.user, 'content': msn["content"]}
|
551 |
+
messages.add_message(user)
|
552 |
+
elif msn["role"] == "assistant":
|
553 |
+
assistant = {'role': Roles.assistant, 'content': msn["content"]}
|
554 |
+
messages.add_message(assistant)
|
|
|
|
|
|
|
|
|
555 |
|
556 |
progress(0, desc="Translating...")
|
557 |
stream = agent.get_chat_response(
|
|
|
564 |
|
565 |
progress(0.5, desc="Processing...")
|
566 |
|
567 |
+
history.append({"role": "user", "content": message})
|
568 |
+
history.append({"role": "assistant", "content": ""})
|
569 |
for output in stream:
|
570 |
+
history[-1]['content'] += output
|
571 |
+
yield history, gr.update(), gr.update()
|
572 |
except Exception as e:
|
573 |
print(e)
|
574 |
+
history.append({"role": "user", "content": message})
|
575 |
+
history.append({"role": "assistant", "content": message})
|
576 |
+
yield history, gr.update(), gr.update()
|
577 |
finally:
|
578 |
torch.cuda.empty_cache()
|
579 |
gc.collect()
|
|
|
581 |
|
582 |
def dolphin_parse_simple(
|
583 |
message: str,
|
584 |
+
history: list[MessageDict],
|
585 |
state: dict,
|
586 |
):
|
587 |
try:
|
588 |
#if not is_japanese(message): return message
|
589 |
dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
|
590 |
if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
|
591 |
+
msg = history[-1]["content"]
|
592 |
raw_prompt = get_raw_prompt(msg)
|
593 |
prompts = []
|
594 |
if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
|
|
|
607 |
|
608 |
|
609 |
@torch.inference_mode()
|
610 |
+
@spaces.GPU(duration=30)
|
611 |
def respond_playground(
|
612 |
message: str,
|
613 |
+
history: list[MessageDict],
|
614 |
model: str = default_llm_model_filename,
|
615 |
system_message: str = get_dolphin_sysprompt(),
|
616 |
max_tokens: int = 1024,
|
|
|
665 |
|
666 |
# Add user and assistant messages to the history
|
667 |
for msn in history:
|
668 |
+
if msn["role"] == "user":
|
669 |
+
user = {'role': Roles.user, 'content': msn["content"]}
|
670 |
+
messages.add_message(user)
|
671 |
+
elif msn["role"] == "assistant":
|
672 |
+
assistant = {'role': Roles.assistant, 'content': msn["content"]}
|
673 |
+
messages.add_message(assistant)
|
674 |
|
675 |
# Stream the response
|
676 |
stream = agent.get_chat_response(
|
|
|
681 |
print_output=False
|
682 |
)
|
683 |
|
684 |
+
history.append({"role": "user", "content": message})
|
685 |
+
history.append({"role": "assistant", "content": ""})
|
686 |
for output in stream:
|
687 |
+
history[-1]['content'] += output
|
688 |
+
yield history
|
689 |
except Exception as e:
|
690 |
print(e)
|
691 |
raise gr.Error(f"Error: {e}")
|
|
|
692 |
finally:
|
693 |
torch.cuda.empty_cache()
|
694 |
gc.collect()
|
llmenv.py
CHANGED
@@ -147,11 +147,17 @@ llm_models = {
|
|
147 |
"SnowElf-12B-v2.Q4_K_M.gguf": ["mradermacher/SnowElf-12B-v2-GGUF", MessagesFormatterType.CHATML],
|
148 |
"Queen-2.5-14B-aka.Q4_K_M.gguf": ["mradermacher/Queen-2.5-14B-aka-GGUF", MessagesFormatterType.OPEN_CHAT],
|
149 |
"KnowledgeCore-12B.Q4_K_M.gguf": ["mradermacher/KnowledgeCore-12B-GGUF", MessagesFormatterType.CHATML],
|
|
|
150 |
"PatriSlush-DarkRPReign-12B.Q4_K_M.gguf": ["mradermacher/PatriSlush-DarkRPReign-12B-GGUF", MessagesFormatterType.MISTRAL],
|
151 |
"BianCang-Qwen2.5-14B-Instruct.Q4_K_M.gguf": ["mradermacher/BianCang-Qwen2.5-14B-Instruct-GGUF", MessagesFormatterType.OPEN_CHAT],
|
152 |
"Simulation_LLM_wiki_14B_V2.Q4_K_M.gguf": ["mradermacher/Simulation_LLM_wiki_14B_V2-GGUF", MessagesFormatterType.OPEN_CHAT],
|
153 |
"Neona-12B.i1-Q4_K_M.gguf": ["mradermacher/Neona-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
|
154 |
"Pinecone-Rune-12b.Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
|
|
|
|
155 |
"claude-3.7-sonnet-reasoning-gemma3-12B.Q4_K_M.gguf": ["mradermacher/claude-3.7-sonnet-reasoning-gemma3-12B-GGUF", MessagesFormatterType.ALPACA],
|
156 |
"allura-org_MN-Lyrebird-12B-Q4_K_M.gguf": ["bartowski/allura-org_MN-Lyrebird-12B-GGUF", MessagesFormatterType.MISTRAL],
|
157 |
"ape-fiction-2-mistral-nemo.Q4_K_M.gguf": ["mradermacher/ape-fiction-2-mistral-nemo-GGUF", MessagesFormatterType.MISTRAL],
|
@@ -172,6 +178,45 @@ llm_models = {
|
|
172 |
#"": ["", MessagesFormatterType.OPEN_CHAT],
|
173 |
#"": ["", MessagesFormatterType.CHATML],
|
174 |
#"": ["", MessagesFormatterType.PHI_3],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
"KansenSakura-Eclipse-RP-12b.Q4_K_M.gguf": ["mradermacher/KansenSakura-Eclipse-RP-12b-GGUF", MessagesFormatterType.CHATML],
|
176 |
"Sugoi-14B-Ultra-HF.Q4_K_M.gguf": ["mradermacher/Sugoi-14B-Ultra-HF-GGUF", MessagesFormatterType.OPEN_CHAT],
|
177 |
"CaptainErisNebula-12B-Chimera-v1.1.i1-Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v1.1-i1-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
147 |
"SnowElf-12B-v2.Q4_K_M.gguf": ["mradermacher/SnowElf-12B-v2-GGUF", MessagesFormatterType.CHATML],
|
148 |
"Queen-2.5-14B-aka.Q4_K_M.gguf": ["mradermacher/Queen-2.5-14B-aka-GGUF", MessagesFormatterType.OPEN_CHAT],
|
149 |
"KnowledgeCore-12B.Q4_K_M.gguf": ["mradermacher/KnowledgeCore-12B-GGUF", MessagesFormatterType.CHATML],
|
150 |
+
"Mistral-Nemo-2407-Role-Playing-Final-4data_ga16_lr6e7.Q4_K_M.gguf": ["mradermacher/Mistral-Nemo-2407-Role-Playing-Final-4data_ga16_lr6e7-GGUF", MessagesFormatterType.CHATML],
|
151 |
"PatriSlush-DarkRPReign-12B.Q4_K_M.gguf": ["mradermacher/PatriSlush-DarkRPReign-12B-GGUF", MessagesFormatterType.MISTRAL],
|
152 |
"BianCang-Qwen2.5-14B-Instruct.Q4_K_M.gguf": ["mradermacher/BianCang-Qwen2.5-14B-Instruct-GGUF", MessagesFormatterType.OPEN_CHAT],
|
153 |
"Simulation_LLM_wiki_14B_V2.Q4_K_M.gguf": ["mradermacher/Simulation_LLM_wiki_14B_V2-GGUF", MessagesFormatterType.OPEN_CHAT],
|
154 |
"Neona-12B.i1-Q4_K_M.gguf": ["mradermacher/Neona-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
|
155 |
+
"NeoSage-12B.Q4_K_M.gguf": ["mradermacher/NeoSage-12B-GGUF", MessagesFormatterType.MISTRAL],
|
156 |
+
"Patricide-12B-Forgottenslop-Mell.i1-Q4_K_M.gguf": ["mradermacher/Patricide-12B-Forgottenslop-Mell-i1-GGUF", MessagesFormatterType.MISTRAL],
|
157 |
"Pinecone-Rune-12b.Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-GGUF", MessagesFormatterType.MISTRAL],
|
158 |
+
"mn-12b-rp-without-dumb.Q4_K_M.gguf": ["mradermacher/mn-12b-rp-without-dumb-GGUF", MessagesFormatterType.MISTRAL],
|
159 |
+
"Denker-mistral-nemo-12B.Q4_K_M.gguf": ["mradermacher/Denker-mistral-nemo-12B-GGUF", MessagesFormatterType.MISTRAL],
|
160 |
+
"Goldcide-12B-Forgottenslop-Mell.Q4_K_M.gguf": ["mradermacher/Goldcide-12B-Forgottenslop-Mell-GGUF", MessagesFormatterType.MISTRAL],
|
161 |
"claude-3.7-sonnet-reasoning-gemma3-12B.Q4_K_M.gguf": ["mradermacher/claude-3.7-sonnet-reasoning-gemma3-12B-GGUF", MessagesFormatterType.ALPACA],
|
162 |
"allura-org_MN-Lyrebird-12B-Q4_K_M.gguf": ["bartowski/allura-org_MN-Lyrebird-12B-GGUF", MessagesFormatterType.MISTRAL],
|
163 |
"ape-fiction-2-mistral-nemo.Q4_K_M.gguf": ["mradermacher/ape-fiction-2-mistral-nemo-GGUF", MessagesFormatterType.MISTRAL],
|
|
|
178 |
#"": ["", MessagesFormatterType.OPEN_CHAT],
|
179 |
#"": ["", MessagesFormatterType.CHATML],
|
180 |
#"": ["", MessagesFormatterType.PHI_3],
|
181 |
+
"SauerHuatuoSkyworkDeepWatt-o1-Llama-3.1-8B.Q5_K_M.gguf": ["mradermacher/SauerHuatuoSkyworkDeepWatt-o1-Llama-3.1-8B-GGUF", MessagesFormatterType.LLAMA_3],
|
182 |
+
"care-japanese-llama3.1-8b.Q5_K_M.gguf": ["mradermacher/care-japanese-llama3.1-8b-GGUF", MessagesFormatterType.LLAMA_3],
|
183 |
+
"UltraPatriMerge-12B.Q4_K_M.gguf": ["mradermacher/UltraPatriMerge-12B-GGUF", MessagesFormatterType.MISTRAL],
|
184 |
+
"Llama-3.1-Amelia-MTFT-8B-v1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-Amelia-MTFT-8B-v1-GGUF", MessagesFormatterType.LLAMA_3],
|
185 |
+
"llama3-archimate-merged.Q5_K_M.gguf": ["mradermacher/llama3-archimate-merged-GGUF", MessagesFormatterType.LLAMA_3],
|
186 |
+
"Mistral-Nemo-Base-2407-RP-Merge.Q4_K_M.gguf": ["mradermacher/Mistral-Nemo-Base-2407-RP-Merge-GGUF", MessagesFormatterType.CHATML],
|
187 |
+
"PatriMaid-12B-Forgottenslop-NeonMell.Q4_K_M.gguf": ["mradermacher/PatriMaid-12B-Forgottenslop-NeonMell-GGUF", MessagesFormatterType.MISTRAL],
|
188 |
+
"Magnolia-v3-medis-dilute-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-v3-medis-dilute-12B-GGUF", MessagesFormatterType.MISTRAL],
|
189 |
+
"Magnolia-v3b-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-v3b-12B-GGUF", MessagesFormatterType.MISTRAL],
|
190 |
+
"MN-Mystic-Rune-12B.Q4_K_S.gguf": ["mradermacher/MN-Mystic-Rune-12B-GGUF", MessagesFormatterType.MISTRAL],
|
191 |
+
"MarinaraSpaghetti-NemoMix-Unleashed-12B-chat.Q4_K_M.gguf": ["mradermacher/MarinaraSpaghetti-NemoMix-Unleashed-12B-chat-GGUF", MessagesFormatterType.MISTRAL],
|
192 |
+
"GoldFox-12B-Forgottenslop-Mell.i1-Q4_K_M.gguf": ["mradermacher/GoldFox-12B-Forgottenslop-Mell-i1-GGUF", MessagesFormatterType.MISTRAL],
|
193 |
+
"Magnolia-Mell-v1-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-Mell-v1-12B-GGUF", MessagesFormatterType.MISTRAL],
|
194 |
+
"Shisa-DellaTest-12B.Q4_K_M.gguf": ["mradermacher/Shisa-DellaTest-12B-GGUF", MessagesFormatterType.MISTRAL],
|
195 |
+
"Q2.5-Coldbrew14B-FusionMix.i1-Q4_K_M.gguf": ["mradermacher/Q2.5-Coldbrew14B-FusionMix-i1-GGUF", MessagesFormatterType.OPEN_CHAT],
|
196 |
+
"nemo-instruct-books-model-stock.Q4_K_M.gguf": ["mradermacher/nemo-instruct-books-model-stock-GGUF", MessagesFormatterType.MISTRAL],
|
197 |
+
"FoxCide-12B-Forgottenslop-Mell.Q4_K_M.gguf": ["mradermacher/FoxCide-12B-Forgottenslop-Mell-GGUF", MessagesFormatterType.MISTRAL],
|
198 |
+
"BMO-CaptianMaid-12B.i1-Q4_K_M.gguf": ["mradermacher/BMO-CaptianMaid-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
|
199 |
+
"Aurore-Reveil_Koto-Small-7B-IT-Q5_K_M.gguf": ["bartowski/Aurore-Reveil_Koto-Small-7B-IT-GGUF", MessagesFormatterType.OPEN_CHAT],
|
200 |
+
"Pinecone-Rune-12b-Token-Surgery-Chatml-v0.1a.i1-Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-Token-Surgery-Chatml-v0.1a-i1-GGUF", MessagesFormatterType.MISTRAL],
|
201 |
+
"Kitsune-Symphony-V0.0-12B.Q4_K_M.gguf": ["mradermacher/Kitsune-Symphony-V0.0-12B-GGUF", MessagesFormatterType.MISTRAL],
|
202 |
+
"Anora-12b.i1-Q4_K_M.gguf": ["mradermacher/Anora-12b-i1-GGUF", MessagesFormatterType.MISTRAL],
|
203 |
+
"Minor-Repo-12B-omg.Q4_K_M.gguf": ["mradermacher/Minor-Repo-12B-omg-GGUF", MessagesFormatterType.MISTRAL],
|
204 |
+
"Luna.i1-Q5_K_M.gguf": ["mradermacher/Luna-i1-GGUF", MessagesFormatterType.OPEN_CHAT],
|
205 |
+
"OmegaMaid-DarkWay-FinalNoctis-12B.Q4_K_M.gguf": ["mradermacher/OmegaMaid-DarkWay-FinalNoctis-12B-GGUF", MessagesFormatterType.MISTRAL],
|
206 |
+
"CaptainMaid-12B-VioletMell-V0.420.Q4_K_M.gguf": ["mradermacher/CaptainMaid-12B-VioletMell-V0.420-GGUF", MessagesFormatterType.MISTRAL],
|
207 |
+
"LatentSoup-modelstock-8b.Q5_K_M.gguf": ["mradermacher/LatentSoup-modelstock-8b-GGUF", MessagesFormatterType.LLAMA_3],
|
208 |
+
"Erotic-Model.v1.Q5_K_M.gguf": ["mradermacher/Erotic-Model.v1-GGUF", MessagesFormatterType.MISTRAL],
|
209 |
+
"Llama-3.1-Amelia-CD-8B-v1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-Amelia-CD-8B-v1-GGUF", MessagesFormatterType.LLAMA_3],
|
210 |
+
"funny-nemo-embedding-merged.Q4_K_M.gguf": ["mradermacher/funny-nemo-embedding-merged-GGUF", MessagesFormatterType.MISTRAL],
|
211 |
+
"EviOmni-nq_train-7B.Q5_K_M.gguf": ["mradermacher/EviOmni-nq_train-7B-GGUF", MessagesFormatterType.OPEN_CHAT],
|
212 |
+
"NuMarkdown-8B-Thinking-fork.Q5_K_M.gguf": ["mradermacher/NuMarkdown-8B-Thinking-fork-GGUF", MessagesFormatterType.OPEN_CHAT],
|
213 |
+
"Comet_12B_V.7.Q4_K_M.gguf": ["mradermacher/Comet_12B_V.7-GGUF", MessagesFormatterType.ALPACA],
|
214 |
+
"Llama-3.1-8B-Instruct-wildfeedback-seed-RPO-0.1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-8B-Instruct-wildfeedback-seed-RPO-0.1-GGUF", MessagesFormatterType.LLAMA_3],
|
215 |
+
"NeuralMerge-9B-Dare.Q5_K_M.gguf": ["mradermacher/NeuralMerge-9B-Dare-GGUF", MessagesFormatterType.MISTRAL],
|
216 |
+
"CaptainErisNebula-12B-Chimera-v0.420.Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v0.420-GGUF", MessagesFormatterType.MISTRAL],
|
217 |
+
"Nemo-12B-OldSpice.Q4_K_M.gguf": ["mradermacher/Nemo-12B-OldSpice-GGUF", MessagesFormatterType.MISTRAL],
|
218 |
+
"funny-nemo-embedding-testing.Q4_K_M.gguf": ["mradermacher/funny-nemo-embedding-testing-GGUF", MessagesFormatterType.MISTRAL],
|
219 |
+
"francois-v3.Q4_K_M.gguf": ["mradermacher/francois-v3-GGUF", MessagesFormatterType.CHATML],
|
220 |
"KansenSakura-Eclipse-RP-12b.Q4_K_M.gguf": ["mradermacher/KansenSakura-Eclipse-RP-12b-GGUF", MessagesFormatterType.CHATML],
|
221 |
"Sugoi-14B-Ultra-HF.Q4_K_M.gguf": ["mradermacher/Sugoi-14B-Ultra-HF-GGUF", MessagesFormatterType.OPEN_CHAT],
|
222 |
"CaptainErisNebula-12B-Chimera-v1.1.i1-Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v1.1-i1-GGUF", MessagesFormatterType.MISTRAL],
|
requirements.txt
CHANGED
@@ -1,11 +1,11 @@
|
|
1 |
-
spaces
|
2 |
huggingface_hub
|
3 |
hf_xet
|
4 |
hf_transfer
|
5 |
scikit-build-core
|
6 |
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
|
7 |
#git+https://github.com/Maximilian-Winter/llama-cpp-agent
|
8 |
-
https://github.com/John6666cat/llama-cpp-python/releases/download/v0.3.
|
|
|
9 |
git+https://github.com/John6666cat/llama-cpp-agent
|
10 |
pybind11>=2.12
|
11 |
torch==2.4.0
|
@@ -13,11 +13,12 @@ torchvision
|
|
13 |
accelerate
|
14 |
transformers<=4.48.3
|
15 |
optimum[onnxruntime]
|
16 |
-
dartrs
|
|
|
17 |
translatepy
|
18 |
diffusers
|
19 |
rapidfuzz
|
20 |
wrapt-timeout-decorator
|
21 |
opencv-python
|
22 |
numpy<2
|
23 |
-
pydantic
|
|
|
|
|
1 |
huggingface_hub
|
2 |
hf_xet
|
3 |
hf_transfer
|
4 |
scikit-build-core
|
5 |
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
|
6 |
#git+https://github.com/Maximilian-Winter/llama-cpp-agent
|
7 |
+
https://github.com/John6666cat/llama-cpp-python/releases/download/v0.3.16-cu124-AVX-linux-20250913/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
|
8 |
+
#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
|
9 |
git+https://github.com/John6666cat/llama-cpp-agent
|
10 |
pybind11>=2.12
|
11 |
torch==2.4.0
|
|
|
13 |
accelerate
|
14 |
transformers<=4.48.3
|
15 |
optimum[onnxruntime]
|
16 |
+
#dartrs
|
17 |
+
git+https://github.com/John6666cat/dartrs
|
18 |
translatepy
|
19 |
diffusers
|
20 |
rapidfuzz
|
21 |
wrapt-timeout-decorator
|
22 |
opencv-python
|
23 |
numpy<2
|
24 |
+
pydantic==2.10.6
|