Spaces:

John6666
/

text2tag-llm

Running on Zero

App Files Files Community

John6666 commited on 6 days ago

Commit

8f48a77

verified ·

1 Parent(s): e548b67

Upload 6 files

Browse files

Files changed (6) hide show

README.md +1 -1
app.py +27 -25
genimage.py +26 -16
llmdolphin.py +44 -46
llmenv.py +45 -0
requirements.txt +5 -4

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 👀😻
 colorFrom: red
 colorTo: purple
 sdk: gradio
-sdk_version: 5.34.2
 app_file: app.py
 pinned: true
 license: apache-2.0

 colorFrom: red
 colorTo: purple
 sdk: gradio
+sdk_version: 5.45.0
 app_file: app.py
 pinned: true
 license: apache-2.0

app.py CHANGED Viewed

@@ -21,7 +21,7 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_ca
                         """, elem_classes="title")
             state = gr.State(value={})
             with gr.Group():
-                chatbot = gr.Chatbot(show_copy_button=True, show_share_button=False, layout="bubble", container=True)
                 with gr.Row(equal_height=True):
                     chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
                     chat_submit = gr.Button("Send", scale=1, variant="primary")
@@ -71,38 +71,40 @@ with gr.Blocks(theme='NoCrypt/miku@>=1.2.2', fill_width=True, css=css, delete_ca
         gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
                     This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
                     Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
-        pg_chatbot = gr.Chatbot(scale=1, show_copy_button=True, show_share_button=False)
-        with gr.Accordion("Additional inputs", open=False):
-            pg_chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0], allow_custom_value=True, label="Model")
-            pg_chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0]), label="Model info")
-            pg_chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0]), label="Message format")
-            pg_chat_sysmsg = gr.Textbox(value="You are a helpful assistant.", label="System message")
-            with gr.Row():
-                pg_chat_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens")
-                pg_chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature")
-                pg_chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p")
-                pg_chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k")
-                pg_chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty")
-                with gr.Accordion("Loras", open=True, visible=False):
-                    pg_chat_lora = gr.Dropdown(choices=get_dolphin_loras(), value=get_dolphin_loras()[0], allow_custom_value=True, label="Lora")
-                    pg_chat_lora_scale = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.01, label="Lora scale")
-                    pg_chat_add_lora_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/ggml-org/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-F16-GGUF/blob/main/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-f16.gguf", lines=1)
-                    pg_chat_add_lora_submit = gr.Button("Update lists of loras")
-            with gr.Accordion("Add models", open=True):
-                pg_chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
-                pg_chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
-                pg_chat_add_submit = gr.Button("Update lists of models")
         gr.ChatInterface(
             fn=respond_playground,
             #title="Chat with lots of Models and LLMs using llama.cpp",
             #retry_btn="Retry",
             #undo_btn="Undo",
-            #clear_btn="Clear",
-            submit_btn="Send",
             #additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
             additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp,
                                pg_chat_lora, pg_chat_lora_scale, state],
-            chatbot=pg_chatbot
         )
     gr.LoginButton()
     gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")

                         """, elem_classes="title")
             state = gr.State(value={})
             with gr.Group():
+                chatbot = gr.Chatbot(type="messages", show_copy_button=True, show_share_button=False, layout="bubble", container=True)
                 with gr.Row(equal_height=True):
                     chat_msg = gr.Textbox(show_label=False, placeholder="Input text in English, Japanese, or any other languages and press Enter or click Send.", scale=4)
                     chat_submit = gr.Button("Send", scale=1, variant="primary")
         gr.Markdown("""# Chat with lots of Models and LLMs using llama.cpp
                     This tab is copy of [CaioXapelaum/GGUF-Playground](https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground).<br>
                     Don't worry about the strange appearance, **it's just a bug of Gradio!**""", elem_classes="title")
+        pg_chatbot = gr.Chatbot(scale=1, type="messages", show_copy_button=True, show_share_button=False)
+        #with gr.Accordion("Additional inputs", open=False):
+        pg_chat_model = gr.Dropdown(choices=get_dolphin_models(), value=get_dolphin_models()[0], allow_custom_value=True, label="Model", render=False)
+        pg_chat_model_info = gr.Markdown(value=get_dolphin_model_info(get_dolphin_models()[0]), label="Model info", render=False)
+        pg_chat_format = gr.Dropdown(choices=get_llm_formats(), value=get_dolphin_model_format(get_dolphin_models()[0]), label="Message format", render=False)
+        pg_chat_sysmsg = gr.Textbox(value="You are a helpful assistant.", label="System message", render=False)
+        with gr.Row():
+            pg_chat_tokens = gr.Slider(minimum=1, maximum=4096, value=2048, step=1, label="Max tokens", render=False)
+            pg_chat_temperature = gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature", render=False)
+            pg_chat_topp = gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p", render=False)
+            pg_chat_topk = gr.Slider(minimum=0, maximum=100, value=40, step=1, label="Top-k", render=False)
+            pg_chat_rp = gr.Slider(minimum=0.0, maximum=2.0, value=1.1, step=0.1, label="Repetition penalty", render=False)
+            pg_chat_lora = gr.Dropdown(choices=get_dolphin_loras(), value=get_dolphin_loras()[0], allow_custom_value=True, label="Lora", render=False)
+            pg_chat_lora_scale = gr.Slider(minimum=0.0, maximum=1.0, value=1.0, step=0.01, label="Lora scale", render=False)
+        with gr.Accordion("Add models", open=False):
+            pg_chat_add_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/mradermacher/MagnumChronos-i1-GGUF/blob/main/MagnumChronos.i1-Q4_K_M.gguf", lines=1)
+            pg_chat_add_format = gr.Dropdown(choices=get_llm_formats(), value=get_llm_formats()[0], label="Message format")
+            pg_chat_add_submit = gr.Button("Update lists of models")
+        with gr.Accordion("Loras", open=False, visible=False):
+            pg_chat_add_lora_text = gr.Textbox(label="URL or Repo ID", placeholder="https://huggingface.co/ggml-org/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-F16-GGUF/blob/main/LoRA-Qwen2.5-14B-Instruct-abliterated-v2-f16.gguf", lines=1)
+            pg_chat_add_lora_submit = gr.Button("Update lists of loras")
         gr.ChatInterface(
             fn=respond_playground,
             #title="Chat with lots of Models and LLMs using llama.cpp",
             #retry_btn="Retry",
             #undo_btn="Undo",
+            stop_btn=True,
+            submit_btn=True,
             #additional_inputs_accordion='gr.Accordion(label="Additional Inputs", open=False)',
             additional_inputs=[pg_chat_model, pg_chat_sysmsg, pg_chat_tokens, pg_chat_temperature, pg_chat_topp, pg_chat_topk, pg_chat_rp,
                                pg_chat_lora, pg_chat_lora_scale, state],
+            chatbot=pg_chatbot,
+            multimodal=False,
+            type="messages",
         )
     gr.LoginButton()
     gr.DuplicateButton(value="Duplicate Space for private use (This demo does not work on CPU. Requires GPU Space)")

genimage.py CHANGED Viewed

@@ -3,9 +3,11 @@ import gradio as gr
 import torch
 import gc, os, uuid, json
 from PIL import PngImagePlugin
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
 if os.getenv("SPACES_ZERO_GPU", None):
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
@@ -14,13 +16,16 @@ if os.getenv("SPACES_ZERO_GPU", None):
 def load_pipeline():
-    from diffusers import DiffusionPipeline
     pipe = DiffusionPipeline.from_pretrained(
-        "John6666/rae-diffusion-xl-v2-sdxl-spo-pcm",
-        custom_pipeline="lpw_stable_diffusion_xl",
         #custom_pipeline="nyanko7/sdxl_smoothed_energy_guidance",
-        torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32,
     )
     pipe.to("cpu")
     return pipe
@@ -69,30 +74,35 @@ pipe = load_pipeline()
 @torch.inference_mode()
-@spaces.GPU(duration=10)
 def generate_image(prompt, neg_prompt, progress=gr.Progress(track_tqdm=True)):
     pipe.to(device)
-    prompt += ", anime, masterpiece, best quality, very aesthetic, absurdres"
-    neg_prompt += ", bad hands, bad feet, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract], photo, deformed, disfigured, low contrast, photo, deformed, disfigured, low contrast"
     metadata = {
         "prompt": prompt,
         "negative_prompt": neg_prompt,
-        "resolution": f"{1024} x {1024}",
-        "guidance_scale": 7.0,
-        "num_inference_steps": 28,
-        "sampler": "Euler",
     }
     try:
         #positive_embeds, negative_embeds = token_auto_concat_embeds(pipe, prompt, neg_prompt)
         images = pipe(
             prompt=prompt,
             negative_prompt=neg_prompt,
-            width=1024,
-            height=1024,
-            guidance_scale=7.0,# seg_scale=3.0, seg_applied_layers=["mid"],
-            num_inference_steps=28,
             output_type="pil",
-            clip_skip=2,
         ).images
         if images:
             image_paths = [

 import torch
 import gc, os, uuid, json
 from PIL import PngImagePlugin
+from diffusers import DiffusionPipeline, AutoencoderKL, EulerAncestralDiscreteScheduler
 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 if os.getenv("SPACES_ZERO_GPU", None):
     torch.backends.cudnn.deterministic = True
     torch.backends.cudnn.benchmark = False
 def load_pipeline():
+    #vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=dtype)
     pipe = DiffusionPipeline.from_pretrained(
+        #"John6666/rae-diffusion-xl-v2-sdxl-spo-pcm",
+        "Raelina/Raehoshi-illust-XL-6",
+        #custom_pipeline="lpw_stable_diffusion_xl",
         #custom_pipeline="nyanko7/sdxl_smoothed_energy_guidance",
+        torch_dtype=dtype,
+        #vae=vae,
     )
+    pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
     pipe.to("cpu")
     return pipe
 @torch.inference_mode()
+@spaces.GPU(duration=15)
 def generate_image(prompt, neg_prompt, progress=gr.Progress(track_tqdm=True)):
     pipe.to(device)
+    #prompt += ", masterpiece, best quality, very aesthetic, absurdres"
+    #neg_prompt += "bad hands, bad feet, lowres, (bad), text, error, fewer, extra, missing, worst quality, jpeg artifacts, low quality, watermark, unfinished, displeasing, oldest, early, chromatic aberration, signature, extra digits, artistic error, username, scan, [abstract], photo, deformed, disfigured, low contrast, photo, deformed, disfigured, low contrast"
+    neg_prompt += "bad quality, worst quality, poorly drawn, sketch, multiple views, bad anatomy, bad hands, missing fingers, extra fingers, extra digits, fewer digits, signature, watermark, username"
+    width = 1024
+    height = 1024
+    cfg = 6.0
+    steps = 28
     metadata = {
         "prompt": prompt,
         "negative_prompt": neg_prompt,
+        "resolution": f"{width} x {height}",
+        "guidance_scale": cfg,
+        "num_inference_steps": steps,
+        "sampler": "Euler a",
     }
     try:
         #positive_embeds, negative_embeds = token_auto_concat_embeds(pipe, prompt, neg_prompt)
         images = pipe(
             prompt=prompt,
             negative_prompt=neg_prompt,
+            width=width,
+            height=height,
+            guidance_scale=cfg,# seg_scale=3.0, seg_applied_layers=["mid"],
+            num_inference_steps=steps,
             output_type="pil",
+            #clip_skip=1,
         ).images
         if images:
             image_paths = [

llmdolphin.py CHANGED Viewed

@@ -7,6 +7,7 @@ import gc
 import os
 import urllib
 from typing import Any
 from huggingface_hub import hf_hub_download, HfApi
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
@@ -376,10 +377,10 @@ def get_raw_prompt(msg: str):
 # https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
 @torch.inference_mode()
-@spaces.GPU(duration=59)
 def dolphin_respond(
     message: str,
-    history: list[tuple[str, str]],
     model: str = default_llm_model_filename,
     system_message: str = get_dolphin_sysprompt(),
     max_tokens: int = 1024,
@@ -434,16 +435,12 @@ def dolphin_respond(
         messages = BasicChatHistory()
         for msn in history:
-            user = {
-                'role': Roles.user,
-                'content': msn[0]
-            }
-            assistant = {
-                'role': Roles.assistant,
-                'content': msn[1]
-            }
-            messages.add_message(user)
-            messages.add_message(assistant)
         stream = agent.get_chat_response(
             message,
@@ -455,28 +452,28 @@ def dolphin_respond(
         progress(0.5, desc="Processing...")
-        outputs = ""
         for output in stream:
-            outputs += output
-            yield [(outputs, None)]
     except Exception as e:
         print(e)
         raise gr.Error(f"Error: {e}")
-        #yield [("", None)]
     finally:
         torch.cuda.empty_cache()
         gc.collect()
 def dolphin_parse(
-    history: list[tuple[str, str]],
     state: dict,
 ):
     try:
         dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
         if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
             return "", gr.update(), gr.update()
-        msg = history[-1][0]
         raw_prompt = get_raw_prompt(msg)
         prompts = []
         if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
@@ -490,10 +487,10 @@ def dolphin_parse(
 @torch.inference_mode()
-@spaces.GPU(duration=59)
 def dolphin_respond_auto(
     message: str,
-    history: list[tuple[str, str]],
     model: str = default_llm_model_filename,
     system_message: str = get_dolphin_sysprompt(),
     max_tokens: int = 1024,
@@ -549,16 +546,12 @@ def dolphin_respond_auto(
         messages = BasicChatHistory()
         for msn in history:
-            user = {
-                'role': Roles.user,
-                'content': msn[0]
-            }
-            assistant = {
-                'role': Roles.assistant,
-                'content': msn[1]
-            }
-            messages.add_message(user)
-            messages.add_message(assistant)
         progress(0, desc="Translating...")
         stream = agent.get_chat_response(
@@ -571,13 +564,16 @@ def dolphin_respond_auto(
         progress(0.5, desc="Processing...")
-        outputs = ""
         for output in stream:
-            outputs += output
-            yield [(outputs, None)], gr.update(), gr.update()
     except Exception as e:
         print(e)
-        yield [("", None)], gr.update(), gr.update()
     finally:
         torch.cuda.empty_cache()
         gc.collect()
@@ -585,14 +581,14 @@ def dolphin_respond_auto(
 def dolphin_parse_simple(
     message: str,
-    history: list[tuple[str, str]],
     state: dict,
 ):
     try:
         #if not is_japanese(message): return message
         dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
         if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
-        msg = history[-1][0]
         raw_prompt = get_raw_prompt(msg)
         prompts = []
         if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
@@ -611,10 +607,10 @@ cv2.setNumThreads(1)
 @torch.inference_mode()
-@spaces.GPU(duration=59)
 def respond_playground(
     message: str,
-    history: list[tuple[str, str]],
     model: str = default_llm_model_filename,
     system_message: str = get_dolphin_sysprompt(),
     max_tokens: int = 1024,
@@ -669,10 +665,12 @@ def respond_playground(
         # Add user and assistant messages to the history
         for msn in history:
-            user = {'role': Roles.user, 'content': msn[0]}
-            assistant = {'role': Roles.assistant, 'content': msn[1]}
-            messages.add_message(user)
-            messages.add_message(assistant)
         # Stream the response
         stream = agent.get_chat_response(
@@ -683,14 +681,14 @@ def respond_playground(
             print_output=False
         )
-        outputs = ""
         for output in stream:
-            outputs += output
-            yield outputs
     except Exception as e:
         print(e)
         raise gr.Error(f"Error: {e}")
-        #yield ""
     finally:
         torch.cuda.empty_cache()
         gc.collect()

 import os
 import urllib
 from typing import Any
+from gradio import MessageDict
 from huggingface_hub import hf_hub_download, HfApi
 from llama_cpp import Llama
 from llama_cpp_agent import LlamaCppAgent, MessagesFormatterType
 # https://llama-cpp-python.readthedocs.io/en/latest/api-reference/
 @torch.inference_mode()
+@spaces.GPU(duration=30)
 def dolphin_respond(
     message: str,
+    history: list[MessageDict],
     model: str = default_llm_model_filename,
     system_message: str = get_dolphin_sysprompt(),
     max_tokens: int = 1024,
         messages = BasicChatHistory()
         for msn in history:
+            if msn["role"] == "user":
+                user = {'role': Roles.user, 'content': msn["content"]}
+                messages.add_message(user)
+            elif msn["role"] == "assistant":
+                assistant = {'role': Roles.assistant, 'content': msn["content"]}
+                messages.add_message(assistant)
         stream = agent.get_chat_response(
             message,
         progress(0.5, desc="Processing...")
+        history.append({"role": "user", "content": message})
+        history.append({"role": "assistant", "content": ""})
         for output in stream:
+            history[-1]['content'] += output
+            yield history
     except Exception as e:
         print(e)
         raise gr.Error(f"Error: {e}")
     finally:
         torch.cuda.empty_cache()
         gc.collect()
 def dolphin_parse(
+    history: list[MessageDict],
     state: dict,
 ):
     try:
         dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
         if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
             return "", gr.update(), gr.update()
+        msg = history[-1]["content"]
         raw_prompt = get_raw_prompt(msg)
         prompts = []
         if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
 @torch.inference_mode()
+@spaces.GPU(duration=30)
 def dolphin_respond_auto(
     message: str,
+    history: list[MessageDict],
     model: str = default_llm_model_filename,
     system_message: str = get_dolphin_sysprompt(),
     max_tokens: int = 1024,
         messages = BasicChatHistory()
         for msn in history:
+            if msn["role"] == "user":
+                user = {'role': Roles.user, 'content': msn["content"]}
+                messages.add_message(user)
+            elif msn["role"] == "assistant":
+                assistant = {'role': Roles.assistant, 'content': msn["content"]}
+                messages.add_message(assistant)
         progress(0, desc="Translating...")
         stream = agent.get_chat_response(
         progress(0.5, desc="Processing...")
+        history.append({"role": "user", "content": message})
+        history.append({"role": "assistant", "content": ""})
         for output in stream:
+            history[-1]['content'] += output
+            yield history, gr.update(), gr.update()
     except Exception as e:
         print(e)
+        history.append({"role": "user", "content": message})
+        history.append({"role": "assistant", "content": message})
+        yield history, gr.update(), gr.update()
     finally:
         torch.cuda.empty_cache()
         gc.collect()
 def dolphin_parse_simple(
     message: str,
+    history: list[MessageDict],
     state: dict,
 ):
     try:
         #if not is_japanese(message): return message
         dolphin_sysprompt_mode = get_state(state, "dolphin_sysprompt_mode")
         if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
+        msg = history[-1]["content"]
         raw_prompt = get_raw_prompt(msg)
         prompts = []
         if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
 @torch.inference_mode()
+@spaces.GPU(duration=30)
 def respond_playground(
     message: str,
+    history: list[MessageDict],
     model: str = default_llm_model_filename,
     system_message: str = get_dolphin_sysprompt(),
     max_tokens: int = 1024,
         # Add user and assistant messages to the history
         for msn in history:
+            if msn["role"] == "user":
+                user = {'role': Roles.user, 'content': msn["content"]}
+                messages.add_message(user)
+            elif msn["role"] == "assistant":
+                assistant = {'role': Roles.assistant, 'content': msn["content"]}
+                messages.add_message(assistant)
         # Stream the response
         stream = agent.get_chat_response(
             print_output=False
         )
+        history.append({"role": "user", "content": message})
+        history.append({"role": "assistant", "content": ""})
         for output in stream:
+            history[-1]['content'] += output
+            yield history
     except Exception as e:
         print(e)
         raise gr.Error(f"Error: {e}")
     finally:
         torch.cuda.empty_cache()
         gc.collect()

llmenv.py CHANGED Viewed

@@ -147,11 +147,17 @@ llm_models = {
     "SnowElf-12B-v2.Q4_K_M.gguf": ["mradermacher/SnowElf-12B-v2-GGUF", MessagesFormatterType.CHATML],
     "Queen-2.5-14B-aka.Q4_K_M.gguf": ["mradermacher/Queen-2.5-14B-aka-GGUF", MessagesFormatterType.OPEN_CHAT],
     "KnowledgeCore-12B.Q4_K_M.gguf": ["mradermacher/KnowledgeCore-12B-GGUF", MessagesFormatterType.CHATML],
     "PatriSlush-DarkRPReign-12B.Q4_K_M.gguf": ["mradermacher/PatriSlush-DarkRPReign-12B-GGUF", MessagesFormatterType.MISTRAL],
     "BianCang-Qwen2.5-14B-Instruct.Q4_K_M.gguf": ["mradermacher/BianCang-Qwen2.5-14B-Instruct-GGUF", MessagesFormatterType.OPEN_CHAT],
     "Simulation_LLM_wiki_14B_V2.Q4_K_M.gguf": ["mradermacher/Simulation_LLM_wiki_14B_V2-GGUF", MessagesFormatterType.OPEN_CHAT],
     "Neona-12B.i1-Q4_K_M.gguf": ["mradermacher/Neona-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
     "Pinecone-Rune-12b.Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-GGUF", MessagesFormatterType.MISTRAL],
     "claude-3.7-sonnet-reasoning-gemma3-12B.Q4_K_M.gguf": ["mradermacher/claude-3.7-sonnet-reasoning-gemma3-12B-GGUF", MessagesFormatterType.ALPACA],
     "allura-org_MN-Lyrebird-12B-Q4_K_M.gguf": ["bartowski/allura-org_MN-Lyrebird-12B-GGUF", MessagesFormatterType.MISTRAL],
     "ape-fiction-2-mistral-nemo.Q4_K_M.gguf": ["mradermacher/ape-fiction-2-mistral-nemo-GGUF", MessagesFormatterType.MISTRAL],
@@ -172,6 +178,45 @@ llm_models = {
     #"": ["", MessagesFormatterType.OPEN_CHAT],
     #"": ["", MessagesFormatterType.CHATML],
     #"": ["", MessagesFormatterType.PHI_3],
     "KansenSakura-Eclipse-RP-12b.Q4_K_M.gguf": ["mradermacher/KansenSakura-Eclipse-RP-12b-GGUF", MessagesFormatterType.CHATML],
     "Sugoi-14B-Ultra-HF.Q4_K_M.gguf": ["mradermacher/Sugoi-14B-Ultra-HF-GGUF", MessagesFormatterType.OPEN_CHAT],
     "CaptainErisNebula-12B-Chimera-v1.1.i1-Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v1.1-i1-GGUF", MessagesFormatterType.MISTRAL],

     "SnowElf-12B-v2.Q4_K_M.gguf": ["mradermacher/SnowElf-12B-v2-GGUF", MessagesFormatterType.CHATML],
     "Queen-2.5-14B-aka.Q4_K_M.gguf": ["mradermacher/Queen-2.5-14B-aka-GGUF", MessagesFormatterType.OPEN_CHAT],
     "KnowledgeCore-12B.Q4_K_M.gguf": ["mradermacher/KnowledgeCore-12B-GGUF", MessagesFormatterType.CHATML],
+    "Mistral-Nemo-2407-Role-Playing-Final-4data_ga16_lr6e7.Q4_K_M.gguf": ["mradermacher/Mistral-Nemo-2407-Role-Playing-Final-4data_ga16_lr6e7-GGUF", MessagesFormatterType.CHATML],
     "PatriSlush-DarkRPReign-12B.Q4_K_M.gguf": ["mradermacher/PatriSlush-DarkRPReign-12B-GGUF", MessagesFormatterType.MISTRAL],
     "BianCang-Qwen2.5-14B-Instruct.Q4_K_M.gguf": ["mradermacher/BianCang-Qwen2.5-14B-Instruct-GGUF", MessagesFormatterType.OPEN_CHAT],
     "Simulation_LLM_wiki_14B_V2.Q4_K_M.gguf": ["mradermacher/Simulation_LLM_wiki_14B_V2-GGUF", MessagesFormatterType.OPEN_CHAT],
     "Neona-12B.i1-Q4_K_M.gguf": ["mradermacher/Neona-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
+    "NeoSage-12B.Q4_K_M.gguf": ["mradermacher/NeoSage-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Patricide-12B-Forgottenslop-Mell.i1-Q4_K_M.gguf": ["mradermacher/Patricide-12B-Forgottenslop-Mell-i1-GGUF", MessagesFormatterType.MISTRAL],
     "Pinecone-Rune-12b.Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-GGUF", MessagesFormatterType.MISTRAL],
+    "mn-12b-rp-without-dumb.Q4_K_M.gguf": ["mradermacher/mn-12b-rp-without-dumb-GGUF", MessagesFormatterType.MISTRAL],
+    "Denker-mistral-nemo-12B.Q4_K_M.gguf": ["mradermacher/Denker-mistral-nemo-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Goldcide-12B-Forgottenslop-Mell.Q4_K_M.gguf": ["mradermacher/Goldcide-12B-Forgottenslop-Mell-GGUF", MessagesFormatterType.MISTRAL],
     "claude-3.7-sonnet-reasoning-gemma3-12B.Q4_K_M.gguf": ["mradermacher/claude-3.7-sonnet-reasoning-gemma3-12B-GGUF", MessagesFormatterType.ALPACA],
     "allura-org_MN-Lyrebird-12B-Q4_K_M.gguf": ["bartowski/allura-org_MN-Lyrebird-12B-GGUF", MessagesFormatterType.MISTRAL],
     "ape-fiction-2-mistral-nemo.Q4_K_M.gguf": ["mradermacher/ape-fiction-2-mistral-nemo-GGUF", MessagesFormatterType.MISTRAL],
     #"": ["", MessagesFormatterType.OPEN_CHAT],
     #"": ["", MessagesFormatterType.CHATML],
     #"": ["", MessagesFormatterType.PHI_3],
+    "SauerHuatuoSkyworkDeepWatt-o1-Llama-3.1-8B.Q5_K_M.gguf": ["mradermacher/SauerHuatuoSkyworkDeepWatt-o1-Llama-3.1-8B-GGUF", MessagesFormatterType.LLAMA_3],
+    "care-japanese-llama3.1-8b.Q5_K_M.gguf": ["mradermacher/care-japanese-llama3.1-8b-GGUF", MessagesFormatterType.LLAMA_3],
+    "UltraPatriMerge-12B.Q4_K_M.gguf": ["mradermacher/UltraPatriMerge-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Llama-3.1-Amelia-MTFT-8B-v1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-Amelia-MTFT-8B-v1-GGUF", MessagesFormatterType.LLAMA_3],
+    "llama3-archimate-merged.Q5_K_M.gguf": ["mradermacher/llama3-archimate-merged-GGUF", MessagesFormatterType.LLAMA_3],
+    "Mistral-Nemo-Base-2407-RP-Merge.Q4_K_M.gguf": ["mradermacher/Mistral-Nemo-Base-2407-RP-Merge-GGUF", MessagesFormatterType.CHATML],
+    "PatriMaid-12B-Forgottenslop-NeonMell.Q4_K_M.gguf": ["mradermacher/PatriMaid-12B-Forgottenslop-NeonMell-GGUF", MessagesFormatterType.MISTRAL],
+    "Magnolia-v3-medis-dilute-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-v3-medis-dilute-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Magnolia-v3b-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-v3b-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "MN-Mystic-Rune-12B.Q4_K_S.gguf": ["mradermacher/MN-Mystic-Rune-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "MarinaraSpaghetti-NemoMix-Unleashed-12B-chat.Q4_K_M.gguf": ["mradermacher/MarinaraSpaghetti-NemoMix-Unleashed-12B-chat-GGUF", MessagesFormatterType.MISTRAL],
+    "GoldFox-12B-Forgottenslop-Mell.i1-Q4_K_M.gguf": ["mradermacher/GoldFox-12B-Forgottenslop-Mell-i1-GGUF", MessagesFormatterType.MISTRAL],
+    "Magnolia-Mell-v1-12B.Q4_K_M.gguf": ["mradermacher/Magnolia-Mell-v1-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Shisa-DellaTest-12B.Q4_K_M.gguf": ["mradermacher/Shisa-DellaTest-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Q2.5-Coldbrew14B-FusionMix.i1-Q4_K_M.gguf": ["mradermacher/Q2.5-Coldbrew14B-FusionMix-i1-GGUF", MessagesFormatterType.OPEN_CHAT],
+    "nemo-instruct-books-model-stock.Q4_K_M.gguf": ["mradermacher/nemo-instruct-books-model-stock-GGUF", MessagesFormatterType.MISTRAL],
+    "FoxCide-12B-Forgottenslop-Mell.Q4_K_M.gguf": ["mradermacher/FoxCide-12B-Forgottenslop-Mell-GGUF", MessagesFormatterType.MISTRAL],
+    "BMO-CaptianMaid-12B.i1-Q4_K_M.gguf": ["mradermacher/BMO-CaptianMaid-12B-i1-GGUF", MessagesFormatterType.MISTRAL],
+    "Aurore-Reveil_Koto-Small-7B-IT-Q5_K_M.gguf": ["bartowski/Aurore-Reveil_Koto-Small-7B-IT-GGUF", MessagesFormatterType.OPEN_CHAT],
+    "Pinecone-Rune-12b-Token-Surgery-Chatml-v0.1a.i1-Q4_K_M.gguf": ["mradermacher/Pinecone-Rune-12b-Token-Surgery-Chatml-v0.1a-i1-GGUF", MessagesFormatterType.MISTRAL],
+    "Kitsune-Symphony-V0.0-12B.Q4_K_M.gguf": ["mradermacher/Kitsune-Symphony-V0.0-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "Anora-12b.i1-Q4_K_M.gguf": ["mradermacher/Anora-12b-i1-GGUF", MessagesFormatterType.MISTRAL],
+    "Minor-Repo-12B-omg.Q4_K_M.gguf": ["mradermacher/Minor-Repo-12B-omg-GGUF", MessagesFormatterType.MISTRAL],
+    "Luna.i1-Q5_K_M.gguf": ["mradermacher/Luna-i1-GGUF", MessagesFormatterType.OPEN_CHAT],
+    "OmegaMaid-DarkWay-FinalNoctis-12B.Q4_K_M.gguf": ["mradermacher/OmegaMaid-DarkWay-FinalNoctis-12B-GGUF", MessagesFormatterType.MISTRAL],
+    "CaptainMaid-12B-VioletMell-V0.420.Q4_K_M.gguf": ["mradermacher/CaptainMaid-12B-VioletMell-V0.420-GGUF", MessagesFormatterType.MISTRAL],
+    "LatentSoup-modelstock-8b.Q5_K_M.gguf": ["mradermacher/LatentSoup-modelstock-8b-GGUF", MessagesFormatterType.LLAMA_3],
+    "Erotic-Model.v1.Q5_K_M.gguf": ["mradermacher/Erotic-Model.v1-GGUF", MessagesFormatterType.MISTRAL],
+    "Llama-3.1-Amelia-CD-8B-v1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-Amelia-CD-8B-v1-GGUF", MessagesFormatterType.LLAMA_3],
+    "funny-nemo-embedding-merged.Q4_K_M.gguf": ["mradermacher/funny-nemo-embedding-merged-GGUF", MessagesFormatterType.MISTRAL],
+    "EviOmni-nq_train-7B.Q5_K_M.gguf": ["mradermacher/EviOmni-nq_train-7B-GGUF", MessagesFormatterType.OPEN_CHAT],
+    "NuMarkdown-8B-Thinking-fork.Q5_K_M.gguf": ["mradermacher/NuMarkdown-8B-Thinking-fork-GGUF", MessagesFormatterType.OPEN_CHAT],
+    "Comet_12B_V.7.Q4_K_M.gguf": ["mradermacher/Comet_12B_V.7-GGUF", MessagesFormatterType.ALPACA],
+    "Llama-3.1-8B-Instruct-wildfeedback-seed-RPO-0.1.Q5_K_M.gguf": ["mradermacher/Llama-3.1-8B-Instruct-wildfeedback-seed-RPO-0.1-GGUF", MessagesFormatterType.LLAMA_3],
+    "NeuralMerge-9B-Dare.Q5_K_M.gguf": ["mradermacher/NeuralMerge-9B-Dare-GGUF", MessagesFormatterType.MISTRAL],
+    "CaptainErisNebula-12B-Chimera-v0.420.Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v0.420-GGUF", MessagesFormatterType.MISTRAL],
+    "Nemo-12B-OldSpice.Q4_K_M.gguf": ["mradermacher/Nemo-12B-OldSpice-GGUF", MessagesFormatterType.MISTRAL],
+    "funny-nemo-embedding-testing.Q4_K_M.gguf": ["mradermacher/funny-nemo-embedding-testing-GGUF", MessagesFormatterType.MISTRAL],
+    "francois-v3.Q4_K_M.gguf": ["mradermacher/francois-v3-GGUF", MessagesFormatterType.CHATML],
     "KansenSakura-Eclipse-RP-12b.Q4_K_M.gguf": ["mradermacher/KansenSakura-Eclipse-RP-12b-GGUF", MessagesFormatterType.CHATML],
     "Sugoi-14B-Ultra-HF.Q4_K_M.gguf": ["mradermacher/Sugoi-14B-Ultra-HF-GGUF", MessagesFormatterType.OPEN_CHAT],
     "CaptainErisNebula-12B-Chimera-v1.1.i1-Q4_K_M.gguf": ["mradermacher/CaptainErisNebula-12B-Chimera-v1.1-i1-GGUF", MessagesFormatterType.MISTRAL],

requirements.txt CHANGED Viewed

@@ -1,11 +1,11 @@
-spaces
 huggingface_hub
 hf_xet
 hf_transfer
 scikit-build-core
 #https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
 #git+https://github.com/Maximilian-Winter/llama-cpp-agent
-https://github.com/John6666cat/llama-cpp-python/releases/download/v0.3.14-cu124-AVX-linux-20250731/llama_cpp_python-0.3.14-cp310-cp310-linux_x86_64.whl
 git+https://github.com/John6666cat/llama-cpp-agent
 pybind11>=2.12
 torch==2.4.0
@@ -13,11 +13,12 @@ torchvision
 accelerate
 transformers<=4.48.3
 optimum[onnxruntime]
-dartrs
 translatepy
 diffusers
 rapidfuzz
 wrapt-timeout-decorator
 opencv-python
 numpy<2
-pydantic<=2.10.6

 huggingface_hub
 hf_xet
 hf_transfer
 scikit-build-core
 #https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.4-cu124/llama_cpp_python-0.3.4-cp310-cp310-linux_x86_64.whl
 #git+https://github.com/Maximilian-Winter/llama-cpp-agent
+https://github.com/John6666cat/llama-cpp-python/releases/download/v0.3.16-cu124-AVX-linux-20250913/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
+#https://github.com/abetlen/llama-cpp-python/releases/download/v0.3.16-cu124/llama_cpp_python-0.3.16-cp310-cp310-linux_x86_64.whl
 git+https://github.com/John6666cat/llama-cpp-agent
 pybind11>=2.12
 torch==2.4.0
 accelerate
 transformers<=4.48.3
 optimum[onnxruntime]
+#dartrs
+git+https://github.com/John6666cat/dartrs
 translatepy
 diffusers
 rapidfuzz
 wrapt-timeout-decorator
 opencv-python
 numpy<2
+pydantic==2.10.6