llama-cpp-agent

Paused

App Files Files Community

pabloce commited on May 22, 2024

Commit

0e76b02

verified ·

1 Parent(s): 11987d3

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -6

app.py CHANGED Viewed

@@ -7,8 +7,21 @@ from huggingface_hub import hf_hub_download
 subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
 subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
-hf_hub_download(repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF", filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",  local_dir = "./models")
-hf_hub_download(repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF", filename="Llama-3-8B-Synthia-v3.5-f16.gguf",  local_dir = "./models")
 css = """
 .message-row {
@@ -28,6 +41,14 @@ css = """
 }
 """
 @spaces.GPU(duration=120)
 def respond(
     message,
@@ -47,6 +68,8 @@ def respond(
     from llama_cpp_agent.chat_history import BasicChatHistory
     from llama_cpp_agent.chat_history.messages import Roles
     llm = Llama(
         model_path=f"models/{model}",
         flash_attn=True,
@@ -60,7 +83,7 @@ def respond(
     agent = LlamaCppAgent(
         provider,
         system_prompt=f"{system_message}",
-        predefined_messages_formatter_type=MessagesFormatterType.LLAMA_3,
         debug_output=True
     )
@@ -83,11 +106,16 @@ def respond(
             'role': Roles.assistant,
             'content': msn[1]
         }
         messages.add_message(user)
         messages.add_message(assistant)
-    stream = agent.get_chat_response(message, llm_sampling_settings=settings, chat_history=messages, returns_streaming_generator=True, print_output=False)
     outputs = ""
     for output in stream:
@@ -121,7 +149,13 @@ demo = gr.ChatInterface(
             step=0.1,
             label="Repetition penalty",
         ),
-        gr.Dropdown(['Meta-Llama-3-70B-Instruct-Q3_K_M.gguf', 'Llama-3-8B-Synthia-v3.5-f16.gguf'], value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf", label="Model"),
     ],
     theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",

 subprocess.run('pip install llama-cpp-python==0.2.75 --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124', shell=True)
 subprocess.run('pip install llama-cpp-agent==0.2.10', shell=True)
+hf_hub_download(
+    repo_id="bartowski/Meta-Llama-3-70B-Instruct-GGUF",
+    filename="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
+    local_dir = "./models"
+)
+hf_hub_download(
+    repo_id="bartowski/Llama-3-8B-Synthia-v3.5-GGUF",
+    filename="Llama-3-8B-Synthia-v3.5-f16.gguf",
+    local_dir = "./models"
+)
+hf_hub_download(
+    repo_id="bartowski/Mistral-7B-Instruct-v0.3-GGUF",
+    filename="Mistral-7B-Instruct-v0.3-f32.gguf",
+    local_dir = "./models"
+)
 css = """
 .message-row {
 }
 """
+def get_messages_formatter_type(model_name):
+    if "Llama" in model_name:
+        return MessagesFormatterType.LLAMA_3
+    elif "Mistral" in model_name:
+        return MessagesFormatterType.MISTRAL
+    else:
+        raise ValueError(f"Unsupported model: {model_name}")
 @spaces.GPU(duration=120)
 def respond(
     message,
     from llama_cpp_agent.chat_history import BasicChatHistory
     from llama_cpp_agent.chat_history.messages import Roles
+    chat_template = get_messages_formatter_type(model)
     llm = Llama(
         model_path=f"models/{model}",
         flash_attn=True,
     agent = LlamaCppAgent(
         provider,
         system_prompt=f"{system_message}",
+        predefined_messages_formatter_type=chat_template,
         debug_output=True
     )
             'role': Roles.assistant,
             'content': msn[1]
         }
         messages.add_message(user)
         messages.add_message(assistant)
+    stream = agent.get_chat_response(
+        message,
+        llm_sampling_settings=settings,
+        chat_history=messages,
+        returns_streaming_generator=True,
+        print_output=False
+    )
     outputs = ""
     for output in stream:
             step=0.1,
             label="Repetition penalty",
         ),
+        gr.Dropdown([
+                'Meta-Llama-3-70B-Instruct-Q3_K_M.gguf',
+                'Llama-3-8B-Synthia-v3.5-f16.gguf'
+            ],
+            value="Meta-Llama-3-70B-Instruct-Q3_K_M.gguf",
+            label="Model"
+        ),
     ],
     theme=gr.themes.Soft(primary_hue="violet", secondary_hue="violet", neutral_hue="gray",font=[gr.themes.GoogleFont("Exo"), "ui-sans-serif", "system-ui", "sans-serif"]).set(
         body_background_fill_dark="#16141c",