Spaces:
Sleeping
Sleeping
import gradio as gr | |
from llama_cpp import Llama | |
try: | |
llm = Llama.from_pretrained( | |
repo_id="operablepattern/gemma-2b-it-Q", | |
filename="*Q5_K_M.gguf", | |
chat_format="gemma", | |
verbose=True | |
) | |
except: | |
llm = Llama( | |
model_path="./gemma-2b-it-Q5_K_M.gguf", | |
chat_format="gemma", | |
verbose=True | |
) | |
def response(message, history): | |
print(message) | |
print(history) | |
output = llm(message,max_tokens=32) | |
print(output) | |
return output["choices"][0]["text"].strip() | |
gr.ChatInterface( | |
fn=response, | |
title="Chat with Gemma", | |
).queue().launch() |