Spaces:

operablepattern
/

chat-with-gemma-2b

Sleeping

File size: 585 Bytes

d4cdbf0
20f6917
d4cdbf0
b12a44a
 
 
 
 
 
 
 
 
 
 
 
 
7c324c2
20f6917
a3cd95c
 
0934e21
f40ba5e
 
20f6917
 
 
7a45842
8fdda21

import gradio as gr
from llama_cpp import Llama

try:
    llm = Llama.from_pretrained(
        repo_id="operablepattern/gemma-2b-it-Q",
        filename="*Q5_K_M.gguf",
        chat_format="gemma",
        verbose=True
    )
except:
    llm = Llama(
        model_path="./gemma-2b-it-Q5_K_M.gguf",
        chat_format="gemma",
        verbose=True
    )

def response(message, history):
    print(message)
    print(history)
    output = llm(message,max_tokens=32)
    print(output)
    return message

gr.ChatInterface(
    fn=response,
    title="Chat with Gemma",
).queue().launch()