Spaces:

operablepattern
/

chat-with-gemma-2b

Sleeping

chat-with-gemma-2b / app.py

Update app.py

a9d7f2c verified 8 months ago

614 Bytes

	import gradio as gr
	from llama_cpp import Llama

	try:
	llm = Llama.from_pretrained(
	repo_id="operablepattern/gemma-2b-it-Q",
	filename="*Q5_K_M.gguf",
	chat_format="gemma",
	verbose=True
	)
	except:
	llm = Llama(
	model_path="./gemma-2b-it-Q5_K_M.gguf",
	chat_format="gemma",
	verbose=True
	)

	def response(message, history):
	print(message)
	print(history)
	output = llm(message,max_tokens=32)
	print(output)
	return output["choices"][0]["text"].strip()

	gr.ChatInterface(
	fn=response,
	title="Chat with Gemma",
	).queue().launch()