Spaces:

dar-tau
/

run_inference

Sleeping

run_inference / app.py

Update app.py

b818b3f verified over 1 year ago

973 Bytes

	import os
	import gradio as gr
	import spaces
	from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline


	model_name = "teknium/OpenHermes-2.5-Mistral-7B"
	token = os.environ['hf_token']

	pipe = pipeline("text-generation", model=model_name, device="cuda")


	system_prompt = '''You are given an input text for a chat interface. Propose auto-completion to the text. You have several roles:
	- Fight under-specification: if the user does not provide sufficient context, propose them a set of relevant suggestions.
	- Complete text: The text provided to you is in the making. If you have a good idea for how to complete - make suggestions.

	Make sure the suggestions are valid completions of the text! No need for them to complete the text completely.
	Suggest only up to 5 works ahead.
	'''

	@spaces.GPU
	def generate(text):
	messages = [
	{'role': 'system', 'content': system_prompt},
	{'role': 'user', 'content': text}
	]
	return pipe(messages)