Spaces:
Sleeping
Sleeping
| import os | |
| import gradio as gr | |
| import spaces | |
| from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline | |
| model_name = "teknium/OpenHermes-2.5-Mistral-7B" | |
| token = os.environ['hf_token'] | |
| pipe = pipeline("text-generation", model=model_name, device="cuda") | |
| system_prompt = '''You are given an input text for a chat interface. Propose auto-completion to the text. You have several roles: | |
| - Fight under-specification: if the user does not provide sufficient context, propose them a set of relevant suggestions. | |
| - Complete text: The text provided to you is in the making. If you have a good idea for how to complete - make suggestions. | |
| Make sure the suggestions are valid completions of the text! No need for them to complete the text completely. | |
| Suggest only up to 5 works ahead. | |
| ''' | |
| def generate(text): | |
| messages = [ | |
| {'role': 'system', 'content': system_prompt}, | |
| {'role': 'user', 'content': text} | |
| ] | |
| return pipe(messages) |