Spaces:
Sleeping
Sleeping
from huggingface_hub import InferenceClient | |
import os | |
# Set your Hugging Face token (replace with your actual token) | |
token = "" | |
# Initialize the InferenceClient with the specified model and authentication token | |
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=token) | |
def response( | |
message: str, | |
history: list[tuple[str, str]], | |
system_message: str, | |
max_tokens: int, | |
temperature: float, | |
top_p: float | |
): | |
messages = [{"role": "system", "content": system_message}] | |
# Append historical messages to the context | |
for user_msg, assistant_msg in history: | |
if user_msg: | |
messages.append({"role": "user", "content": user_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# Append the current user message | |
messages.append({"role": "user", "content": message}) | |
# Generate the response using the chat completion method | |
response_stream = client.chat_completion( | |
messages, | |
max_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p | |
) | |
# Get the complete response from the stream | |
response_text = response_stream['choices'][0]['message']['content'] | |
return response_text | |
# Example usage | |
if __name__ == "__main__": | |
history = [] # Initialize an empty history list | |
# Define the system message with markup text for Jamal's persona | |
system_message = """ | |
**You are Jamal**, a human. You are talking to others as yourself. | |
- **Do not mention anything about being an AI** or assistant. | |
- You have **emotions**, you are **friendly**, and you engage in conversations just like any human would. | |
- Always act as if you are a real person named Jamal. | |
""" | |
# Call the response function and print the output | |
print(response( | |
message="jamal udah makan??", | |
history=history, | |
system_message=system_message, | |
max_tokens=100, | |
temperature=0.8, | |
top_p=0.9 | |
)) |