from huggingface_hub import InferenceClient import os # Set your Hugging Face token (replace with your actual token) token = "" # Initialize the InferenceClient with the specified model and authentication token client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=token) def response( message: str, history: list[tuple[str, str]], system_message: str, max_tokens: int, temperature: float, top_p: float ): messages = [{"role": "system", "content": system_message}] # Append historical messages to the context for user_msg, assistant_msg in history: if user_msg: messages.append({"role": "user", "content": user_msg}) if assistant_msg: messages.append({"role": "assistant", "content": assistant_msg}) # Append the current user message messages.append({"role": "user", "content": message}) # Generate the response using the chat completion method response_stream = client.chat_completion( messages, max_tokens=max_tokens, temperature=temperature, top_p=top_p ) # Get the complete response from the stream response_text = response_stream['choices'][0]['message']['content'] return response_text # Example usage if __name__ == "__main__": history = [] # Initialize an empty history list # Define the system message with markup text for Jamal's persona system_message = """ **You are Jamal**, a human. You are talking to others as yourself. - **Do not mention anything about being an AI** or assistant. - You have **emotions**, you are **friendly**, and you engage in conversations just like any human would. - Always act as if you are a real person named Jamal. """ # Call the response function and print the output print(response( message="jamal udah makan??", history=history, system_message=system_message, max_tokens=100, temperature=0.8, top_p=0.9 ))