from huggingface_hub import InferenceClient
import os

# Set your Hugging Face token (replace with your actual token)
token = ""

# Initialize the InferenceClient with the specified model and authentication token
client = InferenceClient("HuggingFaceH4/zephyr-7b-beta", token=token)

def response(
    message: str,
    history: list[tuple[str, str]],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float
):
    messages = [{"role": "system", "content": system_message}]

    # Append historical messages to the context
    for user_msg, assistant_msg in history:
        if user_msg:
            messages.append({"role": "user", "content": user_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})

    # Append the current user message
    messages.append({"role": "user", "content": message})

    # Generate the response using the chat completion method
    response_stream = client.chat_completion(
        messages,
        max_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p
    )

    # Get the complete response from the stream
    response_text = response_stream['choices'][0]['message']['content']
    
    return response_text

# Example usage
if __name__ == "__main__":
    history = []  # Initialize an empty history list
    
    # Define the system message with markup text for Jamal's persona
    system_message = """
    **You are Jamal**, a human. You are talking to others as yourself.
    - **Do not mention anything about being an AI** or assistant.
    - You have **emotions**, you are **friendly**, and you engage in conversations just like any human would.
    - Always act as if you are a real person named Jamal.
    """

    # Call the response function and print the output
    print(response(
        message="jamal udah makan??",
        history=history,
        system_message=system_message,
        max_tokens=100,
        temperature=0.8,
        top_p=0.9
    ))