import openai
import gradio as gr

openai.api_key = "sk-R3HlMsYBk0NpAlLu2aA4B19054Ea4884A2Cf93D25662243d"
openai.api_base="https://apai.zyai.online/v1"

def predict(message, history):
    history_openai_format = []
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human })
        history_openai_format.append({"role": "assistant", "content":assistant})
    history_openai_format.append({"role": "user", "content": message})

    # response = openai.ChatCompletion.create(
    #     model='gpt-3.5-turbo',
    #     messages= history_openai_format,
    #     temperature=1.0,
    #     stream=True
    # )
    response = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",  # 对话模型的名称
        messages=history_openai_format,
        temperature=1,  # 值在[0,1]之间，越大表示回复越具有不确定性
        max_tokens=600,  # 回复最大的字符数
        top_p=1,
        frequency_penalty=0,  # [-2,2]之间，该值越大则更倾向于产生不同的内容
        presence_penalty=0,  # [-2,2]之间，该值越大则更倾向于产生不同的内容
    )

    partial_message = ""
    for chunk in response:
        if len(chunk['choices'][0]['delta']) != 0:
            partial_message = partial_message + chunk['choices'][0]['delta']['content']
            yield partial_message

gr.ChatInterface(predict).queue().launch()