File size: 2,837 Bytes
b899370
 
 
10e90f5
903d757
10e90f5
 
 
8fc3dd0
10e90f5
564b6cf
10e90f5
 
 
 
 
 
 
 
979996d
 
 
10e90f5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c758ec0
10e90f5
b95201b
10e90f5
 
 
 
b899370
 
 
10e90f5
 
 
b899370
d7af575
 
 
 
 
 
10e90f5
 
a7682fa
10e90f5
 
 
 
a7682fa
979996d
a7682fa
 
 
 
10e90f5
a7682fa
10e90f5
a7682fa
b899370
10e90f5
 
 
 
 
 
b899370
 
c758ec0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import gradio as gr
from huggingface_hub import InferenceClient

# Step 1: Read your background info
with open("BACKGROUND_NEW.md", "r", encoding="utf-8") as f:
    background_text = f.read()

# Step 2: Set up your InferenceClient (same as before)
client = InferenceClient("bunnycore/QwQen-3B-LCoT")
# HuggingFaceH4/zephyr-7b-beta
# meta-llama/Llama-3.2-1B
def respond(
    message,
    history: list[dict],
    system_message: str,
    max_tokens: int,
    temperature: float,
    top_p: float,
):
    if history is None:
        history = []

    # Include background text as part of the system message for context
    combined_system_message = f"{system_message}\n\n### Background Information ###\n{background_text}"

    # Start building the conversation history
    messages = [{"role": "system", "content": combined_system_message}]
    
    # Add conversation history
    for interaction in history:
        if "user" in interaction:
            messages.append({"role": "user", "content": interaction["user"]})
        if "assistant" in interaction:
            messages.append({"role": "assistant", "content": interaction["assistant"]})

    # Add the latest user message
    messages.append({"role": "user", "content": message})

    # Generate response
    response = ""
    for msg in client.chat_completion(
        messages,
        max_tokens=max_tokens,
        stream=True,
        temperature=temperature,
        top_p=top_p,
    ):
        
        token = msg.choices[0].delta.content
        response += token
        yield response
    # print("----- SYSTEM MESSAGE -----")
    # print(messages[0]["content"])
    # print("----- FULL MESSAGES LIST -----")
    # for m in messages:
    #     print(m)
    # print("-------------------------")
    
# Step 3: Build a Gradio Blocks interface with two Tabs
with gr.Blocks() as demo:
    # Tab 1: GPT Chat Agent
    with gr.Tab("GPT Chat Agent"):
        gr.Markdown("## Welcome to Varun's GPT Agent")
        gr.Markdown("Feel free to ask questions about Varun’s journey, skills, and more!")
        chat = gr.ChatInterface(
            fn=respond,
            additional_inputs=[
                gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
                gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
                gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
                gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
            ],
            type="messages",  # Specify message type
        )

    # # Tab 2: Background Document
    # with gr.Tab("Varun's Background"):
    #     gr.Markdown("# About Varun")
    #     gr.Markdown(background_text)

# Step 4: Launch
if __name__ == "__main__":
    demo.launch()