suayptalha commited on
Commit
7f20953
·
verified ·
1 Parent(s): 773a998

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -11
app.py CHANGED
@@ -15,7 +15,6 @@ from transformers import (
15
  import gradio as gr
16
  import spaces
17
 
18
- # Load model and tokenizer
19
  model_id = "microsoft/bitnet-b1.58-2B-4T"
20
 
21
  tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -49,7 +48,6 @@ def respond(
49
  Yields:
50
  The growing response text as new tokens are generated.
51
  """
52
- # Assemble messages
53
  messages = [{"role": "system", "content": system_message}]
54
  for user_msg, bot_msg in history:
55
  if user_msg:
@@ -58,13 +56,11 @@ def respond(
58
  messages.append({"role": "assistant", "content": bot_msg})
59
  messages.append({"role": "user", "content": message})
60
 
61
- # Prepare prompt and tokenize
62
  prompt = tokenizer.apply_chat_template(
63
  messages, tokenize=False, add_generation_prompt=True
64
  )
65
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
66
 
67
- # Set up streamer for real-time output
68
  streamer = TextIteratorStreamer(
69
  tokenizer, skip_prompt=True, skip_special_tokens=True
70
  )
@@ -76,24 +72,19 @@ def respond(
76
  top_p=top_p,
77
  do_sample=True,
78
  )
79
- # Start generation in a separate thread
80
  thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
81
  thread.start()
82
 
83
- # Stream tokens back to user
84
  response = ""
85
  for new_text in streamer:
86
  response += new_text
87
  yield response
88
 
89
- # Initialize Gradio chat interface
90
-
91
  demo = gr.ChatInterface(
92
  fn=respond,
93
  title="Bitnet-b1.58-2B-4T Chatbot",
94
- description="This chat application is powered by Microsoft BitNet-b1.58-2B-4T and designed for natural and fast conversations.",
95
  examples=[
96
- # Each example: [message, system_message, max_new_tokens, temperature, top_p]
97
  [
98
  "Hello! How are you?",
99
  "You are a helpful AI assistant.",
@@ -104,7 +95,7 @@ demo = gr.ChatInterface(
104
  [
105
  "Can you code a snake game in Python?",
106
  "You are a helpful AI assistant.",
107
- 512,
108
  0.7,
109
  0.95,
110
  ],
 
15
  import gradio as gr
16
  import spaces
17
 
 
18
  model_id = "microsoft/bitnet-b1.58-2B-4T"
19
 
20
  tokenizer = AutoTokenizer.from_pretrained(model_id)
 
48
  Yields:
49
  The growing response text as new tokens are generated.
50
  """
 
51
  messages = [{"role": "system", "content": system_message}]
52
  for user_msg, bot_msg in history:
53
  if user_msg:
 
56
  messages.append({"role": "assistant", "content": bot_msg})
57
  messages.append({"role": "user", "content": message})
58
 
 
59
  prompt = tokenizer.apply_chat_template(
60
  messages, tokenize=False, add_generation_prompt=True
61
  )
62
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
63
 
 
64
  streamer = TextIteratorStreamer(
65
  tokenizer, skip_prompt=True, skip_special_tokens=True
66
  )
 
72
  top_p=top_p,
73
  do_sample=True,
74
  )
 
75
  thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
76
  thread.start()
77
 
 
78
  response = ""
79
  for new_text in streamer:
80
  response += new_text
81
  yield response
82
 
 
 
83
  demo = gr.ChatInterface(
84
  fn=respond,
85
  title="Bitnet-b1.58-2B-4T Chatbot",
86
+ description="This chat application is powered by Microsoft's SOTA BitNet-b1.58-2B-4T and designed for natural and fast conversations.",
87
  examples=[
 
88
  [
89
  "Hello! How are you?",
90
  "You are a helpful AI assistant.",
 
95
  [
96
  "Can you code a snake game in Python?",
97
  "You are a helpful AI assistant.",
98
+ 2048,
99
  0.7,
100
  0.95,
101
  ],