Spaces:

suayptalha
/

Chat-with-Bitnet-b1.58-2B-4T

Running on Zero

App Files Files Community

suayptalha commited on Apr 20

Commit

7f20953

verified ·

1 Parent(s): 773a998

Update app.py

Browse files

Files changed (1) hide show

app.py +2 -11

app.py CHANGED Viewed

@@ -15,7 +15,6 @@ from transformers import (
 import gradio as gr
 import spaces
-# Load model and tokenizer
 model_id = "microsoft/bitnet-b1.58-2B-4T"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
@@ -49,7 +48,6 @@ def respond(
     Yields:
         The growing response text as new tokens are generated.
     """
-    # Assemble messages
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
@@ -58,13 +56,11 @@ def respond(
             messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
-    # Prepare prompt and tokenize
     prompt = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-    # Set up streamer for real-time output
     streamer = TextIteratorStreamer(
         tokenizer, skip_prompt=True, skip_special_tokens=True
     )
@@ -76,24 +72,19 @@ def respond(
         top_p=top_p,
         do_sample=True,
     )
-    # Start generation in a separate thread
     thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
     thread.start()
-    # Stream tokens back to user
     response = ""
     for new_text in streamer:
         response += new_text
         yield response
-# Initialize Gradio chat interface
 demo = gr.ChatInterface(
     fn=respond,
     title="Bitnet-b1.58-2B-4T Chatbot",
-    description="This chat application is powered by Microsoft BitNet-b1.58-2B-4T and designed for natural and fast conversations.",
     examples=[
-        # Each example: [message, system_message, max_new_tokens, temperature, top_p]
         [
             "Hello! How are you?",
             "You are a helpful AI assistant.",
@@ -104,7 +95,7 @@ demo = gr.ChatInterface(
         [
             "Can you code a snake game in Python?",
             "You are a helpful AI assistant.",
-            512,
             0.7,
             0.95,
         ],

 import gradio as gr
 import spaces
 model_id = "microsoft/bitnet-b1.58-2B-4T"
 tokenizer = AutoTokenizer.from_pretrained(model_id)
     Yields:
         The growing response text as new tokens are generated.
     """
     messages = [{"role": "system", "content": system_message}]
     for user_msg, bot_msg in history:
         if user_msg:
             messages.append({"role": "assistant", "content": bot_msg})
     messages.append({"role": "user", "content": message})
     prompt = tokenizer.apply_chat_template(
         messages, tokenize=False, add_generation_prompt=True
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(
         tokenizer, skip_prompt=True, skip_special_tokens=True
     )
         top_p=top_p,
         do_sample=True,
     )
     thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
     thread.start()
     response = ""
     for new_text in streamer:
         response += new_text
         yield response
 demo = gr.ChatInterface(
     fn=respond,
     title="Bitnet-b1.58-2B-4T Chatbot",
+    description="This chat application is powered by Microsoft's SOTA BitNet-b1.58-2B-4T and designed for natural and fast conversations.",
     examples=[
         [
             "Hello! How are you?",
             "You are a helpful AI assistant.",
         [
             "Can you code a snake game in Python?",
             "You are a helpful AI assistant.",
+            2048,
             0.7,
             0.95,
         ],