Spaces:

LemiSt
/

SmolLM-135M-instruct-de

Build error

LenDigLearn commited on Oct 10, 2024

Commit

031ecb9

1 Parent(s): f4455e9

threading fix

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import queue
 import gradio as gr
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 """
@@ -14,10 +15,14 @@ model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloa
 class CustomIterable:
     def __init__(self):
         self._queue = queue.Queue()  # Thread-safe queue
     def put(self, item):
         """Add an element to the internal queue."""
-        self._queue.put(item)
     def end(self):
         """Signal that no more elements will be added."""
@@ -61,8 +66,8 @@ def respond(
     streamer = CustomIterable()
     inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True)
-    outputs = model.generate(inputs, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, streamer=streamer)
     response = ""
     for token in streamer:
@@ -70,6 +75,8 @@ def respond(
         response += decoded
         yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """

 import queue
 import gradio as gr
 import torch
+import threading
 from transformers import AutoTokenizer, AutoModelForCausalLM
 """
 class CustomIterable:
     def __init__(self):
         self._queue = queue.Queue()  # Thread-safe queue
+        self.first = True
     def put(self, item):
         """Add an element to the internal queue."""
+        if self.first:
+            self.first = False
+        else:
+            self._queue.put(item)
     def end(self):
         """Signal that no more elements will be added."""
     streamer = CustomIterable()
     inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True)
+    thread = threading.Thread(target=model.generate, args=([inputs]), kwargs={"max_new_tokens": max_tokens, "do_sample": True, "temperature": temperature, "top_p": top_p, "repetition_penalty": repetition_penalty, "streamer": streamer})
+    thread.start()
     response = ""
     for token in streamer:
         response += decoded
         yield response
+    thread.join()
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 """