Spaces:

LemiSt
/

SmolLM-135M-instruct-de

Sleeping

App Files Files Community

LenDigLearn commited on Oct 10, 2024

Commit

f4455e9

1 Parent(s): 87fc746

experiment with own streaming

Browse files

Files changed (2) hide show

app.py +52 -9
requirements.txt +3 -0

app.py CHANGED Viewed

@@ -1,12 +1,44 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("LemiSt/SmolLM-135M-instruct-de-merged")
 def respond(
     message,
     history: list[tuple[str, str]],
@@ -14,6 +46,7 @@ def respond(
     max_tokens,
     temperature,
     top_p,
 ):
     messages = [{"role": "system", "content": system_message}]
@@ -25,14 +58,17 @@ def respond(
     messages.append({"role": "user", "content": message})
-    response = client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=False,
-        temperature=temperature,
-        top_p=top_p).choices[0].message.content
-    yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
@@ -50,6 +86,13 @@ demo = gr.ChatInterface(
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
     ],
 )

+import queue
 import gradio as gr
+import torch
+from transformers import AutoTokenizer, AutoModelForCausalLM
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+checkpoint = "LemiSt/SmolLM-135M-instruct-de-merged"
+tokenizer = AutoTokenizer.from_pretrained(checkpoint)
+model = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.bfloat16)
+class CustomIterable:
+    def __init__(self):
+        self._queue = queue.Queue()  # Thread-safe queue
+    def put(self, item):
+        """Add an element to the internal queue."""
+        self._queue.put(item)
+    def end(self):
+        """Signal that no more elements will be added."""
+        self._queue.put(None)  # Sentinel value to indicate the end of the queue
+    def __iter__(self):
+        """Return the iterator (self in this case)."""
+        return self
+    def __next__(self):
+        """Return the next element from the queue, blocking if necessary."""
+        try:
+            item = self._queue.get(block=True)  # Wait for an item
+        except queue.Empty:
+            raise StopIteration
+        if item is None:  # Sentinel value to end the iteration
+            raise StopIteration
+        return item
 def respond(
     message,
     history: list[tuple[str, str]],
     max_tokens,
     temperature,
     top_p,
+    repetition_penalty
 ):
     messages = [{"role": "system", "content": system_message}]
     messages.append({"role": "user", "content": message})
+    streamer = CustomIterable()
+    inputs = tokenizer.apply_chat_template(messages, tokenize=True, return_tensors="pt", add_generation_prompt=True)
+    outputs = model.generate(inputs, max_new_tokens=max_tokens, do_sample=True, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, streamer=streamer)
+    response = ""
+    for token in streamer:
+        decoded = tokenizer.decode(token, skip_special_tokens=True)
+        response += decoded
+        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
             step=0.05,
             label="Top-p (nucleus sampling)",
         ),
+        gr.Slider(
+            minimum=0.1,
+            maximum=2.0,
+            value=1.2,
+            step=0.05,
+            label="Repetition penalty",
+        ),
     ],
 )

requirements.txt CHANGED Viewed

@@ -1,2 +1,5 @@
 huggingface_hub==0.23.2
 minijinja==2.2.0

 huggingface_hub==0.23.2
 minijinja==2.2.0
+--extra-index-url https://download.pytorch.org/whl/cpu
+torch==2.4.1
+transformers==4.45.2