Spaces:

inclusionAI
/

ling-mini-2.0-local

Sleeping

zhanghanxiao commited on about 1 month ago

Commit

de8bf82

verified ·

1 Parent(s): f308a75

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,6 @@
 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from threading import Thread
 import gradio as gr
-import re
-import torch
 # load model and tokenizer
 model_name = "inclusionAI/Ling-mini-2.0"
@@ -38,18 +36,28 @@ def respond(
         tokenize=False,
         add_generation_prompt=True
     )
     model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
-    generated_ids = model.generate(
-        **model_inputs,
-        max_new_tokens=512
     )
-    generated_ids = [
-        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-    yield response
 """

 from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
 from threading import Thread
 import gradio as gr
 # load model and tokenizer
 model_name = "inclusionAI/Ling-mini-2.0"
         tokenize=False,
         add_generation_prompt=True
     )
+    streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
     model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
+    model_inputs.update(dict(max_new_tokens=512,streamer=streamer))
+    # Start a separate thread for model generation to allow streaming output
+    thread = Thread(
+        target=model.generate,
+        kwargs=model_inputs,
     )
+    thread.start()
+    # Accumulate and yield text tokens as they are generated
+    acc_text = ""
+    for text_token in streamer:
+        acc_text += text_token  # Append the generated token to the accumulated text
+        yield acc_text  # Yield the accumulated text
+    # Ensure the generation thread completes
+    thread.join()
 """