Spaces:

Moreza009
/

aya23-8b-4bitdq

Runtime error

Moreza009 commited on Aug 20, 2024

Commit

8d0b34d

1 Parent(s): e018ed2

update

Files changed (2) hide show

app.py CHANGED Viewed

@@ -1,35 +1,36 @@
 import gradio as gr
-from huggingface_hub import InferenceClient
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
-client = InferenceClient("Moreza009/aya23-8b-double-quantized")
 def respond(
     message,
-    max_tokens,
-    temperature,
-    top_p,
 ):
-    messages = {"role": "user", "content": f"{message}"}
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
         temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

 import gradio as gr
 """
 For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
 """
+# Load model directly
+from transformers import AutoTokenizer, AutoModelForCausalLM
+tokenizer = AutoTokenizer.from_pretrained("Moreza009/aya23-8b-double-quantized")
+model = AutoModelForCausalLM.from_pretrained("Moreza009/aya23-8b-double-quantized")
 def respond(
     message,
+    max_new_tokens=4000,
+    temperature=0.3,
+    top_p = 0.7,
 ):
+    messages = [{"role": "user", "content": f"{message}"}]
+    input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
+    gen_tokens = model.generate(
+        input_ids,
+        max_new_tokens=max_new_tokens,
+        do_sample=True,
         temperature=temperature,
+        top_p=top_p
+        )
+    gen_text = tokenizer.decode(gen_tokens[0])
+    yield gen_text
 """
 For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface

requirements.txt CHANGED Viewed

+torch
+transformers
 huggingface_hub==0.22.2