Moreza009 commited on
Commit
8d0b34d
·
1 Parent(s): e018ed2
Files changed (2) hide show
  1. app.py +18 -17
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,35 +1,36 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
3
 
4
  """
5
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
6
  """
7
- client = InferenceClient("Moreza009/aya23-8b-double-quantized")
 
 
 
 
8
 
9
 
10
  def respond(
11
  message,
12
- max_tokens,
13
- temperature,
14
- top_p,
15
  ):
16
 
17
 
18
- messages = {"role": "user", "content": f"{message}"}
19
-
20
- response = ""
21
 
22
- for message in client.chat_completion(
23
- messages,
24
- max_tokens=max_tokens,
25
- stream=True,
26
  temperature=temperature,
27
- top_p=top_p,
28
- ):
29
- token = message.choices[0].delta.content
30
 
31
- response += token
32
- yield response
33
 
34
  """
35
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
 
1
  import gradio as gr
 
2
 
3
  """
4
  For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
5
  """
6
+ # Load model directly
7
+ from transformers import AutoTokenizer, AutoModelForCausalLM
8
+
9
+ tokenizer = AutoTokenizer.from_pretrained("Moreza009/aya23-8b-double-quantized")
10
+ model = AutoModelForCausalLM.from_pretrained("Moreza009/aya23-8b-double-quantized")
11
 
12
 
13
  def respond(
14
  message,
15
+ max_new_tokens=4000,
16
+ temperature=0.3,
17
+ top_p = 0.7,
18
  ):
19
 
20
 
21
+ messages = [{"role": "user", "content": f"{message}"}]
22
+ input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt")
 
23
 
24
+ gen_tokens = model.generate(
25
+ input_ids,
26
+ max_new_tokens=max_new_tokens,
27
+ do_sample=True,
28
  temperature=temperature,
29
+ top_p=top_p
30
+ )
 
31
 
32
+ gen_text = tokenizer.decode(gen_tokens[0])
33
+ yield gen_text
34
 
35
  """
36
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
requirements.txt CHANGED
@@ -1 +1,3 @@
 
 
1
  huggingface_hub==0.22.2
 
1
+ torch
2
+ transformers
3
  huggingface_hub==0.22.2