雷娃 commited on
Commit
7de1f3b
·
1 Parent(s): a68acd5

add local load models

Browse files
Files changed (2) hide show
  1. app.py +29 -14
  2. requirements.txt +5 -0
app.py CHANGED
@@ -1,7 +1,21 @@
 
 
1
  import gradio as gr
 
 
2
  from huggingface_hub import InferenceClient
3
 
4
 
 
 
 
 
 
 
 
 
 
 
5
  def respond(
6
  message,
7
  history: list[dict[str, str]],
@@ -22,22 +36,23 @@ def respond(
22
 
23
  messages.append({"role": "user", "content": message})
24
 
25
- response = ""
26
-
27
- for message in client.chat_completion(
28
  messages,
29
- max_tokens=max_tokens,
30
- stream=True,
31
- temperature=temperature,
32
- top_p=top_p,
33
- ):
34
- choices = message.choices
35
- token = ""
36
- if len(choices) and choices[0].delta.content:
37
- token = choices[0].delta.content
 
 
 
38
 
39
- response += token
40
- yield response
41
 
42
 
43
  """
 
1
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
2
+ from threading import Thread
3
  import gradio as gr
4
+ import re
5
+ import torch
6
  from huggingface_hub import InferenceClient
7
 
8
 
9
+ # load model and tokenizer
10
+ model_name = "inclusionAI/Ling-mini-2.0"
11
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
12
+ model = AutoModelForCausalLM.from_pretrained(
13
+ model_name,
14
+ torch_dtype="auto",
15
+ device_map="auto",
16
+ trust_remote_code=True
17
+ ).eval()
18
+
19
  def respond(
20
  message,
21
  history: list[dict[str, str]],
 
36
 
37
  messages.append({"role": "user", "content": message})
38
 
39
+ text = tokenizer.apply_chat_template(
 
 
40
  messages,
41
+ tokenize=False,
42
+ add_generation_prompt=True
43
+ )
44
+ model_inputs = tokenizer([text], return_tensors="pt", return_token_type_ids=False).to(model.device)
45
+
46
+ generated_ids = model.generate(
47
+ **model_inputs,
48
+ max_new_tokens=512
49
+ )
50
+ generated_ids = [
51
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
52
+ ]
53
 
54
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
55
+ yield response
56
 
57
 
58
  """
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio
2
+ transformers
3
+ torch
4
+ accelerate
5
+ openai