Rainnighttram commited on
Commit
b602306
·
verified ·
1 Parent(s): e37e48c

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +47 -1
README.md CHANGED
@@ -1,4 +1,50 @@
1
  ---
2
  base_model:
3
  - Dream-org/Dream-v0-Instruct-7B
4
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
  base_model:
3
  - Dream-org/Dream-v0-Instruct-7B
4
+ ---
5
+
6
+ ## Usage
7
+
8
+ Here's how to load and use the quantized model:
9
+
10
+ ```python
11
+ from transformers import AutoModel, AutoTokenizer
12
+
13
+ model_path = "Dream-v0-Instruct-7B-4bit"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
15
+ model = AutoModel.from_pretrained(
16
+ model_path,
17
+ device_map="auto",
18
+ trust_remote_code=True
19
+ )
20
+ model = model.to("cuda").eval()
21
+
22
+ messages = [
23
+ {"role": "user", "content": "Please make comparisons between UHF and LF RFID."}
24
+ ]
25
+
26
+ inputs = tokenizer.apply_chat_template(
27
+ messages, return_tensors="pt", return_dict=True, add_generation_prompt=True
28
+ )
29
+ input_ids = inputs.input_ids.to(device="cuda")
30
+ attention_mask = inputs.attention_mask.to(device="cuda")
31
+
32
+ output = model.diffusion_generate(
33
+ input_ids,
34
+ attention_mask=attention_mask,
35
+ max_new_tokens=512,
36
+ output_history=True,
37
+ return_dict_in_generate=True,
38
+ steps=512,
39
+ temperature=0.2,
40
+ top_p=0.95,
41
+ alg="entropy",
42
+ alg_temp=0.,
43
+ )
44
+
45
+ generations = [
46
+ tokenizer.decode(g[len(p) :].tolist())
47
+ for p, g in zip(input_ids, output.sequences)
48
+ ]
49
+
50
+ print(generations[0].split(tokenizer.eos_token)[0])