Rainnighttram commited on
Commit
35874da
·
verified ·
1 Parent(s): 8219bdd

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +84 -1
README.md CHANGED
@@ -4,6 +4,7 @@ base_model:
4
  ---
5
 
6
  ## Usage
 
7
 
8
  Here's how to load and use the quantized model:
9
 
@@ -47,4 +48,86 @@ generations = [
47
  for p, g in zip(input_ids, output.sequences)
48
  ]
49
 
50
- print(generations[0].split(tokenizer.eos_token)[0])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
  ---
5
 
6
  ## Usage
7
+ #Single Chat
8
 
9
  Here's how to load and use the quantized model:
10
 
 
48
  for p, g in zip(input_ids, output.sequences)
49
  ]
50
 
51
+ print(generations[0].split(tokenizer.eos_token)[0])
52
+ ```
53
+
54
+ #Multi-round Chat
55
+ ```python
56
+ from transformers import AutoModel, AutoTokenizer
57
+
58
+ def initialize_model():
59
+ model_path = "Rainnighttram/Dream-v0-Instruct-7B-4bit"
60
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
61
+ model = AutoModel.from_pretrained(
62
+ model_path,
63
+ device_map="auto",
64
+ trust_remote_code=True
65
+ )
66
+ model = model.to("cuda").eval()
67
+ return model, tokenizer
68
+
69
+ def generate_response(model, tokenizer, messages):
70
+ inputs = tokenizer.apply_chat_template(
71
+ messages,
72
+ return_tensors="pt",
73
+ return_dict=True,
74
+ add_generation_prompt=True
75
+ )
76
+
77
+ input_ids = inputs.input_ids.to(device="cuda")
78
+ attention_mask = inputs.attention_mask.to(device="cuda")
79
+
80
+ output = model.diffusion_generate(
81
+ input_ids,
82
+ attention_mask=attention_mask,
83
+ max_new_tokens=512,
84
+ output_history=True,
85
+ return_dict_in_generate=True,
86
+ steps=512,
87
+ temperature=0.2,
88
+ top_p=0.95,
89
+ alg="entropy",
90
+ alg_temp=0.,
91
+ )
92
+
93
+ generations = [
94
+ tokenizer.decode(g[len(p):].tolist())
95
+ for p, g in zip(input_ids, output.sequences)
96
+ ]
97
+
98
+ return generations[0].split(tokenizer.eos_token)[0]
99
+
100
+ def main():
101
+ # Initialize the model and tokenizer
102
+ print("Initializing model and tokenizer...")
103
+ model, tokenizer = initialize_model()
104
+
105
+ # Store conversation history
106
+ messages = []
107
+
108
+ print("Chat initialized. Type 'quit' to exit.")
109
+ print("-" * 50)
110
+
111
+ while True:
112
+ # Get user input
113
+ user_input = input("\nYou: ").strip()
114
+
115
+ # Check if user wants to quit
116
+ if user_input.lower() == 'quit':
117
+ print("\nEnding conversation. Goodbye!")
118
+ break
119
+
120
+ # Add user message to conversation history
121
+ messages.append({"role": "user", "content": user_input})
122
+
123
+ # Generate response
124
+ print("\nAssistant: ", end="")
125
+ response = generate_response(model, tokenizer, messages)
126
+ print(response)
127
+
128
+ # Add assistant's response to conversation history
129
+ messages.append({"role": "assistant", "content": response})
130
+
131
+ if __name__ == "__main__":
132
+ main()
133
+ ```