Rainnighttram
/

Dream-v0-Instruct-7B-4bit

4-bit precision

Model card Files Files and versions Community

Rainnighttram commited on Apr 9

Commit

35874da

·

verified ·

1 Parent(s): 8219bdd

Update README.md

Files changed (1) hide show

README.md +84 -1

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ base_model:
 ---
 ## Usage
 Here's how to load and use the quantized model:
@@ -47,4 +48,86 @@ generations = [
      for p, g in zip(input_ids, output.sequences)
  ]
-print(generations[0].split(tokenizer.eos_token)[0])

 ---
 ## Usage
+#Single Chat
 Here's how to load and use the quantized model:
      for p, g in zip(input_ids, output.sequences)
  ]
+print(generations[0].split(tokenizer.eos_token)[0])
+```
+#Multi-round Chat
+```python
+from transformers import AutoModel, AutoTokenizer
+def initialize_model():
+    model_path = "Rainnighttram/Dream-v0-Instruct-7B-4bit"
+    tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
+    model = AutoModel.from_pretrained(
+        model_path,
+        device_map="auto",
+        trust_remote_code=True
+    )
+    model = model.to("cuda").eval()
+    return model, tokenizer
+def generate_response(model, tokenizer, messages):
+    inputs = tokenizer.apply_chat_template(
+        messages,
+        return_tensors="pt",
+        return_dict=True,
+        add_generation_prompt=True
+    )
+    input_ids = inputs.input_ids.to(device="cuda")
+    attention_mask = inputs.attention_mask.to(device="cuda")
+    output = model.diffusion_generate(
+        input_ids,
+        attention_mask=attention_mask,
+        max_new_tokens=512,
+        output_history=True,
+        return_dict_in_generate=True,
+        steps=512,
+        temperature=0.2,
+        top_p=0.95,
+        alg="entropy",
+        alg_temp=0.,
+    )
+    generations = [
+        tokenizer.decode(g[len(p):].tolist())
+        for p, g in zip(input_ids, output.sequences)
+    ]
+    return generations[0].split(tokenizer.eos_token)[0]
+def main():
+    # Initialize the model and tokenizer
+    print("Initializing model and tokenizer...")
+    model, tokenizer = initialize_model()
+    # Store conversation history
+    messages = []
+    print("Chat initialized. Type 'quit' to exit.")
+    print("-" * 50)
+    while True:
+        # Get user input
+        user_input = input("\nYou: ").strip()
+        # Check if user wants to quit
+        if user_input.lower() == 'quit':
+            print("\nEnding conversation. Goodbye!")
+            break
+        # Add user message to conversation history
+        messages.append({"role": "user", "content": user_input})
+        # Generate response
+        print("\nAssistant: ", end="")
+        response = generate_response(model, tokenizer, messages)
+        print(response)
+        # Add assistant's response to conversation history
+        messages.append({"role": "assistant", "content": response})
+if __name__ == "__main__":
+    main()
+```