Updated Readme

Browse files

Files changed (1) hide show

README.md +60 -0

README.md CHANGED Viewed

@@ -84,6 +84,66 @@ model_name =  model_id.split("/")[-1]
 !python exllamav2/test_inference.py -m {model_name}/ -p "Tell me a funny joke about Large Language Models meeting a Blackhole in an intergalactic Bar."
 ```
 ## Uses
 <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->

 !python exllamav2/test_inference.py -m {model_name}/ -p "Tell me a funny joke about Large Language Models meeting a Blackhole in an intergalactic Bar."
 ```
+```python
+import sys, os
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from exllamav2 import (
+    ExLlamaV2,
+    ExLlamaV2Config,
+    ExLlamaV2Cache,
+    ExLlamaV2Tokenizer,
+)
+from exllamav2.generator import ExLlamaV2BaseGenerator, ExLlamaV2Sampler
+import time
+# Initialize model and cache
+model_directory = "/model_path/Llama-2-7b-chat-hf-5.0-bpw-exl2/"
+print("Loading model: " + model_directory)
+config = ExLlamaV2Config(model_directory)
+model = ExLlamaV2(config)
+cache = ExLlamaV2Cache(model, lazy=True)
+model.load_autosplit(cache)
+tokenizer = ExLlamaV2Tokenizer(config)
+# Initialize generator
+generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)
+# Generate some text
+settings = ExLlamaV2Sampler.Settings()
+settings.temperature = 0.85
+settings.top_k = 50
+settings.top_p = 0.8
+settings.token_repetition_penalty = 1.01
+settings.disallow_tokens(tokenizer, [tokenizer.eos_token_id])
+prompt = "Tell me a funny joke about Large Language Models meeting a Blackhole in an intergalactic Bar."
+max_new_tokens = 512
+generator.warmup()
+time_begin = time.time()
+output = generator.generate_simple(prompt, settings, max_new_tokens, seed=1234)
+time_end = time.time()
+time_total = time_end - time_begin
+print(output)
+```
 ## Uses
 <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->