alokabhishek commited on
Commit
2109f0b
·
verified ·
1 Parent(s): 1795b14

Updated Readme

Browse files
Files changed (1) hide show
  1. README.md +60 -0
README.md CHANGED
@@ -84,6 +84,66 @@ model_name = model_id.split("/")[-1]
84
  !python exllamav2/test_inference.py -m {model_name}/ -p "Tell me a funny joke about Large Language Models meeting a Blackhole in an intergalactic Bar."
85
  ```
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  ## Uses
88
 
89
  <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
 
84
  !python exllamav2/test_inference.py -m {model_name}/ -p "Tell me a funny joke about Large Language Models meeting a Blackhole in an intergalactic Bar."
85
  ```
86
 
87
+ ```python
88
+
89
+ import sys, os
90
+
91
+ sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
92
+
93
+ from exllamav2 import (
94
+ ExLlamaV2,
95
+ ExLlamaV2Config,
96
+ ExLlamaV2Cache,
97
+ ExLlamaV2Tokenizer,
98
+ )
99
+
100
+ from exllamav2.generator import ExLlamaV2BaseGenerator, ExLlamaV2Sampler
101
+
102
+ import time
103
+
104
+ # Initialize model and cache
105
+
106
+ model_directory = "/model_path/Llama-2-7b-chat-hf-5.0-bpw-exl2/"
107
+ print("Loading model: " + model_directory)
108
+
109
+ config = ExLlamaV2Config(model_directory)
110
+ model = ExLlamaV2(config)
111
+ cache = ExLlamaV2Cache(model, lazy=True)
112
+ model.load_autosplit(cache)
113
+ tokenizer = ExLlamaV2Tokenizer(config)
114
+
115
+ # Initialize generator
116
+
117
+ generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)
118
+
119
+ # Generate some text
120
+
121
+ settings = ExLlamaV2Sampler.Settings()
122
+ settings.temperature = 0.85
123
+ settings.top_k = 50
124
+ settings.top_p = 0.8
125
+ settings.token_repetition_penalty = 1.01
126
+ settings.disallow_tokens(tokenizer, [tokenizer.eos_token_id])
127
+
128
+ prompt = "Tell me a funny joke about Large Language Models meeting a Blackhole in an intergalactic Bar."
129
+
130
+ max_new_tokens = 512
131
+
132
+ generator.warmup()
133
+ time_begin = time.time()
134
+
135
+ output = generator.generate_simple(prompt, settings, max_new_tokens, seed=1234)
136
+
137
+ time_end = time.time()
138
+ time_total = time_end - time_begin
139
+
140
+ print(output)
141
+
142
+
143
+
144
+ ```
145
+
146
+
147
  ## Uses
148
 
149
  <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->