Spaces:
Paused
Paused
rodrigomasini
commited on
Commit
·
4b8d66c
1
Parent(s):
24eb0d4
Update app_v4.py
Browse files
app_v4.py
CHANGED
@@ -33,15 +33,20 @@ if device == "cuda:0":
|
|
33 |
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=False)
|
34 |
|
35 |
# Attempt to load the model, catch any OOM errors
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
pretrained_model_dir,
|
40 |
model_basename="Jackson2-4bit-128g-GPTQ",
|
41 |
use_safetensors=True,
|
42 |
device=device
|
43 |
)
|
44 |
model.eval() # Set the model to inference mode
|
|
|
|
|
|
|
|
|
|
|
45 |
model_loaded = True
|
46 |
except RuntimeError as e:
|
47 |
if 'CUDA out of memory' in str(e):
|
|
|
33 |
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=False)
|
34 |
|
35 |
# Attempt to load the model, catch any OOM errors
|
36 |
+
@st.cache_data
|
37 |
+
def load_gptq_model():
|
38 |
+
AutoGPTQForCausalLM.from_quantized(
|
39 |
pretrained_model_dir,
|
40 |
model_basename="Jackson2-4bit-128g-GPTQ",
|
41 |
use_safetensors=True,
|
42 |
device=device
|
43 |
)
|
44 |
model.eval() # Set the model to inference mode
|
45 |
+
return model
|
46 |
+
|
47 |
+
model_loaded = False
|
48 |
+
try:
|
49 |
+
model = load_gptq_model()
|
50 |
model_loaded = True
|
51 |
except RuntimeError as e:
|
52 |
if 'CUDA out of memory' in str(e):
|