rodrigomasini commited on
Commit
4b8d66c
·
1 Parent(s): 24eb0d4

Update app_v4.py

Browse files
Files changed (1) hide show
  1. app_v4.py +8 -3
app_v4.py CHANGED
@@ -33,15 +33,20 @@ if device == "cuda:0":
33
  tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=False)
34
 
35
  # Attempt to load the model, catch any OOM errors
36
- model_loaded = False
37
- try:
38
- model = AutoGPTQForCausalLM.from_quantized(
39
  pretrained_model_dir,
40
  model_basename="Jackson2-4bit-128g-GPTQ",
41
  use_safetensors=True,
42
  device=device
43
  )
44
  model.eval() # Set the model to inference mode
 
 
 
 
 
45
  model_loaded = True
46
  except RuntimeError as e:
47
  if 'CUDA out of memory' in str(e):
 
33
  tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=False)
34
 
35
  # Attempt to load the model, catch any OOM errors
36
+ @st.cache_data
37
+ def load_gptq_model():
38
+ AutoGPTQForCausalLM.from_quantized(
39
  pretrained_model_dir,
40
  model_basename="Jackson2-4bit-128g-GPTQ",
41
  use_safetensors=True,
42
  device=device
43
  )
44
  model.eval() # Set the model to inference mode
45
+ return model
46
+
47
+ model_loaded = False
48
+ try:
49
+ model = load_gptq_model()
50
  model_loaded = True
51
  except RuntimeError as e:
52
  if 'CUDA out of memory' in str(e):