Spaces:
Paused
Paused
Commit
·
b23a956
1
Parent(s):
08e3783
Update app_v3.py
Browse files
app_v3.py
CHANGED
@@ -38,15 +38,15 @@ if torch.cuda.is_available():
|
|
38 |
|
39 |
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
|
40 |
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
|
|
|
41 |
model = AutoGPTQForCausalLM.from_quantized(
|
42 |
pretrained_model_dir,
|
43 |
model_basename=model_basename,
|
44 |
use_safetensors=True,
|
45 |
-
device=
|
46 |
-
#use_triton=use_triton,
|
47 |
-
#quantize_config=None
|
48 |
)
|
49 |
|
|
|
50 |
viz = torch.cuda.memory_summary()
|
51 |
st.write(viz)
|
52 |
|
@@ -55,7 +55,8 @@ user_input = st.text_input("Input a phrase")
|
|
55 |
prompt_template = f'USER: {user_input}\nASSISTANT:'
|
56 |
|
57 |
if st.button("Generate the prompt"):
|
58 |
-
inputs = tokenizer(prompt_template, return_tensors='pt')
|
|
|
59 |
#streamer = TextStreamer(tokenizer)
|
60 |
#pipe = pipeline(
|
61 |
# "text-generation",
|
|
|
38 |
|
39 |
#tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, use_fast=True, legacy=False)
|
40 |
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
|
41 |
+
device = "cuda:0" if torch.cuda.is_available() else "cpu"
|
42 |
model = AutoGPTQForCausalLM.from_quantized(
|
43 |
pretrained_model_dir,
|
44 |
model_basename=model_basename,
|
45 |
use_safetensors=True,
|
46 |
+
device=device,
|
|
|
|
|
47 |
)
|
48 |
|
49 |
+
|
50 |
viz = torch.cuda.memory_summary()
|
51 |
st.write(viz)
|
52 |
|
|
|
55 |
prompt_template = f'USER: {user_input}\nASSISTANT:'
|
56 |
|
57 |
if st.button("Generate the prompt"):
|
58 |
+
inputs = tokenizer(prompt_template, return_tensors='pt', max_length=512, truncation=True, padding='max_length', batch_size=4)
|
59 |
+
#inputs = tokenizer(prompt_template, return_tensors='pt')
|
60 |
#streamer = TextStreamer(tokenizer)
|
61 |
#pipe = pipeline(
|
62 |
# "text-generation",
|