Spaces:
Paused
Paused
Commit
·
4333c79
1
Parent(s):
ba18e04
Update app.py
Browse files
app.py
CHANGED
@@ -98,40 +98,4 @@ if model_loaded:
|
|
98 |
|
99 |
# Display GPU memory information after generation
|
100 |
gpu_memory_after_generation = get_gpu_memory()
|
101 |
-
st.write(f"GPU Memory Info after generation: {gpu_memory_after_generation}")
|
102 |
-
|
103 |
-
tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
|
104 |
-
|
105 |
-
quantize_config = BaseQuantizeConfig(
|
106 |
-
bits=4,
|
107 |
-
group_size=128,
|
108 |
-
desc_act=False
|
109 |
-
)
|
110 |
-
|
111 |
-
model = AutoGPTQForCausalLM.from_quantized(local_folder,
|
112 |
-
use_safetensors=True,
|
113 |
-
strict=use_strict,
|
114 |
-
model_basename=model_basename,
|
115 |
-
device="cuda:0",
|
116 |
-
use_triton=use_triton,
|
117 |
-
quantize_config=quantize_config)
|
118 |
-
|
119 |
-
pipe = pipeline(
|
120 |
-
"text-generation",
|
121 |
-
model=model,
|
122 |
-
tokenizer=tokenizer,
|
123 |
-
max_new_tokens=512,
|
124 |
-
temperature=0.1,
|
125 |
-
top_p=0.95,
|
126 |
-
repetition_penalty=1.15
|
127 |
-
)
|
128 |
-
|
129 |
-
user_input = st.text_input("Input a phrase")
|
130 |
-
|
131 |
-
prompt_template=f'''USER: {user_input}
|
132 |
-
ASSISTANT:'''
|
133 |
-
|
134 |
-
# Generate output when the "Generate" button is pressed
|
135 |
-
if st.button("Generate the prompt"):
|
136 |
-
output = pipe(prompt_template)[0]['generated_text']
|
137 |
-
st.text_area("Prompt", value=output)
|
|
|
98 |
|
99 |
# Display GPU memory information after generation
|
100 |
gpu_memory_after_generation = get_gpu_memory()
|
101 |
+
st.write(f"GPU Memory Info after generation: {gpu_memory_after_generation}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|