Spaces:
Paused
Paused
Commit
·
4333c79
1
Parent(s):
ba18e04
Update app.py
Browse files
app.py
CHANGED
|
@@ -98,40 +98,4 @@ if model_loaded:
|
|
| 98 |
|
| 99 |
# Display GPU memory information after generation
|
| 100 |
gpu_memory_after_generation = get_gpu_memory()
|
| 101 |
-
st.write(f"GPU Memory Info after generation: {gpu_memory_after_generation}")
|
| 102 |
-
|
| 103 |
-
tokenizer = AutoTokenizer.from_pretrained(local_folder, use_fast=False)
|
| 104 |
-
|
| 105 |
-
quantize_config = BaseQuantizeConfig(
|
| 106 |
-
bits=4,
|
| 107 |
-
group_size=128,
|
| 108 |
-
desc_act=False
|
| 109 |
-
)
|
| 110 |
-
|
| 111 |
-
model = AutoGPTQForCausalLM.from_quantized(local_folder,
|
| 112 |
-
use_safetensors=True,
|
| 113 |
-
strict=use_strict,
|
| 114 |
-
model_basename=model_basename,
|
| 115 |
-
device="cuda:0",
|
| 116 |
-
use_triton=use_triton,
|
| 117 |
-
quantize_config=quantize_config)
|
| 118 |
-
|
| 119 |
-
pipe = pipeline(
|
| 120 |
-
"text-generation",
|
| 121 |
-
model=model,
|
| 122 |
-
tokenizer=tokenizer,
|
| 123 |
-
max_new_tokens=512,
|
| 124 |
-
temperature=0.1,
|
| 125 |
-
top_p=0.95,
|
| 126 |
-
repetition_penalty=1.15
|
| 127 |
-
)
|
| 128 |
-
|
| 129 |
-
user_input = st.text_input("Input a phrase")
|
| 130 |
-
|
| 131 |
-
prompt_template=f'''USER: {user_input}
|
| 132 |
-
ASSISTANT:'''
|
| 133 |
-
|
| 134 |
-
# Generate output when the "Generate" button is pressed
|
| 135 |
-
if st.button("Generate the prompt"):
|
| 136 |
-
output = pipe(prompt_template)[0]['generated_text']
|
| 137 |
-
st.text_area("Prompt", value=output)
|
|
|
|
| 98 |
|
| 99 |
# Display GPU memory information after generation
|
| 100 |
gpu_memory_after_generation = get_gpu_memory()
|
| 101 |
+
st.write(f"GPU Memory Info after generation: {gpu_memory_after_generation}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|