Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
@@ -24,7 +24,7 @@ from espnet2.bin.tts_inference import Text2Speech
|
|
24 |
repo_id = "Sosaka/Vicuna-7B-4bit-ggml"
|
25 |
filename = "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
|
26 |
cache_dir="~/.cache/huggingface/hub"
|
27 |
-
hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
|
28 |
'''
|
29 |
llm = Llama(
|
30 |
model_path="~/.cache/huggingface/hub/vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
|
@@ -32,7 +32,7 @@ llm = Llama(
|
|
32 |
# seed=1337, # Uncomment to set a specific seed
|
33 |
n_ctx=4096, # Uncomment to increase the context window
|
34 |
)
|
35 |
-
|
36 |
llm = Llama.from_pretrained(
|
37 |
repo_id="Sosaka/Vicuna-7B-4bit-ggml",
|
38 |
filename="vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
|
@@ -40,7 +40,7 @@ llm = Llama.from_pretrained(
|
|
40 |
n_ctx = 4096,
|
41 |
verbose=False
|
42 |
)
|
43 |
-
|
44 |
try:
|
45 |
nltk.data.find('taggers/averaged_perceptron_tagger_eng')
|
46 |
except LookupError:
|
@@ -134,7 +134,6 @@ def process_audio(img, microphone, audio_upload, state, answer_mode): # Added a
|
|
134 |
torch.backends.cudnn.deterministic = False
|
135 |
torch.backends.cudnn.benchmark = True
|
136 |
torch.set_float32_matmul_precision("highest")
|
137 |
-
'''
|
138 |
vicuna_output = vicuna_model.generate(
|
139 |
**vicuna_input,
|
140 |
max_new_tokens = 512,
|
@@ -149,6 +148,7 @@ def process_audio(img, microphone, audio_upload, state, answer_mode): # Added a
|
|
149 |
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
|
150 |
echo=True # Echo the prompt back in the output
|
151 |
)
|
|
|
152 |
if answer_mode == 'medium':
|
153 |
torch.backends.cuda.matmul.allow_tf32 = True
|
154 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|
|
|
24 |
repo_id = "Sosaka/Vicuna-7B-4bit-ggml"
|
25 |
filename = "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
|
26 |
cache_dir="~/.cache/huggingface/hub"
|
27 |
+
#hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
|
28 |
'''
|
29 |
llm = Llama(
|
30 |
model_path="~/.cache/huggingface/hub/vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
|
|
|
32 |
# seed=1337, # Uncomment to set a specific seed
|
33 |
n_ctx=4096, # Uncomment to increase the context window
|
34 |
)
|
35 |
+
|
36 |
llm = Llama.from_pretrained(
|
37 |
repo_id="Sosaka/Vicuna-7B-4bit-ggml",
|
38 |
filename="vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
|
|
|
40 |
n_ctx = 4096,
|
41 |
verbose=False
|
42 |
)
|
43 |
+
'''
|
44 |
try:
|
45 |
nltk.data.find('taggers/averaged_perceptron_tagger_eng')
|
46 |
except LookupError:
|
|
|
134 |
torch.backends.cudnn.deterministic = False
|
135 |
torch.backends.cudnn.benchmark = True
|
136 |
torch.set_float32_matmul_precision("highest")
|
|
|
137 |
vicuna_output = vicuna_model.generate(
|
138 |
**vicuna_input,
|
139 |
max_new_tokens = 512,
|
|
|
148 |
stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
|
149 |
echo=True # Echo the prompt back in the output
|
150 |
)
|
151 |
+
'''
|
152 |
if answer_mode == 'medium':
|
153 |
torch.backends.cuda.matmul.allow_tf32 = True
|
154 |
torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
|