1inkusFace commited on
Commit
2e72baf
·
verified ·
1 Parent(s): ace0c77

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -24,7 +24,7 @@ from espnet2.bin.tts_inference import Text2Speech
24
  repo_id = "Sosaka/Vicuna-7B-4bit-ggml"
25
  filename = "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
26
  cache_dir="~/.cache/huggingface/hub"
27
- hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
28
  '''
29
  llm = Llama(
30
  model_path="~/.cache/huggingface/hub/vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
@@ -32,7 +32,7 @@ llm = Llama(
32
  # seed=1337, # Uncomment to set a specific seed
33
  n_ctx=4096, # Uncomment to increase the context window
34
  )
35
- '''
36
  llm = Llama.from_pretrained(
37
  repo_id="Sosaka/Vicuna-7B-4bit-ggml",
38
  filename="vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
@@ -40,7 +40,7 @@ llm = Llama.from_pretrained(
40
  n_ctx = 4096,
41
  verbose=False
42
  )
43
-
44
  try:
45
  nltk.data.find('taggers/averaged_perceptron_tagger_eng')
46
  except LookupError:
@@ -134,7 +134,6 @@ def process_audio(img, microphone, audio_upload, state, answer_mode): # Added a
134
  torch.backends.cudnn.deterministic = False
135
  torch.backends.cudnn.benchmark = True
136
  torch.set_float32_matmul_precision("highest")
137
- '''
138
  vicuna_output = vicuna_model.generate(
139
  **vicuna_input,
140
  max_new_tokens = 512,
@@ -149,6 +148,7 @@ def process_audio(img, microphone, audio_upload, state, answer_mode): # Added a
149
  stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
150
  echo=True # Echo the prompt back in the output
151
  )
 
152
  if answer_mode == 'medium':
153
  torch.backends.cuda.matmul.allow_tf32 = True
154
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False
 
24
  repo_id = "Sosaka/Vicuna-7B-4bit-ggml"
25
  filename = "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
26
  cache_dir="~/.cache/huggingface/hub"
27
+ #hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
28
  '''
29
  llm = Llama(
30
  model_path="~/.cache/huggingface/hub/vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
 
32
  # seed=1337, # Uncomment to set a specific seed
33
  n_ctx=4096, # Uncomment to increase the context window
34
  )
35
+
36
  llm = Llama.from_pretrained(
37
  repo_id="Sosaka/Vicuna-7B-4bit-ggml",
38
  filename="vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
 
40
  n_ctx = 4096,
41
  verbose=False
42
  )
43
+ '''
44
  try:
45
  nltk.data.find('taggers/averaged_perceptron_tagger_eng')
46
  except LookupError:
 
134
  torch.backends.cudnn.deterministic = False
135
  torch.backends.cudnn.benchmark = True
136
  torch.set_float32_matmul_precision("highest")
 
137
  vicuna_output = vicuna_model.generate(
138
  **vicuna_input,
139
  max_new_tokens = 512,
 
148
  stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
149
  echo=True # Echo the prompt back in the output
150
  )
151
+ '''
152
  if answer_mode == 'medium':
153
  torch.backends.cuda.matmul.allow_tf32 = True
154
  torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False