vicuna-clip

Running on Zero

App Files Files Community

1inkusFace commited on 7 days ago

Commit

2e72baf

verified ·

1 Parent(s): ace0c77

Update app.py

Browse files

Files changed (1) hide show

app.py +4 -4

app.py CHANGED Viewed

@@ -24,7 +24,7 @@ from espnet2.bin.tts_inference import Text2Speech
 repo_id = "Sosaka/Vicuna-7B-4bit-ggml"
 filename = "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
 cache_dir="~/.cache/huggingface/hub"
-hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
 '''
 llm = Llama(
       model_path="~/.cache/huggingface/hub/vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
@@ -32,7 +32,7 @@ llm = Llama(
       # seed=1337, # Uncomment to set a specific seed
       n_ctx=4096, # Uncomment to increase the context window
 )
-'''
 llm = Llama.from_pretrained(
     repo_id="Sosaka/Vicuna-7B-4bit-ggml",
     filename="vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
@@ -40,7 +40,7 @@ llm = Llama.from_pretrained(
     n_ctx = 4096,
     verbose=False
 )
 try:
     nltk.data.find('taggers/averaged_perceptron_tagger_eng')
 except LookupError:
@@ -134,7 +134,6 @@ def process_audio(img, microphone, audio_upload, state, answer_mode):  # Added a
             torch.backends.cudnn.deterministic = False
             torch.backends.cudnn.benchmark = True
             torch.set_float32_matmul_precision("highest")
-            '''
             vicuna_output = vicuna_model.generate(
                 **vicuna_input,
                 max_new_tokens = 512,
@@ -149,6 +148,7 @@ def process_audio(img, microphone, audio_upload, state, answer_mode):  # Added a
                 stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
                 echo=True # Echo the prompt back in the output
             )
         if answer_mode == 'medium':
             torch.backends.cuda.matmul.allow_tf32 = True
             torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False

 repo_id = "Sosaka/Vicuna-7B-4bit-ggml"
 filename = "vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin"
 cache_dir="~/.cache/huggingface/hub"
+#hf_hub_download(repo_id=repo_id, filename=filename, cache_dir=cache_dir)
 '''
 llm = Llama(
       model_path="~/.cache/huggingface/hub/vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
       # seed=1337, # Uncomment to set a specific seed
       n_ctx=4096, # Uncomment to increase the context window
 )
 llm = Llama.from_pretrained(
     repo_id="Sosaka/Vicuna-7B-4bit-ggml",
     filename="vicuna-7B-1.1-ggml_q4_0-ggjt_v3.bin",
     n_ctx = 4096,
     verbose=False
 )
+'''
 try:
     nltk.data.find('taggers/averaged_perceptron_tagger_eng')
 except LookupError:
             torch.backends.cudnn.deterministic = False
             torch.backends.cudnn.benchmark = True
             torch.set_float32_matmul_precision("highest")
             vicuna_output = vicuna_model.generate(
                 **vicuna_input,
                 max_new_tokens = 512,
                 stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
                 echo=True # Echo the prompt back in the output
             )
+            '''
         if answer_mode == 'medium':
             torch.backends.cuda.matmul.allow_tf32 = True
             torch.backends.cuda.matmul.allow_bf16_reduced_precision_reduction = False