Spaces:
Sleeping
Sleeping
Tobias Bergmann
commited on
Commit
·
9329033
1
Parent(s):
ef2124d
gguf pipe
Browse files- app.py +26 -13
- requirements.txt +2 -1
app.py
CHANGED
|
@@ -1,31 +1,44 @@
|
|
| 1 |
-
import
|
|
|
|
| 2 |
import gradio as gr
|
| 3 |
from typing import Tuple, List
|
| 4 |
|
| 5 |
deepsparse.cpu.print_hardware_capability()
|
| 6 |
|
| 7 |
-
MODEL_ID = "hf:mgoin/Meta-Llama-3-8B-Instruct-pruned50-quant-ds"
|
| 8 |
-
|
| 9 |
DESCRIPTION = f"""
|
| 10 |
-
# Chat with
|
| 11 |
-
|
| 12 |
-
Model ID: {MODEL_ID[len("hf:"):]}
|
| 13 |
"""
|
| 14 |
|
| 15 |
MAX_MAX_NEW_TOKENS = 1024
|
| 16 |
DEFAULT_MAX_NEW_TOKENS = 200
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
|
| 20 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
task="text-generation",
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
prompt_sequence_length=8,
|
| 25 |
-
num_cores=8,
|
| 26 |
)
|
| 27 |
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
def clear_and_save_textbox(message: str) -> Tuple[str, str]:
|
| 30 |
return "", message
|
| 31 |
|
|
|
|
| 1 |
+
from llama_cpp import Llama
|
| 2 |
+
from huggingface_hub import hf_hub_download
|
| 3 |
import gradio as gr
|
| 4 |
from typing import Tuple, List
|
| 5 |
|
| 6 |
deepsparse.cpu.print_hardware_capability()
|
| 7 |
|
|
|
|
|
|
|
| 8 |
DESCRIPTION = f"""
|
| 9 |
+
# Chat with Arco 500M as GGUF on CPU
|
|
|
|
|
|
|
| 10 |
"""
|
| 11 |
|
| 12 |
MAX_MAX_NEW_TOKENS = 1024
|
| 13 |
DEFAULT_MAX_NEW_TOKENS = 200
|
| 14 |
|
| 15 |
+
# Download the GGUF file
|
| 16 |
+
model_path = hf_hub_download(
|
| 17 |
+
repo_id="ijohn07/arco-plus-Q8_0-GGUF",
|
| 18 |
+
filename="arco-plus-q8_0.gguf",
|
| 19 |
+
repo_type="model"
|
| 20 |
+
)
|
| 21 |
+
# Load the GGUF model
|
| 22 |
+
llm = Llama(model_path=model_path)
|
| 23 |
+
|
| 24 |
+
# Setup the pipeline
|
| 25 |
+
pipe = pipeline(
|
| 26 |
task="text-generation",
|
| 27 |
+
model=llm, # Passes the loaded Llama model as the model
|
| 28 |
+
max_new_tokens=MAX_MAX_NEW_TOKENS, # Sets the maximum number of tokens the model generates
|
|
|
|
|
|
|
| 29 |
)
|
| 30 |
|
| 31 |
|
| 32 |
+
# Setup the engine
|
| 33 |
+
#pipe = Pipeline.create(
|
| 34 |
+
# task="text-generation",
|
| 35 |
+
# model_path=MODEL_ID,
|
| 36 |
+
# sequence_length=MAX_MAX_NEW_TOKENS,
|
| 37 |
+
# prompt_sequence_length=8,
|
| 38 |
+
# num_cores=8,
|
| 39 |
+
#)
|
| 40 |
+
|
| 41 |
+
|
| 42 |
def clear_and_save_textbox(message: str) -> Tuple[str, str]:
|
| 43 |
return "", message
|
| 44 |
|
requirements.txt
CHANGED
|
@@ -1,3 +1,4 @@
|
|
| 1 |
-
deepsparse-nightly==1.8.0.20240502
|
| 2 |
transformers
|
|
|
|
|
|
|
| 3 |
gradio
|
|
|
|
|
|
|
| 1 |
transformers
|
| 2 |
+
llama_cpp
|
| 3 |
+
huggingface_hub
|
| 4 |
gradio
|