Christoph Holthaus
commited on
Commit
·
a2232d8
1
Parent(s):
493f720
improve
Browse files
app.py
CHANGED
|
@@ -5,10 +5,13 @@ from time import time
|
|
| 5 |
import gradio as gr
|
| 6 |
import psutil
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
# Initing things
|
| 9 |
-
print("debug: init model")
|
| 10 |
#llm = Llama(model_path="./model.bin") # LLaMa model
|
| 11 |
-
llama_model_name = "TheBloke/dolphin-2.2.1-AshhLimaRP-Mistral-7B-GGUF"
|
| 12 |
print("! INITING DONE !")
|
| 13 |
|
| 14 |
# Preparing things to work
|
|
@@ -45,13 +48,14 @@ MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
|
|
| 45 |
|
| 46 |
#download model here
|
| 47 |
# check localstorage, if no there, load, else use existing.
|
|
|
|
| 48 |
|
| 49 |
if torch.cuda.is_available():
|
| 50 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
| 51 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
| 52 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 53 |
|
| 54 |
-
|
| 55 |
def generate(
|
| 56 |
message: str,
|
| 57 |
chat_history: list[tuple[str, str]],
|
|
@@ -133,6 +137,7 @@ chat_interface = gr.ChatInterface(
|
|
| 133 |
),
|
| 134 |
],
|
| 135 |
stop_btn=None,
|
|
|
|
| 136 |
examples=[
|
| 137 |
["Hello there! How are you doing?"],
|
| 138 |
["Can you explain briefly to me what is the Python programming language?"],
|
|
@@ -149,6 +154,7 @@ with gr.Blocks(css="style.css") as demo:
|
|
| 149 |
value="Duplicate Space for private use",
|
| 150 |
elem_id="duplicate-button",
|
| 151 |
visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
|
|
|
|
| 152 |
)
|
| 153 |
chat_interface.render()
|
| 154 |
|
|
|
|
| 5 |
import gradio as gr
|
| 6 |
import psutil
|
| 7 |
|
| 8 |
+
# load like this - use tne variable everywhere
|
| 9 |
+
model_path=os.getenv("MODEL_PATH")
|
| 10 |
+
# show warning, when empty and briefs description of how to set it
|
| 11 |
+
|
| 12 |
# Initing things
|
| 13 |
+
print(f"debug: init model: {model_path}")
|
| 14 |
#llm = Llama(model_path="./model.bin") # LLaMa model
|
|
|
|
| 15 |
print("! INITING DONE !")
|
| 16 |
|
| 17 |
# Preparing things to work
|
|
|
|
| 48 |
|
| 49 |
#download model here
|
| 50 |
# check localstorage, if no there, load, else use existing.
|
| 51 |
+
# check gradio - how does it dl? is there a function we can use?
|
| 52 |
|
| 53 |
if torch.cuda.is_available():
|
| 54 |
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
|
| 55 |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16, device_map="auto")
|
| 56 |
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
| 57 |
|
| 58 |
+
# we need to make sure we only run one thread or we probably run out of ram
|
| 59 |
def generate(
|
| 60 |
message: str,
|
| 61 |
chat_history: list[tuple[str, str]],
|
|
|
|
| 137 |
),
|
| 138 |
],
|
| 139 |
stop_btn=None,
|
| 140 |
+
# add more eval examples, like a long list taken from teknium and others maybe group by type
|
| 141 |
examples=[
|
| 142 |
["Hello there! How are you doing?"],
|
| 143 |
["Can you explain briefly to me what is the Python programming language?"],
|
|
|
|
| 154 |
value="Duplicate Space for private use",
|
| 155 |
elem_id="duplicate-button",
|
| 156 |
visible=os.getenv("SHOW_DUPLICATE_BUTTON") == "1",
|
| 157 |
+
# add
|
| 158 |
)
|
| 159 |
chat_interface.render()
|
| 160 |
|