Luminia-13B_SD_Prompt

Running on Zero

App Files Files Community

Nekochu commited on Apr 13, 2024

Commit

b11e705

verified ·

1 Parent(s): 88bb7df

Update app.py

Browse files

Files changed (1) hide show

app.py +18 -33

app.py CHANGED Viewed

@@ -1,4 +1,3 @@
-import os
 from threading import Thread
 from typing import Iterator
@@ -11,9 +10,15 @@ MAX_MAX_NEW_TOKENS = 2048
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
 DESCRIPTION = """\
-# Nekochu/Luminia-13B-v3
-This Space demonstrates model Nekochu/Luminia-13B-v3 by Nekochu, a Llama 2 model with 13B parameters fine-tuned for SD gen prompt
 """
 LICENSE = """
@@ -21,41 +26,26 @@ LICENSE = """
 ---.
 """
-def load_model(model_id):
-    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
-    tokenizer = AutoTokenizer.from_pretrained(model_id)
-    tokenizer.use_default_system_prompt = False
-    return model, tokenizer
 if not torch.cuda.is_available():
-    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"
-if torch.cuda.is_available():
-    model_id = "Nekochu/Luminia-13B-v3"
-    model, tokenizer = load_model(model_id)
-MODELS = [
-    {"name": "Nekochu/Luminia-13B-v3", "id": "Nekochu/Luminia-13B-v3"},
-    {"name": "Nekochu/Llama-2-13B-German-ORPO", "id": "Nekochu/Llama-2-13B-German-ORPO"},
-    # Add more models here in the future
-]
-@spaces.GPU(duration=120)
 def generate(
-    model_dropdown: str,
-    custom_model_id: str,
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    # Prioritize custom model ID if provided, otherwise use the dropdown selection
-    selected_model_id = custom_model_id if custom_model_id else model_dropdown
-    model, tokenizer = load_model(selected_model_id)
     conversation = []
     if system_prompt:
@@ -90,25 +80,20 @@ def generate(
         outputs.append(text)
         yield "".join(outputs)
-model_dropdown = gr.Dropdown(
-    label="Select Predefined Model",
-    choices=[model["name"] for model in MODELS],
-    value=MODELS[0]["name"], # Default to the first model
-)
-custom_model_id_input = gr.Textbox(label="Or Enter Custom Model ID", placeholder="Enter model ID here")
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         model_dropdown,
-        custom_model_id_input,
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
             minimum=1,
             maximum=MAX_MAX_NEW_TOKENS,
             step=1,
-            value=DEFAULT_MAX_NEW_TOKENS,
         ),
         gr.Slider(
             label="Temperature",

 from threading import Thread
 from typing import Iterator
 DEFAULT_MAX_NEW_TOKENS = 1024
 MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))
+MODELS = {
+    "Nekochu/Luminia-13B-v3": "Default - Nekochu/Luminia-13B-v3",
+    "Nekochu/Llama-2-13B-German-ORPO": "German ORPO - Nekochu/Llama-2-13B-German-ORPO",
+}
 DESCRIPTION = """\
+# Text Generation with Selectable Models
+This Space demonstrates text generation using different models. Choose a model from the dropdown and experience its creative capabilities!
 """
 LICENSE = """
 ---.
 """
 if not torch.cuda.is_available():
+    DESCRIPTION += "\n<p>Running on CPU  This demo does not work on CPU.</p>"
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
     system_prompt: str,
+    model_id: str = None,  # Add default value for model_id
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    if not model_id:
+        raise ValueError("Please select a model from the dropdown.")
+    model = AutoModelForCausalLM.from_pretrained(model_id, device_map="auto", load_in_4bit=True)
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    tokenizer.use_default_system_prompt = False
     conversation = []
     if system_prompt:
         outputs.append(text)
         yield "".join(outputs)
+model_dropdown = gr.Dropdown(label="Select Model", choices=list(MODELS.values()))
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
         model_dropdown,
         gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
             minimum=1,
             maximum=MAX_MAX_NEW_TOKENS,
             step=1,
+            value=DEFAULT_MAX
         ),
         gr.Slider(
             label="Temperature",