Spaces:

ibm-granite
/

granite-3.1-8b-instruct

Running

App Files Files Community

grahamwhiteuk commited on Jan 14

Commit

da09cca

1 Parent(s): 05fd483

feat: granite 3.1 with model selection

Browse files

Signed-off-by: Graham White <[email protected]>

Files changed (4) hide show

pyproject.toml +7 -3
src/app.css +14 -0
src/app.py +65 -26
src/app_head.html +4 -0

pyproject.toml CHANGED Viewed

@@ -1,8 +1,12 @@
 [tool.poetry]
-name = "huggingface-gradio-template"
 version = "0.1.0"
-description = "A boilerplate template for an IBM Granite Huggingface Spaces Gradio Demo"
-authors = ["James Sutton <[email protected]>"]
 license = "Apache-2.0"
 readme = "README.md"
 package-mode = false

 [tool.poetry]
+name = "granite-3.1-8b-instruct"
 version = "0.1.0"
+description = "A demo of the IBM Granite 3.1 8b instruct model"
+authors = [
+    "James Sutton <[email protected]>",
+    "Graham White <[email protected]>",
+    "Michael Desmond <[email protected]>",
+]
 license = "Apache-2.0"
 readme = "README.md"
 package-mode = false

src/app.css CHANGED Viewed

@@ -1,3 +1,17 @@
 footer {
     display: none !important;
 }

 footer {
     display: none !important;
 }
+.gr_docs_link {
+    float: right;
+    font-size: var(--text-xs);
+    margin-top: -8px;
+}
+.gr_title {
+    display: flex;
+    align-items: center;
+}
+.gr_title img {
+    max-height: 40px;
+    margin-right: 1rem;
+    margin-bottom: -10px;
+}

src/app.py CHANGED Viewed

@@ -14,25 +14,28 @@ from themes.carbon import carbon_theme
 today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
-MODEL_ID = "ibm-granite/granite-3.1-8b-instruct"
 SYS_PROMPT = f"""Knowledge Cutoff Date: April 2024.
 Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
 TITLE = "IBM Granite 3.1 8b Instruct"
 DESCRIPTION = "Try one of the sample prompts below or write your own. Remember, just like developers, \
                AI models can make mistakes."
-MAX_INPUT_TOKEN_LENGTH = 4096
 MAX_NEW_TOKENS = 1024
 TEMPERATURE = 0.7
 TOP_P = 0.85
 TOP_K = 50
 REPETITION_PENALTY = 1.05
 if not torch.cuda.is_available():
     DESCRIPTION += "\nThis demo does not work on CPU."
-model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float16, device_map="auto")
-tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
 tokenizer.use_default_system_prompt = False
@@ -46,11 +49,13 @@ def generate(message: str, chat_history: list[dict]) -> Iterator[str]:
     conversation.append({"role": "user", "content": message})
     # Convert messages to prompt format
-    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt", add_generation_prompt=True)
-    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
-        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
-        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
@@ -75,28 +80,62 @@ def generate(message: str, chat_history: list[dict]) -> Iterator[str]:
         yield "".join(outputs)
-chat_interface = gr.ChatInterface(
-    fn=generate,
-    stop_btn=None,
-    examples=[
-        ["Explain quantum computing"],
-        ["What is OpenShift?"],
-        ["Importance of low latency inference"],
-        ["Boosting productivity habits"],
-    ],
-    cache_examples=False,
-    type="messages",
-)
 css_file_path = Path(Path(__file__).parent / "app.css")
 head_file_path = Path(Path(__file__).parent / "app_head.html")
 with gr.Blocks(
     fill_height=True, css_paths=css_file_path, head_paths=head_file_path, theme=carbon_theme, title=TITLE
 ) as demo:
-    gr.Markdown(f"# {TITLE}")
-    gr.Markdown(DESCRIPTION)
-    chat_interface.render()
 if __name__ == "__main__":
-    demo.queue(max_size=20).launch()

 today_date = datetime.today().strftime("%B %-d, %Y")  # noqa: DTZ002
 SYS_PROMPT = f"""Knowledge Cutoff Date: April 2024.
 Today's Date: {today_date}.
 You are Granite, developed by IBM. You are a helpful AI assistant"""
 TITLE = "IBM Granite 3.1 8b Instruct"
 DESCRIPTION = "Try one of the sample prompts below or write your own. Remember, just like developers, \
                AI models can make mistakes."
+MAX_INPUT_TOKEN_LENGTH = 128_000
 MAX_NEW_TOKENS = 1024
 TEMPERATURE = 0.7
 TOP_P = 0.85
 TOP_K = 50
 REPETITION_PENALTY = 1.05
+model_list = ["granite-3.1-8b-instruct", "granite-3.1-2b-instruct"]
 if not torch.cuda.is_available():
     DESCRIPTION += "\nThis demo does not work on CPU."
+model = AutoModelForCausalLM.from_pretrained(
+    "ibm-granite/granite-3.1-8b-instruct", torch_dtype=torch.float16, device_map="auto"
+)
+tokenizer = AutoTokenizer.from_pretrained("ibm-granite/granite-3.1-8b-instruct")
 tokenizer.use_default_system_prompt = False
     conversation.append({"role": "user", "content": message})
     # Convert messages to prompt format
+    input_ids = tokenizer.apply_chat_template(
+        conversation,
+        return_tensors="pt",
+        add_generation_prompt=True,
+        truncation=True,
+        max_length=MAX_INPUT_TOKEN_LENGTH,
+    )
     input_ids = input_ids.to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
         yield "".join(outputs)
 css_file_path = Path(Path(__file__).parent / "app.css")
 head_file_path = Path(Path(__file__).parent / "app_head.html")
+def on_model_dropdown_change(model_name: str) -> list:
+    """Event handler for dropdown."""
+    global model
+    global tokenizer
+    model = AutoModelForCausalLM.from_pretrained(
+        f"ibm-granite/{model_name}", torch_dtype=torch.float16, device_map="auto"
+    )
+    tokenizer = AutoTokenizer.from_pretrained(f"ibm-granite/{model_name}")
+    tokenizer.use_default_system_prompt = False
+    # clear the chat interface when the model dropdown is changed
+    # works around https://github.com/gradio-app/gradio/issues/10343
+    return [None, []]
 with gr.Blocks(
     fill_height=True, css_paths=css_file_path, head_paths=head_file_path, theme=carbon_theme, title=TITLE
 ) as demo:
+    gr.HTML(
+        f"<img src='https://www.ibm.com/granite/docs/images/granite-cubes-352x368.webp'/><h1>{TITLE}</h1>",
+        elem_classes=["gr_title"],
+    )
+    gr.HTML(DESCRIPTION)
+    model_dropdown = gr.Dropdown(
+        choices=model_list,
+        value="granite-3.1-8b-instruct",
+        interactive=True,
+        label="Model",
+        filterable=False,
+    )
+    gr.HTML(
+        value='<a href="https://www.ibm.com/granite/docs/">View Documentation</a> <i class="fa fa-external-link"></i>',
+        elem_classes=["gr_docs_link"],
+    )
+    chat_interface = gr.ChatInterface(
+        fn=generate,
+        examples=[
+            ["Explain quantum computing"],
+            ["What is OpenShift?"],
+            ["Importance of low latency inference"],
+            ["Boosting productivity habits"],
+        ],
+        cache_examples=False,
+        type="messages",
+    )
+    model_dropdown.change(
+        fn=on_model_dropdown_change,
+        inputs=model_dropdown,
+        outputs=[chat_interface.chatbot, chat_interface.chatbot_state],
+    )
 if __name__ == "__main__":
+    demo.queue().launch()

src/app_head.html CHANGED Viewed

@@ -1,3 +1,7 @@
 <script
   async
   src="https://www.googletagmanager.com/gtag/js?id=G-C6LFT227RC"

+<link
+  rel="stylesheet"
+  href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"
+/>
 <script
   async
   src="https://www.googletagmanager.com/gtag/js?id=G-C6LFT227RC"