Spaces:

lyimo
/

llama_multimodel_model

Runtime error

File size: 2,276 Bytes

import os
import gradio as gr
from fastai.vision.all import *
import skimage
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Load the FastAI vision model
learn = load_learner('export.pkl')
labels = learn.dls.vocab

# Load the Llama language model
llm = Llama(
    model_path=hf_hub_download(
        repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7B-Chat-GGML"),
        filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.ggmlv3.q5_0.bin"),
    ),
    n_ctx=2048,
    n_gpu_layers=50,
)

history = []
system_message = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  
"""

def generate_text(message, history):
    temp = ""
    input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
    for interaction in history:
        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "

    input_prompt = input_prompt + str(message) + " [/INST] "

    output = llm(
        input_prompt,
        temperature=0.15,
        top_p=0.1,
        top_k=40,
        repeat_penalty=1.1,
        max_tokens=1024,
        stop=[
            "",
            "",
            " \n",
            "ASSISTANT:",
            "USER:",
            "SYSTEM:",
        ],
        stream=True,
    )
    for out in output:
        stream = copy.deepcopy(out)
        temp += stream["choices"][0]["text"]
        yield temp

    history.append(("USER:", message))
    history.append(("ASSISTANT:", temp))

# Define the predict function for the FastAI model
def predict_with_llama_and_generate_text(img):
    img = PILImage.create(img)
    pred, pred_idx, probs = learn.predict(img)
    detected_object = labels[pred_idx]
    
    response = f"The system has detected {detected_object}. Do you want to know about {detected_object}?"
    
    for llama_response in generate_text(response, history):
        yield llama_response

# Define the Gradio interface
gr.Interface(
    fn=predict_with_llama_and_generate_text,
    inputs=gr.inputs.Image(shape=(512, 512)),
    outputs=gr.outputs.Textbox(),
    title="Multimodal Assistant",
    description="An AI model that combines image classification with text generation.",
).launch()