File size: 2,276 Bytes
2be2417
4966832
2be2417
 
4966832
 
4f36f3a
cc431b5
2be2417
 
 
 
 
4966832
 
 
 
 
 
2be2417
4f36f3a
4966832
 
 
2be2417
9589466
4f36f3a
4966832
 
 
 
 
 
cc431b5
4966832
 
 
 
 
b3437e3
4966832
cc431b5
4966832
b3437e3
 
 
4966832
 
 
 
 
 
 
 
 
 
 
2be2417
 
4966832
2be2417
 
 
b3437e3
2be2417
 
 
 
 
 
cc431b5
2be2417
b3437e3
2be2417
 
b3437e3
2be2417
 
b3437e3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import gradio as gr
from fastai.vision.all import *
import skimage
import copy
from llama_cpp import Llama
from huggingface_hub import hf_hub_download

# Load the FastAI vision model
learn = load_learner('export.pkl')
labels = learn.dls.vocab

# Load the Llama language model
llm = Llama(
    model_path=hf_hub_download(
        repo_id=os.environ.get("REPO_ID", "TheBloke/Llama-2-7B-Chat-GGML"),
        filename=os.environ.get("MODEL_FILE", "llama-2-7b-chat.ggmlv3.q5_0.bin"),
    ),
    n_ctx=2048,
    n_gpu_layers=50,
)

history = []
system_message = """
You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe.  
"""

def generate_text(message, history):
    temp = ""
    input_prompt = f"[INST] <<SYS>>\n{system_message}\n<</SYS>>\n\n "
    for interaction in history:
        input_prompt = input_prompt + str(interaction[0]) + " [/INST] " + str(interaction[1]) + " </s><s> [INST] "

    input_prompt = input_prompt + str(message) + " [/INST] "

    output = llm(
        input_prompt,
        temperature=0.15,
        top_p=0.1,
        top_k=40,
        repeat_penalty=1.1,
        max_tokens=1024,
        stop=[
            "",
            "",
            " \n",
            "ASSISTANT:",
            "USER:",
            "SYSTEM:",
        ],
        stream=True,
    )
    for out in output:
        stream = copy.deepcopy(out)
        temp += stream["choices"][0]["text"]
        yield temp

    history.append(("USER:", message))
    history.append(("ASSISTANT:", temp))

# Define the predict function for the FastAI model
def predict_with_llama_and_generate_text(img):
    img = PILImage.create(img)
    pred, pred_idx, probs = learn.predict(img)
    detected_object = labels[pred_idx]
    
    response = f"The system has detected {detected_object}. Do you want to know about {detected_object}?"
    
    for llama_response in generate_text(response, history):
        yield llama_response

# Define the Gradio interface
gr.Interface(
    fn=predict_with_llama_and_generate_text,
    inputs=gr.inputs.Image(shape=(512, 512)),
    outputs=gr.outputs.Textbox(),
    title="Multimodal Assistant",
    description="An AI model that combines image classification with text generation.",
).launch()