hf-model-search

Build error

File size: 6,895 Bytes

283ec99
9c9ed59
b7c2467
 
9ff4d69
9c9ed59
283ec99
0cf2bb1
 
 
 
8c816fd
7c45ed7
 
 
 
 
 
 
 
 
9ff4d69
 
 
 
 
 
 
 
5488666
9ff4d69
325cb43
7c45ed7
62cff37
0225c88
052e0bc
3b02db9
052e0bc
3b02db9
 
052e0bc
 
 
 
62cff37
9e109c5
62cff37
9e109c5
028be0c
d7c2b20
0225c88
b7c2467
25ae17f
0225c88
4621625
62cff37
b7c2467
028be0c
0225c88
d7c2b20
0225c88
d3cbcf4
028be0c
d3cbcf4
25ae17f
b7c2467
25ae17f
0225c88
 
 
b7c2467
028be0c
0225c88
d7c2b20
fa9b217
 
9c9ed59
 
 
 
 
 
 
 
 
25ae17f
9c9ed59
f8ffdb7
 
9c9ed59
 
 
 
 
 
 
e0a4715
9c9ed59
 
 
 
 
addef16
 
25ae17f
 
028be0c
9c9ed59
 
 
 
4621625
addef16
9c9ed59
 
 
ca677a9
 
 
 
 
9c9ed59
 
 
 
 
 
 
 
 
 
 
e4af97b
9c9ed59
e4af97b
9c9ed59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1afe06d
 
 
 
 
 
 
763c551
2ba68cf
 
 
 
 
 
 
fa9b217
9ff4d69
0cf2bb1
a7acc0e
 
 
b7c2467
2d52659
a7acc0e
 
62cff37
9c9ed59
62cff37
e95e8e1
 
2891dae
1afe06d
 
 
62cff37

from huggingface_hub import InferenceClient, HfApi
import gradio as gr
import datetime

return_list=[]

api=HfApi()
model_list = api.list_models(filter="text-generation")
this_obj = list(model_list)
for i,eb in enumerate(this_obj):
    return_list.append(this_obj[i].id)
def search_models(a=None):
    
    model_list = api.list_models(filter="text-generation")
    this_obj = list(model_list)
    for i,eb in enumerate(this_obj):
        return_list.append(this_obj[i].id)
    return gr.update(choices=[m for m in return_list],interactive=True)
        
'''
def search_models(query=None,amount=20):   
    model_list = api.list_models(filter="text-generation", search =f'{query}')
    #print (model_list)
    #print (dir(model_list))
    
    this_obj = list(model_list)
    #print (len(this_obj))
    #print (this_obj[0])
    #print (this_obj[0].id)
    for i,eb in enumerate(this_obj):
        return_list.append(this_obj[i].id)
    return gr.update(choices=[m for m in return_list],interactive=True)
'''
def test_fn():
    out_box=[]
    generate_kwargs = dict(
        temperature=0.9,
        max_new_tokens=30,
        top_p=0.95,
        repetition_penalty=1.0,
        do_sample=True,
        seed=42,
    )
    prompt = "what is a cat?"
    for i,ea in enumerate(this_obj):
        model = this_obj[i].id
        try:
            client = InferenceClient(f'{model}')
            time = datetime.datetime.now()
            out = client.text_generation(prompt, **generate_kwargs)
            print (this_obj[i].id +"--"+out)
            tot_time=datetime.datetime.now()-time
            out_box.append({"name":this_obj[i].id, "error":f"Function 1 Error :: {e}", "success":f"{out}","time":tot_time})
            yield (out_box)
        except Exception as e:
            print (f'{this_obj[i].id}--Function 2 Error :: {e}')
            tot_time=datetime.datetime.now()-time
            out_box.append({"name":this_obj[i].id, "error":f"Function 2 Error :: {e}", "success":"","time":tot_time})
            yield (out_box)
            '''
            try:
                client2 = InferenceClient(this_obj[i].id)
                time = datetime.datetime.now()
                out = client2.text_generation(prompt, **generate_kwargs, stream=False)
                print (this_obj[i].id +"--"+out1)
                tot_time=datetime.datetime.now()-time
                out_box.append({"name":this_obj[i].id, "error":f"Function 1 Error :: {e}", "success":f"{out1}","time":tot_time})
                yield (out_box)
            except Exception as e:
                print (f'{this_obj[i].id}--Function 3 Error :: {e}')
                tot_time=datetime.datetime.now()-time
                out_box.append({"name":this_obj[i].id, "error":f"Function 3 Error :: {e}", "success":"","time":tot_time})
                yield (out_box)                
            '''    


def format_prompt(message, history):
  prompt = "<s>"
  for user_prompt, bot_response in history:
    prompt += f"[INST] {user_prompt} [/INST]"
    prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def generate(
    prompt, inf_client, stream, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
):
    client = InferenceClient(inf_client)

    temperature = float(temperature)
    if temperature < 1e-2:
        temperature = 1e-2
    top_p = float(top_p)

    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=30,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    output = client.text_generation(prompt, **generate_kwargs, stream=stream)
    '''
    #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history)
    stream = client.text_generation(prompt, **generate_kwargs, stream=stream)
    #stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=stream, details=True, return_full_text=False)
    output = ""

    for response in stream:
        output += response.token.text
        #yield output
    '''
    return output

additional_inputs=[
    gr.Textbox(
        label="System Prompt",
        max_lines=1,
        interactive=True,
    ),
    gr.Slider(
        label="Temperature",
        value=0.9,
        minimum=0.0,
        maximum=1.0,
        step=0.05,
        interactive=True,
        info="Higher values produce more diverse outputs",
    ),
    gr.Slider(
        label="Max new tokens",
        value=1048,
        minimum=0,
        maximum=1048*10,
        step=64,
        interactive=True,
        info="The maximum numbers of new tokens",
    ),
    gr.Slider(
        label="Top-p (nucleus sampling)",
        value=0.90,
        minimum=0.0,
        maximum=1,
        step=0.05,
        interactive=True,
        info="Higher values sample more low-probability tokens",
    ),
    gr.Slider(
        label="Repetition penalty",
        value=1.2,
        minimum=1.0,
        maximum=2.0,
        step=0.05,
        interactive=True,
        info="Penalize repeated tokens",
    )
]

examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ],
          ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,],
          ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,],
          ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,],
          ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,],
          ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,],
         ]
with gr.Blocks() as app:
    gr.Markdown("""
    graph TD
A[Start] -->B[Step 1]
B --> C[Step 2]
C --> D[Step 3] --> A
D --> E[Step 4]
E --> F[End]""")
    with gr.Row():
        inp_query=gr.Textbox()
        models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True)
    with gr.Row():
        button=gr.Button()
        stop_button=gr.Button("Stop")
    text=gr.JSON()
    inp_query.change(search_models,inp_query,models_dd)
    go=button.click(test_fn,None,text)
    stop_button.click(None,None,None,cancels=[go])
app.launch()

"""gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"),
    additional_inputs=additional_inputs,
    title="Mixtral 46.7B",
    examples=examples,
    concurrency_limit=20,
).launch(show_api=False)"""