from huggingface_hub import InferenceClient, HfApi import gradio as gr import datetime return_list=[] api=HfApi() model_list = api.list_models(filter="text-generation") this_obj = list(model_list) for i,eb in enumerate(this_obj): return_list.append(this_obj[i].id) def search_models(a=None): model_list = api.list_models(filter="text-generation") this_obj = list(model_list) for i,eb in enumerate(this_obj): return_list.append(this_obj[i].id) return gr.update(choices=[m for m in return_list],interactive=True) ''' def search_models(query=None,amount=20): model_list = api.list_models(filter="text-generation", search =f'{query}') #print (model_list) #print (dir(model_list)) this_obj = list(model_list) #print (len(this_obj)) #print (this_obj[0]) #print (this_obj[0].id) for i,eb in enumerate(this_obj): return_list.append(this_obj[i].id) return gr.update(choices=[m for m in return_list],interactive=True) ''' def test_fn(): out_box=[] generate_kwargs = dict( temperature=0.9, max_new_tokens=30, top_p=0.95, repetition_penalty=1.0, do_sample=True, seed=42, ) prompt = "what is a cat?" for i,ea in enumerate(this_obj): model = this_obj[i].id try: client = InferenceClient(f'{model}') time = datetime.datetime.now() out = client.text_generation(prompt, **generate_kwargs) print (this_obj[i].id +"--"+out) tot_time=datetime.datetime.now()-time out_box.append({"name":this_obj[i].id, "error":f"Function 1 Error :: {e}", "success":f"{out}","time":tot_time}) yield (out_box) except Exception as e: print (f'{this_obj[i].id}--Function 2 Error :: {e}') tot_time=datetime.datetime.now()-time out_box.append({"name":this_obj[i].id, "error":f"Function 2 Error :: {e}", "success":"","time":tot_time}) yield (out_box) ''' try: client2 = InferenceClient(this_obj[i].id) time = datetime.datetime.now() out = client2.text_generation(prompt, **generate_kwargs, stream=False) print (this_obj[i].id +"--"+out1) tot_time=datetime.datetime.now()-time out_box.append({"name":this_obj[i].id, "error":f"Function 1 Error :: {e}", "success":f"{out1}","time":tot_time}) yield (out_box) except Exception as e: print (f'{this_obj[i].id}--Function 3 Error :: {e}') tot_time=datetime.datetime.now()-time out_box.append({"name":this_obj[i].id, "error":f"Function 3 Error :: {e}", "success":"","time":tot_time}) yield (out_box) ''' def format_prompt(message, history): prompt = "" for user_prompt, bot_response in history: prompt += f"[INST] {user_prompt} [/INST]" prompt += f" {bot_response} " prompt += f"[INST] {message} [/INST]" return prompt def generate( prompt, inf_client, stream, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0, ): client = InferenceClient(inf_client) temperature = float(temperature) if temperature < 1e-2: temperature = 1e-2 top_p = float(top_p) generate_kwargs = dict( temperature=temperature, max_new_tokens=30, top_p=top_p, repetition_penalty=repetition_penalty, do_sample=True, seed=42, ) output = client.text_generation(prompt, **generate_kwargs, stream=stream) ''' #formatted_prompt = format_prompt(f"{system_prompt}, {prompt}", history) stream = client.text_generation(prompt, **generate_kwargs, stream=stream) #stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=stream, details=True, return_full_text=False) output = "" for response in stream: output += response.token.text #yield output ''' return output additional_inputs=[ gr.Textbox( label="System Prompt", max_lines=1, interactive=True, ), gr.Slider( label="Temperature", value=0.9, minimum=0.0, maximum=1.0, step=0.05, interactive=True, info="Higher values produce more diverse outputs", ), gr.Slider( label="Max new tokens", value=1048, minimum=0, maximum=1048*10, step=64, interactive=True, info="The maximum numbers of new tokens", ), gr.Slider( label="Top-p (nucleus sampling)", value=0.90, minimum=0.0, maximum=1, step=0.05, interactive=True, info="Higher values sample more low-probability tokens", ), gr.Slider( label="Repetition penalty", value=1.2, minimum=1.0, maximum=2.0, step=0.05, interactive=True, info="Penalize repeated tokens", ) ] examples=[["I'm planning a vacation to Japan. Can you suggest a one-week itinerary including must-visit places and local cuisines to try?", None, None, None, None, None, ], ["Can you write a short story about a time-traveling detective who solves historical mysteries?", None, None, None, None, None,], ["I'm trying to learn French. Can you provide some common phrases that would be useful for a beginner, along with their pronunciations?", None, None, None, None, None,], ["I have chicken, rice, and bell peppers in my kitchen. Can you suggest an easy recipe I can make with these ingredients?", None, None, None, None, None,], ["Can you explain how the QuickSort algorithm works and provide a Python implementation?", None, None, None, None, None,], ["What are some unique features of Rust that make it stand out compared to other systems programming languages like C++?", None, None, None, None, None,], ] with gr.Blocks() as app: gr.Markdown(""" graph TD A[Start] -->B[Step 1] B --> C[Step 2] C --> D[Step 3] --> A D --> E[Step 4] E --> F[End]""") with gr.Row(): inp_query=gr.Textbox() models_dd=gr.Dropdown(choices=[m for m in return_list],interactive=True) with gr.Row(): button=gr.Button() stop_button=gr.Button("Stop") text=gr.JSON() inp_query.change(search_models,inp_query,models_dd) go=button.click(test_fn,None,text) stop_button.click(None,None,None,cancels=[go]) app.launch() """gr.ChatInterface( fn=generate, chatbot=gr.Chatbot(show_label=False, show_share_button=False, show_copy_button=True, likeable=True, layout="panel"), additional_inputs=additional_inputs, title="Mixtral 46.7B", examples=examples, concurrency_limit=20, ).launch(show_api=False)"""