Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -1,7 +1,6 @@ 
     | 
|
| 1 | 
         
             
            import os
         
     | 
| 2 | 
         
             
            import torch
         
     | 
| 3 | 
         
             
            import gradio as gr
         
     | 
| 4 | 
         
            -
            import ipywidgets as widgets
         
     | 
| 5 | 
         
             
            from pathlib import Path
         
     | 
| 6 | 
         
             
            from transformers import AutoConfig, AutoTokenizer
         
     | 
| 7 | 
         
             
            from optimum.intel.openvino import OVModelForCausalLM
         
     | 
| 
         @@ -18,7 +17,6 @@ import requests 
     | 
|
| 18 | 
         | 
| 19 | 
         
             
            # Define the model loading function (same as in your notebook)
         
     | 
| 20 | 
         
             
            def convert_to_int4(model_id, model_configuration, enable_awq=False):
         
     | 
| 21 | 
         
            -
                # Model conversion logic here (same as in notebook)
         
     | 
| 22 | 
         
             
                compression_configs = {
         
     | 
| 23 | 
         
             
                    "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0},
         
     | 
| 24 | 
         
             
                    "default": {"sym": False, "group_size": 128, "ratio": 0.8},
         
     | 
| 
         @@ -45,10 +43,8 @@ def convert_to_int4(model_id, model_configuration, enable_awq=False): 
     | 
|
| 45 | 
         
             
                os.system(export_command)
         
     | 
| 46 | 
         
             
                return int4_model_dir
         
     | 
| 47 | 
         | 
| 48 | 
         
            -
             
     | 
| 49 | 
         
             
            # Model and tokenizer loading
         
     | 
| 50 | 
         
             
            def load_model(model_dir, device):
         
     | 
| 51 | 
         
            -
                # Load model using OpenVINO
         
     | 
| 52 | 
         
             
                ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
         
     | 
| 53 | 
         
             
                core = ov.Core()
         
     | 
| 54 | 
         
             
                model_name = model_configuration["model_id"]
         
     | 
| 
         @@ -64,7 +60,7 @@ def load_model(model_dir, device): 
     | 
|
| 64 | 
         | 
| 65 | 
         
             
                return ov_model, tok
         
     | 
| 66 | 
         | 
| 67 | 
         
            -
            #  
     | 
| 68 | 
         
             
            def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
         
     | 
| 69 | 
         
             
                input_ids = convert_history_to_token(history)
         
     | 
| 70 | 
         
             
                if input_ids.shape[1] > 2000:
         
     | 
| 
         @@ -99,23 +95,56 @@ def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id) 
     | 
|
| 99 | 
         
             
                    history[-1][1] = partial_text
         
     | 
| 100 | 
         
             
                    yield history
         
     | 
| 101 | 
         | 
| 102 | 
         
            -
            # Gradio interface  
     | 
| 103 | 
         
             
            def create_gradio_interface():
         
     | 
| 104 | 
         
            -
                 
     | 
| 105 | 
         
            -
                 
     | 
| 106 | 
         
            -
                
         
     | 
| 107 | 
         
            -
                # Choose model based on the selected language
         
     | 
| 108 | 
         
            -
                model_configuration = SUPPORTED_LLM_MODELS[model_language[0]][model_id.value]
         
     | 
| 109 | 
         
            -
                
         
     | 
| 110 | 
         
            -
                # Prepare model (convert to INT4, etc.)
         
     | 
| 111 | 
         
            -
                int4_model_dir = convert_to_int4(model_id.value, model_configuration)
         
     | 
| 112 | 
         
            -
                
         
     | 
| 113 | 
         
            -
                # Load model and tokenizer
         
     | 
| 114 | 
         
            -
                device = device_widget("CPU")
         
     | 
| 115 | 
         
            -
                ov_model, tok = load_model(int4_model_dir, device)
         
     | 
| 116 | 
         | 
| 117 | 
         
            -
                #  
     | 
| 118 | 
         
            -
                 
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 119 | 
         | 
| 120 | 
         
             
                return demo
         
     | 
| 121 | 
         | 
| 
         @@ -123,4 +152,3 @@ def create_gradio_interface(): 
     | 
|
| 123 | 
         
             
            if __name__ == "__main__":
         
     | 
| 124 | 
         
             
                app = create_gradio_interface()
         
     | 
| 125 | 
         
             
                app.launch(debug=True, share=True)  # share=True for public access
         
     | 
| 126 | 
         
            -
             
     | 
| 
         | 
|
| 1 | 
         
             
            import os
         
     | 
| 2 | 
         
             
            import torch
         
     | 
| 3 | 
         
             
            import gradio as gr
         
     | 
| 
         | 
|
| 4 | 
         
             
            from pathlib import Path
         
     | 
| 5 | 
         
             
            from transformers import AutoConfig, AutoTokenizer
         
     | 
| 6 | 
         
             
            from optimum.intel.openvino import OVModelForCausalLM
         
     | 
| 
         | 
|
| 17 | 
         | 
| 18 | 
         
             
            # Define the model loading function (same as in your notebook)
         
     | 
| 19 | 
         
             
            def convert_to_int4(model_id, model_configuration, enable_awq=False):
         
     | 
| 
         | 
|
| 20 | 
         
             
                compression_configs = {
         
     | 
| 21 | 
         
             
                    "qwen2.5-0.5b-instruct": {"sym": True, "group_size": 128, "ratio": 1.0},
         
     | 
| 22 | 
         
             
                    "default": {"sym": False, "group_size": 128, "ratio": 0.8},
         
     | 
| 
         | 
|
| 43 | 
         
             
                os.system(export_command)
         
     | 
| 44 | 
         
             
                return int4_model_dir
         
     | 
| 45 | 
         | 
| 
         | 
|
| 46 | 
         
             
            # Model and tokenizer loading
         
     | 
| 47 | 
         
             
            def load_model(model_dir, device):
         
     | 
| 
         | 
|
| 48 | 
         
             
                ov_config = {hints.performance_mode(): hints.PerformanceMode.LATENCY, streams.num(): "1", props.cache_dir(): ""}
         
     | 
| 49 | 
         
             
                core = ov.Core()
         
     | 
| 50 | 
         
             
                model_name = model_configuration["model_id"]
         
     | 
| 
         | 
|
| 60 | 
         | 
| 61 | 
         
             
                return ov_model, tok
         
     | 
| 62 | 
         | 
| 63 | 
         
            +
            # Gradio Interface for Bot interaction
         
     | 
| 64 | 
         
             
            def bot(history, temperature, top_p, top_k, repetition_penalty, conversation_id):
         
     | 
| 65 | 
         
             
                input_ids = convert_history_to_token(history)
         
     | 
| 66 | 
         
             
                if input_ids.shape[1] > 2000:
         
     | 
| 
         | 
|
| 95 | 
         
             
                    history[-1][1] = partial_text
         
     | 
| 96 | 
         
             
                    yield history
         
     | 
| 97 | 
         | 
| 98 | 
         
            +
            # Define a Gradio interface for user interaction
         
     | 
| 99 | 
         
             
            def create_gradio_interface():
         
     | 
| 100 | 
         
            +
                # Dropdown for selecting model language and model ID
         
     | 
| 101 | 
         
            +
                model_language = list(SUPPORTED_LLM_MODELS.keys())  # List of model languages
         
     | 
| 102 | 
         
            +
                model_id = gr.Dropdown(choices=model_language, value=model_language[0], label="Model Language")
         
     | 
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 
         | 
|
| 103 | 
         | 
| 104 | 
         
            +
                # Once model language is selected, show the respective model IDs
         
     | 
| 105 | 
         
            +
                def update_model_ids(model_language):
         
     | 
| 106 | 
         
            +
                    model_ids = list(SUPPORTED_LLM_MODELS[model_language].keys())
         
     | 
| 107 | 
         
            +
                    return gr.Dropdown.update(choices=model_ids, value=model_ids[0])
         
     | 
| 108 | 
         
            +
             
     | 
| 109 | 
         
            +
                model_id_selector = gr.Dropdown(choices=model_language, value=model_language[0], label="Model ID")
         
     | 
| 110 | 
         
            +
                model_id_selector.change(update_model_ids, inputs=model_language, outputs=model_id_selector)
         
     | 
| 111 | 
         
            +
             
     | 
| 112 | 
         
            +
                # Set up a checkbox for enabling AWQ compression
         
     | 
| 113 | 
         
            +
                enable_awq = gr.Checkbox(value=False, label="Enable AWQ for Compression")
         
     | 
| 114 | 
         
            +
             
     | 
| 115 | 
         
            +
                # Initialize model selection based on language and ID
         
     | 
| 116 | 
         
            +
                def load_model_on_select(model_language, model_id, enable_awq):
         
     | 
| 117 | 
         
            +
                    model_configuration = SUPPORTED_LLM_MODELS[model_language][model_id]
         
     | 
| 118 | 
         
            +
                    int4_model_dir = convert_to_int4(model_id, model_configuration, enable_awq)
         
     | 
| 119 | 
         
            +
                    
         
     | 
| 120 | 
         
            +
                    # Load the model and tokenizer
         
     | 
| 121 | 
         
            +
                    device = device_widget("CPU")  # or any device you want to use
         
     | 
| 122 | 
         
            +
                    ov_model, tok = load_model(int4_model_dir, device)
         
     | 
| 123 | 
         
            +
                    
         
     | 
| 124 | 
         
            +
                    # Return the loaded model and tokenizer
         
     | 
| 125 | 
         
            +
                    return ov_model, tok
         
     | 
| 126 | 
         
            +
             
     | 
| 127 | 
         
            +
                # Connect model selection UI to load model dynamically
         
     | 
| 128 | 
         
            +
                load_button = gr.Button("Load Model")
         
     | 
| 129 | 
         
            +
                load_button.click(load_model_on_select, inputs=[model_language, model_id, enable_awq], outputs=[gr.Textbox(label="Model Status")])
         
     | 
| 130 | 
         
            +
             
     | 
| 131 | 
         
            +
                # Create the Gradio chatbot interface
         
     | 
| 132 | 
         
            +
                chatbot = gr.Chatbot()
         
     | 
| 133 | 
         
            +
             
     | 
| 134 | 
         
            +
                # Parameters for bot generation
         
     | 
| 135 | 
         
            +
                temperature = gr.Slider(minimum=0, maximum=1, step=0.1, label="Temperature", value=0.7)
         
     | 
| 136 | 
         
            +
                top_p = gr.Slider(minimum=0, maximum=1, step=0.1, label="Top-p", value=0.9)
         
     | 
| 137 | 
         
            +
                top_k = gr.Slider(minimum=0, maximum=50, step=1, label="Top-k", value=50)
         
     | 
| 138 | 
         
            +
                repetition_penalty = gr.Slider(minimum=0, maximum=2, step=0.1, label="Repetition Penalty", value=1.0)
         
     | 
| 139 | 
         
            +
             
     | 
| 140 | 
         
            +
                # Run the Gradio interface
         
     | 
| 141 | 
         
            +
                demo = gr.Interface(
         
     | 
| 142 | 
         
            +
                    fn=bot,
         
     | 
| 143 | 
         
            +
                    inputs=[chatbot, temperature, top_p, top_k, repetition_penalty],
         
     | 
| 144 | 
         
            +
                    outputs=[chatbot],
         
     | 
| 145 | 
         
            +
                    title="OpenVINO Chatbot",
         
     | 
| 146 | 
         
            +
                    live=True
         
     | 
| 147 | 
         
            +
                )
         
     | 
| 148 | 
         | 
| 149 | 
         
             
                return demo
         
     | 
| 150 | 
         | 
| 
         | 
|
| 152 | 
         
             
            if __name__ == "__main__":
         
     | 
| 153 | 
         
             
                app = create_gradio_interface()
         
     | 
| 154 | 
         
             
                app.launch(debug=True, share=True)  # share=True for public access
         
     | 
| 
         |