Sean-Case
		
	commited on
		
		
					Commit 
							
							·
						
						114048b
	
1
								Parent(s):
							
							9795699
								
gpu_layers should now update correctly. Added code for creating distribution.
Browse files- .gitignore +3 -1
- app.py +23 -9
- bootstrapper.py +63 -0
- chatfuncs/chatfuncs.py +26 -30
- requirements.txt +0 -1
    	
        .gitignore
    CHANGED
    
    | @@ -1,3 +1,5 @@ | |
| 1 | 
             
            *.pyc
         | 
| 2 | 
             
            *.ipynb
         | 
| 3 | 
            -
            *.pdf
         | 
|  | |
|  | 
|  | |
| 1 | 
             
            *.pyc
         | 
| 2 | 
             
            *.ipynb
         | 
| 3 | 
            +
            *.pdf
         | 
| 4 | 
            +
            */build
         | 
| 5 | 
            +
            */dist
         | 
    	
        app.py
    CHANGED
    
    | @@ -2,6 +2,7 @@ | |
| 2 |  | 
| 3 | 
             
            # +
         | 
| 4 | 
             
            import os
         | 
|  | |
| 5 |  | 
| 6 | 
             
            # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
         | 
| 7 | 
             
            #os.system("pip uninstall -y gradio")
         | 
| @@ -69,18 +70,31 @@ import chatfuncs.chatfuncs as chatf | |
| 69 | 
             
            chatf.embeddings = load_embeddings(embeddings_name)
         | 
| 70 | 
             
            chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
         | 
| 71 |  | 
|  | |
|  | |
| 72 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 73 |  | 
| 74 | 
            -
            def load_model(model_type, gpu_layers, CtransInitConfig_gpu=chatf.CtransInitConfig_gpu, CtransInitConfig_cpu=chatf.CtransInitConfig_cpu, torch_device=chatf.torch_device):
         | 
| 75 | 
            -
                print("Loading model")
         | 
| 76 | 
             
                if model_type == "Orca Mini":
         | 
| 77 | 
            -
             | 
| 78 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 79 |  | 
| 80 | 
             
                    try:
         | 
| 81 | 
            -
                        model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict( | 
| 82 | 
             
                    except:
         | 
| 83 | 
            -
                        model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **asdict( | 
| 84 |  | 
| 85 | 
             
                    tokenizer = []
         | 
| 86 |  | 
| @@ -119,10 +133,10 @@ def load_model(model_type, gpu_layers, CtransInitConfig_gpu=chatf.CtransInitConf | |
| 119 | 
             
            # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
         | 
| 120 | 
             
            model_type = "Orca Mini"
         | 
| 121 |  | 
| 122 | 
            -
            load_model(model_type, chatf.gpu_layers, chatf. | 
| 123 |  | 
| 124 | 
             
            model_type = "Flan Alpaca"
         | 
| 125 | 
            -
            load_model(model_type, 0, chatf. | 
| 126 |  | 
| 127 | 
             
            def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
         | 
| 128 |  | 
| @@ -207,7 +221,7 @@ with block: | |
| 207 |  | 
| 208 | 
             
                with gr.Tab("Advanced features"):
         | 
| 209 | 
             
                    model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
         | 
| 210 | 
            -
                    gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1)
         | 
| 211 |  | 
| 212 | 
             
                gr.HTML(
         | 
| 213 | 
             
                    "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
         | 
|  | |
| 2 |  | 
| 3 | 
             
            # +
         | 
| 4 | 
             
            import os
         | 
| 5 | 
            +
            import copy
         | 
| 6 |  | 
| 7 | 
             
            # Need to overwrite version of gradio present in Huggingface spaces as it doesn't have like buttons/avatars (Oct 2023)
         | 
| 8 | 
             
            #os.system("pip uninstall -y gradio")
         | 
|  | |
| 70 | 
             
            chatf.embeddings = load_embeddings(embeddings_name)
         | 
| 71 | 
             
            chatf.vectorstore = get_faiss_store(faiss_vstore_folder="faiss_embedding",embeddings=globals()["embeddings"])
         | 
| 72 |  | 
| 73 | 
            +
            def load_model(model_type, gpu_layers, gpu_config=None, cpu_config=None, torch_device=None):
         | 
| 74 | 
            +
                print("Loading model")
         | 
| 75 |  | 
| 76 | 
            +
                # Default values inside the function
         | 
| 77 | 
            +
                if gpu_config is None:
         | 
| 78 | 
            +
                    gpu_config = chatf.gpu_config
         | 
| 79 | 
            +
                if cpu_config is None:
         | 
| 80 | 
            +
                    cpu_config = chatf.cpu_config
         | 
| 81 | 
            +
                if torch_device is None:
         | 
| 82 | 
            +
                    torch_device = chatf.torch_device
         | 
| 83 |  | 
|  | |
|  | |
| 84 | 
             
                if model_type == "Orca Mini":
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                    gpu_config.update_gpu(gpu_layers)
         | 
| 87 | 
            +
                    cpu_config.update_gpu(gpu_layers)
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                    print("Loading with", cpu_config.gpu_layers, "model layers sent to GPU.")
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                    print(vars(gpu_config))
         | 
| 92 | 
            +
                    print(vars(cpu_config))
         | 
| 93 |  | 
| 94 | 
             
                    try:
         | 
| 95 | 
            +
                        model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(cpu_config)) # **asdict(CtransRunConfig_cpu())
         | 
| 96 | 
             
                    except:
         | 
| 97 | 
            +
                        model = AutoModelForCausalLM.from_pretrained('juanjgit/orca_mini_3B-GGUF', model_type='llama', model_file='orca-mini-3b.q4_0.gguf', **vars(gpu_config)) #**asdict(CtransRunConfig_gpu())
         | 
| 98 |  | 
| 99 | 
             
                    tokenizer = []
         | 
| 100 |  | 
|  | |
| 133 | 
             
            # Both models are loaded on app initialisation so that users don't have to wait for the models to be downloaded
         | 
| 134 | 
             
            model_type = "Orca Mini"
         | 
| 135 |  | 
| 136 | 
            +
            load_model(model_type, chatf.gpu_layers, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
         | 
| 137 |  | 
| 138 | 
             
            model_type = "Flan Alpaca"
         | 
| 139 | 
            +
            load_model(model_type, 0, chatf.gpu_config, chatf.cpu_config, chatf.torch_device)
         | 
| 140 |  | 
| 141 | 
             
            def docs_to_faiss_save(docs_out:PandasDataFrame, embeddings=embeddings):
         | 
| 142 |  | 
|  | |
| 221 |  | 
| 222 | 
             
                with gr.Tab("Advanced features"):
         | 
| 223 | 
             
                    model_choice = gr.Radio(label="Choose a chat model", value="Flan Alpaca", choices = ["Flan Alpaca", "Orca Mini"])
         | 
| 224 | 
            +
                    gpu_layer_choice = gr.Slider(label="Choose number of model layers to send to GPU (please don't change if you don't know what you're doing).", value=0, minimum=0, maximum=6, step = 1, scale = 0)
         | 
| 225 |  | 
| 226 | 
             
                gr.HTML(
         | 
| 227 | 
             
                    "<center>This app is based on the models Flan Alpaca and Orca Mini. It powered by Gradio, Transformers, Ctransformers, and Langchain.</a></center>"
         | 
    	
        bootstrapper.py
    ADDED
    
    | @@ -0,0 +1,63 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            import sys
         | 
| 2 | 
            +
            import os
         | 
| 3 | 
            +
            import subprocess
         | 
| 4 | 
            +
            import logging
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            # Set up logging
         | 
| 7 | 
            +
            logging.basicConfig(filename='bootstrapper.log', level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s')
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            ENV_DIR = "app_env"
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            def create_virtual_env():
         | 
| 12 | 
            +
                logging.info("Checking for virtual environment at {}".format(ENV_DIR))
         | 
| 13 | 
            +
                
         | 
| 14 | 
            +
                if not os.path.exists(ENV_DIR):
         | 
| 15 | 
            +
                    logging.info("Virtual environment not found. Creating a new one.")
         | 
| 16 | 
            +
                    
         | 
| 17 | 
            +
                    # Import virtualenv and create a new environment
         | 
| 18 | 
            +
                    import virtualenv
         | 
| 19 | 
            +
                    virtualenv.create_environment(ENV_DIR)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            def install_dependencies():
         | 
| 22 | 
            +
                logging.info("Installing dependencies.")
         | 
| 23 | 
            +
                
         | 
| 24 | 
            +
                # Ensure the requirements.txt file is bundled with your application
         | 
| 25 | 
            +
                requirements_path = "requirements.txt"
         | 
| 26 | 
            +
                
         | 
| 27 | 
            +
                # pip executable within the virtual environment
         | 
| 28 | 
            +
                pip_path = os.path.join(ENV_DIR, 'Scripts', 'pip')
         | 
| 29 | 
            +
                
         | 
| 30 | 
            +
                try:
         | 
| 31 | 
            +
                    subprocess.check_call([pip_path, "install", "-r", requirements_path])
         | 
| 32 | 
            +
                    logging.info("Dependencies installed successfully.")
         | 
| 33 | 
            +
                except Exception as e:
         | 
| 34 | 
            +
                    logging.error("Error installing dependencies: {}".format(e))
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            def main():
         | 
| 37 | 
            +
                #try:
         | 
| 38 | 
            +
                #	create_virtual_env()
         | 
| 39 | 
            +
                #except Exception as e:
         | 
| 40 | 
            +
                #	logging.error("An error occurred in the bootstrapper: {}".format(e), exc_info=True)
         | 
| 41 | 
            +
             | 
| 42 | 
            +
                try:
         | 
| 43 | 
            +
                    import langchain
         | 
| 44 | 
            +
                except ImportError:
         | 
| 45 | 
            +
                    logging.warning("Some dependencies are missing. Attempting to install.")
         | 
| 46 | 
            +
                    install_dependencies()
         | 
| 47 | 
            +
                
         | 
| 48 | 
            +
                # Now you can run your main application logic.
         | 
| 49 | 
            +
                # If it's in another file, you can use exec as shown before.
         | 
| 50 | 
            +
                try:
         | 
| 51 | 
            +
                    with open('app.py', 'r') as file:
         | 
| 52 | 
            +
                        exec(file.read())
         | 
| 53 | 
            +
                    logging.info("Main application executed successfully.")
         | 
| 54 | 
            +
                except Exception as e:
         | 
| 55 | 
            +
                    logging.error("Error executing main application: {}".format(e))
         | 
| 56 | 
            +
             | 
| 57 | 
            +
            if __name__ == "__main__":
         | 
| 58 | 
            +
                logging.info("Bootstrapper started.")
         | 
| 59 | 
            +
                try:
         | 
| 60 | 
            +
                    main()
         | 
| 61 | 
            +
                    logging.info("Bootstrapper finished.")
         | 
| 62 | 
            +
                except Exception as e:
         | 
| 63 | 
            +
                    logging.error("An error occurred in the bootstrapper: {}".format(e))
         | 
    	
        chatfuncs/chatfuncs.py
    CHANGED
    
    | @@ -95,38 +95,34 @@ context_length:int = 4096 | |
| 95 | 
             
            sample = True
         | 
| 96 |  | 
| 97 |  | 
| 98 | 
            -
            @dataclass
         | 
| 99 | 
             
            class CtransInitConfig_gpu:
         | 
| 100 | 
            -
                temperature | 
| 101 | 
            -
             | 
| 102 | 
            -
             | 
| 103 | 
            -
             | 
| 104 | 
            -
             | 
| 105 | 
            -
             | 
| 106 | 
            -
             | 
| 107 | 
            -
             | 
| 108 | 
            -
             | 
| 109 | 
            -
             | 
| 110 | 
            -
             | 
| 111 | 
            -
             | 
| 112 | 
            -
             | 
| 113 | 
            -
             | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 114 |  | 
| 115 | 
            -
            class CtransInitConfig_cpu:
         | 
| 116 | 
            -
                temperature: float = temperature
         | 
| 117 | 
            -
                top_k: int = top_k
         | 
| 118 | 
            -
                top_p: float = top_p
         | 
| 119 | 
            -
                repetition_penalty: float = repetition_penalty
         | 
| 120 | 
            -
                last_n_tokens: int = last_n_tokens
         | 
| 121 | 
            -
                max_new_tokens: int = max_new_tokens
         | 
| 122 | 
            -
                seed: int = seed
         | 
| 123 | 
            -
                reset: bool = reset
         | 
| 124 | 
            -
                stream: bool = stream
         | 
| 125 | 
            -
                threads: int = threads
         | 
| 126 | 
            -
                batch_size:int = batch_size
         | 
| 127 | 
            -
                context_length:int = context_length
         | 
| 128 | 
            -
                gpu_layers:int = 0
         | 
| 129 | 
            -
                #stop: list[str] = field(default_factory=lambda: [stop_string])
         | 
| 130 |  | 
| 131 | 
             
            @dataclass
         | 
| 132 | 
             
            class CtransGenGenerationConfig:
         | 
|  | |
| 95 | 
             
            sample = True
         | 
| 96 |  | 
| 97 |  | 
|  | |
| 98 | 
             
            class CtransInitConfig_gpu:
         | 
| 99 | 
            +
                def __init__(self, temperature=0.1, top_k=3, top_p=1, repetition_penalty=1.05, last_n_tokens=64, max_new_tokens=125, seed=42, reset=False, stream=True, threads=None, batch_size=1024, context_length=4096, gpu_layers=None):
         | 
| 100 | 
            +
                    self.temperature = temperature
         | 
| 101 | 
            +
                    self.top_k = top_k
         | 
| 102 | 
            +
                    self.top_p = top_p
         | 
| 103 | 
            +
                    self.repetition_penalty = repetition_penalty
         | 
| 104 | 
            +
                    self.last_n_tokens = last_n_tokens
         | 
| 105 | 
            +
                    self.max_new_tokens = max_new_tokens
         | 
| 106 | 
            +
                    self.seed = seed
         | 
| 107 | 
            +
                    self.reset = reset
         | 
| 108 | 
            +
                    self.stream = stream
         | 
| 109 | 
            +
                    self.threads = threads
         | 
| 110 | 
            +
                    self.batch_size = batch_size
         | 
| 111 | 
            +
                    self.context_length = context_length
         | 
| 112 | 
            +
                    self.gpu_layers = gpu_layers
         | 
| 113 | 
            +
                    # self.stop: list[str] = field(default_factory=lambda: [stop_string])
         | 
| 114 | 
            +
             | 
| 115 | 
            +
                def update_gpu(self, new_value):
         | 
| 116 | 
            +
                    self.gpu_layers = new_value
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            class CtransInitConfig_cpu(CtransInitConfig_gpu):
         | 
| 119 | 
            +
                def __init__(self):
         | 
| 120 | 
            +
                    super().__init__()
         | 
| 121 | 
            +
                    self.gpu_layers = 0
         | 
| 122 | 
            +
             | 
| 123 | 
            +
            gpu_config = CtransInitConfig_gpu()
         | 
| 124 | 
            +
            cpu_config = CtransInitConfig_cpu()
         | 
| 125 |  | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 126 |  | 
| 127 | 
             
            @dataclass
         | 
| 128 | 
             
            class CtransGenGenerationConfig:
         | 
    	
        requirements.txt
    CHANGED
    
    | @@ -6,7 +6,6 @@ transformers | |
| 6 | 
             
            torch
         | 
| 7 | 
             
            sentence_transformers
         | 
| 8 | 
             
            faiss-cpu
         | 
| 9 | 
            -
            bitsandbytes
         | 
| 10 | 
             
            pypdf
         | 
| 11 | 
             
            python-docx
         | 
| 12 | 
             
            ctransformers[cuda]
         | 
|  | |
| 6 | 
             
            torch
         | 
| 7 | 
             
            sentence_transformers
         | 
| 8 | 
             
            faiss-cpu
         | 
|  | |
| 9 | 
             
            pypdf
         | 
| 10 | 
             
            python-docx
         | 
| 11 | 
             
            ctransformers[cuda]
         |