Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Update app.py (#26)
Browse files- Update app.py (4f7853ffbdb4c32f60e40d9c739d3bf9ae7819dc)
    	
        app.py
    CHANGED
    
    | @@ -23,14 +23,30 @@ def script_to_use(model_id, api): | |
| 23 | 
             
                arch = arch[0]
         | 
| 24 | 
             
                return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
         | 
| 25 |  | 
| 26 | 
            -
            def process_model(model_id, q_method, hf_token):
         | 
| 27 | 
             
                model_name = model_id.split('/')[-1]
         | 
| 28 | 
             
                fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
         | 
| 29 |  | 
| 30 | 
             
                try:
         | 
| 31 | 
             
                    api = HfApi(token=hf_token)
         | 
| 32 |  | 
| 33 | 
            -
                     | 
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
| 34 | 
             
                    print("Model downloaded successully!")
         | 
| 35 |  | 
| 36 | 
             
                    conversion_script = script_to_use(model_id, api)
         | 
| @@ -49,7 +65,7 @@ def process_model(model_id, q_method, hf_token): | |
| 49 | 
             
                    print("Quantised successfully!")
         | 
| 50 |  | 
| 51 | 
             
                    # Create empty repo
         | 
| 52 | 
            -
                    new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True)
         | 
| 53 | 
             
                    new_repo_id = new_repo_url.repo_id
         | 
| 54 | 
             
                    print("Repo created successfully!", new_repo_url)
         | 
| 55 |  | 
| @@ -58,6 +74,7 @@ def process_model(model_id, q_method, hf_token): | |
| 58 | 
             
                    except:
         | 
| 59 | 
             
                        card = ModelCard("")
         | 
| 60 | 
             
                    card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
         | 
|  | |
| 61 | 
             
                    card.text = dedent(
         | 
| 62 | 
             
                        f"""
         | 
| 63 | 
             
                        # {new_repo_id}
         | 
| @@ -84,7 +101,7 @@ def process_model(model_id, q_method, hf_token): | |
| 84 | 
             
                        llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
         | 
| 85 | 
             
                        ```
         | 
| 86 |  | 
| 87 | 
            -
                        Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the  | 
| 88 |  | 
| 89 | 
             
                        ```
         | 
| 90 | 
             
                        git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
         | 
| @@ -138,6 +155,11 @@ iface = gr.Interface( | |
| 138 | 
             
                        label="HF Write Token",
         | 
| 139 | 
             
                        info="https://hf.co/settings/token",
         | 
| 140 | 
             
                        type="password",
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 141 | 
             
                    )
         | 
| 142 | 
             
                ], 
         | 
| 143 | 
             
                outputs=[
         | 
| @@ -145,7 +167,7 @@ iface = gr.Interface( | |
| 145 | 
             
                    gr.Image(show_label=False),
         | 
| 146 | 
             
                ],
         | 
| 147 | 
             
                title="Create your own GGUF Quants, blazingly fast ⚡!",
         | 
| 148 | 
            -
                description="The space takes  | 
| 149 | 
             
                article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
         | 
| 150 |  | 
| 151 | 
             
            )
         | 
|  | |
| 23 | 
             
                arch = arch[0]
         | 
| 24 | 
             
                return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
         | 
| 25 |  | 
| 26 | 
            +
            def process_model(model_id, q_method, hf_token, private_repo):
         | 
| 27 | 
             
                model_name = model_id.split('/')[-1]
         | 
| 28 | 
             
                fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
         | 
| 29 |  | 
| 30 | 
             
                try:
         | 
| 31 | 
             
                    api = HfApi(token=hf_token)
         | 
| 32 |  | 
| 33 | 
            +
                    dl_pattern = ["*.md", "*.json", "*.model"]
         | 
| 34 | 
            +
                    
         | 
| 35 | 
            +
                    pattern = (
         | 
| 36 | 
            +
                        "*.safetensors"
         | 
| 37 | 
            +
                        if any(
         | 
| 38 | 
            +
                            file.path.endswith(".safetensors")
         | 
| 39 | 
            +
                            for file in api.list_repo_tree(
         | 
| 40 | 
            +
                                repo_id=model_id,
         | 
| 41 | 
            +
                                recursive=True,
         | 
| 42 | 
            +
                            )
         | 
| 43 | 
            +
                        )
         | 
| 44 | 
            +
                        else "*.bin"
         | 
| 45 | 
            +
                    )
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    dl_pattern += pattern
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                    snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False, token=hf_token, allow_patterns=dl_pattern)
         | 
| 50 | 
             
                    print("Model downloaded successully!")
         | 
| 51 |  | 
| 52 | 
             
                    conversion_script = script_to_use(model_id, api)
         | 
|  | |
| 65 | 
             
                    print("Quantised successfully!")
         | 
| 66 |  | 
| 67 | 
             
                    # Create empty repo
         | 
| 68 | 
            +
                    new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True, private=private_repo)
         | 
| 69 | 
             
                    new_repo_id = new_repo_url.repo_id
         | 
| 70 | 
             
                    print("Repo created successfully!", new_repo_url)
         | 
| 71 |  | 
|  | |
| 74 | 
             
                    except:
         | 
| 75 | 
             
                        card = ModelCard("")
         | 
| 76 | 
             
                    card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
         | 
| 77 | 
            +
                    card.data.tags += ["gguf-my-repo"]
         | 
| 78 | 
             
                    card.text = dedent(
         | 
| 79 | 
             
                        f"""
         | 
| 80 | 
             
                        # {new_repo_id}
         | 
|  | |
| 101 | 
             
                        llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
         | 
| 102 | 
             
                        ```
         | 
| 103 |  | 
| 104 | 
            +
                        Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
         | 
| 105 |  | 
| 106 | 
             
                        ```
         | 
| 107 | 
             
                        git clone https://github.com/ggerganov/llama.cpp && cd llama.cpp && make && ./main -m {qtype.split("/")[-1]} -n 128
         | 
|  | |
| 155 | 
             
                        label="HF Write Token",
         | 
| 156 | 
             
                        info="https://hf.co/settings/token",
         | 
| 157 | 
             
                        type="password",
         | 
| 158 | 
            +
                    ),
         | 
| 159 | 
            +
                    gr.Checkbox(
         | 
| 160 | 
            +
                        value=False,
         | 
| 161 | 
            +
                        label="Private Repo",
         | 
| 162 | 
            +
                        info="Create a private repo under your username."
         | 
| 163 | 
             
                    )
         | 
| 164 | 
             
                ], 
         | 
| 165 | 
             
                outputs=[
         | 
|  | |
| 167 | 
             
                    gr.Image(show_label=False),
         | 
| 168 | 
             
                ],
         | 
| 169 | 
             
                title="Create your own GGUF Quants, blazingly fast ⚡!",
         | 
| 170 | 
            +
                description="The space takes an HF repo as an input, quantises it and creates a Public repo containing the selected quant under your HF user namespace. You need to specify a write token obtained in https://hf.co/settings/tokens.",
         | 
| 171 | 
             
                article="<p>Find your write token at <a href='https://huggingface.co/settings/tokens' target='_blank'>token settings</a></p>",
         | 
| 172 |  | 
| 173 | 
             
            )
         | 
 
			

