Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	Some cleaning in huggingface_hub integration (#13)
Browse files- Some cleaning in huggingface_hub integration (85fe931f3911f59e26cc6c4ca2f28c5d1affa26a)
Co-authored-by: Lucain Pouget <[email protected]>
    	
        app.py
    CHANGED
    
    | @@ -24,25 +24,23 @@ def script_to_use(model_id, api): | |
| 24 | 
             
                return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
         | 
| 25 |  | 
| 26 | 
             
            def process_model(model_id, q_method, hf_token):
         | 
| 27 | 
            -
                 | 
| 28 | 
            -
                fp16 = f"{ | 
| 29 |  | 
| 30 | 
             
                try:
         | 
| 31 | 
             
                    api = HfApi(token=hf_token)
         | 
| 32 |  | 
| 33 | 
            -
                     | 
| 34 | 
            -
             | 
| 35 | 
            -
                    snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
         | 
| 36 | 
             
                    print("Model downloaded successully!")
         | 
| 37 |  | 
| 38 | 
             
                    conversion_script = script_to_use(model_id, api)
         | 
| 39 | 
            -
                    fp16_conversion = f"python llama.cpp/{conversion_script} { | 
| 40 | 
             
                    result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
         | 
| 41 | 
             
                    if result.returncode != 0:
         | 
| 42 | 
             
                        raise Exception(f"Error converting to fp16: {result.stderr}")
         | 
| 43 | 
             
                    print("Model converted to fp16 successully!")
         | 
| 44 |  | 
| 45 | 
            -
                    qtype = f"{ | 
| 46 | 
             
                    quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
         | 
| 47 | 
             
                    result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
         | 
| 48 | 
             
                    if result.returncode != 0:
         | 
| @@ -50,20 +48,15 @@ def process_model(model_id, q_method, hf_token): | |
| 50 | 
             
                    print("Quantised successfully!")
         | 
| 51 |  | 
| 52 | 
             
                    # Create empty repo
         | 
| 53 | 
            -
                     | 
| 54 | 
            -
                     | 
| 55 | 
            -
             | 
| 56 | 
            -
                        repo_type="model",
         | 
| 57 | 
            -
                        exist_ok=True,
         | 
| 58 | 
            -
                        token=hf_token
         | 
| 59 | 
            -
                    )
         | 
| 60 | 
            -
                    print("Repo created successfully!")
         | 
| 61 |  | 
| 62 | 
             
                    card = ModelCard.load(model_id)
         | 
| 63 | 
             
                    card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
         | 
| 64 | 
             
                    card.text = dedent(
         | 
| 65 | 
             
                        f"""
         | 
| 66 | 
            -
                        # { | 
| 67 | 
             
                        This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp.
         | 
| 68 | 
             
                        Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
         | 
| 69 | 
             
                        ## Use with llama.cpp
         | 
| @@ -73,39 +66,37 @@ def process_model(model_id, q_method, hf_token): | |
| 73 | 
             
                        ```
         | 
| 74 |  | 
| 75 | 
             
                        ```bash
         | 
| 76 | 
            -
                        llama-cli --hf-repo { | 
| 77 | 
             
                        ```
         | 
| 78 |  | 
| 79 | 
             
                        ```bash
         | 
| 80 | 
            -
                        llama-server --hf-repo { | 
| 81 | 
             
                        ```
         | 
| 82 | 
             
                        """
         | 
| 83 | 
             
                    )
         | 
| 84 | 
            -
                    card.save(os.path.join( | 
| 85 |  | 
| 86 | 
             
                    api.upload_file(
         | 
| 87 | 
             
                        path_or_fileobj=qtype,
         | 
| 88 | 
             
                        path_in_repo=qtype.split("/")[-1],
         | 
| 89 | 
            -
                        repo_id= | 
| 90 | 
            -
                        repo_type="model",
         | 
| 91 | 
             
                    )
         | 
| 92 |  | 
| 93 | 
             
                    api.upload_file(
         | 
| 94 | 
            -
                        path_or_fileobj=f"{ | 
| 95 | 
             
                        path_in_repo="README.md",
         | 
| 96 | 
            -
                        repo_id= | 
| 97 | 
            -
                        repo_type="model",
         | 
| 98 | 
             
                    )
         | 
| 99 | 
             
                    print("Uploaded successfully!")
         | 
| 100 |  | 
| 101 | 
             
                    return (
         | 
| 102 | 
            -
                        f'Find your repo <a href=\'{ | 
| 103 | 
             
                        "llama.png",
         | 
| 104 | 
             
                    )
         | 
| 105 | 
             
                except Exception as e:
         | 
| 106 | 
             
                    return (f"Error: {e}", "error.png")
         | 
| 107 | 
             
                finally:
         | 
| 108 | 
            -
                    shutil.rmtree( | 
| 109 | 
             
                    print("Folder cleaned up successfully!")
         | 
| 110 |  | 
| 111 |  | 
|  | |
| 24 | 
             
                return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
         | 
| 25 |  | 
| 26 | 
             
            def process_model(model_id, q_method, hf_token):
         | 
| 27 | 
            +
                model_name = model_id.split('/')[-1]
         | 
| 28 | 
            +
                fp16 = f"{model_name}/{model_name.lower()}.fp16.bin"
         | 
| 29 |  | 
| 30 | 
             
                try:
         | 
| 31 | 
             
                    api = HfApi(token=hf_token)
         | 
| 32 |  | 
| 33 | 
            +
                    snapshot_download(repo_id=model_id, local_dir=model_name, local_dir_use_symlinks=False)
         | 
|  | |
|  | |
| 34 | 
             
                    print("Model downloaded successully!")
         | 
| 35 |  | 
| 36 | 
             
                    conversion_script = script_to_use(model_id, api)
         | 
| 37 | 
            +
                    fp16_conversion = f"python llama.cpp/{conversion_script} {model_name} --outtype f16 --outfile {fp16}"
         | 
| 38 | 
             
                    result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
         | 
| 39 | 
             
                    if result.returncode != 0:
         | 
| 40 | 
             
                        raise Exception(f"Error converting to fp16: {result.stderr}")
         | 
| 41 | 
             
                    print("Model converted to fp16 successully!")
         | 
| 42 |  | 
| 43 | 
            +
                    qtype = f"{model_name}/{model_name.lower()}.{q_method.upper()}.gguf"
         | 
| 44 | 
             
                    quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
         | 
| 45 | 
             
                    result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
         | 
| 46 | 
             
                    if result.returncode != 0:
         | 
|  | |
| 48 | 
             
                    print("Quantised successfully!")
         | 
| 49 |  | 
| 50 | 
             
                    # Create empty repo
         | 
| 51 | 
            +
                    new_repo_url = api.create_repo(repo_id=f"{model_name}-{q_method}-GGUF", exist_ok=True)
         | 
| 52 | 
            +
                    new_repo_id = new_repo_url.repo_id
         | 
| 53 | 
            +
                    print("Repo created successfully!", new_repo_url)
         | 
|  | |
|  | |
|  | |
|  | |
|  | |
| 54 |  | 
| 55 | 
             
                    card = ModelCard.load(model_id)
         | 
| 56 | 
             
                    card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
         | 
| 57 | 
             
                    card.text = dedent(
         | 
| 58 | 
             
                        f"""
         | 
| 59 | 
            +
                        # {new_repo_id}
         | 
| 60 | 
             
                        This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp.
         | 
| 61 | 
             
                        Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
         | 
| 62 | 
             
                        ## Use with llama.cpp
         | 
|  | |
| 66 | 
             
                        ```
         | 
| 67 |  | 
| 68 | 
             
                        ```bash
         | 
| 69 | 
            +
                        llama-cli --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
         | 
| 70 | 
             
                        ```
         | 
| 71 |  | 
| 72 | 
             
                        ```bash
         | 
| 73 | 
            +
                        llama-server --hf-repo {new_repo_id} --model {qtype.split("/")[-1]} -c 2048
         | 
| 74 | 
             
                        ```
         | 
| 75 | 
             
                        """
         | 
| 76 | 
             
                    )
         | 
| 77 | 
            +
                    card.save(os.path.join(model_name, "README-new.md"))
         | 
| 78 |  | 
| 79 | 
             
                    api.upload_file(
         | 
| 80 | 
             
                        path_or_fileobj=qtype,
         | 
| 81 | 
             
                        path_in_repo=qtype.split("/")[-1],
         | 
| 82 | 
            +
                        repo_id=new_repo_id,
         | 
|  | |
| 83 | 
             
                    )
         | 
| 84 |  | 
| 85 | 
             
                    api.upload_file(
         | 
| 86 | 
            +
                        path_or_fileobj=f"{model_name}/README-new.md",
         | 
| 87 | 
             
                        path_in_repo="README.md",
         | 
| 88 | 
            +
                        repo_id=new_repo_id,
         | 
|  | |
| 89 | 
             
                    )
         | 
| 90 | 
             
                    print("Uploaded successfully!")
         | 
| 91 |  | 
| 92 | 
             
                    return (
         | 
| 93 | 
            +
                        f'Find your repo <a href=\'{new_repo_url}\' target="_blank" style="text-decoration:underline">here</a>',
         | 
| 94 | 
             
                        "llama.png",
         | 
| 95 | 
             
                    )
         | 
| 96 | 
             
                except Exception as e:
         | 
| 97 | 
             
                    return (f"Error: {e}", "error.png")
         | 
| 98 | 
             
                finally:
         | 
| 99 | 
            +
                    shutil.rmtree(model_name, ignore_errors=True)
         | 
| 100 | 
             
                    print("Folder cleaned up successfully!")
         | 
| 101 |  | 
| 102 |  | 
 
			

 
		