Spaces:
Running
Running
add FP16 conversion support , fix GH #1320 (#41)
Browse files- add FP16 conversion support , fix GH #1320 (aab92e1489520874e8e3be930f2ace2b72df3953)
Co-authored-by: Cavit Erginsoy <[email protected]>
app.py
CHANGED
|
@@ -108,16 +108,25 @@ def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
|
|
| 108 |
model_name = model_id.split('/')[-1]
|
| 109 |
username = whoami(oauth_token.token)["name"]
|
| 110 |
try:
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
return (
|
| 122 |
f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',
|
| 123 |
"llama.png",
|
|
@@ -143,9 +152,9 @@ with gr.Blocks(css=css) as demo:
|
|
| 143 |
)
|
| 144 |
|
| 145 |
q_method = gr.Dropdown(
|
| 146 |
-
["Q2", "Q3", "Q4", "Q6", "Q8"],
|
| 147 |
-
label="
|
| 148 |
-
info="MLX
|
| 149 |
value="Q4",
|
| 150 |
filterable=False,
|
| 151 |
visible=True
|
|
@@ -161,8 +170,8 @@ with gr.Blocks(css=css) as demo:
|
|
| 161 |
gr.Markdown(label="output"),
|
| 162 |
gr.Image(show_label=False),
|
| 163 |
],
|
| 164 |
-
title="Create your own MLX
|
| 165 |
-
description="The space takes an HF repo as an input,
|
| 166 |
api_name=False
|
| 167 |
)
|
| 168 |
|
|
|
|
| 108 |
model_name = model_id.split('/')[-1]
|
| 109 |
username = whoami(oauth_token.token)["name"]
|
| 110 |
try:
|
| 111 |
+
if q_method == "FP16":
|
| 112 |
+
upload_repo = f"{username}/{model_name}-mlx-fp16"
|
| 113 |
+
with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
|
| 114 |
+
# The target directory must not exist
|
| 115 |
+
mlx_path = os.path.join(tmpdir, "mlx")
|
| 116 |
+
convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
|
| 117 |
+
print("Conversion done")
|
| 118 |
+
upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
|
| 119 |
+
print("Upload done")
|
| 120 |
+
else:
|
| 121 |
+
q_bits = QUANT_PARAMS[q_method]
|
| 122 |
+
upload_repo = f"{username}/{model_name}-mlx-{q_bits}Bit"
|
| 123 |
+
with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
|
| 124 |
+
# The target directory must not exist
|
| 125 |
+
mlx_path = os.path.join(tmpdir, "mlx")
|
| 126 |
+
convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
|
| 127 |
+
print("Conversion done")
|
| 128 |
+
upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
|
| 129 |
+
print("Upload done")
|
| 130 |
return (
|
| 131 |
f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',
|
| 132 |
"llama.png",
|
|
|
|
| 152 |
)
|
| 153 |
|
| 154 |
q_method = gr.Dropdown(
|
| 155 |
+
["FP16", "Q2", "Q3", "Q4", "Q6", "Q8"],
|
| 156 |
+
label="Conversion Method",
|
| 157 |
+
info="MLX conversion type (FP16 for float16, Q2–Q8 for quantized models)",
|
| 158 |
value="Q4",
|
| 159 |
filterable=False,
|
| 160 |
visible=True
|
|
|
|
| 170 |
gr.Markdown(label="output"),
|
| 171 |
gr.Image(show_label=False),
|
| 172 |
],
|
| 173 |
+
title="Create your own MLX Models, blazingly fast ⚡!",
|
| 174 |
+
description="The space takes an HF repo as an input, converts it to MLX format (FP16 or quantized), and creates a Public/Private repo under your HF user namespace.",
|
| 175 |
api_name=False
|
| 176 |
)
|
| 177 |
|