Spaces:
Running
Running
Joseph Pollack
commited on
adds network timeout wait
Browse files
scripts/__pycache__/deploy_demo_space.cpython-313.pyc
ADDED
Binary file (53.2 kB). View file
|
|
scripts/__pycache__/push_to_huggingface.cpython-313.pyc
ADDED
Binary file (45.6 kB). View file
|
|
scripts/__pycache__/train_lora.cpython-313.pyc
ADDED
Binary file (22.6 kB). View file
|
|
scripts/deploy_demo_space.py
CHANGED
@@ -566,7 +566,7 @@ os.environ['BRAND_PROJECT_URL'] = {_json.dumps(self.brand_project_url)}
|
|
566 |
f"app_file: app.py\n"
|
567 |
f"pinned: false\n"
|
568 |
f"short_description: Interactive demo for {self.model_id}\n"
|
569 |
-
+ ("license: mit
|
570 |
f"---\n\n"
|
571 |
)
|
572 |
|
|
|
566 |
f"app_file: app.py\n"
|
567 |
f"pinned: false\n"
|
568 |
f"short_description: Interactive demo for {self.model_id}\n"
|
569 |
+
+ ("license: mit\\n" if self.demo_type != 'gpt' else "") +
|
570 |
f"---\n\n"
|
571 |
)
|
572 |
|
scripts/push_to_huggingface.py
CHANGED
@@ -137,54 +137,126 @@ class HuggingFacePusher:
|
|
137 |
|
138 |
def _detect_artifact_type(self) -> str:
|
139 |
"""Detect whether output dir contains a full model or a LoRA adapter."""
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
return "lora"
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
return "full"
|
|
|
|
|
|
|
|
|
158 |
|
|
|
159 |
return "unknown"
|
160 |
|
161 |
def validate_model_path(self) -> bool:
|
162 |
"""Validate that the model path contains required files for Voxtral full or LoRA."""
|
163 |
self.artifact_type = self._detect_artifact_type()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
164 |
if self.artifact_type == "lora":
|
165 |
-
|
166 |
-
|
167 |
-
|
|
|
|
|
|
|
|
|
168 |
return False
|
169 |
-
|
170 |
-
if not (
|
171 |
-
logger.error("β LoRA
|
172 |
return False
|
173 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
return True
|
175 |
|
176 |
if self.artifact_type == "full":
|
177 |
-
#
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
|
|
180 |
return False
|
181 |
-
|
182 |
-
|
|
|
183 |
return False
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
return True
|
186 |
|
187 |
-
logger.error("β Could not detect model artifacts (neither full model nor LoRA)")
|
188 |
return False
|
189 |
|
190 |
def create_model_card(self, training_config: Dict[str, Any], results: Dict[str, Any]) -> str:
|
@@ -455,9 +527,16 @@ MIT License
|
|
455 |
results: Optional[Dict[str, Any]] = None) -> bool:
|
456 |
"""Complete model push process"""
|
457 |
logger.info(f"π Starting model push to {self.repo_id}")
|
|
|
|
|
458 |
|
459 |
# Validate model path
|
460 |
if not self.validate_model_path():
|
|
|
|
|
|
|
|
|
|
|
461 |
return False
|
462 |
|
463 |
# Create repository
|
|
|
137 |
|
138 |
def _detect_artifact_type(self) -> str:
|
139 |
"""Detect whether output dir contains a full model or a LoRA adapter."""
|
140 |
+
logger.info(f"Detecting model artifacts in: {self.model_path}")
|
141 |
+
|
142 |
+
# Check if path exists
|
143 |
+
if not self.model_path.exists():
|
144 |
+
logger.error(f"β Model path does not exist: {self.model_path}")
|
145 |
+
return "unknown"
|
146 |
+
|
147 |
+
# List all files for debugging
|
148 |
+
all_files = list(self.model_path.rglob("*"))
|
149 |
+
logger.info(f"π Found {len(all_files)} files in model directory")
|
150 |
+
if len(all_files) <= 20: # Only show if not too many files
|
151 |
+
for f in all_files:
|
152 |
+
logger.info(f" - {f.relative_to(self.model_path)}")
|
153 |
+
|
154 |
+
# LoRA artifacts - be more flexible about file combinations
|
155 |
+
lora_config = self.model_path / "adapter_config.json"
|
156 |
+
lora_weights_safetensors = self.model_path / "adapter_model.safetensors"
|
157 |
+
lora_weights_bin = self.model_path / "adapter_model.bin"
|
158 |
+
|
159 |
+
has_lora_config = lora_config.exists()
|
160 |
+
has_lora_weights = lora_weights_safetensors.exists() or lora_weights_bin.exists()
|
161 |
+
|
162 |
+
if has_lora_config:
|
163 |
+
logger.info("β
Found adapter_config.json")
|
164 |
+
if has_lora_weights:
|
165 |
+
logger.info("β
Found LoRA weight files")
|
166 |
+
|
167 |
+
if has_lora_config and has_lora_weights:
|
168 |
+
logger.info("π― Detected LoRA adapter artifacts")
|
169 |
return "lora"
|
170 |
+
elif has_lora_config:
|
171 |
+
logger.warning("β οΈ Found adapter_config.json but no weight files")
|
172 |
+
elif has_lora_weights:
|
173 |
+
logger.warning("β οΈ Found LoRA weight files but no adapter_config.json")
|
174 |
+
|
175 |
+
# Full model artifacts - also be more flexible
|
176 |
+
config_file = self.model_path / "config.json"
|
177 |
+
safetensors_model = self.model_path / "model.safetensors"
|
178 |
+
safetensors_index = self.model_path / "model.safetensors.index.json"
|
179 |
+
pytorch_model = self.model_path / "pytorch_model.bin"
|
180 |
+
|
181 |
+
has_config = config_file.exists()
|
182 |
+
has_weights = (safetensors_model.exists() or safetensors_index.exists() or pytorch_model.exists())
|
183 |
+
|
184 |
+
if has_config:
|
185 |
+
logger.info("β
Found config.json")
|
186 |
+
if has_weights:
|
187 |
+
logger.info("β
Found model weight files")
|
188 |
+
|
189 |
+
if has_config and has_weights:
|
190 |
+
logger.info("π― Detected full model artifacts")
|
191 |
return "full"
|
192 |
+
elif has_config:
|
193 |
+
logger.warning("β οΈ Found config.json but no weight files")
|
194 |
+
elif has_weights:
|
195 |
+
logger.warning("β οΈ Found weight files but no config.json")
|
196 |
|
197 |
+
logger.error("β Could not detect model artifacts (neither full model nor LoRA)")
|
198 |
return "unknown"
|
199 |
|
200 |
def validate_model_path(self) -> bool:
|
201 |
"""Validate that the model path contains required files for Voxtral full or LoRA."""
|
202 |
self.artifact_type = self._detect_artifact_type()
|
203 |
+
|
204 |
+
if self.artifact_type == "unknown":
|
205 |
+
logger.error("β Could not detect model type. Expected files:")
|
206 |
+
logger.error(" For LoRA: adapter_config.json + adapter_model.safetensors (or .bin)")
|
207 |
+
logger.error(" For Full Model: config.json + model.safetensors (or pytorch_model.bin)")
|
208 |
+
logger.error(" For Voxtral ASR: also look for processor_config.json, tokenizer.json, etc.")
|
209 |
+
return False
|
210 |
+
|
211 |
if self.artifact_type == "lora":
|
212 |
+
# Check for required LoRA files
|
213 |
+
config_file = self.model_path / "adapter_config.json"
|
214 |
+
weights_file_safetensors = self.model_path / "adapter_model.safetensors"
|
215 |
+
weights_file_bin = self.model_path / "adapter_model.bin"
|
216 |
+
|
217 |
+
if not config_file.exists():
|
218 |
+
logger.error("β LoRA adapter missing required file: adapter_config.json")
|
219 |
return False
|
220 |
+
|
221 |
+
if not (weights_file_safetensors.exists() or weights_file_bin.exists()):
|
222 |
+
logger.error("β LoRA adapter missing weight files: adapter_model.safetensors or adapter_model.bin")
|
223 |
return False
|
224 |
+
|
225 |
+
logger.info("β
LoRA adapter validation successful")
|
226 |
+
logger.info(f" - Config: {config_file.name}")
|
227 |
+
if weights_file_safetensors.exists():
|
228 |
+
logger.info(f" - Weights: {weights_file_safetensors.name}")
|
229 |
+
elif weights_file_bin.exists():
|
230 |
+
logger.info(f" - Weights: {weights_file_bin.name}")
|
231 |
+
|
232 |
return True
|
233 |
|
234 |
if self.artifact_type == "full":
|
235 |
+
# Check for required full model files
|
236 |
+
config_file = self.model_path / "config.json"
|
237 |
+
safetensors_file = self.model_path / "model.safetensors"
|
238 |
+
safetensors_index = self.model_path / "model.safetensors.index.json"
|
239 |
+
pytorch_file = self.model_path / "pytorch_model.bin"
|
240 |
+
|
241 |
+
if not config_file.exists():
|
242 |
+
logger.error("β Full model missing required file: config.json")
|
243 |
return False
|
244 |
+
|
245 |
+
if not (safetensors_file.exists() or safetensors_index.exists() or pytorch_file.exists()):
|
246 |
+
logger.error("β Full model missing weight files: model.safetensors, model.safetensors.index.json, or pytorch_model.bin")
|
247 |
return False
|
248 |
+
|
249 |
+
logger.info("β
Full model validation successful")
|
250 |
+
logger.info(f" - Config: {config_file.name}")
|
251 |
+
if safetensors_file.exists():
|
252 |
+
logger.info(f" - Weights: {safetensors_file.name}")
|
253 |
+
elif safetensors_index.exists():
|
254 |
+
logger.info(f" - Weights: {safetensors_index.name} (sharded)")
|
255 |
+
elif pytorch_file.exists():
|
256 |
+
logger.info(f" - Weights: {pytorch_file.name}")
|
257 |
+
|
258 |
return True
|
259 |
|
|
|
260 |
return False
|
261 |
|
262 |
def create_model_card(self, training_config: Dict[str, Any], results: Dict[str, Any]) -> str:
|
|
|
527 |
results: Optional[Dict[str, Any]] = None) -> bool:
|
528 |
"""Complete model push process"""
|
529 |
logger.info(f"π Starting model push to {self.repo_id}")
|
530 |
+
logger.info(f"π Model path: {self.model_path}")
|
531 |
+
logger.info(f"π― Repository: {self.repo_id}")
|
532 |
|
533 |
# Validate model path
|
534 |
if not self.validate_model_path():
|
535 |
+
logger.error("β Model validation failed. Please check:")
|
536 |
+
logger.error(" 1. The model path exists and contains the expected files")
|
537 |
+
logger.error(" 2. For LoRA models: adapter_config.json and adapter_model.* files")
|
538 |
+
logger.error(" 3. For full models: config.json and model weight files")
|
539 |
+
logger.error(" 4. Make sure the training completed successfully and saved the model")
|
540 |
return False
|
541 |
|
542 |
# Create repository
|
scripts/train_lora.py
CHANGED
@@ -24,6 +24,7 @@ Get your token from: https://huggingface.co/settings/tokens
|
|
24 |
|
25 |
import argparse
|
26 |
import json
|
|
|
27 |
from pathlib import Path
|
28 |
from datetime import datetime
|
29 |
from typing import Tuple, Optional
|
@@ -285,50 +286,117 @@ def main():
|
|
285 |
if not trackio_space:
|
286 |
trackio_space = get_default_space_name("voxtral-lora-finetuning")
|
287 |
|
288 |
-
# Initialize trackio for experiment tracking
|
|
|
289 |
if trackio_space:
|
290 |
print(f"Initializing trackio with space: {trackio_space}")
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
311 |
else:
|
312 |
print("Initializing trackio in local-only mode")
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
|
320 |
-
|
321 |
-
|
322 |
-
|
323 |
-
|
324 |
-
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
332 |
|
333 |
print("Loading processor and model...")
|
334 |
processor = VoxtralProcessor.from_pretrained(model_checkpoint)
|
@@ -397,8 +465,9 @@ def main():
|
|
397 |
if eval_dataset:
|
398 |
results = trainer.evaluate()
|
399 |
print(f"Final evaluation results: {results}")
|
400 |
-
# Log final evaluation results
|
401 |
-
|
|
|
402 |
|
403 |
# Push dataset to Hub if requested
|
404 |
if args.push_dataset and args.dataset_jsonl:
|
@@ -433,8 +502,9 @@ def main():
|
|
433 |
except Exception as e:
|
434 |
print(f"β Error pushing dataset: {e}")
|
435 |
|
436 |
-
# Finish trackio logging
|
437 |
-
|
|
|
438 |
|
439 |
print("Training completed successfully!")
|
440 |
|
|
|
24 |
|
25 |
import argparse
|
26 |
import json
|
27 |
+
import time
|
28 |
from pathlib import Path
|
29 |
from datetime import datetime
|
30 |
from typing import Tuple, Optional
|
|
|
286 |
if not trackio_space:
|
287 |
trackio_space = get_default_space_name("voxtral-lora-finetuning")
|
288 |
|
289 |
+
# Initialize trackio for experiment tracking with retry logic
|
290 |
+
trackio_enabled = False
|
291 |
if trackio_space:
|
292 |
print(f"Initializing trackio with space: {trackio_space}")
|
293 |
+
try:
|
294 |
+
trackio.init(
|
295 |
+
project="voxtral-lora-finetuning",
|
296 |
+
config={
|
297 |
+
"model_checkpoint": model_checkpoint,
|
298 |
+
"output_dir": output_dir,
|
299 |
+
"batch_size": args.batch_size,
|
300 |
+
"learning_rate": args.learning_rate,
|
301 |
+
"epochs": args.epochs,
|
302 |
+
"train_count": args.train_count,
|
303 |
+
"eval_count": args.eval_count,
|
304 |
+
"dataset_jsonl": args.dataset_jsonl,
|
305 |
+
"dataset_name": args.dataset_name,
|
306 |
+
"dataset_config": args.dataset_config,
|
307 |
+
"lora_r": args.lora_r,
|
308 |
+
"lora_alpha": args.lora_alpha,
|
309 |
+
"lora_dropout": args.lora_dropout,
|
310 |
+
"freeze_audio_tower": args.freeze_audio_tower,
|
311 |
+
},
|
312 |
+
space_id=trackio_space
|
313 |
+
)
|
314 |
+
trackio_enabled = True
|
315 |
+
print("β
Trackio initialized successfully")
|
316 |
+
except Exception as e:
|
317 |
+
print(f"β Failed to initialize trackio with space: {e}")
|
318 |
+
print("β³ Waiting 3 minutes for space to deploy before retrying...")
|
319 |
+
time.sleep(180) # Wait 3 minutes (180 seconds)
|
320 |
+
|
321 |
+
print("π Retrying trackio initialization with space...")
|
322 |
+
try:
|
323 |
+
trackio.init(
|
324 |
+
project="voxtral-lora-finetuning",
|
325 |
+
config={
|
326 |
+
"model_checkpoint": model_checkpoint,
|
327 |
+
"output_dir": output_dir,
|
328 |
+
"batch_size": args.batch_size,
|
329 |
+
"learning_rate": args.learning_rate,
|
330 |
+
"epochs": args.epochs,
|
331 |
+
"train_count": args.train_count,
|
332 |
+
"eval_count": args.eval_count,
|
333 |
+
"dataset_jsonl": args.dataset_jsonl,
|
334 |
+
"dataset_name": args.dataset_name,
|
335 |
+
"dataset_config": args.dataset_config,
|
336 |
+
"lora_r": args.lora_r,
|
337 |
+
"lora_alpha": args.lora_alpha,
|
338 |
+
"lora_dropout": args.lora_dropout,
|
339 |
+
"freeze_audio_tower": args.freeze_audio_tower,
|
340 |
+
},
|
341 |
+
space_id=trackio_space
|
342 |
+
)
|
343 |
+
trackio_enabled = True
|
344 |
+
print("β
Trackio initialized successfully after retry")
|
345 |
+
except Exception as retry_e:
|
346 |
+
print(f"β Retry also failed: {retry_e}")
|
347 |
+
print("π Falling back to local-only mode...")
|
348 |
+
try:
|
349 |
+
trackio.init(
|
350 |
+
project="voxtral-lora-finetuning",
|
351 |
+
config={
|
352 |
+
"model_checkpoint": model_checkpoint,
|
353 |
+
"output_dir": output_dir,
|
354 |
+
"batch_size": args.batch_size,
|
355 |
+
"learning_rate": args.learning_rate,
|
356 |
+
"epochs": args.epochs,
|
357 |
+
"train_count": args.train_count,
|
358 |
+
"eval_count": args.eval_count,
|
359 |
+
"dataset_jsonl": args.dataset_jsonl,
|
360 |
+
"dataset_name": args.dataset_name,
|
361 |
+
"dataset_config": args.dataset_config,
|
362 |
+
"lora_r": args.lora_r,
|
363 |
+
"lora_alpha": args.lora_alpha,
|
364 |
+
"lora_dropout": args.lora_dropout,
|
365 |
+
"freeze_audio_tower": args.freeze_audio_tower,
|
366 |
+
}
|
367 |
+
)
|
368 |
+
trackio_enabled = True
|
369 |
+
print("β
Trackio initialized in local-only mode")
|
370 |
+
except Exception as fallback_e:
|
371 |
+
print(f"β Failed to initialize trackio in local mode: {fallback_e}")
|
372 |
+
print("β οΈ Training will continue without experiment tracking")
|
373 |
else:
|
374 |
print("Initializing trackio in local-only mode")
|
375 |
+
try:
|
376 |
+
trackio.init(
|
377 |
+
project="voxtral-lora-finetuning",
|
378 |
+
config={
|
379 |
+
"model_checkpoint": model_checkpoint,
|
380 |
+
"output_dir": output_dir,
|
381 |
+
"batch_size": args.batch_size,
|
382 |
+
"learning_rate": args.learning_rate,
|
383 |
+
"epochs": args.epochs,
|
384 |
+
"train_count": args.train_count,
|
385 |
+
"eval_count": args.eval_count,
|
386 |
+
"dataset_jsonl": args.dataset_jsonl,
|
387 |
+
"dataset_name": args.dataset_name,
|
388 |
+
"dataset_config": args.dataset_config,
|
389 |
+
"lora_r": args.lora_r,
|
390 |
+
"lora_alpha": args.lora_alpha,
|
391 |
+
"lora_dropout": args.lora_dropout,
|
392 |
+
"freeze_audio_tower": args.freeze_audio_tower,
|
393 |
+
}
|
394 |
+
)
|
395 |
+
trackio_enabled = True
|
396 |
+
print("β
Trackio initialized in local-only mode")
|
397 |
+
except Exception as e:
|
398 |
+
print(f"β Failed to initialize trackio: {e}")
|
399 |
+
print("β οΈ Training will continue without experiment tracking")
|
400 |
|
401 |
print("Loading processor and model...")
|
402 |
processor = VoxtralProcessor.from_pretrained(model_checkpoint)
|
|
|
465 |
if eval_dataset:
|
466 |
results = trainer.evaluate()
|
467 |
print(f"Final evaluation results: {results}")
|
468 |
+
# Log final evaluation results if trackio is enabled
|
469 |
+
if trackio_enabled:
|
470 |
+
trackio.log(results)
|
471 |
|
472 |
# Push dataset to Hub if requested
|
473 |
if args.push_dataset and args.dataset_jsonl:
|
|
|
502 |
except Exception as e:
|
503 |
print(f"β Error pushing dataset: {e}")
|
504 |
|
505 |
+
# Finish trackio logging if enabled
|
506 |
+
if trackio_enabled:
|
507 |
+
trackio.finish()
|
508 |
|
509 |
print("Training completed successfully!")
|
510 |
|