Joseph Pollack commited on
Commit
eb0369d
·
unverified ·
1 Parent(s): 7ca96a1

adds functioning demo space for adapter config and adds model readme

Browse files
requirements.txt CHANGED
@@ -1,15 +1,19 @@
1
- torch
2
- triton
3
  torchvision
4
- torchaudio
 
 
 
5
  datasets
6
  peft
7
  transformers
 
8
  gradio
9
  gradio[mcp]
10
  trackio
11
  huggingface_hub
 
12
  soundfile
13
  librosa
14
  mistral-common
15
- torchcodec
 
1
+ # PyTorch 2.8 ecosystem with CUDA support (required for TorchCodec 0.7)
2
+ torch==2.8.0
3
  torchvision
4
+ torchaudio==2.8.0
5
+ triton
6
+ torchcodec==0.7
7
+ # Core ML libraries
8
  datasets
9
  peft
10
  transformers
11
+ # UI and deployment
12
  gradio
13
  gradio[mcp]
14
  trackio
15
  huggingface_hub
16
+ # Audio processing
17
  soundfile
18
  librosa
19
  mistral-common
 
templates/spaces/demo_voxtral/app.py CHANGED
@@ -2,41 +2,77 @@ import os
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoProcessor
 
 
 
 
5
  try:
6
  from transformers import VoxtralForConditionalGeneration as VoxtralModelClass
7
  except Exception:
8
- # Fallback for older transformers versions
9
- from transformers import AutoModelForSeq2SeqLM as VoxtralModelClass
 
 
 
 
 
 
10
 
11
  HF_MODEL_ID = os.getenv("HF_MODEL_ID", "mistralai/Voxtral-Mini-3B-2507")
 
12
  MODEL_NAME = os.getenv("MODEL_NAME", HF_MODEL_ID.split("/")[-1])
13
  HF_USERNAME = os.getenv("HF_USERNAME", "")
14
  MODEL_SUBFOLDER = os.getenv("MODEL_SUBFOLDER", "").strip()
15
 
16
- try:
17
- processor = AutoProcessor.from_pretrained(HF_MODEL_ID)
18
- except Exception:
19
- # Fallback: some repos may store processor files inside the subfolder
20
- if MODEL_SUBFOLDER:
21
- processor = AutoProcessor.from_pretrained(HF_MODEL_ID, subfolder=MODEL_SUBFOLDER)
22
- else:
23
- raise
 
 
 
 
 
 
24
 
25
  device = "cuda" if torch.cuda.is_available() else "cpu"
26
  # Use float32 on CPU; bfloat16 on CUDA if available
27
- if torch.cuda.is_available():
28
- model_kwargs = {"device_map": "auto", "torch_dtype": torch.bfloat16}
29
- else:
30
- model_kwargs = {"torch_dtype": torch.float32}
31
-
32
- if MODEL_SUBFOLDER:
33
- model = VoxtralModelClass.from_pretrained(
34
- HF_MODEL_ID, subfolder=MODEL_SUBFOLDER, **model_kwargs
35
- )
36
- else:
37
- model = VoxtralModelClass.from_pretrained(
38
- HF_MODEL_ID, **model_kwargs
39
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
  # Simple language options (with Auto detection)
42
  LANGUAGES = {
 
2
  import gradio as gr
3
  import torch
4
  from transformers import AutoProcessor
5
+ try:
6
+ from transformers import AutoConfig
7
+ except Exception:
8
+ AutoConfig = None
9
  try:
10
  from transformers import VoxtralForConditionalGeneration as VoxtralModelClass
11
  except Exception:
12
+ # Fallback for older transformers versions: prefer causal LM over seq2seq
13
+ from transformers import AutoModelForCausalLM as VoxtralModelClass
14
+ try:
15
+ from peft import PeftModel, PeftConfig
16
+ except Exception:
17
+ PeftModel = None
18
+ PeftConfig = None
19
+
20
 
21
  HF_MODEL_ID = os.getenv("HF_MODEL_ID", "mistralai/Voxtral-Mini-3B-2507")
22
+ BASE_MODEL_ID = os.getenv("BASE_MODEL_ID", "mistralai/Voxtral-Mini-3B-2507")
23
  MODEL_NAME = os.getenv("MODEL_NAME", HF_MODEL_ID.split("/")[-1])
24
  HF_USERNAME = os.getenv("HF_USERNAME", "")
25
  MODEL_SUBFOLDER = os.getenv("MODEL_SUBFOLDER", "").strip()
26
 
27
+ def _load_processor():
28
+ try:
29
+ return AutoProcessor.from_pretrained(HF_MODEL_ID)
30
+ except Exception:
31
+ # Fallback: some repos may store processor files inside the subfolder
32
+ if MODEL_SUBFOLDER:
33
+ try:
34
+ return AutoProcessor.from_pretrained(HF_MODEL_ID, subfolder=MODEL_SUBFOLDER)
35
+ except Exception:
36
+ pass
37
+ # Final fallback to base model's processor
38
+ return AutoProcessor.from_pretrained(BASE_MODEL_ID)
39
+
40
+ processor = _load_processor()
41
 
42
  device = "cuda" if torch.cuda.is_available() else "cpu"
43
  # Use float32 on CPU; bfloat16 on CUDA if available
44
+ dtype = torch.bfloat16 if device == "cuda" else torch.float32
45
+ model_kwargs = {"device_map": "auto"} if device == "cuda" else {}
46
+
47
+ def _from_pretrained_with_dtype(model_cls, model_id, **kwargs):
48
+ # Prefer new `dtype` kw; fall back to legacy `torch_dtype` if needed
49
+ try:
50
+ return model_cls.from_pretrained(model_id, dtype=dtype, **kwargs)
51
+ except TypeError:
52
+ return model_cls.from_pretrained(model_id, torch_dtype=dtype, **kwargs)
53
+
54
+
55
+
56
+ model = None
57
+ base_model = None
58
+
59
+ # Prefer PEFT adapter-over-base path first, independent of adapter detection
60
+ if PeftModel is not None:
61
+ try:
62
+ base_model = _from_pretrained_with_dtype(VoxtralModelClass, BASE_MODEL_ID, **model_kwargs)
63
+ if MODEL_SUBFOLDER:
64
+ model = PeftModel.from_pretrained(base_model, HF_MODEL_ID, subfolder=MODEL_SUBFOLDER)
65
+ else:
66
+ model = PeftModel.from_pretrained(base_model, HF_MODEL_ID)
67
+ model = model.to(dtype=dtype)
68
+ except Exception:
69
+ model = None
70
+
71
+ # If PEFT path failed or PEFT is unavailable, fall back to the base model only
72
+ if model is None:
73
+ if base_model is None:
74
+ base_model = _from_pretrained_with_dtype(VoxtralModelClass, BASE_MODEL_ID, **model_kwargs)
75
+ model = base_model
76
 
77
  # Simple language options (with Auto detection)
78
  LANGUAGES = {
templates/spaces/demo_voxtral/requirements.txt CHANGED
@@ -5,3 +5,7 @@ datasets
5
  soundfile
6
  librosa
7
  mistral-common
 
 
 
 
 
5
  soundfile
6
  librosa
7
  mistral-common
8
+ peft
9
+ huggingface_hub
10
+ accelerate
11
+ safetensors