arunasrivastava commited on
Commit
96e03ae
·
1 Parent(s): 91a1fc2
.data/TIMIT.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b79af42068b53045510d86854e2239a13ff77c4bd27803b40c28dce4bb5aeb0d
3
+ size 869007403
__pycache__/main.cpython-310.pyc ADDED
Binary file (14.1 kB). View file
 
app.py CHANGED
@@ -1,30 +1,32 @@
1
  import gradio as gr
2
  import pandas as pd
3
- import requests
4
  from pathlib import Path
5
- from datetime import datetime
6
  import logging
7
- import os
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
  logging.basicConfig(level=logging.INFO)
10
 
11
- QUEUE_SPACE_URL = os.getenv(
12
- 'QUEUE_SPACE_URL',
13
- 'https://koellabs-ipa-transcription-en-queue.hf.space/api'
14
- ).rstrip('/')
15
 
16
  def load_leaderboard_data():
17
  try:
18
- response = requests.get(f"{QUEUE_SPACE_URL}/leaderboard", timeout=10)
19
- logging.info(f"Leaderboard request URL: {QUEUE_SPACE_URL}/leaderboard")
20
- response.raise_for_status()
21
- return pd.DataFrame(response.json())
22
- except requests.RequestException as e:
23
  logging.error(f"Error loading leaderboard: {e}")
24
- try:
25
- return pd.read_json(Path("fake_queue/leaderboard.json"))
26
- except:
27
- return pd.DataFrame()
28
 
29
  def format_leaderboard_df(df):
30
  if df.empty:
@@ -43,43 +45,85 @@ def format_leaderboard_df(df):
43
  def create_html_table(df):
44
  return df.to_html(escape=False, index=False, classes="styled-table")
45
 
46
- def submit_evaluation(model_name, submission_name, github_url):
47
  if not model_name or not submission_name:
48
  return "⚠️ Please provide both model name and submission name."
49
 
50
- request_data = {
51
- "transcription_model": model_name,
52
- "subset": "test",
53
- "submission_name": submission_name,
54
- "github_url": github_url if github_url else None
55
- }
56
-
57
  try:
58
- response = requests.post(
59
- f"{QUEUE_SPACE_URL}/evaluate",
60
- json=request_data,
61
- timeout=10
 
 
 
 
 
62
  )
63
- logging.info(f"Submit request URL: {QUEUE_SPACE_URL}/evaluate")
64
- response.raise_for_status()
65
- task_id = response.json()["task_id"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
  return f"✅ Evaluation submitted successfully! Task ID: {task_id}"
67
- except requests.RequestException as e:
68
  return f"❌ Error: {str(e)}"
69
 
70
- def check_task_status(task_id):
71
- if not task_id:
72
- return "Please enter a task ID"
 
73
  try:
74
- response = requests.get(
75
- f"{QUEUE_SPACE_URL}/tasks/{task_id}",
76
- timeout=10
77
- )
78
- logging.info(f"Status check URL: {QUEUE_SPACE_URL}/tasks/{task_id}")
79
- response.raise_for_status()
80
- return response.json()
81
- except requests.RequestException as e:
82
- return f"Error checking status: {str(e)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
 
84
  with gr.Blocks(css="""
85
  .styled-table {
@@ -127,19 +171,16 @@ with gr.Blocks(css="""
127
  outputs=result
128
  )
129
 
130
- with gr.TabItem("📊 Task Status"):
131
- task_id = gr.Textbox(label="Task ID")
132
  status_btn = gr.Button("Check Status")
133
  status_output = gr.JSON(label="Status")
134
 
135
- # Use a simple function wrapper to ensure direct HTTP request
136
- def check_status_wrapper(task_id):
137
- return check_task_status(task_id)
138
-
139
  status_btn.click(
140
- fn=check_status_wrapper,
141
- inputs=task_id,
142
  outputs=status_output
143
  )
144
 
145
- demo.launch()
 
 
1
  import gradio as gr
2
  import pandas as pd
 
3
  from pathlib import Path
 
4
  import logging
5
+ from datetime import datetime
6
+ import sys
7
+ import uuid
8
+ from typing import Dict, Any
9
+
10
+ # Add parent directory to path to import main
11
+ sys.path.append(str(Path(__file__).parent))
12
+ from main import (
13
+ StorageManager,
14
+ EvaluationRequest,
15
+ evaluate_model,
16
+ PATHS
17
+ )
18
 
19
  logging.basicConfig(level=logging.INFO)
20
 
21
+ # Initialize storage manager
22
+ storage_manager = StorageManager(PATHS)
 
 
23
 
24
  def load_leaderboard_data():
25
  try:
26
+ return pd.DataFrame(storage_manager.load('leaderboard'))
27
+ except Exception as e:
 
 
 
28
  logging.error(f"Error loading leaderboard: {e}")
29
+ return pd.DataFrame()
 
 
 
30
 
31
  def format_leaderboard_df(df):
32
  if df.empty:
 
45
  def create_html_table(df):
46
  return df.to_html(escape=False, index=False, classes="styled-table")
47
 
48
+ def submit_evaluation(model_name: str, submission_name: str, github_url: str) -> str:
49
  if not model_name or not submission_name:
50
  return "⚠️ Please provide both model name and submission name."
51
 
 
 
 
 
 
 
 
52
  try:
53
+ # Generate a task ID
54
+ task_id = str(uuid.uuid4())
55
+
56
+ # Create evaluation request
57
+ request = EvaluationRequest(
58
+ transcription_model=model_name,
59
+ submission_name=submission_name,
60
+ github_url=github_url if github_url else None,
61
+ subset="test"
62
  )
63
+
64
+ # Create task entry
65
+ task = {
66
+ "id": task_id,
67
+ "model": model_name,
68
+ "subset": "test",
69
+ "submission_name": submission_name,
70
+ "github_url": github_url,
71
+ "status": "queued",
72
+ "submitted_at": datetime.now().isoformat()
73
+ }
74
+
75
+ # Save task
76
+ tasks = storage_manager.load('tasks')
77
+ tasks.append(task)
78
+ storage_manager.save('tasks', tasks)
79
+
80
+ # Start evaluation in background
81
+ import asyncio
82
+ asyncio.run(evaluate_model(task_id, request))
83
+
84
  return f"✅ Evaluation submitted successfully! Task ID: {task_id}"
85
+ except Exception as e:
86
  return f"❌ Error: {str(e)}"
87
 
88
+ def check_status(query: str) -> Dict[str, Any]:
89
+ if not query:
90
+ return {"error": "Please enter a model name or task ID"}
91
+
92
  try:
93
+ results = storage_manager.load('results')
94
+ tasks = storage_manager.load('tasks')
95
+
96
+ # First try to find by task ID
97
+ result = next((r for r in results if r["task_id"] == query), None)
98
+ task = next((t for t in tasks if t["id"] == query), None)
99
+
100
+ # If not found, try to find by model name
101
+ if not result:
102
+ result = next((r for r in results if r["model"] == query), None)
103
+ if not task:
104
+ task = next((t for t in tasks if t["model"] == query), None)
105
+
106
+ if result:
107
+ # If we found results, return them
108
+ return {
109
+ "status": "completed",
110
+ "model": result["model"],
111
+ "subset": result["subset"],
112
+ "num_files": result["num_files"],
113
+ "average_per": result["average_per"],
114
+ "average_pwed": result["average_pwed"],
115
+ "detailed_results": result["detailed_results"],
116
+ "timestamp": result["timestamp"]
117
+ }
118
+ elif task:
119
+ # If we only found task status, return that
120
+ return task
121
+ else:
122
+ return {"error": f"No results found for '{query}'"}
123
+
124
+ except Exception as e:
125
+ logging.error(f"Error checking status: {e}")
126
+ return {"error": f"Error checking status: {str(e)}"}
127
 
128
  with gr.Blocks(css="""
129
  .styled-table {
 
171
  outputs=result
172
  )
173
 
174
+ with gr.TabItem("📊 Model Status"):
175
+ query = gr.Textbox(label="Model Name or Task ID", placeholder="Enter model name (e.g., facebook/wav2vec2-lv-60-espeak-cv-ft)")
176
  status_btn = gr.Button("Check Status")
177
  status_output = gr.JSON(label="Status")
178
 
 
 
 
 
179
  status_btn.click(
180
+ fn=check_status,
181
+ inputs=query,
182
  outputs=status_output
183
  )
184
 
185
+ if __name__ == "__main__":
186
+ demo.launch()
fake_queue/leaderboard.json CHANGED
@@ -1,7 +1,7 @@
1
  [
2
  {
3
  "submission_id": "8e6a3a00-59fa-4a24-861d-a132a8212658",
4
- "submission_name": "facebook espeak",
5
  "model": "facebook/wav2vec2-lv-60-espeak-cv-ft",
6
  "average_per": 0.33667301260691423,
7
  "average_pwed": 0.1276725657099669,
 
1
  [
2
  {
3
  "submission_id": "8e6a3a00-59fa-4a24-861d-a132a8212658",
4
+ "submission_name": "fake-facebook espeak",
5
  "model": "facebook/wav2vec2-lv-60-espeak-cv-ft",
6
  "average_per": 0.33667301260691423,
7
  "average_pwed": 0.1276725657099669,
main.py ADDED
@@ -0,0 +1,499 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from fastapi import FastAPI, HTTPException, BackgroundTasks
3
+ from pydantic import BaseModel, HttpUrl
4
+ from typing import List, Optional, Dict
5
+ import torch
6
+ import torchaudio
7
+ from transformers import AutoProcessor, AutoModelForCTC
8
+ import evaluate
9
+ import zipfile
10
+ from datetime import datetime
11
+ import json
12
+ import uuid
13
+ import os
14
+ from pathlib import Path
15
+
16
+ app = FastAPI(title="TIMIT Phoneme Transcription Leaderboard")
17
+
18
+ # Create Gradio interface
19
+ demo = gr.Interface(
20
+ fn=lambda x: x,
21
+ inputs=gr.Textbox(visible=False),
22
+ outputs=gr.Textbox(visible=False),
23
+ title="TIMIT Phoneme Transcription Queue",
24
+ description="API endpoints are available at /api/leaderboard, /api/evaluate, and /api/tasks/{task_id}"
25
+ )
26
+
27
+
28
+ # Get absolute path - Updated for HF Spaces
29
+ CURRENT_DIR = Path(__file__).parent.absolute()
30
+
31
+ # Constants - Updated for HF Spaces environment
32
+ TIMIT_PATH = CURRENT_DIR / ".data" / "TIMIT.zip" # Move TIMIT.zip to root of space
33
+ QUEUE_DIR = CURRENT_DIR / "queue"
34
+ PATHS = {
35
+ 'tasks': QUEUE_DIR / "tasks.json",
36
+ 'results': QUEUE_DIR / "results.json",
37
+ 'leaderboard': QUEUE_DIR / "leaderboard.json"
38
+ }
39
+
40
+ # Initialize evaluation metric
41
+ phone_errors = evaluate.load("ginic/phone_errors")
42
+
43
+ class TimitDataManager:
44
+ """Handles all TIMIT dataset operations"""
45
+
46
+ # TIMIT to IPA mapping with direct simplifications
47
+ TIMIT_TO_IPA = {
48
+ # Vowels (simplified)
49
+ 'aa': 'ɑ',
50
+ 'ae': 'æ',
51
+ 'ah': 'ʌ',
52
+ 'ao': 'ɔ',
53
+ 'aw': 'aʊ',
54
+ 'ay': 'aɪ',
55
+ 'eh': 'ɛ',
56
+ 'er': 'ɹ', # Simplified from 'ɝ'
57
+ 'ey': 'eɪ',
58
+ 'ih': 'ɪ',
59
+ 'ix': 'i', # Simplified from 'ɨ'
60
+ 'iy': 'i',
61
+ 'ow': 'oʊ',
62
+ 'oy': 'ɔɪ',
63
+ 'uh': 'ʊ',
64
+ 'uw': 'u',
65
+ 'ux': 'u', # Simplified from 'ʉ'
66
+ 'ax': 'ə',
67
+ 'ax-h': 'ə', # Simplified from 'ə̥'
68
+ 'axr': 'ɹ', # Simplified from 'ɚ'
69
+
70
+ # Consonants
71
+ 'b': '',
72
+ 'bcl': 'b',
73
+ 'd': '',
74
+ 'dcl': 'd',
75
+ 'g': '',
76
+ 'gcl': 'g',
77
+ 'p': '',
78
+ 'pcl': 'p',
79
+ 't': '',
80
+ 'tcl': 't',
81
+ 'k': '',
82
+ 'kcl': 'k',
83
+ 'dx': 'ɾ',
84
+ 'q': 'ʔ',
85
+
86
+ # Fricatives
87
+ 'jh': 'dʒ',
88
+ 'ch': 'tʃ',
89
+ 's': 's',
90
+ 'sh': 'ʃ',
91
+ 'z': 'z',
92
+ 'zh': 'ʒ',
93
+ 'f': 'f',
94
+ 'th': 'θ',
95
+ 'v': 'v',
96
+ 'dh': 'ð',
97
+ 'hh': 'h',
98
+ 'hv': 'h', # Simplified from 'ɦ'
99
+
100
+ # Nasals (simplified)
101
+ 'm': 'm',
102
+ 'n': 'n',
103
+ 'ng': 'ŋ',
104
+ 'em': 'm', # Simplified from 'm̩'
105
+ 'en': 'n', # Simplified from 'n̩'
106
+ 'eng': 'ŋ', # Simplified from 'ŋ̍'
107
+ 'nx': 'ɾ', # Simplified from 'ɾ̃'
108
+
109
+ # Semivowels and Glides
110
+ 'l': 'l',
111
+ 'r': 'ɹ',
112
+ 'w': 'w',
113
+ 'wh': 'ʍ',
114
+ 'y': 'j',
115
+ 'el': 'l', # Simplified from 'l̩'
116
+
117
+ # Special
118
+ 'epi': '', # Remove epenthetic silence
119
+ 'h#': '', # Remove start/end silence
120
+ 'pau': '', # Remove pause
121
+ }
122
+
123
+
124
+ def __init__(self, timit_path: Path):
125
+ self.timit_path = timit_path
126
+ self._zip = None
127
+ print(f"TimitDataManager initialized with path: {self.timit_path.absolute()}")
128
+ if not self.timit_path.exists():
129
+ raise FileNotFoundError(f"TIMIT dataset not found at {self.timit_path.absolute()}")
130
+ print("TIMIT dataset file exists!")
131
+
132
+ @property
133
+ def zip(self):
134
+ if not self._zip:
135
+ try:
136
+ self._zip = zipfile.ZipFile(self.timit_path, 'r')
137
+ print("Successfully opened TIMIT zip file")
138
+ except FileNotFoundError:
139
+ raise FileNotFoundError(f"TIMIT dataset not found at {self.timit_path}")
140
+ return self._zip
141
+
142
+ def get_file_list(self, subset: str) -> List[str]:
143
+ """Get list of WAV files for given subset"""
144
+ files = [f for f in self.zip.namelist()
145
+ if f.endswith('.WAV') and subset.lower() in f.lower()]
146
+ print(f"Found {len(files)} WAV files in {subset} subset")
147
+ if files:
148
+ print("First 3 files:", files[:3])
149
+ return files
150
+
151
+ def load_audio(self, filename: str) -> torch.Tensor:
152
+ """Load and preprocess audio file"""
153
+ with self.zip.open(filename) as wav_file:
154
+ waveform, sample_rate = torchaudio.load(wav_file)
155
+
156
+ if waveform.shape[0] > 1:
157
+ waveform = torch.mean(waveform, dim=0, keepdim=True)
158
+
159
+ if sample_rate != 16000:
160
+ waveform = torchaudio.transforms.Resample(sample_rate, 16000)(waveform)
161
+
162
+ waveform = (waveform - waveform.mean()) / (waveform.std() + 1e-7)
163
+
164
+ if waveform.dim() == 1:
165
+ waveform = waveform.unsqueeze(0)
166
+
167
+ return waveform
168
+
169
+ def get_phonemes(self, filename: str) -> str:
170
+ """Get cleaned phoneme sequence from PHN file and convert to IPA"""
171
+ phn_file = filename.replace('.WAV', '.PHN')
172
+ with self.zip.open(phn_file) as f:
173
+ phonemes = []
174
+ for line in f.read().decode('utf-8').splitlines():
175
+ if line.strip():
176
+ _, _, phone = line.split()
177
+ phone = self.remove_stress_mark(phone)
178
+ # Convert to IPA instead of using simplify_timit
179
+ ipa = self.TIMIT_TO_IPA.get(phone.lower(), '')
180
+ if ipa:
181
+ phonemes.append(ipa)
182
+ return ''.join(phonemes) # Join without spaces for IPA
183
+
184
+ def simplify_timit(self, phoneme: str) -> str:
185
+ """Apply substitutions to simplify TIMIT phonemes"""
186
+ return self.PHONE_SUBSTITUTIONS.get(phoneme, phoneme)
187
+
188
+ def remove_stress_mark(self, text: str) -> str:
189
+ """Removes the combining double inverted breve (͡) from text"""
190
+ if not isinstance(text, str):
191
+ raise TypeError("Input must be string")
192
+ return text.replace('͡', '')
193
+
194
+ class ModelManager:
195
+ """Handles model loading and inference"""
196
+
197
+ def __init__(self):
198
+ self.models = {}
199
+ self.processors = {}
200
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
201
+ self.batch_size = 32 # Added batch size parameter
202
+
203
+ def get_model_and_processor(self, model_name: str):
204
+ """Get or load model and processor"""
205
+ if model_name not in self.models:
206
+ print("Loading processor with phoneme tokenizer...")
207
+ processor = AutoProcessor.from_pretrained(model_name)
208
+
209
+ print("Loading model...", {model_name})
210
+ model = AutoModelForCTC.from_pretrained(model_name).to(self.device)
211
+
212
+ self.models[model_name] = model
213
+ self.processors[model_name] = processor
214
+
215
+ return self.models[model_name], self.processors[model_name]
216
+
217
+ def transcribe(self, audio_list: List[torch.Tensor], model_name: str) -> List[str]:
218
+ """Transcribe a batch of audio using specified model"""
219
+ model, processor = self.get_model_and_processor(model_name)
220
+ if not model or not processor:
221
+ raise Exception("Model and processor not loaded")
222
+
223
+ # Process audio in batches
224
+ all_predictions = []
225
+ for i in range(0, len(audio_list), self.batch_size):
226
+ batch_audio = audio_list[i:i + self.batch_size]
227
+
228
+ # Pad sequence within batch
229
+ max_length = max(audio.shape[-1] for audio in batch_audio)
230
+ padded_audio = torch.zeros((len(batch_audio), 1, max_length))
231
+ attention_mask = torch.zeros((len(batch_audio), max_length))
232
+
233
+ for j, audio in enumerate(batch_audio):
234
+ padded_audio[j, :, :audio.shape[-1]] = audio
235
+ attention_mask[j, :audio.shape[-1]] = 1
236
+
237
+ # Process batch
238
+ inputs = processor(
239
+ padded_audio.squeeze(1).numpy(),
240
+ sampling_rate=16000,
241
+ return_tensors="pt",
242
+ padding=True
243
+ )
244
+
245
+ input_values = inputs.input_values.to(self.device)
246
+ attention_mask = inputs.get("attention_mask", attention_mask).to(self.device)
247
+
248
+ with torch.no_grad():
249
+ outputs = model(
250
+ input_values=input_values,
251
+ attention_mask=attention_mask
252
+ )
253
+ logits = outputs.logits
254
+ predicted_ids = torch.argmax(logits, dim=-1)
255
+ predictions = processor.batch_decode(predicted_ids, skip_special_tokens=True)
256
+ predictions = [pred.replace(' ', '') for pred in predictions]
257
+ all_predictions.extend(predictions)
258
+
259
+ return all_predictions
260
+
261
+ class StorageManager:
262
+ """Handles all JSON storage operations"""
263
+
264
+ def __init__(self, paths: Dict[str, Path]):
265
+ self.paths = paths
266
+ self._ensure_directories()
267
+
268
+ def _ensure_directories(self):
269
+ """Ensure all necessary directories and files exist"""
270
+ for path in self.paths.values():
271
+ path.parent.mkdir(parents=True, exist_ok=True)
272
+ if not path.exists():
273
+ path.write_text('[]')
274
+
275
+ def load(self, key: str) -> List:
276
+ """Load JSON file"""
277
+ return json.loads(self.paths[key].read_text())
278
+
279
+ def save(self, key: str, data: List):
280
+ """Save data to JSON file"""
281
+ self.paths[key].write_text(json.dumps(data, indent=4, default=str, ensure_ascii=False))
282
+
283
+ def update_task(self, task_id: str, updates: Dict):
284
+ """Update specific task with new data"""
285
+ tasks = self.load('tasks')
286
+ for task in tasks:
287
+ if task['id'] == task_id:
288
+ task.update(updates)
289
+ break
290
+ self.save('tasks', tasks)
291
+
292
+ class EvaluationRequest(BaseModel):
293
+ """Request model for TIMIT evaluation"""
294
+ transcription_model: str
295
+ subset: str = "test"
296
+ max_samples: Optional[int] = None
297
+ submission_name: str
298
+ github_url: Optional[str] = None
299
+
300
+ # Initialize managers
301
+ timit_manager = TimitDataManager(TIMIT_PATH)
302
+ model_manager = ModelManager()
303
+ storage_manager = StorageManager(PATHS)
304
+
305
+ async def evaluate_model(task_id: str, request: EvaluationRequest):
306
+ """Background task to evaluate model on TIMIT"""
307
+ try:
308
+ storage_manager.update_task(task_id, {"status": "processing"})
309
+
310
+ files = timit_manager.get_file_list(request.subset)
311
+ if request.max_samples:
312
+ files = files[:request.max_samples]
313
+
314
+ results = []
315
+ total_per = total_pwed = 0
316
+
317
+ # Process files in batches
318
+ batch_size = model_manager.batch_size
319
+ for i in range(0, len(files), batch_size):
320
+ batch_files = files[i:i + batch_size]
321
+
322
+ # Load batch audio and ground truth
323
+ batch_audio = []
324
+ batch_ground_truth = []
325
+ for wav_file in batch_files:
326
+ audio = timit_manager.load_audio(wav_file)
327
+ ground_truth = timit_manager.get_phonemes(wav_file)
328
+ batch_audio.append(audio)
329
+ batch_ground_truth.append(ground_truth)
330
+
331
+ # Get predictions for batch
332
+ predictions = model_manager.transcribe(batch_audio, request.transcription_model)
333
+
334
+ # Calculate metrics for each file in batch
335
+ for j, (wav_file, prediction, ground_truth) in enumerate(zip(batch_files, predictions, batch_ground_truth)):
336
+ # Convert Unicode to readable format
337
+ #prediction_str = repr(prediction)[1:-1] # Remove quotes but keep escaped unicode
338
+
339
+ metrics = phone_errors.compute(
340
+ predictions=[prediction],
341
+ references=[ground_truth],
342
+ is_normalize_pfer=True
343
+ )
344
+
345
+ per = metrics['phone_error_rates'][0]
346
+ pwed = metrics['phone_feature_error_rates'][0]
347
+
348
+ results.append({
349
+ "file": wav_file,
350
+ "ground_truth": ground_truth,
351
+ "prediction": prediction,
352
+ "per": per,
353
+ "pwed": pwed
354
+ })
355
+
356
+ total_per += per
357
+ total_pwed += pwed
358
+
359
+ if not results:
360
+ raise Exception("No files were successfully processed")
361
+
362
+ avg_per = total_per / len(results)
363
+ avg_pwed = total_pwed / len(results)
364
+
365
+ result = {
366
+ "task_id": task_id,
367
+ "model": request.transcription_model,
368
+ "subset": request.subset,
369
+ "num_files": len(results),
370
+ "average_per": avg_per,
371
+ "average_pwed": avg_pwed,
372
+ "detailed_results": results[:5],
373
+ "timestamp": datetime.now().isoformat()
374
+ }
375
+
376
+ # Save results
377
+ print("Saving results...")
378
+ current_results = storage_manager.load('results')
379
+ current_results.append(result)
380
+ storage_manager.save('results', current_results)
381
+
382
+ # Update leaderboard
383
+ print("Updating leaderboard...")
384
+ leaderboard = storage_manager.load('leaderboard')
385
+ entry = next((e for e in leaderboard
386
+ if e["submission_name"] == request.submission_name), None)
387
+
388
+ if entry:
389
+ # Simply update with new scores
390
+ entry.update({
391
+ "average_per": avg_per,
392
+ "average_pwed": avg_pwed,
393
+ "model": request.transcription_model,
394
+ "subset": request.subset,
395
+ "github_url": request.github_url,
396
+ "submission_date": datetime.now().isoformat()
397
+ })
398
+ else:
399
+ leaderboard.append({
400
+ "submission_id": str(uuid.uuid4()),
401
+ "submission_name": request.submission_name,
402
+ "model": request.transcription_model,
403
+ "average_per": avg_per,
404
+ "average_pwed": avg_pwed,
405
+ "subset": request.subset,
406
+ "github_url": request.github_url,
407
+ "submission_date": datetime.now().isoformat()
408
+ })
409
+
410
+ storage_manager.save('leaderboard', leaderboard)
411
+ storage_manager.update_task(task_id, {"status": "completed"})
412
+ print("Evaluation completed successfully")
413
+
414
+ except Exception as e:
415
+ error_msg = f"Evaluation failed: {str(e)}"
416
+ print(error_msg)
417
+ storage_manager.update_task(task_id, {
418
+ "status": "failed",
419
+ "error": error_msg
420
+ })
421
+
422
+ # Initialize managers
423
+ def init_directories():
424
+ """Ensure all necessary directories exist"""
425
+ (CURRENT_DIR / ".data").mkdir(parents=True, exist_ok=True)
426
+ QUEUE_DIR.mkdir(parents=True, exist_ok=True)
427
+
428
+ for path in PATHS.values():
429
+ if not path.exists():
430
+ path.write_text('[]')
431
+
432
+ # Initialize your managers
433
+ init_directories() # Your existing initialization function
434
+ timit_manager = TimitDataManager(TIMIT_PATH)
435
+ model_manager = ModelManager()
436
+ storage_manager = StorageManager(PATHS)
437
+
438
+ @app.get("/api/health")
439
+ async def health_check():
440
+ """Simple health check endpoint"""
441
+ return {"status": "healthy"}
442
+
443
+ @app.post("/api/evaluate")
444
+ async def submit_evaluation(
445
+ request: EvaluationRequest,
446
+ background_tasks: BackgroundTasks
447
+ ):
448
+ """Submit new evaluation task"""
449
+ task_id = str(uuid.uuid4())
450
+
451
+ task = {
452
+ "id": task_id,
453
+ "model": request.transcription_model,
454
+ "subset": request.subset,
455
+ "submission_name": request.submission_name,
456
+ "github_url": request.github_url,
457
+ "status": "queued",
458
+ "submitted_at": datetime.now().isoformat()
459
+ }
460
+
461
+ tasks = storage_manager.load('tasks')
462
+ tasks.append(task)
463
+ storage_manager.save('tasks', tasks)
464
+
465
+ background_tasks.add_task(evaluate_model, task_id, request)
466
+
467
+ return {
468
+ "message": "Evaluation task submitted successfully",
469
+ "task_id": task_id
470
+ }
471
+
472
+ @app.get("/api/tasks/{task_id}")
473
+ async def get_task(task_id: str):
474
+ """Get specific task status"""
475
+ tasks = storage_manager.load('tasks')
476
+ task = next((t for t in tasks if t["id"] == task_id), None)
477
+ if not task:
478
+ raise HTTPException(status_code=404, detail="Task not found")
479
+ return task
480
+
481
+ @app.get("/api/leaderboard")
482
+ async def get_leaderboard():
483
+ """Get current leaderboard"""
484
+ try:
485
+ leaderboard = storage_manager.load('leaderboard')
486
+ sorted_leaderboard = sorted(leaderboard, key=lambda x: (x["average_per"], x["average_pwed"]))
487
+ return sorted_leaderboard
488
+ except Exception as e:
489
+ print(f"Error loading leaderboard: {e}")
490
+ return []
491
+
492
+ # Note: We need to mount the FastAPI app after defining all routes
493
+ app = gr.mount_gradio_app(app, demo, path="/")
494
+
495
+ # For local development
496
+ if __name__ == "__main__":
497
+ import uvicorn
498
+ uvicorn.run(app, host="0.0.0.0", port=7860)
499
+
queue/leaderboard.json ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "submission_id": "8e6a3a00-59fa-4a24-861d-a132a8212658",
4
+ "submission_name": "facebook espeak",
5
+ "model": "facebook/wav2vec2-lv-60-espeak-cv-ft",
6
+ "average_per": 0.33667301260691423,
7
+ "average_pwed": 0.1276725657099669,
8
+ "subset": "test",
9
+ "github_url": "https://github.com/facebookresearch/fairseq/blob/main/examples/wav2vec/README.md",
10
+ "submission_date": "2024-12-05T07:32:06.850230"
11
+ },
12
+ {
13
+ "submission_id": "70aceb68-ad86-4a83-9998-08adb27b4d5c",
14
+ "submission_name": "english phoneme model",
15
+ "model": "KoelLabs/xlsr-timit-b0",
16
+ "average_per": 0.12572285528714347,
17
+ "average_pwed": 0.06476636812791145,
18
+ "subset": "test",
19
+ "github_url": "https://github.com/KoelLabs/",
20
+ "submission_date": "2024-12-05T08:25:24.982477"
21
+ },
22
+ {
23
+ "submission_id": "80b57299-b3ab-4caf-ac4a-898c8398046e",
24
+ "submission_name": "speech 31 model",
25
+ "model": "speech31/wav2vec2-large-TIMIT-IPA",
26
+ "average_per": 0.4415425496841929,
27
+ "average_pwed": 0.18625930002594002,
28
+ "subset": "test",
29
+ "github_url": "https://huggingface.co/speech31/wav2vec2-large-TIMIT-IPA2",
30
+ "submission_date": "2024-12-05T09:36:14.570315"
31
+ },
32
+ {
33
+ "submission_id": "0cbcab0a-bd07-421f-82a0-480c9507a214",
34
+ "submission_name": "jubiliano model wav2vec2",
35
+ "model": "Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5",
36
+ "average_per": 0.6318471187460027,
37
+ "average_pwed": 0.222932144739126,
38
+ "subset": "test",
39
+ "github_url": "https://huggingface.co/Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5WithoutSpaces/tree/d5312009d8e620b183c334dfdd9ffc6b4f06f8c1",
40
+ "submission_date": "2024-12-05T10:17:21.334530"
41
+ },
42
+ {
43
+ "submission_id": "0fc29c54-3db2-46b6-aeee-c96484306751",
44
+ "submission_name": "xlsr 53 model",
45
+ "model": "facebook/wav2vec2-xlsr-53-espeak-cv-ft",
46
+ "average_per": 0.348845592557092,
47
+ "average_pwed": 0.1386742019529415,
48
+ "subset": "test",
49
+ "github_url": "https://github.com/facebookresearch/fairseq/blob/main/examples/wav2vec/README.md",
50
+ "submission_date": "2024-12-05T10:34:26.157054"
51
+ },
52
+ {
53
+ "submission_id": "a23026ec-acac-4481-9761-f9368b4b94f1",
54
+ "submission_name": "ginic model wav2vec2 finetuned on buckeye",
55
+ "model": "ginic/hyperparam_tuning_1_wav2vec2-large-xlsr-buckeye-ipa",
56
+ "average_per": 0.2766466385175833,
57
+ "average_pwed": 0.10410683992600853,
58
+ "subset": "test",
59
+ "github_url": "https://huggingface.co/ginic/vary_individuals_old_only_1_wav2vec2-large-xlsr-buckeye-ipa",
60
+ "submission_date": "2024-12-05T11:06:07.984825"
61
+ },
62
+ {
63
+ "submission_id": "e3bbf521-cc32-43a6-bf1c-5ddc6bce04ab",
64
+ "submission_name": "koel labs initial ",
65
+ "model": "KoelLabs/xlsr-timit-a0",
66
+ "average_per": 0.24242141955346685,
67
+ "average_pwed": 0.17395311976938,
68
+ "subset": "test",
69
+ "github_url": "https://github.com/KoelLabs/ML/",
70
+ "submission_date": "2024-12-12T16:07:25.391145"
71
+ }
72
+ ]
queue/results.json ADDED
@@ -0,0 +1,370 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "task_id": "721b4c64-a825-42d3-bb0a-bdff9ee1ed0f",
4
+ "model": "facebook/wav2vec2-lv-60-espeak-cv-ft",
5
+ "subset": "test",
6
+ "num_files": 1680,
7
+ "average_per": 0.33667301260691423,
8
+ "average_pwed": 0.1276725657099669,
9
+ "detailed_results": [
10
+ {
11
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
12
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
13
+ "prediction": "ʃiːhædjɚdɑːɹksuːɾɪnɡɹiːsiwɑːʃwɑːɾɚɹɑːljiː",
14
+ "per": 0.3939393939393939,
15
+ "pwed": 0.13888888888888887
16
+ },
17
+ {
18
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
19
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
20
+ "prediction": "doʊntæskmiːtəkæɹiɐnoɪliɹæɡlaɪkðæt",
21
+ "per": 0.32142857142857145,
22
+ "pwed": 0.13541666666666666
23
+ },
24
+ {
25
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
26
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
27
+ "prediction": "hɪzkæptənwʌzθɪnændhæɡɚdændhɪzbjuːɾɪfəlbuːtswɜːwɔːɹnændʃæbi",
28
+ "per": 0.3617021276595745,
29
+ "pwed": 0.13915094339622644
30
+ },
31
+ {
32
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
33
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
34
+ "prediction": "ðəɹiːzənzfɜːðɪsdaɪvsiːmdfuːlɪʃnaʊ",
35
+ "per": 0.20689655172413793,
36
+ "pwed": 0.022988505747126433
37
+ },
38
+ {
39
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
40
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
41
+ "prediction": "pɹədʌkʃənmeɪfɔːlfɑːɹbᵻloʊɛkspɛkteɪʃənz",
42
+ "per": 0.36363636363636365,
43
+ "pwed": 0.1392857142857143
44
+ }
45
+ ],
46
+ "timestamp": "2024-12-05T07:32:06.849017"
47
+ },
48
+ {
49
+ "task_id": "d6fe0956-b5b4-4105-835e-8dee1872ee4d",
50
+ "model": "KoelLabs/xlsr-timit-b0",
51
+ "subset": "test",
52
+ "num_files": 1680,
53
+ "average_per": 0.12572285528714347,
54
+ "average_pwed": 0.06476636812791145,
55
+ "detailed_results": [
56
+ {
57
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
58
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
59
+ "prediction": "ʃihædjɹdɑɹksuɾɪnɡɹisiwɑʃwɔɾɹʔɔljɪɹ",
60
+ "per": 0.12121212121212122,
61
+ "pwed": 0.037990196078431376
62
+ },
63
+ {
64
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
65
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
66
+ "prediction": "oʊnæskmitikæɹinɔɪliɹæɡlaɪkðæt",
67
+ "per": 0.14285714285714285,
68
+ "pwed": 0.10632183908045977
69
+ },
70
+ {
71
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
72
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
73
+ "prediction": "hɪzkæpinwəsθɪnhæɡɹdinizbjuɾiflbutswɹwɔɹninʃæbi",
74
+ "per": 0.10638297872340426,
75
+ "pwed": 0.0425531914893617
76
+ },
77
+ {
78
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
79
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
80
+ "prediction": "ðəɹiznzfɹðistaɪvsimdfuliʃnaʊ",
81
+ "per": 0.13793103448275862,
82
+ "pwed": 0.04166666666666667
83
+ },
84
+ {
85
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
86
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
87
+ "prediction": "pɹdʌkʃnmeɪfɔlfɑɹbloʊɛkspɛkeɪʃəns",
88
+ "per": 0.21212121212121213,
89
+ "pwed": 0.10858585858585859
90
+ }
91
+ ],
92
+ "timestamp": "2024-12-05T08:25:24.980111"
93
+ },
94
+ {
95
+ "task_id": "dbf4642a-fb13-402c-8a74-cc41fc4be599",
96
+ "model": "speech31/wav2vec2-large-TIMIT-IPA",
97
+ "subset": "test",
98
+ "num_files": 1680,
99
+ "average_per": 0.4415425496841929,
100
+ "average_pwed": 0.18625930002594002,
101
+ "detailed_results": [
102
+ {
103
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
104
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
105
+ "prediction": "ʃihædjʊrdɑrksutɪngrisiwɑʃwɔtərɔljɪrrrɪrɪrʃ",
106
+ "per": 0.5757575757575758,
107
+ "pwed": 0.25
108
+ },
109
+ {
110
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
111
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
112
+ "prediction": "doʊntæskmitɪkɛriənɔɪliræglaɪkðəttm",
113
+ "per": 0.35714285714285715,
114
+ "pwed": 0.172979797979798
115
+ },
116
+ {
117
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
118
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
119
+ "prediction": "hɪzkæptɪnwɑzθɪnəndhægərdəndhɪzbjutəfəlbutswərwɔrnəndʃæbi",
120
+ "per": 0.40425531914893614,
121
+ "pwed": 0.17500000000000004
122
+ },
123
+ {
124
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
125
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
126
+ "prediction": "ðərizɪənzfərðɪstaɪvsimdfulɪʃnaʊaʊaʊ",
127
+ "per": 0.3793103448275862,
128
+ "pwed": 0.18928571428571428
129
+ },
130
+ {
131
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
132
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
133
+ "prediction": "prədəkʃənmeɪfɔlfɑrbɪloʊɛkspɛkteɪʃənzd",
134
+ "per": 0.3939393939393939,
135
+ "pwed": 0.13626126126126126
136
+ }
137
+ ],
138
+ "timestamp": "2024-12-05T09:36:14.568321"
139
+ },
140
+ {
141
+ "task_id": "912449a4-d7ed-4af4-b5be-5c2c57ec09ff",
142
+ "model": "Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5",
143
+ "subset": "test",
144
+ "num_files": 1680,
145
+ "average_per": 0.6318471187460027,
146
+ "average_pwed": 0.222932144739126,
147
+ "detailed_results": [
148
+ {
149
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
150
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
151
+ "prediction": "ʒihɛldjydɑrksydənrisiwɑswadərɑlhir",
152
+ "per": 0.5454545454545454,
153
+ "pwed": 0.11764705882352941
154
+ },
155
+ {
156
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
157
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
158
+ "prediction": "dɑnraːstɪkmədəkaːrənoːjliralɪkaːn",
159
+ "per": 0.7857142857142857,
160
+ "pwed": 0.2341954022988506
161
+ },
162
+ {
163
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
164
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
165
+ "prediction": "xisʃktəʋɑstɪnɛnhɛɪɡərdɛnenzbjudəvɔlbutvɔːrʋɔrnənʃaːbi",
166
+ "per": 0.6595744680851063,
167
+ "pwed": 0.18382352941176472
168
+ },
169
+ {
170
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
171
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
172
+ "prediction": "dərizənsvərdəstajfzimtvuləsna",
173
+ "per": 0.6206896551724138,
174
+ "pwed": 0.11781609195402297
175
+ },
176
+ {
177
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
178
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
179
+ "prediction": "pːdkəmeːvɑlvɑrbəloɛkspɛkteːʃəns",
180
+ "per": 0.5454545454545454,
181
+ "pwed": 0.2171717171717172
182
+ }
183
+ ],
184
+ "timestamp": "2024-12-05T10:17:21.331572"
185
+ },
186
+ {
187
+ "task_id": "c79df17e-2bb2-4253-ae26-f7cc6ab21265",
188
+ "model": "facebook/wav2vec2-xlsr-53-espeak-cv-ft",
189
+ "subset": "test",
190
+ "num_files": 1680,
191
+ "average_per": 0.348845592557092,
192
+ "average_pwed": 0.1386742019529415,
193
+ "detailed_results": [
194
+ {
195
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
196
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
197
+ "prediction": "ʃiːhædjɚdksuːtɪnɡɹiːsiwɑːʃwɑːɾɚɑːljɪ",
198
+ "per": 0.48484848484848486,
199
+ "pwed": 0.21338383838383837
200
+ },
201
+ {
202
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
203
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
204
+ "prediction": "doːntæskmitəkæɹiənoɪliɹæɡlaɪkðæt",
205
+ "per": 0.32142857142857145,
206
+ "pwed": 0.12634408602150538
207
+ },
208
+ {
209
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
210
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
211
+ "prediction": "hɪzkæptənwʌzθɪnænhæɡɚdændhɪzbjuːɾɪfʊbuːtswɚwoːnəndʃæbi",
212
+ "per": 0.3617021276595745,
213
+ "pwed": 0.13095238095238093
214
+ },
215
+ {
216
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
217
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
218
+ "prediction": "ðəɹiːzənzfɚðəsdɑːvsiːmdfuːlɪʃnæ",
219
+ "per": 0.3793103448275862,
220
+ "pwed": 0.12068965517241376
221
+ },
222
+ {
223
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
224
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
225
+ "prediction": "pɹədʌkʃənmeɪfɑːlfɑːbəloʊɛkspɛkteɪʃənz",
226
+ "per": 0.36363636363636365,
227
+ "pwed": 0.14404761904761906
228
+ }
229
+ ],
230
+ "timestamp": "2024-12-05T10:34:26.154521"
231
+ },
232
+ {
233
+ "task_id": "f36060e6-a746-44dc-a527-54995b270053",
234
+ "model": "ginic/hyperparam_tuning_1_wav2vec2-large-xlsr-buckeye-ipa",
235
+ "subset": "test",
236
+ "num_files": 1680,
237
+ "average_per": 0.2766466385175833,
238
+ "average_pwed": 0.10410683992600853,
239
+ "detailed_results": [
240
+ {
241
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
242
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
243
+ "prediction": "ʃihædjɹ̩dɑɹksuɾɪnɡɹeɪsiwɑʃwɔɾɹ̩ɔljiɹ",
244
+ "per": 0.24242424242424243,
245
+ "pwed": 0.09926470588235292
246
+ },
247
+ {
248
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
249
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
250
+ "prediction": "doʊndæskmidɪkæɹiɛnɔɪliɹæɡlaɪkðæʔ",
251
+ "per": 0.32142857142857145,
252
+ "pwed": 0.14192708333333334
253
+ },
254
+ {
255
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
256
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
257
+ "prediction": "hɪzkæptɪnwʌzθɪnɛnhæɡɹ̩dɛnɪzbjuɾʌfl̩butswɹ̩wɔɹnɛnʃæbi",
258
+ "per": 0.2553191489361702,
259
+ "pwed": 0.05357142857142857
260
+ },
261
+ {
262
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
263
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
264
+ "prediction": "ðʌɹizʌnzfɹ̩ðʌstaɪvsimdfulɪʃnaʊ",
265
+ "per": 0.20689655172413793,
266
+ "pwed": 0.01293103448275862
267
+ },
268
+ {
269
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
270
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
271
+ "prediction": "pɹʌdʌkʃʌnmeɪfɔlfɑɹbʌloʊɛkspɛkteɪʃʌns",
272
+ "per": 0.2727272727272727,
273
+ "pwed": 0.10416666666666667
274
+ }
275
+ ],
276
+ "timestamp": "2024-12-05T11:06:07.981224"
277
+ },
278
+ {
279
+ "task_id": "47d56349-8111-4bda-a47f-e007dbedd36d",
280
+ "model": "KoelLabs/xlsr-timit-a0",
281
+ "subset": "test",
282
+ "num_files": 1680,
283
+ "average_per": 0.24242141955346685,
284
+ "average_pwed": 0.17395311976938,
285
+ "detailed_results": [
286
+ {
287
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
288
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
289
+ "prediction": "ʃihædjɹdɑɹksuɾɪnɡɹisiwɑʃwɔɾɹʔɔljɪɹ",
290
+ "per": 0.12121212121212122,
291
+ "pwed": 0.037990196078431376
292
+ },
293
+ {
294
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
295
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
296
+ "prediction": "ɪoʊnæskmitikæɹinɔɪliɹæɡlaɪkðt",
297
+ "per": 0.21428571428571427,
298
+ "pwed": 0.1695402298850575
299
+ },
300
+ {
301
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
302
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
303
+ "prediction": "hɪzkæpinwəsθɪninhæɡɹdinhizbjuɾiflbutswɹwɔɹnintʃæbi",
304
+ "per": 0.1276595744680851,
305
+ "pwed": 0.06499999999999999
306
+ },
307
+ {
308
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
309
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
310
+ "prediction": "ðəɹiznzfɹðistaɪ",
311
+ "per": 0.5862068965517241,
312
+ "pwed": 0.4899425287356322
313
+ },
314
+ {
315
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
316
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
317
+ "prediction": "ɹidʌkʃinmeɪfɔlfɑɹbəloʊɛkspɛkeɪ",
318
+ "per": 0.21212121212121213,
319
+ "pwed": 0.1553030303030303
320
+ }
321
+ ],
322
+ "timestamp": "2024-12-12T15:53:07.584096"
323
+ },
324
+ {
325
+ "task_id": "51dd5735-63bd-4fe5-a588-c0fc079076e0",
326
+ "model": "KoelLabs/xlsr-timit-a0",
327
+ "subset": "test",
328
+ "num_files": 1680,
329
+ "average_per": 0.24242141955346685,
330
+ "average_pwed": 0.17395311976938,
331
+ "detailed_results": [
332
+ {
333
+ "file": "data/TEST/DR1/FAKS0/SA1.WAV",
334
+ "ground_truth": "ʃihædjɹdɑɹksuɾɪŋgɹisiwɑʃwɑɾɹʔɔljiɹ",
335
+ "prediction": "ʃihædjɹdɑɹksuɾɪnɡɹisiwɑʃwɔɾɹʔɔljɪɹ",
336
+ "per": 0.12121212121212122,
337
+ "pwed": 0.037990196078431376
338
+ },
339
+ {
340
+ "file": "data/TEST/DR1/FAKS0/SA2.WAV",
341
+ "ground_truth": "oʊnæsmitikɛɹiinɔɪliɹæglaɪkðæt",
342
+ "prediction": "ɪoʊnæskmitikæɹinɔɪliɹæɡlaɪkðt",
343
+ "per": 0.21428571428571427,
344
+ "pwed": 0.1695402298850575
345
+ },
346
+ {
347
+ "file": "data/TEST/DR1/FAKS0/SI1573.WAV",
348
+ "ground_truth": "hɪzkæpinwəsθɪnænhægɹdinɪzbjuɾuflbutswɹwɔɹninʃæbi",
349
+ "prediction": "hɪzkæpinwəsθɪninhæɡɹdinhizbjuɾiflbutswɹwɔɹnintʃæbi",
350
+ "per": 0.1276595744680851,
351
+ "pwed": 0.06499999999999999
352
+ },
353
+ {
354
+ "file": "data/TEST/DR1/FAKS0/SI2203.WAV",
355
+ "ground_truth": "ðiɹizənzfɹðɪsdaɪvsimdfuliʃnaʊ",
356
+ "prediction": "ðəɹiznzfɹðistaɪ",
357
+ "per": 0.5862068965517241,
358
+ "pwed": 0.4899425287356322
359
+ },
360
+ {
361
+ "file": "data/TEST/DR1/FAKS0/SI943.WAV",
362
+ "ground_truth": "ɹdʌkʃinmeɪfɔlfɑɹbəloʊəkspikeɪʃnts",
363
+ "prediction": "ɹidʌkʃinmeɪfɔlfɑɹbəloʊɛkspɛkeɪ",
364
+ "per": 0.21212121212121213,
365
+ "pwed": 0.1553030303030303
366
+ }
367
+ ],
368
+ "timestamp": "2024-12-12T16:07:25.389475"
369
+ }
370
+ ]
queue/tasks.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "id": "721b4c64-a825-42d3-bb0a-bdff9ee1ed0f",
4
+ "model": "facebook/wav2vec2-lv-60-espeak-cv-ft",
5
+ "subset": "test",
6
+ "submission_name": "facebook espeak",
7
+ "github_url": "https://github.com/facebookresearch/fairseq/blob/main/examples/wav2vec/README.md",
8
+ "status": "completed",
9
+ "submitted_at": "2024-12-05T07:19:03.076292"
10
+ },
11
+ {
12
+ "id": "d6fe0956-b5b4-4105-835e-8dee1872ee4d",
13
+ "model": "KoelLabs/xlsr-timit-b0",
14
+ "subset": "test",
15
+ "submission_name": "english phoneme model",
16
+ "github_url": "https://github.com/KoelLabs/",
17
+ "status": "completed",
18
+ "submitted_at": "2024-12-05T08:12:40.161444"
19
+ },
20
+ {
21
+ "id": "dbf4642a-fb13-402c-8a74-cc41fc4be599",
22
+ "model": "speech31/wav2vec2-large-TIMIT-IPA",
23
+ "subset": "test",
24
+ "submission_name": "speech 31 model",
25
+ "github_url": "https://huggingface.co/speech31/wav2vec2-large-TIMIT-IPA2",
26
+ "status": "completed",
27
+ "submitted_at": "2024-12-05T09:13:45.315361"
28
+ },
29
+ {
30
+ "id": "4e3b80be-b255-47f2-b4ae-18a12e232e8a",
31
+ "model": "Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5",
32
+ "subset": "test",
33
+ "submission_name": "Jubliano model",
34
+ "github_url": "https://huggingface.co/Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5WithoutSpaces/tree/d5312009d8e620b183c334dfdd9ffc6b4f06f8c1",
35
+ "status": "processing",
36
+ "submitted_at": "2024-12-05T09:36:14.571930"
37
+ },
38
+ {
39
+ "id": "912449a4-d7ed-4af4-b5be-5c2c57ec09ff",
40
+ "model": "Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5",
41
+ "subset": "test",
42
+ "submission_name": "jubiliano model wav2vec2",
43
+ "github_url": "https://huggingface.co/Jubliano/wav2vec2-large-xls-r-300m-ipa-INTERNATIONAL1.5WithoutSpaces/tree/d5312009d8e620b183c334dfdd9ffc6b4f06f8c1",
44
+ "status": "completed",
45
+ "submitted_at": "2024-12-05T10:01:40.502935"
46
+ },
47
+ {
48
+ "id": "c79df17e-2bb2-4253-ae26-f7cc6ab21265",
49
+ "model": "facebook/wav2vec2-xlsr-53-espeak-cv-ft",
50
+ "subset": "test",
51
+ "submission_name": "xlsr 53 model",
52
+ "github_url": "https://github.com/facebookresearch/fairseq/blob/main/examples/wav2vec/README.md",
53
+ "status": "completed",
54
+ "submitted_at": "2024-12-05T10:18:37.408664"
55
+ },
56
+ {
57
+ "id": "f36060e6-a746-44dc-a527-54995b270053",
58
+ "model": "ginic/hyperparam_tuning_1_wav2vec2-large-xlsr-buckeye-ipa",
59
+ "subset": "test",
60
+ "submission_name": "ginic model wav2vec2 finetuned on buckeye",
61
+ "github_url": "https://huggingface.co/ginic/vary_individuals_old_only_1_wav2vec2-large-xlsr-buckeye-ipa",
62
+ "status": "completed",
63
+ "submitted_at": "2024-12-05T10:36:02.340422"
64
+ },
65
+ {
66
+ "id": "abf6c247-9faf-46ef-b0fa-25f2669da922",
67
+ "model": "KoelLabs/xlsr-timit-a0",
68
+ "subset": "test",
69
+ "submission_name": "Koel Labs early version of finetuned model ",
70
+ "github_url": "https://github.com/KoelLabs/ML",
71
+ "status": "processing",
72
+ "submitted_at": "2024-12-05T11:08:23.663553"
73
+ },
74
+ {
75
+ "id": "47d56349-8111-4bda-a47f-e007dbedd36d",
76
+ "model": "KoelLabs/xlsr-timit-a0",
77
+ "subset": "test",
78
+ "submission_name": "koel labs initial ",
79
+ "github_url": "https://github.com/KoelLabs/ML/",
80
+ "status": "completed",
81
+ "submitted_at": "2024-12-12T15:28:12.923626"
82
+ },
83
+ {
84
+ "id": "51dd5735-63bd-4fe5-a588-c0fc079076e0",
85
+ "model": "KoelLabs/xlsr-timit-a0",
86
+ "subset": "test",
87
+ "submission_name": "koel labs initial ",
88
+ "github_url": "https://github.com/KoelLabs/ML/",
89
+ "status": "completed",
90
+ "submitted_at": "2024-12-12T15:53:07.620070"
91
+ }
92
+ ]