Joseph Pollack commited on
Commit
7b2aced
Β·
unverified Β·
1 Parent(s): b55e1b0

improves dataset push to huggingface

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. interface.py +103 -10
  3. scripts/push_to_huggingface.py +209 -32
README.md CHANGED
@@ -1,6 +1,6 @@
1
  ---
2
  title: VoxFactory
3
- emoji: πŸ“š
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
 
1
  ---
2
  title: VoxFactory
3
+ emoji: 🌬️
4
  colorFrom: gray
5
  colorTo: red
6
  sdk: gradio
interface.py CHANGED
@@ -177,11 +177,12 @@ def _save_uploaded_dataset(files: list, transcripts: list[str]) -> str:
177
 
178
 
179
  def _push_dataset_to_hub(jsonl_path: str, repo_name: str, username: str = "") -> str:
180
- """Push dataset to Hugging Face Hub"""
181
  try:
182
  from huggingface_hub import HfApi, create_repo
183
  import json
184
  from pathlib import Path
 
185
 
186
  token = os.getenv("HF_TOKEN") or os.getenv("HF_WRITE_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
187
 
@@ -210,16 +211,74 @@ def _push_dataset_to_hub(jsonl_path: str, repo_name: str, username: str = "") ->
210
  if not jsonl_file.exists():
211
  return f"❌ Dataset file not found: {jsonl_path}"
212
 
213
- # Upload the JSONL file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  api.upload_file(
215
- path_or_fileobj=str(jsonl_file),
216
  path_in_repo="data.jsonl",
217
  repo_id=repo_name,
218
  repo_type="dataset",
219
  token=token
220
  )
221
 
222
- # Create a simple README for the dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
  readme_content = f"""---
224
  dataset_info:
225
  features:
@@ -230,9 +289,15 @@ dataset_info:
230
  splits:
231
  - name: train
232
  num_bytes: {jsonl_file.stat().st_size}
233
- num_examples: {sum(1 for _ in open(jsonl_file))}
234
- download_size: {jsonl_file.stat().st_size}
235
- dataset_size: {jsonl_file.stat().st_size}
 
 
 
 
 
 
236
  ---
237
 
238
  # Voxtral ASR Dataset
@@ -241,15 +306,43 @@ This dataset was created using the Voxtral ASR Fine-tuning Interface.
241
 
242
  ## Dataset Structure
243
 
244
- - **audio_path**: Path to the audio file
245
  - **text**: Transcription of the audio
246
 
 
 
 
 
 
 
247
  ## Usage
248
 
249
  ```python
250
- from datasets import load_dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
 
 
252
  dataset = load_dataset("{repo_name}")
 
 
 
 
 
253
  ```
254
  """
255
 
@@ -268,7 +361,7 @@ dataset = load_dataset("{repo_name}")
268
 
269
  readme_path.unlink() # Clean up temp file
270
 
271
- return f"βœ… Dataset pushed to: https://huggingface.co/datasets/{repo_name}"
272
 
273
  except Exception as e:
274
  return f"❌ Failed to push dataset: {e}"
 
177
 
178
 
179
  def _push_dataset_to_hub(jsonl_path: str, repo_name: str, username: str = "") -> str:
180
+ """Push dataset to Hugging Face Hub including audio files"""
181
  try:
182
  from huggingface_hub import HfApi, create_repo
183
  import json
184
  from pathlib import Path
185
+ import os
186
 
187
  token = os.getenv("HF_TOKEN") or os.getenv("HF_WRITE_TOKEN") or os.getenv("HUGGINGFACE_HUB_TOKEN")
188
 
 
211
  if not jsonl_file.exists():
212
  return f"❌ Dataset file not found: {jsonl_path}"
213
 
214
+ # Read and process the JSONL to collect audio files and update paths
215
+ audio_files = []
216
+ updated_rows = []
217
+ total_audio_size = 0
218
+
219
+ with open(jsonl_file, "r", encoding="utf-8") as f:
220
+ for line_num, line in enumerate(f):
221
+ try:
222
+ row = json.loads(line.strip())
223
+ audio_path = row.get("audio_path", "")
224
+
225
+ if audio_path:
226
+ audio_file = Path(audio_path)
227
+ if audio_file.exists():
228
+ # Store the original file for upload
229
+ audio_files.append(audio_file)
230
+ total_audio_size += audio_file.stat().st_size
231
+
232
+ # Update path to be relative for the dataset
233
+ row["audio_path"] = f"audio/{audio_file.name}"
234
+ else:
235
+ print(f"⚠️ Warning: Audio file not found: {audio_path}")
236
+ row["audio_path"] = "" # Clear missing files
237
+
238
+ updated_rows.append(row)
239
+ except json.JSONDecodeError as e:
240
+ print(f"⚠️ Warning: Invalid JSON on line {line_num + 1}: {e}")
241
+ continue
242
+
243
+ # Create updated JSONL with relative paths
244
+ temp_jsonl_path = jsonl_file.parent / "temp_data.jsonl"
245
+ with open(temp_jsonl_path, "w", encoding="utf-8") as f:
246
+ for row in updated_rows:
247
+ f.write(json.dumps(row, ensure_ascii=False) + "\n")
248
+
249
+ # Upload the updated JSONL file
250
  api.upload_file(
251
+ path_or_fileobj=str(temp_jsonl_path),
252
  path_in_repo="data.jsonl",
253
  repo_id=repo_name,
254
  repo_type="dataset",
255
  token=token
256
  )
257
 
258
+ # Clean up temp file
259
+ temp_jsonl_path.unlink()
260
+
261
+ # Upload audio files
262
+ uploaded_count = 0
263
+ for audio_file in audio_files:
264
+ try:
265
+ remote_path = f"audio/{audio_file.name}"
266
+ api.upload_file(
267
+ path_or_fileobj=str(audio_file),
268
+ path_in_repo=remote_path,
269
+ repo_id=repo_name,
270
+ repo_type="dataset",
271
+ token=token
272
+ )
273
+ uploaded_count += 1
274
+ print(f"βœ… Uploaded audio file: {audio_file.name}")
275
+ except Exception as e:
276
+ print(f"❌ Failed to upload {audio_file.name}: {e}")
277
+
278
+ # Calculate total dataset size
279
+ total_dataset_size = jsonl_file.stat().st_size + total_audio_size
280
+
281
+ # Create README for the dataset
282
  readme_content = f"""---
283
  dataset_info:
284
  features:
 
289
  splits:
290
  - name: train
291
  num_bytes: {jsonl_file.stat().st_size}
292
+ num_examples: {len(updated_rows)}
293
+ download_size: {total_dataset_size}
294
+ dataset_size: {total_dataset_size}
295
+ tags:
296
+ - voxtral
297
+ - asr
298
+ - speech-to-text
299
+ - fine-tuning
300
+ - audio-dataset
301
  ---
302
 
303
  # Voxtral ASR Dataset
 
306
 
307
  ## Dataset Structure
308
 
309
+ - **audio_path**: Relative path to the audio file (stored in `audio/` directory)
310
  - **text**: Transcription of the audio
311
 
312
+ ## Dataset Statistics
313
+
314
+ - **Number of examples**: {len(updated_rows)}
315
+ - **Audio files uploaded**: {uploaded_count}
316
+ - **Total dataset size**: {total_dataset_size:,} bytes
317
+
318
  ## Usage
319
 
320
  ```python
321
+ from datasets import load_dataset, Audio
322
+
323
+ # Load dataset
324
+ dataset = load_dataset("{repo_name}")
325
+
326
+ # Load audio data
327
+ dataset = dataset.cast_column("audio_path", Audio())
328
+
329
+ # Access first example
330
+ print(dataset[0]["text"])
331
+ print(dataset[0]["audio_path"])
332
+ ```
333
+
334
+ ## Loading with Audio Decoding
335
+
336
+ ```python
337
+ from datasets import load_dataset, Audio
338
 
339
+ # Load with automatic audio decoding
340
  dataset = load_dataset("{repo_name}")
341
+ dataset = dataset.cast_column("audio_path", Audio(sampling_rate=16000))
342
+
343
+ # The audio column will contain the decoded audio arrays
344
+ audio_array = dataset[0]["audio_path"]["array"]
345
+ sampling_rate = dataset[0]["audio_path"]["sampling_rate"]
346
  ```
347
  """
348
 
 
361
 
362
  readme_path.unlink() # Clean up temp file
363
 
364
+ return f"βœ… Dataset pushed to: https://huggingface.co/datasets/{repo_name}\nπŸ“Š Uploaded {len(updated_rows)} examples and {uploaded_count} audio files"
365
 
366
  except Exception as e:
367
  return f"❌ Failed to push dataset: {e}"
scripts/push_to_huggingface.py CHANGED
@@ -502,11 +502,11 @@ MIT License
502
  return True
503
 
504
  def push_dataset(self, dataset_path: str, dataset_repo_name: str) -> bool:
505
- """Push dataset to Hugging Face Hub"""
506
  logger.info(f"πŸš€ Starting dataset push to {dataset_repo_name}")
507
 
508
  try:
509
- from huggingface_hub import create_repo
510
  import json
511
 
512
  # Determine full dataset repo name
@@ -529,15 +529,44 @@ MIT License
529
  logger.error(f"❌ Dataset file not found: {dataset_path}")
530
  return False
531
 
532
- # Count lines for metadata
533
- with open(dataset_file, 'r', encoding='utf-8') as f:
534
- num_examples = sum(1 for _ in f)
535
-
536
- file_size = dataset_file.stat().st_size
537
 
538
- # Upload the dataset file
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
539
  upload_file(
540
- path_or_fileobj=str(dataset_file),
541
  path_in_repo="data.jsonl",
542
  repo_id=dataset_repo_name,
543
  repo_type="dataset",
@@ -545,7 +574,30 @@ MIT License
545
  )
546
  logger.info(f"βœ… Uploaded dataset file: {dataset_file.name}")
547
 
548
- # Create a dataset README
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
549
  readme_content = f"""---
550
  dataset_info:
551
  features:
@@ -555,18 +607,17 @@ dataset_info:
555
  dtype: string
556
  splits:
557
  - name: train
558
- num_bytes: {file_size}
559
- num_examples: {num_examples}
560
- download_size: {file_size}
561
- dataset_size: {file_size}
562
  tags:
563
  - voxtral
564
  - asr
565
- - fine-tuning
566
- - conversational
567
  - speech-to-text
568
- - audio-to-text
569
- - tonic
 
570
  ---
571
 
572
  # Voxtral ASR Dataset
@@ -575,21 +626,53 @@ This dataset was created for fine-tuning Voxtral ASR models.
575
 
576
  ## Dataset Structure
577
 
578
- - **audio_path**: Path to the audio file
579
  - **text**: Transcription of the audio
580
 
581
- ## Statistics
582
 
583
- - Number of examples: {num_examples}
584
- - File size: {file_size} bytes
 
585
 
586
  ## Usage
587
 
588
  ```python
589
- from datasets import load_dataset
590
 
 
591
  dataset = load_dataset("{dataset_repo_name}")
 
 
 
 
 
 
 
592
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
593
  """
594
 
595
  # Upload README
@@ -609,13 +692,97 @@ dataset = load_dataset("{dataset_repo_name}")
609
 
610
  logger.info(f"βœ… Dataset README uploaded")
611
  logger.info(f"πŸŽ‰ Dataset successfully pushed to: https://huggingface.co/datasets/{dataset_repo_name}")
 
612
 
613
  return True
614
 
615
  except Exception as e:
616
  logger.error(f"❌ Failed to push dataset: {e}")
617
  return False
618
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
619
  def _load_training_config(self) -> Dict[str, Any]:
620
  """Load training configuration"""
621
  config_path = self.model_path / "training_config.json"
@@ -656,6 +823,7 @@ def parse_args():
656
  dataset_parser.add_argument('repo_name', type=str, help='Hugging Face dataset repository name')
657
  dataset_parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
658
  dataset_parser.add_argument('--private', action='store_true', help='Make repository private')
 
659
 
660
  return parser.parse_args()
661
 
@@ -710,15 +878,24 @@ def main():
710
  private=args.private
711
  )
712
 
713
- # Push dataset
714
- success = pusher.push_dataset(args.dataset_path, args.repo_name)
715
-
716
- if success:
717
- logger.info("βœ… Dataset push completed successfully!")
718
- logger.info(f"πŸ“Š View your dataset at: https://huggingface.co/datasets/{args.repo_name}")
 
 
719
  else:
720
- logger.error("❌ Dataset push failed!")
721
- return 1
 
 
 
 
 
 
 
722
 
723
  except Exception as e:
724
  logger.error(f"❌ Error during push: {e}")
 
502
  return True
503
 
504
  def push_dataset(self, dataset_path: str, dataset_repo_name: str) -> bool:
505
+ """Push dataset to Hugging Face Hub including audio files"""
506
  logger.info(f"πŸš€ Starting dataset push to {dataset_repo_name}")
507
 
508
  try:
509
+ from huggingface_hub import create_repo, upload_file
510
  import json
511
 
512
  # Determine full dataset repo name
 
529
  logger.error(f"❌ Dataset file not found: {dataset_path}")
530
  return False
531
 
532
+ # Read and process the JSONL to collect audio files and update paths
533
+ audio_files = []
534
+ updated_rows = []
535
+ total_audio_size = 0
 
536
 
537
+ with open(dataset_file, 'r', encoding='utf-8') as f:
538
+ for line_num, line in enumerate(f):
539
+ try:
540
+ row = json.loads(line.strip())
541
+ audio_path = row.get("audio_path", "")
542
+
543
+ if audio_path:
544
+ audio_file = Path(audio_path)
545
+ if audio_file.exists():
546
+ # Store the original file for upload
547
+ audio_files.append(audio_file)
548
+ total_audio_size += audio_file.stat().st_size
549
+
550
+ # Update path to be relative for the dataset
551
+ row["audio_path"] = f"audio/{audio_file.name}"
552
+ else:
553
+ logger.warning(f"Audio file not found: {audio_path}")
554
+ row["audio_path"] = "" # Clear missing files
555
+
556
+ updated_rows.append(row)
557
+ except json.JSONDecodeError as e:
558
+ logger.warning(f"Invalid JSON on line {line_num + 1}: {e}")
559
+ continue
560
+
561
+ # Create updated JSONL with relative paths
562
+ temp_jsonl_path = dataset_file.parent / "temp_data.jsonl"
563
+ with open(temp_jsonl_path, "w", encoding="utf-8") as f:
564
+ for row in updated_rows:
565
+ f.write(json.dumps(row, ensure_ascii=False) + "\n")
566
+
567
+ # Upload the updated JSONL file
568
  upload_file(
569
+ path_or_fileobj=str(temp_jsonl_path),
570
  path_in_repo="data.jsonl",
571
  repo_id=dataset_repo_name,
572
  repo_type="dataset",
 
574
  )
575
  logger.info(f"βœ… Uploaded dataset file: {dataset_file.name}")
576
 
577
+ # Clean up temp file
578
+ temp_jsonl_path.unlink()
579
+
580
+ # Upload audio files
581
+ uploaded_count = 0
582
+ for audio_file in audio_files:
583
+ try:
584
+ remote_path = f"audio/{audio_file.name}"
585
+ upload_file(
586
+ path_or_fileobj=str(audio_file),
587
+ path_in_repo=remote_path,
588
+ repo_id=dataset_repo_name,
589
+ repo_type="dataset",
590
+ token=self.token
591
+ )
592
+ uploaded_count += 1
593
+ logger.info(f"βœ… Uploaded audio file: {audio_file.name}")
594
+ except Exception as e:
595
+ logger.error(f"❌ Failed to upload {audio_file.name}: {e}")
596
+
597
+ # Calculate total dataset size
598
+ total_dataset_size = dataset_file.stat().st_size + total_audio_size
599
+
600
+ # Create a comprehensive dataset README
601
  readme_content = f"""---
602
  dataset_info:
603
  features:
 
607
  dtype: string
608
  splits:
609
  - name: train
610
+ num_bytes: {dataset_file.stat().st_size}
611
+ num_examples: {len(updated_rows)}
612
+ download_size: {total_dataset_size}
613
+ dataset_size: {total_dataset_size}
614
  tags:
615
  - voxtral
616
  - asr
 
 
617
  - speech-to-text
618
+ - fine-tuning
619
+ - audio-dataset
620
+ - tonic
621
  ---
622
 
623
  # Voxtral ASR Dataset
 
626
 
627
  ## Dataset Structure
628
 
629
+ - **audio_path**: Relative path to the audio file (stored in `audio/` directory)
630
  - **text**: Transcription of the audio
631
 
632
+ ## Dataset Statistics
633
 
634
+ - **Number of examples**: {len(updated_rows)}
635
+ - **Audio files uploaded**: {uploaded_count}
636
+ - **Total dataset size**: {total_dataset_size:,} bytes
637
 
638
  ## Usage
639
 
640
  ```python
641
+ from datasets import load_dataset, Audio
642
 
643
+ # Load dataset
644
  dataset = load_dataset("{dataset_repo_name}")
645
+
646
+ # Load audio data
647
+ dataset = dataset.cast_column("audio_path", Audio())
648
+
649
+ # Access first example
650
+ print(dataset[0]["text"])
651
+ print(dataset[0]["audio_path"])
652
  ```
653
+
654
+ ## Loading with Audio Decoding
655
+
656
+ ```python
657
+ from datasets import load_dataset, Audio
658
+
659
+ # Load with automatic audio decoding
660
+ dataset = load_dataset("{dataset_repo_name}")
661
+ dataset = dataset.cast_column("audio_path", Audio(sampling_rate=16000))
662
+
663
+ # The audio column will contain the decoded audio arrays
664
+ audio_array = dataset[0]["audio_path"]["array"]
665
+ sampling_rate = dataset[0]["audio_path"]["sampling_rate"]
666
+ ```
667
+
668
+ ## Dataset Features
669
+
670
+ This dataset contains audio files with corresponding transcriptions for Voxtral ASR model fine-tuning.
671
+ All audio files are stored in the `audio/` directory and referenced using relative paths in the dataset.
672
+
673
+ ## License
674
+
675
+ This dataset is created for research and educational purposes.
676
  """
677
 
678
  # Upload README
 
692
 
693
  logger.info(f"βœ… Dataset README uploaded")
694
  logger.info(f"πŸŽ‰ Dataset successfully pushed to: https://huggingface.co/datasets/{dataset_repo_name}")
695
+ logger.info(f"πŸ“Š Uploaded {len(updated_rows)} examples and {uploaded_count} audio files")
696
 
697
  return True
698
 
699
  except Exception as e:
700
  logger.error(f"❌ Failed to push dataset: {e}")
701
  return False
702
+
703
+ def test_dataset_push(self, dataset_path: str) -> bool:
704
+ """Test dataset validation without uploading to Hugging Face Hub"""
705
+ logger.info(f"πŸ§ͺ Testing dataset validation for {dataset_path}")
706
+
707
+ try:
708
+ # Read the dataset file
709
+ dataset_file = Path(dataset_path)
710
+ if not dataset_file.exists():
711
+ logger.error(f"❌ Dataset file not found: {dataset_path}")
712
+ return False
713
+
714
+ # Read and process the JSONL to validate audio files
715
+ audio_files = []
716
+ updated_rows = []
717
+ total_audio_size = 0
718
+ missing_files = []
719
+ invalid_json_lines = []
720
+
721
+ with open(dataset_file, 'r', encoding='utf-8') as f:
722
+ for line_num, line in enumerate(f):
723
+ try:
724
+ row = json.loads(line.strip())
725
+ audio_path = row.get("audio_path", "")
726
+
727
+ if audio_path:
728
+ audio_file = Path(audio_path)
729
+ if audio_file.exists():
730
+ # Store the file info for validation
731
+ audio_files.append(audio_file)
732
+ total_audio_size += audio_file.stat().st_size
733
+ else:
734
+ missing_files.append(str(audio_path))
735
+
736
+ updated_rows.append(row)
737
+ except json.JSONDecodeError as e:
738
+ invalid_json_lines.append(f"Line {line_num + 1}: {e}")
739
+ continue
740
+
741
+ # Report validation results
742
+ logger.info("πŸ“Š Dataset Validation Results:")
743
+ logger.info(f" - Total examples: {len(updated_rows)}")
744
+ logger.info(f" - Valid audio files: {len(audio_files)}")
745
+ logger.info(f" - Total audio size: {total_audio_size:,} bytes")
746
+ logger.info(f" - Missing audio files: {len(missing_files)}")
747
+ logger.info(f" - Invalid JSON lines: {len(invalid_json_lines)}")
748
+
749
+ if missing_files:
750
+ logger.warning("⚠️ Missing audio files:")
751
+ for missing in missing_files[:5]: # Show first 5
752
+ logger.warning(f" - {missing}")
753
+ if len(missing_files) > 5:
754
+ logger.warning(f" ... and {len(missing_files) - 5} more")
755
+
756
+ if invalid_json_lines:
757
+ logger.warning("⚠️ Invalid JSON lines:")
758
+ for invalid in invalid_json_lines[:3]: # Show first 3
759
+ logger.warning(f" - {invalid}")
760
+ if len(invalid_json_lines) > 3:
761
+ logger.warning(f" ... and {len(invalid_json_lines) - 3} more")
762
+
763
+ # Show sample of how paths will be converted
764
+ if audio_files:
765
+ logger.info("πŸ”„ Path conversion preview:")
766
+ for audio_file in audio_files[:3]: # Show first 3
767
+ logger.info(f" - {str(audio_file)} β†’ audio/{audio_file.name}")
768
+
769
+ # Overall validation status
770
+ if len(updated_rows) == 0:
771
+ logger.error("❌ No valid examples found in dataset")
772
+ return False
773
+
774
+ if len(missing_files) > 0:
775
+ logger.warning("⚠️ Some audio files are missing - they will be skipped during upload")
776
+ else:
777
+ logger.info("βœ… All audio files found and valid")
778
+
779
+ logger.info("βœ… Dataset validation completed successfully!")
780
+ return True
781
+
782
+ except Exception as e:
783
+ logger.error(f"❌ Failed to validate dataset: {e}")
784
+ return False
785
+
786
  def _load_training_config(self) -> Dict[str, Any]:
787
  """Load training configuration"""
788
  config_path = self.model_path / "training_config.json"
 
823
  dataset_parser.add_argument('repo_name', type=str, help='Hugging Face dataset repository name')
824
  dataset_parser.add_argument('--token', type=str, default=None, help='Hugging Face token')
825
  dataset_parser.add_argument('--private', action='store_true', help='Make repository private')
826
+ dataset_parser.add_argument('--test', action='store_true', help='Test mode - validate dataset without uploading')
827
 
828
  return parser.parse_args()
829
 
 
878
  private=args.private
879
  )
880
 
881
+ if getattr(args, 'test', False):
882
+ # Test mode - validate dataset without uploading
883
+ success = pusher.test_dataset_push(args.dataset_path)
884
+ if success:
885
+ logger.info("βœ… Dataset validation completed successfully!")
886
+ else:
887
+ logger.error("❌ Dataset validation failed!")
888
+ return 1
889
  else:
890
+ # Push dataset
891
+ success = pusher.push_dataset(args.dataset_path, args.repo_name)
892
+
893
+ if success:
894
+ logger.info("βœ… Dataset push completed successfully!")
895
+ logger.info(f"πŸ“Š View your dataset at: https://huggingface.co/datasets/{args.repo_name}")
896
+ else:
897
+ logger.error("❌ Dataset push failed!")
898
+ return 1
899
 
900
  except Exception as e:
901
  logger.error(f"❌ Error during push: {e}")