Shokoufehhh commited on
Commit
c4644b2
1 Parent(s): a2cde0e

Upload config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +58 -0
config.yaml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # General model information
2
+ language: en
3
+ tags:
4
+ - speech-enhancement
5
+ - dereverberation
6
+ - diffusion-models
7
+ - generative-models
8
+ - pytorch
9
+ - audio-processing
10
+ license: mit
11
+ datasets:
12
+ - VoiceBank-DEMAND
13
+ - WSJ0-CHiME3
14
+ - WSJ0-REVERB
15
+ - EARS-WHAM
16
+ - EARS-Reverb
17
+ model_name: speech-enhancement-dereverberation-diffusion
18
+ model_type: diffusion-based-generative-model
19
+ library_name: pytorch
20
+ pipeline_tag: audio-to-audio
21
+
22
+ # Important files for the model
23
+ key_files:
24
+ - model.py
25
+ - train.py
26
+ - inference.py
27
+ - enhancement.py
28
+ - requirements.txt
29
+ - calc_metrics.py
30
+
31
+ # Pretrained model checkpoints
32
+ pretrained_checkpoints:
33
+ voicebank_demand:
34
+ description: SGMSE+ trained on VoiceBank-DEMAND
35
+ download_link: https://drive.google.com/drive/folders/1CSnkhUSoiv3RG0xg7WEcVapyLuwDaLbe?usp=sharing
36
+ gdown_id: 1_H3EXvhcYBhOZ9QNUcD5VZHc6ktrRbwQ
37
+ wsj0_chime3:
38
+ description: SGMSE+ trained on WSJ0-CHiME3
39
+ download_link: https://drive.google.com/drive/folders/1CSnkhUSoiv3RG0xg7WEcVapyLuwDaLbe?usp=sharing
40
+ gdown_id: 16K4DUdpmLhDNC7pJhBBc08pkSIn_yMPi
41
+ wsj0_reverb:
42
+ description: SGMSE+ trained on WSJ0-REVERB
43
+ download_link: https://drive.google.com/drive/folders/1082_PSEgrqoVVrNsAkSIcpLF1AAtzGwV?usp=sharing
44
+ gdown_id: 1eiOy0VjHh9V9ZUFTxu1Pq2w19izl9ejD
45
+ ears_wham:
46
+ description: SGMSE+ trained on EARS-WHAM
47
+ download_link: https://drive.google.com/drive/folders/1Tn6pVwjxUAy1DJ8167JCg3enuSi0hiw5?usp=sharing
48
+ gdown_id: 1t_DLLk8iPH6nj8M5wGeOP3jFPaz3i7K5
49
+ ears_reverb:
50
+ description: SGMSE+ trained on EARS-Reverb
51
+ download_link: https://drive.google.com/drive/folders/1PunXuLbuyGkknQCn_y-RCV2dTZBhyE3V?usp=sharing
52
+ gdown_id: 1PunXuLbuyGkknQCn_y-RCV2dTZBhyE3V
53
+
54
+ # Citation references for the model
55
+ citations:
56
+ - '@inproceedings{welker22speech, author={Simon Welker and Julius Richter and Timo Gerkmann}, title={Speech Enhancement with Score-Based Generative Models in the Complex {STFT} Domain}, year={2022}, booktitle={Proc. Interspeech 2022}, pages={2928--2932}, doi={10.21437/Interspeech.2022-10653}}'
57
+ - '@article{richter2023speech, title={Speech Enhancement and Dereverberation with Diffusion-based Generative Models}, author={Richter, Julius and Welker, Simon and Lemercier, Jean-Marie and Lay, Bunlong and Gerkmann, Timo}, journal={IEEE/ACM Transactions on Audio, Speech, and Language Processing}, volume={31}, pages={2351-2364}, year={2023}, doi={10.1109/TASLP.2023.3285241}}'
58
+ - '@inproceedings{richter2024ears, title={{EARS}: An Anechoic Fullband Speech Dataset Benchmarked for Speech Enhancement and Dereverberation}, author={Richter, Julius and Wu, Yi-Chiao and Krenn, Steven and Welker, Simon and Lay, Bunlong and Watanabe, Shinjii and Richard, Alexander and Gerkmann, Timo}, booktitle={ISCA Interspeech}, year={2024}}'