aztro commited on
Commit
6bbd5b5
·
1 Parent(s): a4f9390

Initial commit with RVC demo

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ added_IVF403_Flat_nprobe_1_yebama_v2.index filter=lfs diff=lfs merge=lfs -text
added_IVF403_Flat_nprobe_1_yebama_v2.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0fb2e3b992e0d836b5e4976b660268ecd6bfb738f0f4fae239614d0264e805c
3
+ size 49714419
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ import librosa
4
+ import numpy as np
5
+ from huggingface_hub import hf_hub_download
6
+ import zipfile
7
+ import os
8
+
9
+ # Descargar el modelo desde tu otra cuenta de Hugging Face
10
+ def load_model():
11
+ # Descargar archivos del modelo (ajusta los nombres según tu repositorio)
12
+ model_path = hf_hub_download(repo_id="gitgato/yebama", filename="yebama_e200_s3200.pth")
13
+ index_path = hf_hub_download(repo_id="gitgato/yebama", filename="IVF403_Flat_nprobe_1_yebama_v2.index")
14
+ config_path = hf_hub_download(repo_id="gitgato/yebama", filename="config.json")
15
+
16
+ # Aquí iría el código para cargar el modelo RVC (depende de tu implementación)
17
+ # Ejemplo simplificado:
18
+ # from rvc_inference import load_rvc_model
19
+ # model = load_rvc_model(model_path, config_path, index_path)
20
+ # return model
21
+
22
+ def process_audio(audio_file):
23
+ # Cargar el audio (ejemplo con librosa)
24
+ audio, sr = librosa.load(audio_file, sr=44100)
25
+
26
+ # Aquí iría la lógica de inferencia con RVC
27
+ # processed_audio = model.infer(audio)
28
+
29
+ # Simulando un resultado (reemplaza con tu modelo real)
30
+ processed_audio = audio # Esto es un placeholder
31
+
32
+ return sr, processed_audio
33
+
34
+ # Interfaz Gradio
35
+ iface = gr.Interface(
36
+ fn=process_audio,
37
+ inputs=gr.Audio(type="filepath", label="Sube tu audio"),
38
+ outputs=gr.Audio(label="Audio convertido"),
39
+ title="Demo de Yebama RVC",
40
+ description="Convierte tu voz con el modelo Yebama RVC."
41
+ )
42
+
43
+ iface.launch()
config.json ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<pad>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "3": {
28
+ "content": "<unk>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "79": {
36
+ "content": "<mask>",
37
+ "lstrip": true,
38
+ "normalized": true,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "80": {
44
+ "content": "<ctc_blank>",
45
+ "lstrip": false,
46
+ "normalized": true,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": false
50
+ }
51
+ },
52
+ "bos_token": "<s>",
53
+ "clean_up_tokenization_spaces": true,
54
+ "eos_token": "</s>",
55
+ "mask_token": "<mask>",
56
+ "model_max_length": 600,
57
+ "normalize": false,
58
+ "pad_token": "<pad>",
59
+ "processor_class": "SpeechT5Processor",
60
+ "sp_model_kwargs": {},
61
+ "tokenizer_class": "SpeechT5Tokenizer",
62
+ "unk_token": "<unk>"
63
+ }
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=3.0
2
+ torch>=1.10
3
+ librosa>=0.9.0
4
+ huggingface_hub
5
+ numpy
response_1694593941525.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stability": 0.85,
3
+ "similarity_boost": 0.89,
4
+ "style": 0.18,
5
+ "use_speaker_boost": true
6
+ }
7
+
8
+
9
+ access-control-allow-headers: *
10
+ access-control-allow-methods: POST,OPTIONS,DELETE,GET
11
+ access-control-allow-origin: *
12
+ alt-svc: h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 content-length: 80
13
+ content-type: application/json
14
+ date: Wed,13 Sep 2023 08:32:18 GMT
15
+ server: uvicorn
16
+ via: 1.1
17
+ google x-firefox-spdy: h2
18
+
19
+
20
+ {
21
+ "stability": 0,
22
+ "similarity_boost": 0,
23
+ "style": 0,
24
+ "use_speaker_boost": true
25
+ }
26
+
27
+ {
28
+ "detail": [
29
+ {
30
+ "loc": [
31
+ "string",
32
+ 0
33
+ ],
34
+ "msg": "string",
35
+ "type": "string"
36
+ }
37
+ ]
38
+ }
response_1694594214307.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stability": 0.93,
3
+ "similarity_boost": 0.96,
4
+ "style": 0.23,
5
+ "use_speaker_boost": true
6
+ }
7
+
8
+ access-control-allow-headers: *
9
+ access-control-allow-methods: POST,OPTIONS,DELETE,GET
10
+ access-control-allow-origin: *
11
+ alt-svc: h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
12
+ content-length: 80
13
+ content-type: application/json
14
+ date: Wed,13 Sep 2023 08:36:51
15
+ GMT server: uvicorn
16
+ via: 1.1 google x-firefox-spdy: h2
17
+
18
+ {
19
+ "stability": 0,
20
+ "similarity_boost": 0,
21
+ "style": 0,
22
+ "use_speaker_boost": true
23
+ }
24
+
25
+ {
26
+ "detail": [
27
+ {
28
+ "loc": [
29
+ "string",
30
+ 0
31
+ ],
32
+ "msg": "string",
33
+ "type": "string"
34
+ }
35
+ ]
36
+ }
yebama.zip ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdba5e3d3acd56cb7ddf5d6e3d6f0867c36cef5c5cac63c4d45e2b28791e1426
3
+ size 104943041
yebama_e200_s3200.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c280e247f5f3a05a2d0f2220248577215110ae0730d1ef7c290bff56e1a46d66
3
+ size 55228322