texe / app.py
uptaleend's picture
Update app.py
c7e963f
raw
history blame contribute delete
No virus
2.35 kB
import io
import gradio as gr
import librosa
import numpy as np
import soundfile
import torch
from inference.infer_tool import Svc
import logging
logging.getLogger('numba').setLevel(logging.WARNING)
model_name = "logs/G_14000.pth"
config_name = "config/config_cris.json"
svc_model = Svc(model_name, config_name)
sid_map = {
"cris": "cris"
}
def vc_fn(sid, input_audio, vc_transform):
if input_audio is None:
return "You need to upload an audio", None
sampling_rate, audio = input_audio
# print(audio.shape,sampling_rate)
duration = audio.shape[0] / sampling_rate
audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
if len(audio.shape) > 1:
audio = librosa.to_mono(audio.transpose(1, 0))
if sampling_rate != 16000:
audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
print(audio.shape)
out_wav_path = io.BytesIO()
soundfile.write(out_wav_path, audio, 16000, format="wav")
out_wav_path.seek(0)
sid = sid_map[sid]
out_audio, out_sr = svc_model.infer(sid, vc_transform, out_wav_path)
_audio = out_audio.cpu().numpy()
return "Success", (32000, _audio)
app = gr.Blocks()
with app:
with gr.Tabs():
with gr.TabItem("Basic"):
gr.Markdown(value="""
这是sovits 3.0 32khz版本澄闪语音的在线demo
如果要在本地使用该demo,请使用git lfs clone 该仓库,安装requirements.txt后运行app.py即可
项目改写基于 https://huggingface.co/spaces/innnky/nyaru-svc-3.0
本地合成可以删除26、27两行代码以解除合成45s长度限制""")
sid = gr.Dropdown(label="音色", choices=["cris"], value="cris")
vc_input3 = gr.Audio(label="上传音频(长度小于45秒)")
vc_transform = gr.Number(label="变调(整数,可以正负,半音数量,升高八度就是12,男生一般从12开始,女生可以试试从1开始微调)", value=0)
vc_submit = gr.Button("转换", variant="primary")
vc_output1 = gr.Textbox(label="Output Message")
vc_output2 = gr.Audio(label="Output Audio")
vc_submit.click(vc_fn, [sid, vc_input3, vc_transform], [vc_output1, vc_output2])
app.launch()