import io
import os
os.system("chmod 777 ffmpeg")
import torch
import gradio as gr
import librosa
import numpy as np
import soundfile
import logging
from fairseq import checkpoint_utils
from my_utils import load_audio
from vc_infer_pipeline import VC
import traceback
from config import Config
from infer_pack.models import (
    SynthesizerTrnMs256NSFsid,
    SynthesizerTrnMs256NSFsid_nono,
    SynthesizerTrnMs768NSFsid,
    SynthesizerTrnMs768NSFsid_nono,
)
from i18n import I18nAuto

logging.getLogger("numba").setLevel(logging.WARNING)
logging.getLogger("markdown_it").setLevel(logging.WARNING)
logging.getLogger("urllib3").setLevel(logging.WARNING)
logging.getLogger("matplotlib").setLevel(logging.WARNING)

i18n = I18nAuto()
i18n.print()

config = Config()

models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
    ["hubert_base.pt"],
    suffix="",
)
hubert_model = models[0]
hubert_model = hubert_model.to(config.device)
hubert_model = hubert_model.float()
hubert_model.eval()

global n_spk, tgt_sr, net_g, vc, cpt, version
person = "weights/simple-guitar-crepe-guolv_e1000.pth"
print("loading %s" % person)
cpt = torch.load(person, map_location="cpu")
tgt_sr = cpt["config"][-1]
cpt["config"][-3] = cpt["weight"]["emb_g.weight"].shape[0]  # n_spk
net_g = SynthesizerTrnMs768NSFsid(*cpt["config"], is_half=False)
del net_g.enc_q
print(net_g.load_state_dict(cpt["weight"], strict=False))
net_g.eval().to(config.device)
net_g = net_g.float()
vc = VC(tgt_sr, config)
n_spk = cpt["config"][-3]
version="v2"

default_audio=load_audio("logs/mute/1_16k_wavs/mute.wav",16000)
def vc_single(
    # sid=0,
    input_audio_path,#待选取
    f0_up_key,#待选取
    f0_method,
    file_index="logs/added_IVF2225_Flat_nprobe_1_simple-guitar-crepe-guolv_v2.index",#写死
    index_rate=1,#写死1
    filter_radius=3,#不需要，随便写，3
    resample_sr=0,#写死0不需要
    rms_mix_rate=1,#写死1不需要
    protect=0.5,#写死0.5不需要
):
    global tgt_sr, net_g, vc, hubert_model, version
    if input_audio_path is None:
        return "You need to upload an audio", None
    f0_up_key = int(f0_up_key)
    try:
        audio = input_audio_path[1] / 32768.0
        if len(audio.shape) == 2:
            audio = np.mean(audio, -1)
        audio = librosa.resample(audio, orig_sr=input_audio_path[0], target_sr=16000)
        audio_max = np.abs(audio).max() / 0.95
        if audio_max > 1:
            audio /= audio_max
        times = [0, 0, 0]
        audio_opt = vc.pipeline(
            model=hubert_model,
            net_g=net_g,
            sid=0,
            audio=audio,
            input_audio_path="123",
            times=times,
            f0_up_key=f0_up_key,
            f0_method=f0_method,
            file_index=file_index,
            index_rate=index_rate,
            if_f0=1,
            filter_radius=filter_radius,
            tgt_sr=tgt_sr,
            resample_sr=resample_sr,
            rms_mix_rate=rms_mix_rate,
            version="v2",
            protect=protect,
            f0_file=None,
        )
        if resample_sr >= 16000 and tgt_sr != resample_sr:
            tgt_sr = resample_sr
        index_info = (
            "Using index:%s." % file_index
            if os.path.exists(file_index)
            else "Index not used."
        )
        return "Success.\n %s\nTime:\n npy:%ss, f0:%ss, infer:%ss" % (
            index_info,
            times[0],
            times[1],
            times[2],
        ), (tgt_sr, audio_opt)
    except:
        info = traceback.format_exc()
        print(info)
        return "报错了！信息如下：%s"%info, (16000, default_audio)

app = gr.Blocks()
with app:
    with gr.Tabs():
        with gr.TabItem("人声转吉他极简在线demo"):
            gr.Markdown(
                value="""
                变调越高吉他音越细，越低越沉闷
                """
            )
            vc_input = gr.Audio(label="上传音频")
            with gr.Column():
                with gr.Row():
                    vc_transform = gr.Slider(
                        minimum=-12,
                        maximum=12,
                        label="变调(半音数量,升八度12降八度-12)",
                        value=0,
                        step=1,
                        interactive=True,
                    )
                    f0method = gr.Radio(
                        label=i18n(
                            "选择音高提取算法:语音推荐dio歌声推荐pm"
                        ),
                        choices=["pm", "dio"],
                        value="dio",
                        interactive=True,
                    )
                with gr.Row():
                    but = gr.Button(i18n("转换"), variant="primary")
                    vc_output1 = gr.Textbox(label=i18n("输出信息"))
                    vc_output2 = gr.Audio(label=i18n("输出音频(右下角三个点,点了可以下载)"))
            but.click(
                vc_single,
                [
                    vc_input,
                    vc_transform,
                    f0method
                ],
                [vc_output1, vc_output2],
            )

app.launch(server_name="0.0.0.0",quiet=True)