Spaces:

Mira1sen
/

gradio

Build error

App Files Files Community

gradio / test.py

Mira1sen

Upload folder using huggingface_hub

e569c5f verified about 1 year ago

raw

history blame

12.2 kB

	import warnings
	warnings.filterwarnings("ignore")

	# 外部库
	import re
	import requests
	import argparse
	import json
	import os
	import re
	import tempfile
	import librosa
	import numpy as np
	# import torch
	# from torch import no_grad, LongTensor
	# import commons
	import gradio as gr
	import gradio.utils as gr_utils
	import gradio.processing_utils as gr_processing_utils

	# 内部库
	# from models import SynthesizerTrn
	# from text import text_to_sequence, text_to_sequence_for_test, _clean_text
	# from mel_processing import spectrogram_torch
	# import utils
	# from text.symbols import symbols
	all_example = "my voice is my passport verify me."

	eleven_voice_id = [
	"21m00Tcm4TlvDq8ikWAM",
	"29vD33N1CtxCmqQRPOHJ",
	"2EiwWnXFnvU5JabPnv8n",
	"5Q0t7uMcjvnagumLfvZi",
	"AZnzlk1XvdvUeBnXmlld",
	"CYw3kZ02Hs0563khs1Fj",
	"D38z5RcWu1voky8WS1ja",
	"EXAVITQu4vr4xnSDxMaL",
	"ErXwobaYiN019PkySvjV",
	"GBv7mTt0atIp3Br8iCZE",
	"IKne3meq5aSn9XLyUdCD",
	"JBFqnCBsd6RMkjVDRZzb",
	"LcfcDJNUP1GQjkzn1xUU",
	"MF3mGyEYCl7XYWbV9V6O",
	"N2lVS1w4EtoT3dr4eOWO",
	"ODq5zmih8GrVes37Dizd",
	"SOYHLrjzK2X1ezoPC6cr",
	"TX3LPaxmHKxFdv7VOQHJ",
	"ThT5KcBeYPX3keUQqHPh",
	"TxGEqnHWrfWFTfGW9XjX",
	"VR6AewLTigWG4xSOukaG",
	"XB0fDUnXU5powFXDhCwa",
	"Xb7hH8MSUJpSbSDYk0k2",
	"XrExE9yKIg1WjnnlVkGX",
	"ZQe5CZNOzWyzPSCn5a3c",
	"Zlb1dXrM653N07WRdFW3",
	"bVMeCyTHy58xNoL34h3p",
	"flq6f7yk4E4fJM5XTYuZ",
	"g5CIjZEefAph4nQFvHAz",
	"iP95p4xoKVk53GoZ742B",
	"jBpfuIE2acCO8z3wKNLl",
	"jsCqWAovK2LkecY7zXl4",
	"nPczCjzI2devNBz1zQrb",
	"oWAxZDx7w5VEj9dCyTzz",
	"onwK4e9ZLuTAKqWW03F9",
	"pFZP5JQG7iQjIQuC4Bku",
	"pMsXgVXv3BLzUgSXRplE",
	"pNInz6obpgDQGcFmaJgB",
	"piTKgcLEGmPE4e6mEKli",
	"pqHfZKP75CvOlQylNhV4",
	"t0jbNlBVZ17f02VDIeMI",
	"yoZ06aMxZJJ28mfd3POQ",
	"z9fAnlkpzviPz146aGWa",
	"zcAOhNBS3c14rBihAFp1",
	"zrHiDhphv9ZnVXBqCLjz",
	]

	eleven_name = [
	"Rachel",
	"Drew",
	"Clyde",
	"Paul",
	"Domi",
	"Dave",
	"Fin",
	"Sarah",
	"Antoni",
	"Thomas",
	"Charlie",
	"George",
	"Emily",
	"Elli",
	"Callum",
	"Patrick",
	"Harry",
	"Liam",
	"Dorothy",
	"Josh",
	"Arnold",
	"Charlotte",
	"Alice",
	"Matilda",
	"James",
	"Joseph",
	"Jeremy",
	"Michael",
	"Ethan",
	"Chris",
	"Gigi",
	"Freya",
	"Brian",
	"Grace",
	"Daniel",
	"Lily",
	"Serena",
	"Adam",
	"Nicole",
	"Bill",
	"Jessie",
	"Sam",
	"Glinda",
	"Giovanni",
	"Mimi",
	]
	eleven_id_model_name_dict = dict(zip(eleven_name, eleven_voice_id))

	def openai(text, name):

	headers = {
	'Authorization': 'Bearer ' + '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',
	'Content-Type': 'application/json',
	}

	json_data = {
	'model': 'tts-1-hd',
	'input': f'{text}',
	'voice': f'{name}',
	}

	response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, json=json_data)

	# Note: json_data will not be serialized by requests
	# exactly as it was in the original request.
	#data = '{\n "model": "tts-1",\n "input": "The quick brown fox jumped over the lazy dog.",\n "voice": "alloy"\n }'
	#response = requests.post('https://api.openai.com/v1/audio/speech', headers=headers, data=data)

	return "Success", response

	def elevenlabs(text,name):
	url = f"https://api.elevenlabs.io/v1/text-to-speech/{name}"
	CHUNK_SIZE = 1024
	#url = "https://api.elevenlabs.io/v1/text-to-speech/<voice-id>"

	headers = {
	"Accept": "audio/mpeg",
	"Content-Type": "application/json",
	"xi-api-key": "a3391f0e3ff8472b61978dbb70ccc6fe"
	}

	data = {
	"text": f"{text}",
	"model_id": "eleven_monolingual_v1",
	"voice_settings": {
	"stability": 0.5,
	"similarity_boost": 0.5
	}
	}

	response = requests.post(url, json=data, headers=headers)
	# with open('output.mp3', 'wb') as f:
	# for chunk in response.iter_content(chunk_size=CHUNK_SIZE):
	# if chunk:
	# f.write(chunk)
	return "Success", response

	microsoft_model_list = [
	"en-US-AvaMultilingualNeural"
	]

	def microsoft(text, name, style="Neural"):
	"""
	:param text:
	:param name:
	:param style:
	:return:
	"""
	headers = {
	'Ocp-Apim-Subscription-Key': '1f1ef0ce53b84261be94fab81df7e628',
	'Content-Type': 'application/ssml+xml',
	'X-Microsoft-OutputFormat': 'audio-16khz-128kbitrate-mono-mp3',
	'User-Agent': 'curl',
	}

	data = ("<speak version='1.0' xml:lang='en-US'>"
	f"<voice xml:lang='en-US' name='{name}'>" # xml:gender='Female'
	f"{text}"
	"</voice>"
	"</speak>")

	response = requests.post(
	'https://japaneast.tts.speech.microsoft.com/cognitiveservices/v1',
	headers=headers,
	data=data,
	)
	# data = {
	# "text":text,
	# "name":name,
	# "style":style,
	# "format":"mp3"}
	# audio_url = requests.get(microsoft_url, headers=microsoft_headers, json=data).json()['data']['url']
	return "Success", response

	# def google(text,name):
	# # import subprocess
	# # command1 = subprocess.run('gcloud auth print-access-token', shell=True, capture_output=True, text=True).stdout

	# headers = {
	# 'Authorization': 'Bearer ' + "synclub-2383kjhjksxfv.2341gs",
	# 'x-goog-user-project': 'PROJECT_ID',
	# 'Content-Type': 'application/json; charset=utf-8',
	# }

	# data = {
	# "input": {
	# "text": f"{text}"},
	# "voice": {
	# "languageCode": "en-gb",
	# "name": "en-GB-Standard-A",
	# "ssmlGender": "FEMALE"
	# },
	# "audioConfig": {
	# "audioEncoding": "MP3"
	# }
	# }

	# response = requests.post('https://texttospeech.googleapis.com/v1/text:synthesize', headers=headers, data=data)
	# return "Success", response
	if __name__ == '__main__':
	parser = argparse.ArgumentParser()
	parser.add_argument('--device', type=str, default='cuda')
	parser.add_argument("--share", action="store_true", default=True, help="share gradio app")
	parser.add_argument("--port", type=int, default=8081, help="port")
	parser.add_argument('--model_info_path', type=str, default='/gluster/speech_data/info.json')
	args = parser.parse_args()
	# app = gr.Blocks()
	# with app:
	# gr.Markdown("## Japanese TTS Demo")
	# with gr.Tabs():
	# with gr.TabItem("微软"):
	# tts_input1 = gr.TextArea(label="Text", value=all_example)
	# tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
	# tts_submit = gr.Button("Generate", variant="primary")
	# tts_output1 = gr.Textbox(label="Output Message")
	# tts_output2 = gr.Audio(label="Output Audio")
	# tts_submit.click(microsoft, [tts_input1, tts_input2],
	# [tts_output1, tts_output2])
	# _, audio = microsoft(all_example, 'en-US-AvaMultilingualNeural')
	# _, audio = google(all_example,'alloy')
	# print(audio)
	# with open("test4.mp3", "wb") as f:
	# f.write(audio.content)
	#_, audio = elevenlabs(all_example, "21m00Tcm4TlvDq8ikWAM")
	# print(audio)
	# with open('output.mp3', 'wb') as f:
	# for chunk in audio.iter_content(chunk_size=1024):
	# if chunk:
	# f.write(chunk)

	# device = torch.device(args.device)
	# models_tts = []

	# with open(args.model_info_path, "r", encoding="utf-8") as f:
	# models_info = json.load(f)
	# for i, info in models_info.items():
	# model_name = info["model_name"]
	# author = info["author"]
	# lang = info["lang"]
	# example = info["example"]
	# config_path = info["config_path"]
	# model_path = info["model_path"]
	# model_type = info["model_type"]

	# hps = utils.get_hparams_from_file(config_path)
	# if model_type == "vits":
	# emotion_type = None
	# elif model_type == "vits-emotion":
	# emotion_type = "embedding"
	# elif model_type == "vits-emotion-logits":
	# emotion_type = "logits"

	# model = SynthesizerTrn(
	# len(symbols),
	# hps.data.filter_length // 2 + 1,
	# hps.train.segment_size // hps.data.hop_length,
	# emotion_type=emotion_type,
	# **hps.model)

	# utils.load_checkpoint(model_path, model, None)
	# model.eval().to(device)
	# if model_type == "vits":
	# # 普通TTS
	# models_tts.append((model_name, author, lang, example, create_tts_fn(model, hps)))


	app = gr.Blocks()
	with app:
	gr.Markdown("## Japanese TTS Demo")
	with gr.Tabs():
	# with gr.TabItem("自研"):
	# with gr.Tabs():
	# for i, (model_name, author, lang, example, tts_fn) in enumerate(models_tts):
	# with gr.TabItem(model_name):
	# with gr.Column():
	# tts_input1 = gr.TextArea(label="Text", value=example)
	# tts_input2 = gr.Slider(label="Speed", value=1.0, minimum=0.4, maximum=3, step=0.1)
	# tts_input3 = gr.Slider(label="noise_scale", value=0.0, minimum=0.0, maximum=2, step=0.1)
	# tts_input4 = gr.Slider(label="noise_scale_w", value=0.0,
	# minimum=0.0, maximum=2, step=0.1)
	# tts_input5 = gr.Slider(label="volume", value=1.0, minimum=0.1, maximum=4, step=0.1)
	# tts_submit = gr.Button("Generate", variant="primary")
	# tts_output1 = gr.Textbox(label="Output Message")
	# tts_output2 = gr.Audio(label="Output Audio")
	# tts_submit.click(tts_fn, [tts_input1, tts_input2, tts_input3, tts_input4, tts_input5],
	# [tts_output1, tts_output2])

	# with gr.TabItem("谷歌"):
	# tts_input1 = gr.TextArea(label="Text", value=all_example)
	# tts_input2 = gr.Dropdown(google_model_list, label="name")
	# tts_submit = gr.Button("Generate", variant="primary")
	# tts_output1 = gr.Textbox(label="Output Message")
	# tts_output2 = gr.Audio(label="Output Audio")
	# tts_submit.click(google, [tts_input1, tts_input2],
	# [tts_output1, tts_output2])

	with gr.TabItem("微软"):
	tts_input1 = gr.TextArea(label="Text", value=all_example)
	tts_input2 = gr.Dropdown(microsoft_model_list, label="name")
	tts_submit = gr.Button("Generate", variant="primary")
	tts_output1 = gr.Textbox(label="Output Message")
	tts_output2 = gr.Audio(label="Output Audio")
	tts_submit.click(microsoft, [tts_input1, tts_input2],
	[tts_output1, tts_output2])

	# with gr.TabItem("coefont"):
	# tts_input1 = gr.TextArea(label="Text", value=all_example)
	# tts_input2 = gr.Dropdown(coefont_model_list, label="name")
	# tts_submit = gr.Button("Generate", variant="primary")
	# tts_output1 = gr.Textbox(label="Output Message")
	# tts_output2 = gr.Audio(label="Output Audio")
	# tts_submit.click(coefont, [tts_input1, tts_input2],
	# [tts_output1, tts_output2])

	app.launch(show_api=False,
	share=args.share,
	server_name='0.0.0.0',
	server_port=args.port,
	show_error=True)