Spaces:

balacoon
/

revoice

Running

App Files Files Community

revoice / vc_service_request.py

clementruhm

vc_service_request.py: fix check

67f7fe4 over 2 years ago

raw

history blame

3.21 kB

	"""
	Copyright 2023 Balacoon

	contains implementation
	for voice conversion request
	"""

	import os
	import asyncio
	import base64
	import hashlib
	import json
	import ssl
	import time
	from typing import Tuple

	import numpy as np
	import resampy
	import websockets


	def prepare_audio(audio: Tuple[int, np.ndarray]) -> np.ndarray:
	"""
	ensures that audio is in int16 format, 16khz mono
	"""
	sr, wav = audio
	# ensure proper type
	if wav.dtype == np.int32:
	max_val = np.max(np.abs(wav))
	mult = (32767.0 / 2**31) if max_val > 32768 else 1.0
	wav = (wav.astype(np.float32) * mult).astype(np.int16)
	elif wav.dtype == np.float32 or wav.dtype == np.float64:
	mult = 32767.0 if np.max(np.abs(wav)) <= 1.0 else 1.0
	wav = (wav * mult).astype(np.int16)

	if wav.ndim == 2:
	# average channels
	if wav.shape[0] == 2:
	wav = np.mean(wav, axis=0, keepdims=False)
	if wav.shape[1] == 2:
	wav = np.mean(wav, axis=1, keepdims=False)

	if wav.ndim != 1:
	return None

	# ensure proper sampling rate
	if sr != 16000:
	wav = (wav / 32768.0).astype(np.float)
	wav = resampy.resample(wav, sr, 16000)
	wav = (wav * 32768.0).astype(np.int16)
	return wav


	def create_signature() -> str:
	"""
	helper function that creates signature,
	required to authentificate the request
	"""
	int_time = int(time.time() / 1000)
	signature_input = (os.environ["api_secret"] + str(int_time)).encode()
	signature = hashlib.sha256(signature_input).hexdigest()
	return signature


	async def async_service_request(source: np.ndarray, target: np.ndarray) -> np.ndarray:
	ssl_context = ssl.create_default_context()

	async with websockets.connect(
	os.environ["endpoint"], close_timeout=1024, ssl=ssl_context
	) as websocket:
	request_dict = {
	"source": base64.b64encode(source.tobytes()).decode("utf-8"),
	"target": base64.b64encode(target.tobytes()).decode("utf-8"),
	"api_key": os.environ["api_key"],
	"signature": create_signature(),
	}
	request = json.dumps(request_dict)
	await websocket.send(request)

	# read reply
	result_lst = []
	while True:
	try:
	data = await websocket.recv()
	result_lst.append(np.frombuffer(data, dtype="int16"))
	except websockets.exceptions.ConnectionClosed:
	break
	if data is None:
	break
	result = np.concatenate(result_lst) if result_lst else None
	return result


	def vc_service_request(
	source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray]
	) -> Tuple[int, np.ndarray]:
	"""
	prepares audio (has to be 16khz mono)
	and runs request to a voice conversion service
	"""
	src = prepare_audio(source_audio)
	tgt = prepare_audio(target_audio)
	if src is None or tgt is None:
	return
	if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
	# input is way too long, dont return anything
	return

	res = asyncio.run(async_service_request(src, tgt))
	return 16000, res