Spaces:

balacoon
/

revoice

Running

App Files Files Community

clementruhm commited on Oct 28, 2023

Commit

3e667ed

1 Parent(s): 7873cbe

service_request: accomodate requests for text synthesis

Browse files

Files changed (1) hide show

vc_service_request.py → service_request.py +25 -12

vc_service_request.py → service_request.py RENAMED Viewed

@@ -2,7 +2,7 @@
 Copyright 2023 Balacoon
 contains implementation
-for voice conversion request
 """
 import os
@@ -12,7 +12,7 @@ import hashlib
 import json
 import ssl
 import time
-from typing import Tuple
 import numpy as np
 import resampy
@@ -62,18 +62,23 @@ def create_signature(api_secret: str) -> str:
     return signature
-async def async_service_request(source: np.ndarray, target: np.ndarray, api_key: str, api_secret: str) -> np.ndarray:
     ssl_context = ssl.create_default_context()
     async with websockets.connect(
         os.environ["endpoint"], close_timeout=1024, ssl=ssl_context
     ) as websocket:
         request_dict = {
-            "source": base64.b64encode(source.tobytes()).decode("utf-8"),
             "target": base64.b64encode(target.tobytes()).decode("utf-8"),
             "api_key": api_key,
             "signature": create_signature(api_secret),
         }
         request = json.dumps(request_dict)
         await websocket.send(request)
@@ -81,7 +86,7 @@ async def async_service_request(source: np.ndarray, target: np.ndarray, api_key:
         result_lst = []
         while True:
             try:
-                data = await asyncio.wait_for(websocket.recv(), timeout=15)
                 result_lst.append(np.frombuffer(data, dtype="int16"))
             except websockets.exceptions.ConnectionClosed:
                 break
@@ -93,21 +98,29 @@ async def async_service_request(source: np.ndarray, target: np.ndarray, api_key:
         return result
-def vc_service_request(
-    source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray],
     api_key: str, api_secret: str,
 ) -> Tuple[int, np.ndarray]:
     """
     prepares audio (has to be 16khz mono)
     and runs request to a voice conversion service
     """
-    src = prepare_audio(source_audio)
     tgt = prepare_audio(target_audio)
-    if src is None or tgt is None:
         return
-    if len(src) >= 60 * 16000 or len(tgt) >= 30 * 16000:
-        # input is way too long, dont return anything
         return
-    res = asyncio.run(async_service_request(src, tgt, api_key, api_secret))
     return 16000, res

 Copyright 2023 Balacoon
 contains implementation
+for Revoice request
 """
 import os
 import json
 import ssl
 import time
+from typing import Tuple, Union
 import numpy as np
 import resampy
     return signature
+async def async_service_request(source_str: str, source: np.ndarray, target: np.ndarray, api_key: str, api_secret: str) -> np.ndarray:
     ssl_context = ssl.create_default_context()
     async with websockets.connect(
         os.environ["endpoint"], close_timeout=1024, ssl=ssl_context
     ) as websocket:
         request_dict = {
             "target": base64.b64encode(target.tobytes()).decode("utf-8"),
             "api_key": api_key,
             "signature": create_signature(api_secret),
         }
+        if source_str is not None:
+            request_dict["source_str"] = source_str
+        elif source is not None:
+            request_dict["source"] = base64.b64encode(source.tobytes()).decode("utf-8")
+        else:
+            return None
         request = json.dumps(request_dict)
         await websocket.send(request)
         result_lst = []
         while True:
             try:
+                data = await asyncio.wait_for(websocket.recv(), timeout=30)
                 result_lst.append(np.frombuffer(data, dtype="int16"))
             except websockets.exceptions.ConnectionClosed:
                 break
         return result
+def service_request(
+    source_str: str, source_audio: Tuple[int, np.ndarray], target_audio: Tuple[int, np.ndarray],
     api_key: str, api_secret: str,
 ) -> Tuple[int, np.ndarray]:
     """
     prepares audio (has to be 16khz mono)
     and runs request to a voice conversion service
     """
+    src = None
+    if source_audio is not None:
+        src = prepare_audio(source_audio)
     tgt = prepare_audio(target_audio)
+    if tgt is None:
         return
+    if source_str is None and src is None:
+        return
+    if len(tgt) >= 30 * 16000:
+        # too long
+        return
+    if src is not None and len(src) >= 60 * 16000:
+        return
+    if source_str is not None and len(source_str) > 256:
         return
+    res = asyncio.run(async_service_request(src_str, src, tgt, api_key, api_secret))
     return 16000, res