update

Browse files

Files changed (10) hide show

.gitattributes +1 -0
app.py +34 -12
cached_outputs/0.wav +0 -0
cached_outputs/1.wav +0 -0
cached_outputs/2.wav +0 -0
cached_outputs/3.wav +0 -0
cached_outputs/4.wav +0 -0
hash_code_for_cached_output.py +56 -0
openvoicev2.mp4 +3 -0
requirements.txt +2 -1

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text

app.py CHANGED Viewed

@@ -6,7 +6,8 @@ import base64
 import json
 import time
 import re
 API_URL = os.environ.get("API_URL")
@@ -60,6 +61,24 @@ def predict(prompt, style, audio_file_pth, speed, agree):
             None,
         )
     # first detect the input language
     language_predicted = langid.classify(prompt)[0].strip()
     print(f"Detected language:{language_predicted}")
@@ -224,22 +243,25 @@ examples = [
         "examples/speaker3.mp3",
         True,
     ],
 ]
 with gr.Blocks(analytics_enabled=False) as demo:
     with gr.Row():
-        gr.Markdown(
-            """
-            ## <img src="https://huggingface.co/spaces/myshell-ai/OpenVoice/raw/main/logo.jpg" height="10"/>
-            """
-        )
-    with gr.Row():
-        gr.Markdown(markdown_table)
-    with gr.Row():
-        gr.Markdown(description)
     with gr.Row():
         gr.HTML(wrapped_markdown_content)

 import json
 import time
 import re
+import hashlib
+import hash_code_for_cached_output
 API_URL = os.environ.get("API_URL")
             None,
         )
+    # Before we get into inference, we will detect if it is from example table or default value
+    # If so, we use a cached Audio. Noted that, it is just for demo efficiency.
+    # hash code were generated by `hash_code_for_cached_output.py`
+    cached_outputs = {
+        "d0f5806f6e_60565a5c20_en_us" : "cached_outputs/0.wav",
+        "d0f5806f6e_420ab8211d_en_us" : "cached_outputs/1.wav",
+        "6e8a024342_0f96bf44f5_es_default" : "cached_outputs/2.wav",
+        "54ad3237d7_3fef5adc6f_zh_default" : "cached_outputs/3.wav",
+        "8190e911f8_9897b60a4e_jp_default" : "cached_outputs/4.wav"
+    }
+    unique_code = hash_code_for_cached_output.get_unique_code(audio_file_pth, style, prompt)
+    if unique_code in list(cached_outputs.keys()):
+        return (
+            'We get the cached output for you, since you are try to generating an example cloning.',
+            cached_outputs[unique_code],
+            audio_file_pth,
+        )
     # first detect the input language
     language_predicted = langid.classify(prompt)[0].strip()
     print(f"Detected language:{language_predicted}")
         "examples/speaker3.mp3",
         True,
     ],
 ]
 with gr.Blocks(analytics_enabled=False) as demo:
     with gr.Row():
+        with gr.Column():
+            with gr.Row():
+                gr.Markdown(
+                    """
+                    ## <img src="https://huggingface.co/spaces/myshell-ai/OpenVoice/raw/main/logo.jpg" height="40"/>
+                    """
+                )
+            with gr.Row():
+                gr.Markdown(markdown_table_v2)
+            with gr.Row():
+                gr.Markdown(description)
+        with gr.Column():
+            gr.Video('./openvoicev2.mp4', autoplay=True)
     with gr.Row():
         gr.HTML(wrapped_markdown_content)

cached_outputs/0.wav ADDED Viewed

Binary file (36.9 kB). View file

cached_outputs/1.wav ADDED Viewed

Binary file (20.4 kB). View file

cached_outputs/2.wav ADDED Viewed

Binary file (37.5 kB). View file

cached_outputs/3.wav ADDED Viewed

Binary file (41.3 kB). View file

cached_outputs/4.wav ADDED Viewed

Binary file (40.1 kB). View file

hash_code_for_cached_output.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from pydub.utils import mediainfo
+import hashlib
+def audio_hash(audio_path):
+    with open(audio_path, "rb") as f:
+        audio_data = f.read()
+    hash_object = hashlib.sha256()
+    hash_object.update(audio_data)
+    audio_hash = hash_object.hexdigest()
+    return audio_hash[:10]
+def str_to_hash(input_str):
+    input_bytes = input_str.encode('utf-8')
+    hash_object = hashlib.sha256()
+    hash_object.update(input_bytes)
+    hash_code = hash_object.hexdigest()
+    return hash_code[:10]
+def get_unique_code(reference_speaker, text, language):
+    return f"{audio_hash(reference_speaker)}_{str_to_hash(text)}_{language}"
+if __name__ == '__main__':
+    example_inputs = [
+        {
+            "text": "The bustling city square bustled with street performers, tourists, and local vendors.",
+            "language": 'en_us',
+            "reference_speaker": "examples/speaker0.mp3"
+        },
+        {
+            "text": "Did you ever hear a folk tale about a giant turtle?",
+            "language": 'en_us',
+            "reference_speaker": "examples/speaker0.mp3"
+        },
+        {
+            "text": "El resplandor del sol acaricia las olas, pintando el cielo con una paleta deslumbrante.",
+            "language": 'es_default',
+            "reference_speaker": "examples/speaker1.mp3",
+        },
+        {
+            "text": "我最近在学习machine learning，希望能够在未来的artificial intelligence领域有所建树。",
+            "language": 'zh_default',
+            "reference_speaker": "examples/speaker2.mp3",
+        },
+        {
+            "text": "彼は毎朝ジョギングをして体を健康に保っています。",
+            "language": 'jp_default',
+            "reference_speaker": "examples/speaker3.mp3",
+        }
+    ]
+    for example_input in example_inputs:
+        print(get_unique_code(example_input['reference_speaker'], example_input['text'], example_input['language']))

openvoicev2.mp4 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8e623abfdd5d858005d494b7c04c527927534a9a63ca0005739e40f097d6d75e
+size 12042795

requirements.txt CHANGED Viewed

	@@ -1 +1,2 @@
1	- langid


1	+ langid
2	+ hashlib