Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,7 +1,6 @@
|
|
| 1 |
# Welcome to Team Tonic's MultiMed
|
| 2 |
|
| 3 |
from gradio_client import Client
|
| 4 |
-
import os
|
| 5 |
import numpy as np
|
| 6 |
import base64
|
| 7 |
import gradio as gr
|
|
@@ -10,10 +9,10 @@ import requests
|
|
| 10 |
import json
|
| 11 |
import dotenv
|
| 12 |
from scipy.io.wavfile import write
|
| 13 |
-
import PIL
|
| 14 |
import soundfile as sf
|
| 15 |
from openai import OpenAI
|
| 16 |
import time
|
|
|
|
| 17 |
from PIL import Image
|
| 18 |
import io
|
| 19 |
import hashlib
|
|
@@ -28,6 +27,8 @@ from transformers import AutoConfig, AutoTokenizer, AutoModelForSeq2SeqLM, AutoM
|
|
| 28 |
from peft import PeftModel, PeftConfig
|
| 29 |
import torch
|
| 30 |
import os
|
|
|
|
|
|
|
| 31 |
|
| 32 |
# Global variables to hold component references
|
| 33 |
components = {}
|
|
@@ -120,9 +121,10 @@ def process_speech(input_language, audio_input):
|
|
| 120 |
except Exception as e :
|
| 121 |
return f"{e}"
|
| 122 |
|
|
|
|
| 123 |
def convert_text_to_speech(input_text, target_language):
|
| 124 |
"""
|
| 125 |
-
Convert text to speech in the specified language and return the audio file path and the input text.
|
| 126 |
"""
|
| 127 |
try:
|
| 128 |
text_to_speech_result = seamless_client.predict(
|
|
@@ -136,17 +138,16 @@ def convert_text_to_speech(input_text, target_language):
|
|
| 136 |
api_name="/run" # API name
|
| 137 |
)
|
| 138 |
|
| 139 |
-
# Assuming the audio file path is in the second position
|
| 140 |
-
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
file_extension = os.path.splitext(file_name)[1]
|
| 145 |
-
shortened_file_name = file_name[:max_length - len(file_extension)] + file_extension
|
| 146 |
-
shortened_audio_file = os.path.join(dir_name, shortened_file_name)
|
| 147 |
|
| 148 |
-
|
| 149 |
-
return shortened_audio_file, input_text
|
| 150 |
except Exception as e:
|
| 151 |
return f"An error occurred during text-to-speech conversion: {e}", input_text
|
| 152 |
|
|
|
|
| 1 |
# Welcome to Team Tonic's MultiMed
|
| 2 |
|
| 3 |
from gradio_client import Client
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
import base64
|
| 6 |
import gradio as gr
|
|
|
|
| 9 |
import json
|
| 10 |
import dotenv
|
| 11 |
from scipy.io.wavfile import write
|
|
|
|
| 12 |
import soundfile as sf
|
| 13 |
from openai import OpenAI
|
| 14 |
import time
|
| 15 |
+
import PIL
|
| 16 |
from PIL import Image
|
| 17 |
import io
|
| 18 |
import hashlib
|
|
|
|
| 27 |
from peft import PeftModel, PeftConfig
|
| 28 |
import torch
|
| 29 |
import os
|
| 30 |
+
import uuid
|
| 31 |
+
|
| 32 |
|
| 33 |
# Global variables to hold component references
|
| 34 |
components = {}
|
|
|
|
| 121 |
except Exception as e :
|
| 122 |
return f"{e}"
|
| 123 |
|
| 124 |
+
|
| 125 |
def convert_text_to_speech(input_text, target_language):
|
| 126 |
"""
|
| 127 |
+
Convert text to speech in the specified language, rename the audio file with a unique identifier, and return both the new audio file path and the input text.
|
| 128 |
"""
|
| 129 |
try:
|
| 130 |
text_to_speech_result = seamless_client.predict(
|
|
|
|
| 138 |
api_name="/run" # API name
|
| 139 |
)
|
| 140 |
|
| 141 |
+
original_audio_file = text_to_speech_result[1] # Assuming the audio file path is in the second position
|
| 142 |
+
|
| 143 |
+
# Generate a new file name with a random UUID
|
| 144 |
+
new_file_name = f"audio_output_{uuid.uuid4()}.wav"
|
| 145 |
+
new_file_path = os.path.join(os.path.dirname(original_audio_file), new_file_name)
|
| 146 |
|
| 147 |
+
# Rename the file
|
| 148 |
+
os.rename(original_audio_file, new_file_path)
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
+
return new_file_path, input_text
|
|
|
|
| 151 |
except Exception as e:
|
| 152 |
return f"An error occurred during text-to-speech conversion: {e}", input_text
|
| 153 |
|