File size: 2,593 Bytes
9a869c1
 
 
 
c5a2dc4
 
9a869c1
 
a49f9e3
9a869c1
 
150cf0a
9a869c1
 
bbdfab0
4f8aef6
a49f9e3
 
 
 
4f8aef6
 
 
 
 
 
 
a49f9e3
5bf92ce
 
 
 
 
 
 
 
c5a2dc4
a49f9e3
150cf0a
a49f9e3
764d003
 
 
 
 
 
a49f9e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fba7226
c5a2dc4
c825d84
a49f9e3
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
import streamlit as st
from clarifai_grpc.channel.clarifai_channel import ClarifaiChannel
from clarifai_grpc.grpc.api import resources_pb2, service_pb2, service_pb2_grpc
from clarifai_grpc.grpc.api.status import status_code_pb2
import numpy as np


# Streamlit page configuration
st.set_page_config(page_title="Text to Speech", layout="centered")

# Streamlit sidebar elements
st.sidebar.title("Text to Audio")
PAT = st.sidebar.text_input("Enter your Personal Access Token:", type="password")

# Authentication details (hide or secure this in production!)
'''
USER_ID = 'openai'
APP_ID = 'tss'
MODEL_ID = 'openai-tts-1'
MODEL_VERSION_ID = 'fff6ce1fd487457da95b79241ac6f02d'
'''

USER_ID = 'eleven-labs'
APP_ID = 'audio-generation'
# Change these to whatever model and text URL you want to use
MODEL_ID = 'speech-synthesis'
MODEL_VERSION_ID = 'f2cead3a965f4c419a61a4a9b501095c'

# pad buffer
def pad_buffer(audio):
    # Pad buffer to multiple of 2 bytes
    buffer_size = len(audio)
    element_size = np.dtype(np.int16).itemsize
    if buffer_size % element_size != 0:
        audio = audio + b'\0' * (element_size - (buffer_size % element_size))
    return audio 

# Streamlit UI
st.title("Text to Audio Conversion")
input_text = st.text_area("Enter text to convert to speech:", "I love your product very much")

# Disable the button until PAT is entered
if PAT:
    submit_button = st.button("Convert")
else:
    submit_button = st.button("Convert", disabled=True)

if submit_button:
    channel = ClarifaiChannel.get_grpc_channel()
    stub = service_pb2_grpc.V2Stub(channel)
    metadata = (('authorization', 'Key ' + PAT),)
    userDataObject = resources_pb2.UserAppIDSet(user_id=USER_ID, app_id=APP_ID)

    try:
        response = stub.PostModelOutputs(
            service_pb2.PostModelOutputsRequest(
                user_app_id=userDataObject,
                model_id=MODEL_ID,
                version_id=MODEL_VERSION_ID,
                inputs=[resources_pb2.Input(data=resources_pb2.Data(text=resources_pb2.Text(raw=input_text)))]
            ),
            metadata=metadata
        )

        if response.status.code != status_code_pb2.SUCCESS:
            st.error("Error in model response: " + response.status.description)
            raise Exception("Post model outputs failed, status: " + response.status.description)

        # Assuming the output is in audio format
        audio_output = response.outputs[0].data.audio.base64
        
        st.audio(pad_buffer(audio_output), format="audio/wav")
    except Exception as e:
        st.error(f"An error occurred: {e}")