File size: 6,020 Bytes
858c3c5
 
 
 
 
 
 
 
16957a4
0a318ac
858c3c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16957a4
 
858c3c5
20c9059
 
 
 
 
 
 
 
 
 
081800f
20c9059
 
 
956a43b
 
20c9059
16957a4
 
20c9059
 
956a43b
 
20c9059
956a43b
20c9059
16957a4
 
 
 
 
20c9059
 
16957a4
20c9059
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16957a4
858c3c5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a318ac
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import streamlit as st
import os
import time
import glob
import os
import subprocess
import whisper
from whisper.utils import write_vtt
import stat
#import openai

#from gtts import gTTS
from gtts import *
from googletrans import Translator

try:
    os.mkdir("temp")
except:
    pass

st.markdown('<h3 style="text-align:center;text-decoration: lightblue underline;font-size:60px;color:red">Nairo24 <span style="color:#4f9bce;font-weight:bolder;font-size:60px;"> News</span></h3>',unsafe_allow_html=True)

st.title("Text to speech")
translator = Translator()

# Add an image
image = "Nairo24.png"
st.image(image, caption="", use_column_width=True)

text = st.text_area("Enter text", value="", height=200)
in_lang = st.selectbox(
    "Select your input language",
    ("English", "Hindi", "Bengali", "korean", "Chinese", "Japanese"),
)
if in_lang == "English":
    input_language = "en"
elif in_lang == "Hindi":
    input_language = "hi"
elif in_lang == "Bengali":
    input_language = "bn"
elif in_lang == "korean":
    input_language = "ko"
elif in_lang == "Chinese":
    input_language = "zh-cn"
elif in_lang == "Japanese":
    input_language = "ja"

out_lang = st.selectbox(
    "Select your output language",
    ("English", "Hindi", "Bengali", "korean", "Chinese", "Japanese"),
)
if out_lang == "English":
    output_language = "en"
elif out_lang == "Hindi":
    output_language = "hi"
elif out_lang == "Bengali":
    output_language = "bn"
elif out_lang == "korean":
    output_language = "ko"
elif out_lang == "Chinese":
    output_language = "zh-cn"
elif out_lang == "Japanese":
    output_language = "ja"

english_accent = st.selectbox(
    "Select your english accent",
    (
        "Default",
        "India",
        "United Kingdom",
        "United States",
        "Canada",
        "Australia",
        "Ireland",
        "South Africa",
    ),
)

if english_accent == "Default":
    tld = "ca"
elif english_accent == "India":
    tld = "co.in"

elif english_accent == "United Kingdom":
    tld = "co.uk"
elif english_accent == "United States":
    tld = "com"
elif english_accent == "Canada":
    tld = "ca"
elif english_accent == "Australia":
    tld = "com.au"
elif english_accent == "Ireland":
    tld = "ie"
elif english_accent == "South Africa":
    tld = "co.za"


def text_to_speech(input_language, output_language, text, tld):
    translation = translator.translate(text, src=input_language, dest=output_language)
    trans_text = translation.text
    tts = gTTS(trans_text, lang=output_language, tld=tld, slow=False)
    try:
        my_file_name = text[0:20]
    except:
        my_file_name = "audio"
    tts.save(f"temp/{my_file_name}.mp3")
    return my_file_name, trans_text


display_output_text = st.checkbox("Display output text")

if st.button("convert"):
    result, output_text = text_to_speech(input_language, output_language, text, tld)
    audio_file = open(f"temp/{result}.mp3", "rb")
    audio_bytes = audio_file.read()
    st.markdown(f"## Your audio:")
    st.audio(audio_bytes, format="audio/mp3", start_time=0)

    if display_output_text:
        st.markdown(f"## Output text:")
        st.write(f" {output_text}")

    # Add download button for the generated MP3 file
    st.download_button("Download MP3", data=audio_bytes, file_name=f"{result}.mp3")


def remove_files(n):
    mp3_files = glob.glob("temp/*mp3")
    if len(mp3_files) != 0:
        now = time.time()
        n_days = n * 86400
        for f in mp3_files:
            if os.stat(f).st_mtime < now - n_days:
                os.remove(f)
                print("Deleted ", f)
                
remove_files(7)



###MULTILINGUAL AI. FOR ADDING CAPTIONS TO VIDEOS###



# Download the model
model = whisper.load_model("tiny")

def video2mp3(video_file, output_ext="mp3"):
    filename, ext = os.path.splitext(video_file)
    subprocess.call(["ffmpeg", "-y", "-i", video_file, f"{filename}.{output_ext}"],
                    stdout=subprocess.DEVNULL,
                    stderr=subprocess.STDOUT)
    return f"{filename}.{output_ext}"

def translate(input_video):
    audio_file = video2mp3(input_video)

    options = dict(beam_size=5, best_of=5, fp16=False)
    translate_options = dict(task="translate", **options)
    result = model.transcribe(audio_file, **translate_options)

    output_dir = '/content/'
    audio_path = audio_file.split(".")[0]

    subtitle_file = os.path.join(output_dir, audio_path + ".vtt")
    with open(subtitle_file, "w") as vtt:
        write_vtt(result["segments"], file=vtt)

    subtitle = audio_path + ".vtt"
    output_video = audio_path + "_subtitled.mp4"

    os.system(f"ffmpeg -i {input_video} -vf subtitles={subtitle} {output_video}")

    # Grant permissions to the temporary and output files
    os.chmod("temp_video.mp4", stat.S_IRWXU)  # Read, write, execute for the owner
    os.chmod(subtitle_file, stat.S_IRWXU)  # Read, write, execute for the owner
    os.chmod(output_video, stat.S_IRWXU)  # Read, write, execute for the owner

    return output_video



st.title("MultiLingual AI: Add Caption to Videos")

uploaded_file = st.file_uploader("Upload your video", type=["mp4"])

if uploaded_file is not None:
    st.video(uploaded_file)
    if st.button("Generate Subtitle Video"):
        # Save uploaded file to a temporary location
        with open("temp_video.mp4", "wb") as f:
            f.write(uploaded_file.read())

        output_video = translate("temp_video.mp4")

        # Display the output video
        st.video(output_video)

        # Remove temporary files
        os.remove("temp_video.mp4")

# Footer
st.markdown(
    '''
    <style>
    .footer {
        font-size: 12px;
        color: #888888;
        text-align: center;
    }
    </style>
    <div class="footer">
        <p>Powered by <a href="https://openai.com/" style="text-decoration: underline;" target="_blank">OpenAI</a> - Developer Tel: <a style="text-decoration: underline;" target="_blank">+254704205553</a>
        </p>
    </div>
    ''',
    unsafe_allow_html=True
)