|  | import streamlit as st | 
					
						
						|  | from moviepy.editor import VideoFileClip, AudioFileClip, TextClip, CompositeVideoClip | 
					
						
						|  | import whisper | 
					
						
						|  | from translate import Translator | 
					
						
						|  | from gtts import gTTS | 
					
						
						|  | import tempfile | 
					
						
						|  | import os | 
					
						
						|  | import numpy as np | 
					
						
						|  | from pydub import AudioSegment | 
					
						
						|  | import speech_recognition as sr | 
					
						
						|  | from datetime import timedelta | 
					
						
						|  | import json | 
					
						
						|  | import indic_transliteration | 
					
						
						|  | from indic_transliteration import sanscript | 
					
						
						|  | from indic_transliteration.sanscript import SchemeMap, SCHEMES, transliterate | 
					
						
						|  | import azure.cognitiveservices.speech as speechsdk | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | TAMIL_VOICES = { | 
					
						
						|  | 'Female 1': {'gender': 'female', 'age': 'adult', 'style': 'normal'}, | 
					
						
						|  | 'Female 2': {'gender': 'female', 'age': 'adult', 'style': 'formal'}, | 
					
						
						|  | 'Male 1': {'gender': 'male', 'age': 'adult', 'style': 'normal'}, | 
					
						
						|  | 'Male 2': {'gender': 'male', 'age': 'adult', 'style': 'formal'}, | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | TAMIL_PRONUNCIATIONS = { | 
					
						
						|  | 'zh': 'l', | 
					
						
						|  | 'L': 'l', | 
					
						
						|  | 'N': 'n', | 
					
						
						|  | 'R': 'r', | 
					
						
						|  | } | 
					
						
						|  |  | 
					
						
						|  | class TamilTextProcessor: | 
					
						
						|  | @staticmethod | 
					
						
						|  | def normalize_tamil_text(text): | 
					
						
						|  | """Normalize Tamil text for better pronunciation""" | 
					
						
						|  |  | 
					
						
						|  | tamil_numerals = {'௦': '0', '௧': '1', '௨': '2', '௩': '3', '௪': '4', | 
					
						
						|  | '௫': '5', '௬': '6', '௭': '7', '௮': '8', '௯': '9'} | 
					
						
						|  | for tamil_num, eng_num in tamil_numerals.items(): | 
					
						
						|  | text = text.replace(tamil_num, eng_num) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | text = text.replace('ஜ்ஞ', 'க்ய') | 
					
						
						|  |  | 
					
						
						|  | return text | 
					
						
						|  |  | 
					
						
						|  | @staticmethod | 
					
						
						|  | def split_tamil_sentences(text): | 
					
						
						|  | """Split Tamil text into natural sentence boundaries""" | 
					
						
						|  | sentence_endings = ['।', '.', '!', '?', '॥'] | 
					
						
						|  | sentences = [] | 
					
						
						|  | current_sentence = '' | 
					
						
						|  |  | 
					
						
						|  | for char in text: | 
					
						
						|  | current_sentence += char | 
					
						
						|  | if char in sentence_endings: | 
					
						
						|  | sentences.append(current_sentence.strip()) | 
					
						
						|  | current_sentence = '' | 
					
						
						|  |  | 
					
						
						|  | if current_sentence: | 
					
						
						|  | sentences.append(current_sentence.strip()) | 
					
						
						|  |  | 
					
						
						|  | return sentences | 
					
						
						|  |  | 
					
						
						|  | class TamilAudioProcessor: | 
					
						
						|  | @staticmethod | 
					
						
						|  | def adjust_tamil_audio(audio_segment): | 
					
						
						|  | """Adjust audio characteristics for Tamil speech""" | 
					
						
						|  |  | 
					
						
						|  | enhanced_audio = audio_segment.high_pass_filter(80) | 
					
						
						|  | enhanced_audio = enhanced_audio.low_pass_filter(8000) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | enhanced_audio = enhanced_audio.speedup(playback_speed=0.95) | 
					
						
						|  |  | 
					
						
						|  | return enhanced_audio | 
					
						
						|  |  | 
					
						
						|  | @staticmethod | 
					
						
						|  | def match_emotion(audio_segment, emotion_type): | 
					
						
						|  | """Adjust audio based on emotional context""" | 
					
						
						|  | if emotion_type == 'happy': | 
					
						
						|  | return audio_segment.apply_gain(2).high_pass_filter(100) | 
					
						
						|  | elif emotion_type == 'sad': | 
					
						
						|  | return audio_segment.apply_gain(-1).low_pass_filter(3000) | 
					
						
						|  | elif emotion_type == 'angry': | 
					
						
						|  | return audio_segment.apply_gain(4).high_pass_filter(200) | 
					
						
						|  | return audio_segment | 
					
						
						|  |  | 
					
						
						|  | class TamilVideoDubber: | 
					
						
						|  | def __init__(self, azure_key=None, azure_region=None): | 
					
						
						|  | self.whisper_model = whisper.load_model("base") | 
					
						
						|  | self.temp_files = [] | 
					
						
						|  | self.azure_key = azure_key | 
					
						
						|  | self.azure_region = azure_region | 
					
						
						|  |  | 
					
						
						|  | def __enter__(self): | 
					
						
						|  | return self | 
					
						
						|  |  | 
					
						
						|  | def __exit__(self, exc_type, exc_val, exc_tb): | 
					
						
						|  | self.cleanup() | 
					
						
						|  |  | 
					
						
						|  | def cleanup(self): | 
					
						
						|  | for temp_file in self.temp_files: | 
					
						
						|  | if os.path.exists(temp_file): | 
					
						
						|  | os.remove(temp_file) | 
					
						
						|  |  | 
					
						
						|  | def create_temp_file(self, suffix): | 
					
						
						|  | temp_file = tempfile.mktemp(suffix=suffix) | 
					
						
						|  | self.temp_files.append(temp_file) | 
					
						
						|  | return temp_file | 
					
						
						|  |  | 
					
						
						|  | def extract_audio_segments(self, video_path): | 
					
						
						|  | """Extract audio segments with emotion detection""" | 
					
						
						|  | video = VideoFileClip(video_path) | 
					
						
						|  | result = self.whisper_model.transcribe(video_path) | 
					
						
						|  |  | 
					
						
						|  | segments = [] | 
					
						
						|  | for segment in result["segments"]: | 
					
						
						|  |  | 
					
						
						|  | emotion = self.detect_emotion(segment["text"]) | 
					
						
						|  | segments.append({ | 
					
						
						|  | "text": segment["text"], | 
					
						
						|  | "start": segment["start"], | 
					
						
						|  | "end": segment["end"], | 
					
						
						|  | "duration": segment["end"] - segment["start"], | 
					
						
						|  | "emotion": emotion | 
					
						
						|  | }) | 
					
						
						|  |  | 
					
						
						|  | return segments, video.duration | 
					
						
						|  |  | 
					
						
						|  | def detect_emotion(self, text): | 
					
						
						|  | """Simple emotion detection based on text analysis""" | 
					
						
						|  | happy_words = ['happy', 'joy', 'laugh', 'smile', 'மகிழ்ச்சி'] | 
					
						
						|  | sad_words = ['sad', 'sorry', 'cry', 'வருத்தம்'] | 
					
						
						|  | angry_words = ['angry', 'hate', 'கோபம்'] | 
					
						
						|  |  | 
					
						
						|  | text_lower = text.lower() | 
					
						
						|  | if any(word in text_lower for word in happy_words): | 
					
						
						|  | return 'happy' | 
					
						
						|  | elif any(word in text_lower for word in sad_words): | 
					
						
						|  | return 'sad' | 
					
						
						|  | elif any(word in text_lower for word in angry_words): | 
					
						
						|  | return 'angry' | 
					
						
						|  | return 'neutral' | 
					
						
						|  |  | 
					
						
						|  | def translate_to_tamil(self, text): | 
					
						
						|  | """Translate text to Tamil with context preservation""" | 
					
						
						|  | translator = Translator(to_lang='ta') | 
					
						
						|  | translated = translator.translate(text) | 
					
						
						|  | return TamilTextProcessor.normalize_tamil_text(translated) | 
					
						
						|  |  | 
					
						
						|  | def generate_tamil_audio(self, text, voice_config, emotion='neutral'): | 
					
						
						|  | """Generate Tamil audio using Azure TTS or gTTS""" | 
					
						
						|  | if self.azure_key and self.azure_region: | 
					
						
						|  | return self._generate_azure_tamil_audio(text, voice_config, emotion) | 
					
						
						|  | else: | 
					
						
						|  | return self._generate_gtts_tamil_audio(text, emotion) | 
					
						
						|  |  | 
					
						
						|  | def _generate_azure_tamil_audio(self, text, voice_config, emotion): | 
					
						
						|  | """Generate Tamil audio using Azure Cognitive Services""" | 
					
						
						|  | speech_config = speechsdk.SpeechConfig( | 
					
						
						|  | subscription=self.azure_key, region=self.azure_region) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | speech_config.speech_synthesis_voice_name = "ta-IN-PallaviNeural" | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | speech_synthesizer = speechsdk.SpeechSynthesizer( | 
					
						
						|  | speech_config=speech_config) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | ssml_text = f""" | 
					
						
						|  | <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis"> | 
					
						
						|  | <voice name="ta-IN-PallaviNeural"> | 
					
						
						|  | <prosody rate="{self._get_emotion_rate(emotion)}" | 
					
						
						|  | pitch="{self._get_emotion_pitch(emotion)}"> | 
					
						
						|  | {text} | 
					
						
						|  | </prosody> | 
					
						
						|  | </voice> | 
					
						
						|  | </speak> | 
					
						
						|  | """ | 
					
						
						|  |  | 
					
						
						|  | result = speech_synthesizer.speak_ssml_async(ssml_text).get() | 
					
						
						|  |  | 
					
						
						|  | if result.reason == speechsdk.ResultReason.SynthesizingAudioCompleted: | 
					
						
						|  | return AudioSegment.from_wav(io.BytesIO(result.audio_data)) | 
					
						
						|  | else: | 
					
						
						|  | raise Exception("Speech synthesis failed") | 
					
						
						|  |  | 
					
						
						|  | def _generate_gtts_tamil_audio(self, text, emotion): | 
					
						
						|  | """Fallback to gTTS for Tamil audio generation""" | 
					
						
						|  | temp_path = self.create_temp_file(".mp3") | 
					
						
						|  | tts = gTTS(text=text, lang='ta') | 
					
						
						|  | tts.save(temp_path) | 
					
						
						|  |  | 
					
						
						|  | audio = AudioSegment.from_mp3(temp_path) | 
					
						
						|  |  | 
					
						
						|  | audio = TamilAudioProcessor.match_emotion(audio, emotion) | 
					
						
						|  | return audio | 
					
						
						|  |  | 
					
						
						|  | @staticmethod | 
					
						
						|  | def _get_emotion_rate(emotion): | 
					
						
						|  | """Get speech rate based on emotion""" | 
					
						
						|  | rates = { | 
					
						
						|  | 'happy': '1.1', | 
					
						
						|  | 'sad': '0.9', | 
					
						
						|  | 'angry': '1.2', | 
					
						
						|  | 'neutral': '1.0' | 
					
						
						|  | } | 
					
						
						|  | return rates.get(emotion, '1.0') | 
					
						
						|  |  | 
					
						
						|  | @staticmethod | 
					
						
						|  | def _get_emotion_pitch(emotion): | 
					
						
						|  | """Get pitch adjustment based on emotion""" | 
					
						
						|  | pitches = { | 
					
						
						|  | 'happy': '+1st', | 
					
						
						|  | 'sad': '-1st', | 
					
						
						|  | 'angry': '+2st', | 
					
						
						|  | 'neutral': '0st' | 
					
						
						|  | } | 
					
						
						|  | return pitches.get(emotion, '0st') | 
					
						
						|  |  | 
					
						
						|  | def main(): | 
					
						
						|  | st.title("Tamil Movie Dubbing System") | 
					
						
						|  | st.sidebar.header("Settings") | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | video_file = st.file_uploader("Upload your video", type=['mp4', 'mov', 'avi']) | 
					
						
						|  | if not video_file: | 
					
						
						|  | return | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | selected_voice = st.selectbox("Select Tamil voice", list(TAMIL_VOICES.keys())) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with st.expander("Advanced Settings"): | 
					
						
						|  | generate_subtitles = st.checkbox("Generate Tamil subtitles", value=True) | 
					
						
						|  | adjust_audio = st.checkbox("Enhance Tamil audio clarity", value=True) | 
					
						
						|  | emotion_detection = st.checkbox("Enable emotion detection", value=True) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | tamil_fonts = ["Latha", "Vijaya", "Mukta Malar"] | 
					
						
						|  | selected_font = st.selectbox("Select Tamil font", tamil_fonts) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if adjust_audio: | 
					
						
						|  | clarity_level = st.slider("Audio clarity level", 1, 5, 3) | 
					
						
						|  | bass_boost = st.slider("Bass boost", 0, 100, 50) | 
					
						
						|  |  | 
					
						
						|  | if st.button("Start Tamil Dubbing"): | 
					
						
						|  | with st.spinner("Processing your video..."): | 
					
						
						|  | try: | 
					
						
						|  | with TamilVideoDubber() as dubber: | 
					
						
						|  |  | 
					
						
						|  | temp_video_path = dubber.create_temp_file(".mp4") | 
					
						
						|  | with open(temp_video_path, "wb") as f: | 
					
						
						|  | f.write(video_file.read()) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | progress_bar = st.progress(0) | 
					
						
						|  | status_text = st.empty() | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | status_text.text("Analyzing video...") | 
					
						
						|  | segments, duration = dubber.extract_audio_segments( | 
					
						
						|  | temp_video_path) | 
					
						
						|  | progress_bar.progress(0.25) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | status_text.text("Generating Tamil audio...") | 
					
						
						|  | final_audio = AudioSegment.empty() | 
					
						
						|  |  | 
					
						
						|  | for i, segment in enumerate(segments): | 
					
						
						|  |  | 
					
						
						|  | tamil_text = dubber.translate_to_tamil(segment["text"]) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | segment_audio = dubber.generate_tamil_audio( | 
					
						
						|  | tamil_text, | 
					
						
						|  | TAMIL_VOICES[selected_voice], | 
					
						
						|  | segment["emotion"] if emotion_detection else 'neutral' | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if adjust_audio: | 
					
						
						|  | segment_audio = TamilAudioProcessor.adjust_tamil_audio( | 
					
						
						|  | segment_audio) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | if len(final_audio) < segment["start"] * 1000: | 
					
						
						|  | silence_duration = (segment["start"] * 1000 - | 
					
						
						|  | len(final_audio)) | 
					
						
						|  | final_audio += AudioSegment.silent( | 
					
						
						|  | duration=silence_duration) | 
					
						
						|  |  | 
					
						
						|  | final_audio += segment_audio | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | progress_bar.progress(0.25 + (0.5 * (i + 1) / | 
					
						
						|  | len(segments))) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | status_text.text("Creating final video...") | 
					
						
						|  | output_path = dubber.create_temp_file(".mp4") | 
					
						
						|  |  | 
					
						
						|  | video = VideoFileClip(temp_video_path) | 
					
						
						|  | video = video.set_audio(AudioFileClip(final_audio)) | 
					
						
						|  |  | 
					
						
						|  | if generate_subtitles: | 
					
						
						|  |  | 
					
						
						|  | subtitle_clips = [] | 
					
						
						|  | for segment in segments: | 
					
						
						|  | tamil_text = dubber.translate_to_tamil(segment["text"]) | 
					
						
						|  | subtitle_clip = TextClip( | 
					
						
						|  | tamil_text, | 
					
						
						|  | fontsize=24, | 
					
						
						|  | font=selected_font, | 
					
						
						|  | color='white', | 
					
						
						|  | stroke_color='black', | 
					
						
						|  | stroke_width=1 | 
					
						
						|  | ) | 
					
						
						|  | subtitle_clip = subtitle_clip.set_position( | 
					
						
						|  | ('center', 'bottom') | 
					
						
						|  | ).set_duration( | 
					
						
						|  | segment["end"] - segment["start"] | 
					
						
						|  | ).set_start(segment["start"]) | 
					
						
						|  | subtitle_clips.append(subtitle_clip) | 
					
						
						|  |  | 
					
						
						|  | video = CompositeVideoClip([video] + subtitle_clips) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | video.write_videofile(output_path, codec='libx264', | 
					
						
						|  | audio_codec='aac') | 
					
						
						|  | progress_bar.progress(1.0) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | st.success("Tamil dubbing completed!") | 
					
						
						|  | st.video(output_path) | 
					
						
						|  |  | 
					
						
						|  |  | 
					
						
						|  | with open(output_path, "rb") as f: | 
					
						
						|  | st.download_button( | 
					
						
						|  | "Download Tamil Dubbed Video", | 
					
						
						|  | f, | 
					
						
						|  | file_name="tamil_dubbed_video.mp4" | 
					
						
						|  | ) | 
					
						
						|  |  | 
					
						
						|  | except Exception as e: | 
					
						
						|  | st.error(f"An error occurred: {str(e)}") | 
					
						
						|  |  | 
					
						
						|  | if __name__ == "__main__": | 
					
						
						|  | main() |