Spaces:

QomSSLab
/

ASR

Sleeping

ASR

File size: 7,351 Bytes

17a8b4f

import os
from TTS.utils.synthesizer import Synthesizer
import gradio as gr
from huggingface_hub import hf_hub_download
from huggingface_hub import login
import time

# Uncomment for private models if needed
# login(token=os.environ.get("HF_TOKEN"))

# Custom CSS for better styling
custom_css = """
.gradio-container {
    background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%);
    font-family: 'Vazirmatn', 'Tahoma', sans-serif;
}

.main-header {
    color: #2d3748;
    text-align: center;
    margin-bottom: 2rem;
    text-shadow: 1px 1px 2px rgba(0,0,0,0.1);
}

.container {
    max-width: 900px;
    margin: 0 auto;
    padding: 20px;
    background-color: white;
    border-radius: 12px;
    box-shadow: 0 10px 25px rgba(0,0,0,0.1);
}

.footer {
    text-align: center;
    margin-top: 2rem;
    color: #4a5568;
    font-size: 0.9rem;
}

/* Persian text alignment */
textarea, .label {
    text-align: right;
    direction: rtl;
}

/* Button styling */
button.primary {
    background: linear-gradient(to right, #4776E6, #8E54E9);
    border: none;
    border-radius: 8px;
    color: white;
    font-weight: bold;
    transition: all 0.3s ease;
}

button.primary:hover {
    transform: translateY(-2px);
    box-shadow: 0 7px 14px rgba(50, 50, 93, 0.1), 0 3px 6px rgba(0, 0, 0, 0.08);
}

.input-panel, .output-panel {
    background-color: rgba(255, 255, 255, 0.9);
    border-radius: 10px;
    padding: 15px;
    margin-bottom: 15px;
    border: 1px solid #e2e8f0;
}

.examples-panel {
    background-color: rgba(255, 255, 255, 0.8);
    border-radius: 10px;
    padding: 10px;
    border: 1px solid #e2e8f0;
}

.status-panel {
    background-color: #edf2f7;
    border-radius: 8px;
    padding: 10px;
    margin-bottom: 15px;
    text-align: center;
}
"""

def load_synthesizer():
    # Status for loading
    status_block.update("در حال بارگذاری مدل... لطفاً منتظر بمانید")
    
    try:
        # Download model files from Hugging Face Hub
        model_path = hf_hub_download(
            repo_id="QomSSLab/vits-fa-voice",
            filename="best_model.pth",
            cache_dir="models"
        )
        config_path = hf_hub_download(
            repo_id="QomSSLab/vits-fa-voice",
            filename="config.json",
            cache_dir="models"
        )
        
        # Create synthesizer
        synthesizer = Synthesizer(
            tts_checkpoint=model_path,
            tts_config_path=config_path,
            use_cuda=False  # Usually no GPU in free Spaces
        )
        
        status_block.update("مدل با موفقیت بارگذاری شد! اکنون می‌توانید از سیستم استفاده کنید.")
        return synthesizer
    
    except Exception as e:
        error_msg = f"خطا در بارگذاری مدل: {str(e)}"
        status_block.update(f"❌ {error_msg}")
        raise RuntimeError(error_msg)

def tts(text, speed=1.0):
    if not text.strip():
        return None, "لطفاً متنی وارد کنید."
    
    try:
        status_block.update("در حال تبدیل متن به گفتار...")
        
        # Show processing animation
        for i in range(3):
            time.sleep(0.3)
            status_block.update(f"در حال پردازش{'.' * (i+1)}")
        
        # Generate speech
        wav = synthesizer.tts(text, speed=speed)
        output_path = "output.wav"
        synthesizer.save_wav(wav, output_path)
        
        status_block.update("✅ صدا با موفقیت تولید شد!")
        return output_path, "تبدیل با موفقیت انجام شد."
    
    except Exception as e:
        error_msg = f"خطا در تولید صدا: {str(e)}"
        status_block.update(f"❌ {error_msg}")
        return None, error_msg

# Create a status block for feedback
status_block = gr.Markdown("در حال آماده‌سازی سیستم...")

# First create the interface without the synthesizer
with gr.Blocks(css=custom_css) as demo:
    with gr.Column(elem_classes="container"):
        gr.Markdown("# سامانه تبدیل متن فارسی به گفتار", elem_classes="main-header")
        
        # Status area
        with gr.Column(elem_classes="status-panel"):
            status_output = gr.Markdown("", elem_id="status")
        
        # Input panel
        with gr.Column(elem_classes="input-panel"):
            gr.Markdown("### متن ورودی", elem_classes="label")
            text_input = gr.Textbox(
                placeholder="متن فارسی خود را اینجا وارد کنید...",
                lines=5,
                label="",
                elem_classes="input-text"
            )
            
            with gr.Row():
                speed_slider = gr.Slider(
                    minimum=0.5,
                    maximum=2.0,
                    value=1.0,
                    step=0.1,
                    label="سرعت گفتار",
                    elem_classes="speed-slider"
                )
                
                submit_btn = gr.Button("تبدیل به گفتار", variant="primary", elem_classes="primary")
        
        # Output panel
        with gr.Column(elem_classes="output-panel"):
            gr.Markdown("### خروجی صوتی", elem_classes="label")
            output_audio = gr.Audio(label="")
            result_text = gr.Markdown("")
        
        # Examples panel
        with gr.Column(elem_classes="examples-panel"):
            gr.Markdown("### نمونه‌های متنی", elem_classes="label")
            examples = gr.Examples(
                examples=[
                    ["سلام دنیا، این یک آزمایش برای سیستم تبدیل متن به گفتار فارسی است."],
                    ["امروز هوا بسیار خوب است و من احساس شادی می‌کنم."],
                    ["فناوری هوش مصنوعی به سرعت در حال پیشرفت است و به زودی در تمام جنبه‌های زندگی ما حضور خواهد داشت."]
                ],
                inputs=text_input,
                label="نمونه‌های متنی را امتحان کنید"
            )
        
        gr.Markdown(
            "**راهنما**: متن فارسی خود را در کادر بالا وارد کنید و دکمه تبدیل را فشار دهید. "
            "می‌توانید سرعت گفتار را با استفاده از نوار لغزنده تنظیم کنید.",
            elem_classes="footer"
        )
        
        gr.Markdown(
            "توسعه داده شده با استفاده از مدل VITS فارسی | [WaeliFatima/vits-fa-voice](https://huggingface.co/WaeliFatima/vits-fa-voice)",
            elem_classes="footer"
        )

# Initialize the synthesizer
try:
    synthesizer = load_synthesizer()
    # Connect the function to the button
    submit_btn.click(
        fn=tts,
        inputs=[text_input, speed_slider],
        outputs=[output_audio, result_text]
    )
    # Update the status block
    status_block.update("سیستم آماده استفاده است!")
    
except Exception as e:
    print(f"Error: {str(e)}")
    status_block.update(f"❌ خطا در بارگذاری مدل: {str(e)}")

# Launch the interface
demo.launch()