File size: 4,269 Bytes
be003b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d06f8f
be003b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d06f8f
 
 
be003b4
 
 
 
 
 
7d06f8f
be003b4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d06f8f
 
 
 
be003b4
7d06f8f
be003b4
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import gradio as gr
from transformers import MarianMTModel, MarianTokenizer
import torch
import nltk

# Download punkt for sentence tokenization
nltk.download('punkt')
nltk.download('punkt_tab')

from nltk.tokenize import sent_tokenize

# Cache for storing models and tokenizers
models_cache = {}

def load_model(model_name):
    """
    Load and cache the MarianMT model and tokenizer.
    """
    if model_name not in models_cache:
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        if torch.cuda.is_available():
            model = model.to('cuda')
        models_cache[model_name] = (model, tokenizer)
    return models_cache[model_name]

def translate_text(model_name, text):
    """
    Translate input text sentence by sentence using the specified model.
    """
    if not model_name or not text:
        return "Please select a model and provide text for translation."
    
    try:
        # Load the model and tokenizer
        model, tokenizer = load_model(model_name)
        
        # Split text into sentences
        sentences = sent_tokenize(text)
        translated_sentences = []
        
        for sentence in sentences:
            # Tokenize the sentence
            print(f"Sentence: {sentence}\n")
            tokens = tokenizer(sentence, return_tensors="pt", padding=True)
            if torch.cuda.is_available():
                tokens = {k: v.to('cuda') for k, v in tokens.items()}
            
            # Generate translation for the sentence
            translated = model.generate(**tokens)
            translated_text = tokenizer.decode(translated[0], skip_special_tokens=True)
            translated_sentences.append(translated_text)
        
        # Join translated sentences back into a single string
        return " ".join(translated_sentences)
    
    except Exception as e:
        return f"Error: {str(e)}"

# Model options (display name, model name)
model_options = [
    ("English to Turkish", "Helsinki-NLP/opus-mt-tc-big-en-tr"),
    ("Turkish to English", "Helsinki-NLP/opus-mt-tc-big-tr-en"),
    ("English to French", "Helsinki-NLP/opus-mt-tc-big-en-fr"),
    ("French to English", "Helsinki-NLP/opus-mt-tc-big-fr-en"),
    ("English to German", "Helsinki-NLP/opus-mt-en-de"),
    ("German to English", "Helsinki-NLP/opus-mt-de-en"),
    ("English to Spanish", "Helsinki-NLP/opus-mt-tc-big-en-es"),
    ("Spanish to English", "Helsinki-NLP/opus-mt-es-en"),
    ("English to Arabic", "Helsinki-NLP/opus-mt-tc-big-en-ar"),
    ("Arabic to English", "Helsinki-NLP/opus-mt-tc-big-ar-en"),
    ("English to Urdu", "Helsinki-NLP/opus-mt-en-ur"),
    ("Urdu to English", "Helsinki-NLP/opus-mt-ur-en"),
    ("English to Hindi", "Helsinki-NLP/opus-mt-en-hi"),
    ("Hindi to English", "Helsinki-NLP/opus-mt-hi-en"),
    ("English to Chinese", "Helsinki-NLP/opus-mt-en-zh"),
    ("Chinese to English", "Helsinki-NLP/opus-mt-zh-en")
]

# Create mapping for display names to model names
model_mapping = {name: model for name, model in model_options}

with gr.Blocks() as demo:
    gr.Markdown("# 🌍 Real-Time Sentence Translation")
    
    with gr.Row():
        model_dropdown = gr.Dropdown(
            label="Select Translation Model",
            choices=[name for name, _ in model_options],  # Show only names
            type="value",
        )
    
    with gr.Row():
        input_text = gr.Textbox(
            label="Enter text (complete sentences)",
            lines=5,
            placeholder="Type here...",
        )
    
    with gr.Row():
        translate_button = gr.Button("Translate")
        clear_button = gr.Button("Clear")
    
    output_text = gr.Textbox(label="Translated Text", interactive=False)
    
    def clear_inputs():
        return "", ""
    
    def translate_with_mapping(selected_name, text):
        model_name = model_mapping.get(selected_name, "")
        return translate_text(model_name, text)

    translate_button.click(
        fn=translate_with_mapping,
        inputs=[model_dropdown, input_text],
        outputs=output_text,
    )
    
    clear_button.click(
        fn=clear_inputs,
        inputs=[],
        outputs=[input_text, output_text],
    )

# Run the Gradio app
demo.launch()