Athspi commited on
Commit
1e0f1bc
·
verified ·
1 Parent(s): 5a84705

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -223
app.py CHANGED
@@ -1,224 +1,28 @@
1
  import gradio as gr
2
- import whisper
3
- import os
4
- from pydub import AudioSegment
5
- from transformers import WhisperForConditionalGeneration, WhisperProcessor
6
-
7
- # Mapping of model names to Whisper model sizes
8
- MODELS = {
9
- "Tiny (Fastest)": "tiny",
10
- "Base (Faster)": "base",
11
- "Small (Balanced)": "small",
12
- "Medium (Accurate)": "medium",
13
- "Large (Most Accurate)": "large",
14
- "Fine-Tuned Hindi": "yash-04/whisper-base-hindi", # Hindi fine-tuned model
15
- "Fine-Tuned Tamil": "mahimairaja/whisper-base-tamil" # Tamil fine-tuned model
16
- }
17
-
18
- # Mapping of full language names to language codes
19
- LANGUAGE_NAME_TO_CODE = {
20
- "Auto Detect": "Auto Detect",
21
- "English": "en",
22
- "Chinese": "zh",
23
- "German": "de",
24
- "Spanish": "es",
25
- "Russian": "ru",
26
- "Korean": "ko",
27
- "French": "fr",
28
- "Japanese": "ja",
29
- "Portuguese": "pt",
30
- "Turkish": "tr",
31
- "Polish": "pl",
32
- "Catalan": "ca",
33
- "Dutch": "nl",
34
- "Arabic": "ar",
35
- "Swedish": "sv",
36
- "Italian": "it",
37
- "Indonesian": "id",
38
- "Hindi": "hi",
39
- "Finnish": "fi",
40
- "Vietnamese": "vi",
41
- "Hebrew": "he",
42
- "Ukrainian": "uk",
43
- "Greek": "el",
44
- "Malay": "ms",
45
- "Czech": "cs",
46
- "Romanian": "ro",
47
- "Danish": "da",
48
- "Hungarian": "hu",
49
- "Tamil": "ta",
50
- "Norwegian": "no",
51
- "Thai": "th",
52
- "Urdu": "ur",
53
- "Croatian": "hr",
54
- "Bulgarian": "bg",
55
- "Lithuanian": "lt",
56
- "Latin": "la",
57
- "Maori": "mi",
58
- "Malayalam": "ml",
59
- "Welsh": "cy",
60
- "Slovak": "sk",
61
- "Telugu": "te",
62
- "Persian": "fa",
63
- "Latvian": "lv",
64
- "Bengali": "bn",
65
- "Serbian": "sr",
66
- "Azerbaijani": "az",
67
- "Slovenian": "sl",
68
- "Kannada": "kn",
69
- "Estonian": "et",
70
- "Macedonian": "mk",
71
- "Breton": "br",
72
- "Basque": "eu",
73
- "Icelandic": "is",
74
- "Armenian": "hy",
75
- "Nepali": "ne",
76
- "Mongolian": "mn",
77
- "Bosnian": "bs",
78
- "Kazakh": "kk",
79
- "Albanian": "sq",
80
- "Swahili": "sw",
81
- "Galician": "gl",
82
- "Marathi": "mr",
83
- "Punjabi": "pa",
84
- "Sinhala": "si",
85
- "Khmer": "km",
86
- "Shona": "sn",
87
- "Yoruba": "yo",
88
- "Somali": "so",
89
- "Afrikaans": "af",
90
- "Occitan": "oc",
91
- "Georgian": "ka",
92
- "Belarusian": "be",
93
- "Tajik": "tg",
94
- "Sindhi": "sd",
95
- "Gujarati": "gu",
96
- "Amharic": "am",
97
- "Yiddish": "yi",
98
- "Lao": "lo",
99
- "Uzbek": "uz",
100
- "Faroese": "fo",
101
- "Haitian Creole": "ht",
102
- "Pashto": "ps",
103
- "Turkmen": "tk",
104
- "Nynorsk": "nn",
105
- "Maltese": "mt",
106
- "Sanskrit": "sa",
107
- "Luxembourgish": "lb",
108
- "Burmese": "my",
109
- "Tibetan": "bo",
110
- "Tagalog": "tl",
111
- "Malagasy": "mg",
112
- "Assamese": "as",
113
- "Tatar": "tt",
114
- "Hawaiian": "haw",
115
- "Lingala": "ln",
116
- "Hausa": "ha",
117
- "Bashkir": "ba",
118
- "Javanese": "jw",
119
- "Sundanese": "su",
120
- }
121
-
122
- def detect_language(audio_file):
123
- """Detect the language of the audio file."""
124
- # Load the Whisper model (use "base" for faster detection)
125
- model = whisper.load_model("base")
126
-
127
- # Convert audio to 16kHz mono for better compatibility with Whisper
128
- audio = AudioSegment.from_file(audio_file)
129
- audio = audio.set_frame_rate(16000).set_channels(1)
130
- processed_audio_path = "processed_audio.wav"
131
- audio.export(processed_audio_path, format="wav")
132
-
133
- # Detect the language
134
- result = model.transcribe(processed_audio_path, task="detect_language", fp16=False)
135
- detected_language = result.get("language", "unknown")
136
-
137
- # Clean up processed audio file
138
- os.remove(processed_audio_path)
139
-
140
- return f"Detected Language: {detected_language}"
141
-
142
- def transcribe_audio(audio_file, language="Auto Detect", model_size="Base (Faster)"):
143
- """Transcribe the audio file."""
144
- # Map language to fine-tuned model
145
- language_to_model = {
146
- "Hindi": "yash-04/whisper-base-hindi",
147
- "Tamil": "mahimairaja/whisper-base-tamil",
148
- # Add more mappings as needed
149
- }
150
-
151
- # Load the selected Whisper model
152
- if language in language_to_model:
153
- model_name = language_to_model[language]
154
- model = WhisperForConditionalGeneration.from_pretrained(model_name)
155
- processor = WhisperProcessor.from_pretrained(model_name)
156
- else:
157
- model = whisper.load_model(MODELS[model_size])
158
- processor = None # Use default Whisper processor
159
-
160
- # Convert audio to 16kHz mono for better compatibility with Whisper
161
- audio = AudioSegment.from_file(audio_file)
162
- audio = audio.set_frame_rate(16000).set_channels(1)
163
- processed_audio_path = "processed_audio.wav"
164
- audio.export(processed_audio_path, format="wav")
165
-
166
- # Transcribe the audio
167
- if language == "Auto Detect":
168
- if processor:
169
- inputs = processor(processed_audio_path, return_tensors="pt", sampling_rate=16000)
170
- result = model.generate(inputs.input_features)
171
- transcription = processor.batch_decode(result, skip_special_tokens=True)[0]
172
- else:
173
- result = model.transcribe(processed_audio_path, fp16=False)
174
- transcription = result["text"]
175
- detected_language = result.get("language", "unknown")
176
- else:
177
- language_code = LANGUAGE_NAME_TO_CODE.get(language, "en") # Default to English if not found
178
- if processor:
179
- inputs = processor(processed_audio_path, return_tensors="pt", sampling_rate=16000)
180
- result = model.generate(inputs.input_features, language=language_code)
181
- transcription = processor.batch_decode(result, skip_special_tokens=True)[0]
182
- else:
183
- result = model.transcribe(processed_audio_path, language=language_code, fp16=False)
184
- transcription = result["text"]
185
- detected_language = language_code
186
-
187
- # Clean up processed audio file
188
- os.remove(processed_audio_path)
189
-
190
- # Return transcription and detected language
191
- return f"Detected Language: {detected_language}\n\nTranscription:\n{transcription}"
192
-
193
- # Define the Gradio interface
194
- with gr.Blocks() as demo:
195
- gr.Markdown("# Audio Transcription and Language Detection")
196
-
197
- with gr.Tab("Detect Language"):
198
- gr.Markdown("Upload an audio file to detect its language.")
199
- detect_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
200
- detect_language_output = gr.Textbox(label="Detected Language")
201
- detect_button = gr.Button("Detect Language")
202
-
203
- with gr.Tab("Transcribe Audio"):
204
- gr.Markdown("Upload an audio file, select a language (or choose 'Auto Detect'), and choose a model for transcription.")
205
- transcribe_audio_input = gr.Audio(type="filepath", label="Upload Audio File")
206
- language_dropdown = gr.Dropdown(
207
- choices=list(LANGUAGE_NAME_TO_CODE.keys()), # Full language names
208
- label="Select Language",
209
- value="Auto Detect"
210
- )
211
- model_dropdown = gr.Dropdown(
212
- choices=list(MODELS.keys()), # Model options
213
- label="Select Model",
214
- value="Base (Faster)" # Default to "Base" model
215
- )
216
- transcribe_output = gr.Textbox(label="Transcription and Detected Language")
217
- transcribe_button = gr.Button("Transcribe Audio")
218
-
219
- # Link buttons to functions
220
- detect_button.click(detect_language, inputs=detect_audio_input, outputs=detect_language_output)
221
- transcribe_button.click(transcribe_audio, inputs=[transcribe_audio_input, language_dropdown, model_dropdown], outputs=transcribe_output)
222
-
223
- # Launch the Gradio interface
224
- demo.launch()
 
1
  import gradio as gr
2
+ from transformers import pipeline
3
+
4
+ # Load the Whisper model from Hugging Face
5
+ model_name = "Subhaka/whisper-small-Sinhala-Fine_Tune"
6
+ transcriber = pipeline("automatic-speech-recognition", model=model_name)
7
+
8
+ # Define a transcription function
9
+ def transcribe_audio(audio_file):
10
+ try:
11
+ transcription = transcriber(audio_file)["text"]
12
+ return transcription
13
+ except Exception as e:
14
+ return f"Error: {str(e)}"
15
+
16
+ # Create Gradio interface
17
+ interface = gr.Interface(
18
+ fn=transcribe_audio,
19
+ inputs=gr.Audio(source="upload", type="filepath", label="Upload Audio"),
20
+ outputs=gr.Textbox(label="Transcription"),
21
+ title="Sinhala Audio-to-Text Transcription",
22
+ description="Upload an audio file and get the transcription in Sinhala using the Whisper model fine-tuned by Subhaka.",
23
+ allow_flagging="never"
24
+ )
25
+
26
+ # Launch the Gradio app
27
+ if __name__ == "__main__":
28
+ interface.launch(server_name="0.0.0.0", server_port=7860, share=True)