whisper-webui3

Paused

App Files Files Community

aadnk commited on Sep 27, 2022

Commit

fdd892b

1 Parent(s): 6a308c6

Limit video length before downloading from YouTube

Browse files

Files changed (2) hide show

app.py +53 -50
download.py +16 -2

app.py CHANGED Viewed

@@ -12,7 +12,7 @@ import ffmpeg
 # UI
 import gradio as gr
-from download import downloadUrl
 from utils import slugify, write_srt, write_vtt
@@ -52,54 +52,70 @@ class UI:
         self.inputAudioMaxDuration = inputAudioMaxDuration
     def transcribeFile(self, modelName, languageName, urlData, uploadFile, microphoneData, task):
-        source, sourceName = getSource(urlData, uploadFile, microphoneData)
         try:
-            selectedLanguage = languageName.lower() if len(languageName) > 0 else None
-            selectedModel = modelName if modelName is not None else "base"
-            if self.inputAudioMaxDuration > 0:
-                # Calculate audio length
-                audioDuration = ffmpeg.probe(source)["format"]["duration"]
-                if float(audioDuration) > self.inputAudioMaxDuration:
-                    return ("[ERROR]: Maximum audio file length is " + str(self.inputAudioMaxDuration) + "s, file was " + str(audioDuration) + "s"), "[ERROR]"
-            model = model_cache.get(selectedModel, None)
-            if not model:
-                model = whisper.load_model(selectedModel)
-                model_cache[selectedModel] = model
-            # The results
-            result = model.transcribe(source, language=selectedLanguage, task=task)
-            text = result["text"]
-            language = result["language"]
-            languageMaxLineWidth = getMaxLineWidth(language)
-            print("Max line width " + str(languageMaxLineWidth))
-            vtt = getSubs(result["segments"], "vtt", languageMaxLineWidth)
-            srt = getSubs(result["segments"], "srt", languageMaxLineWidth)
-            # Files that can be downloaded
-            downloadDirectory = tempfile.mkdtemp()
-            filePrefix = slugify(sourceName, allow_unicode=True)
-            download = []
-            download.append(createFile(srt, downloadDirectory, filePrefix + "-subs.srt"));
-            download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
-            download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
-            return download, text, vtt
-        finally:
-            # Cleanup source
-            if DELETE_UPLOADED_FILES:
-                print("Deleting source file " + source)
-                os.remove(source)
 def getMaxLineWidth(language: str) -> int:
     if (language == "ja" or language == "zh"):
@@ -110,19 +126,6 @@ def getMaxLineWidth(language: str) -> int:
         # 80 latin characters should fit on a 1080p/720p screen
         return 80
-def getSource(urlData, uploadFile, microphoneData):
-    if urlData:
-        # Download from YouTube
-        source = downloadUrl(urlData)
-    else:
-        # File input
-        source = uploadFile if uploadFile is not None else microphoneData
-    file_path = pathlib.Path(source)
-    sourceName = file_path.stem[:MAX_FILE_PREFIX_LENGTH] + file_path.suffix
-    return source, sourceName
 def createFile(text: str, directory: str, fileName: str) -> str:
     # Write the text to a file
     with open(os.path.join(directory, fileName), 'w+', encoding="utf-8") as file:

 # UI
 import gradio as gr
+from download import ExceededMaximumDuration, downloadUrl
 from utils import slugify, write_srt, write_vtt
         self.inputAudioMaxDuration = inputAudioMaxDuration
     def transcribeFile(self, modelName, languageName, urlData, uploadFile, microphoneData, task):
         try:
+            source, sourceName = self.getSource(urlData, uploadFile, microphoneData)
+            try:
+                selectedLanguage = languageName.lower() if len(languageName) > 0 else None
+                selectedModel = modelName if modelName is not None else "base"
+                if self.inputAudioMaxDuration > 0:
+                    # Calculate audio length
+                    audioDuration = ffmpeg.probe(source)["format"]["duration"]
+                    if float(audioDuration) > self.inputAudioMaxDuration:
+                        return [], ("[ERROR]: Maximum audio file length is " + str(self.inputAudioMaxDuration) + "s, file was " + str(audioDuration) + "s"), "[ERROR]"
+                model = model_cache.get(selectedModel, None)
+                if not model:
+                    model = whisper.load_model(selectedModel)
+                    model_cache[selectedModel] = model
+                # The results
+                result = model.transcribe(source, language=selectedLanguage, task=task)
+                text = result["text"]
+                language = result["language"]
+                languageMaxLineWidth = getMaxLineWidth(language)
+                print("Max line width " + str(languageMaxLineWidth))
+                vtt = getSubs(result["segments"], "vtt", languageMaxLineWidth)
+                srt = getSubs(result["segments"], "srt", languageMaxLineWidth)
+                # Files that can be downloaded
+                downloadDirectory = tempfile.mkdtemp()
+                filePrefix = slugify(sourceName, allow_unicode=True)
+                download = []
+                download.append(createFile(srt, downloadDirectory, filePrefix + "-subs.srt"));
+                download.append(createFile(vtt, downloadDirectory, filePrefix + "-subs.vtt"));
+                download.append(createFile(text, downloadDirectory, filePrefix + "-transcript.txt"));
+                return download, text, vtt
+            finally:
+                # Cleanup source
+                if DELETE_UPLOADED_FILES:
+                    print("Deleting source file " + source)
+                    os.remove(source)
+        except ExceededMaximumDuration as e:
+            return [], ("[ERROR]: Maximum remote video length is " + str(e.maxDuration) + "s, file was " + str(e.videoDuration) + "s"), "[ERROR]"
+    def getSource(self, urlData, uploadFile, microphoneData):
+        if urlData:
+            # Download from YouTube
+            source = downloadUrl(urlData, self.inputAudioMaxDuration)
+        else:
+            # File input
+            source = uploadFile if uploadFile is not None else microphoneData
+        file_path = pathlib.Path(source)
+        sourceName = file_path.stem[:MAX_FILE_PREFIX_LENGTH] + file_path.suffix
+        return source, sourceName
 def getMaxLineWidth(language: str) -> int:
     if (language == "ja" or language == "zh"):
         # 80 latin characters should fit on a 1080p/720p screen
         return 80
 def createFile(text: str, directory: str, fileName: str) -> str:
     # Write the text to a file
     with open(os.path.join(directory, fileName), 'w+', encoding="utf-8") as file:

download.py CHANGED Viewed

@@ -13,7 +13,7 @@ class FilenameCollectorPP(PostProcessor):
         self.filenames.append(information["filepath"])
         return [], information
-def downloadUrl(url: str):
     destinationDirectory = mkdtemp()
     ydl_opts = {
@@ -26,6 +26,13 @@ def downloadUrl(url: str):
     filename_collector = FilenameCollectorPP()
     with YoutubeDL(ydl_opts) as ydl:
         ydl.add_post_processor(filename_collector)
         ydl.download([url])
@@ -35,4 +42,11 @@ def downloadUrl(url: str):
     result = filename_collector.filenames[0]
     print("Downloaded " + result)
-    return result

         self.filenames.append(information["filepath"])
         return [], information
+def downloadUrl(url: str, maxDuration: int = None):
     destinationDirectory = mkdtemp()
     ydl_opts = {
     filename_collector = FilenameCollectorPP()
     with YoutubeDL(ydl_opts) as ydl:
+        if maxDuration:
+            info = ydl.extract_info(url, download=False)
+            duration = info['duration']
+            if duration >= maxDuration:
+                raise ExceededMaximumDuration(videoDuration=duration, maxDuration=maxDuration, message="Video is too long")
         ydl.add_post_processor(filename_collector)
         ydl.download([url])
     result = filename_collector.filenames[0]
     print("Downloaded " + result)
+    return result
+class ExceededMaximumDuration(Exception):
+    def __init__(self, videoDuration, maxDuration, message):
+        self.videoDuration = videoDuration
+        self.maxDuration = maxDuration
+        super().__init__(message)