Spaces:
Build error
Build error
Merge branch 'main' of https://huggingface.co/spaces/aadnk/whisper-webui
Browse files- app.py +6 -2
- requirements-fasterWhisper.txt +1 -0
- requirements-whisper.txt +1 -0
- requirements.txt +1 -0
- src/utils.py +12 -8
app.py
CHANGED
|
@@ -501,16 +501,20 @@ class WhisperTranscriber:
|
|
| 501 |
language = result["language"] if "language" in result else None
|
| 502 |
languageMaxLineWidth = self.__get_max_line_width(language)
|
| 503 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 504 |
print("Max line width " + str(languageMaxLineWidth))
|
| 505 |
vtt = self.__get_subs(result["segments"], "vtt", languageMaxLineWidth, highlight_words=highlight_words)
|
| 506 |
srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
|
| 507 |
-
json_result = json.dumps(result, indent=4, ensure_ascii=False)
|
| 508 |
|
| 509 |
output_files = []
|
| 510 |
output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
|
| 511 |
output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
|
| 512 |
output_files.append(self.__create_file(text, output_dir, source_name + "-transcript.txt"));
|
| 513 |
-
output_files.append(
|
| 514 |
|
| 515 |
return output_files, text, vtt
|
| 516 |
|
|
|
|
| 501 |
language = result["language"] if "language" in result else None
|
| 502 |
languageMaxLineWidth = self.__get_max_line_width(language)
|
| 503 |
|
| 504 |
+
# We always create the JSON file for debugging purposes
|
| 505 |
+
json_result = json.dumps(result, indent=4, ensure_ascii=False)
|
| 506 |
+
json_file = self.__create_file(json_result, output_dir, source_name + "-result.json")
|
| 507 |
+
print("Created JSON file " + json_file)
|
| 508 |
+
|
| 509 |
print("Max line width " + str(languageMaxLineWidth))
|
| 510 |
vtt = self.__get_subs(result["segments"], "vtt", languageMaxLineWidth, highlight_words=highlight_words)
|
| 511 |
srt = self.__get_subs(result["segments"], "srt", languageMaxLineWidth, highlight_words=highlight_words)
|
|
|
|
| 512 |
|
| 513 |
output_files = []
|
| 514 |
output_files.append(self.__create_file(srt, output_dir, source_name + "-subs.srt"));
|
| 515 |
output_files.append(self.__create_file(vtt, output_dir, source_name + "-subs.vtt"));
|
| 516 |
output_files.append(self.__create_file(text, output_dir, source_name + "-transcript.txt"));
|
| 517 |
+
output_files.append(json_file)
|
| 518 |
|
| 519 |
return output_files, text, vtt
|
| 520 |
|
requirements-fasterWhisper.txt
CHANGED
|
@@ -2,6 +2,7 @@ ctranslate2
|
|
| 2 |
faster-whisper
|
| 3 |
ffmpeg-python==0.2.0
|
| 4 |
gradio==3.38.0
|
|
|
|
| 5 |
yt-dlp
|
| 6 |
json5
|
| 7 |
torch
|
|
|
|
| 2 |
faster-whisper
|
| 3 |
ffmpeg-python==0.2.0
|
| 4 |
gradio==3.38.0
|
| 5 |
+
gradio-client==0.8.1
|
| 6 |
yt-dlp
|
| 7 |
json5
|
| 8 |
torch
|
requirements-whisper.txt
CHANGED
|
@@ -3,6 +3,7 @@ git+https://github.com/openai/whisper.git
|
|
| 3 |
transformers
|
| 4 |
ffmpeg-python==0.2.0
|
| 5 |
gradio==3.38.0
|
|
|
|
| 6 |
yt-dlp
|
| 7 |
torchaudio
|
| 8 |
altair
|
|
|
|
| 3 |
transformers
|
| 4 |
ffmpeg-python==0.2.0
|
| 5 |
gradio==3.38.0
|
| 6 |
+
gradio-client==0.8.1
|
| 7 |
yt-dlp
|
| 8 |
torchaudio
|
| 9 |
altair
|
requirements.txt
CHANGED
|
@@ -2,6 +2,7 @@ ctranslate2
|
|
| 2 |
faster-whisper
|
| 3 |
ffmpeg-python==0.2.0
|
| 4 |
gradio==3.38.0
|
|
|
|
| 5 |
yt-dlp
|
| 6 |
json5
|
| 7 |
torch
|
|
|
|
| 2 |
faster-whisper
|
| 3 |
ffmpeg-python==0.2.0
|
| 4 |
gradio==3.38.0
|
| 5 |
+
gradio-client==0.8.1
|
| 6 |
yt-dlp
|
| 7 |
json5
|
| 8 |
torch
|
src/utils.py
CHANGED
|
@@ -63,14 +63,18 @@ def write_vtt(transcript: Iterator[dict], file: TextIO,
|
|
| 63 |
print("WEBVTT\n", file=file)
|
| 64 |
|
| 65 |
for segment in iterator:
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
|
| 75 |
def write_srt(transcript: Iterator[dict], file: TextIO,
|
| 76 |
maxLineWidth=None, highlight_words: bool = False):
|
|
|
|
| 63 |
print("WEBVTT\n", file=file)
|
| 64 |
|
| 65 |
for segment in iterator:
|
| 66 |
+
try:
|
| 67 |
+
text = segment['text'].replace('-->', '->')
|
| 68 |
+
|
| 69 |
+
print(
|
| 70 |
+
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
| 71 |
+
f"{text}\n",
|
| 72 |
+
file=file,
|
| 73 |
+
flush=True,
|
| 74 |
+
)
|
| 75 |
+
except Exception as e:
|
| 76 |
+
print(f"Error writing segment {segment}: {e}")
|
| 77 |
+
raise
|
| 78 |
|
| 79 |
def write_srt(transcript: Iterator[dict], file: TextIO,
|
| 80 |
maxLineWidth=None, highlight_words: bool = False):
|