Spaces:
Runtime error
Runtime error
Wrap lines in SRT and VTT files that exceed 47 characters
Browse files
utils.py
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
|
|
| 1 |
import unicodedata
|
| 2 |
import re
|
| 3 |
|
|
@@ -55,9 +56,11 @@ def write_txt(transcript: Iterator[dict], file: TextIO):
|
|
| 55 |
def write_vtt(transcript: Iterator[dict], file: TextIO):
|
| 56 |
print("WEBVTT\n", file=file)
|
| 57 |
for segment in transcript:
|
|
|
|
|
|
|
| 58 |
print(
|
| 59 |
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
| 60 |
-
f"{
|
| 61 |
file=file,
|
| 62 |
flush=True,
|
| 63 |
)
|
|
@@ -76,16 +79,22 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
|
|
| 76 |
write_srt(result["segments"], file=srt)
|
| 77 |
"""
|
| 78 |
for i, segment in enumerate(transcript, start=1):
|
|
|
|
|
|
|
| 79 |
# write srt lines
|
| 80 |
print(
|
| 81 |
f"{i}\n"
|
| 82 |
f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
|
| 83 |
f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
|
| 84 |
-
f"{
|
| 85 |
file=file,
|
| 86 |
flush=True,
|
| 87 |
)
|
| 88 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
def slugify(value, allow_unicode=False):
|
| 90 |
"""
|
| 91 |
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|
|
|
|
| 1 |
+
import textwrap
|
| 2 |
import unicodedata
|
| 3 |
import re
|
| 4 |
|
|
|
|
| 56 |
def write_vtt(transcript: Iterator[dict], file: TextIO):
|
| 57 |
print("WEBVTT\n", file=file)
|
| 58 |
for segment in transcript:
|
| 59 |
+
text = processText(segment['text']).replace('-->', '->')
|
| 60 |
+
|
| 61 |
print(
|
| 62 |
f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n"
|
| 63 |
+
f"{text}\n",
|
| 64 |
file=file,
|
| 65 |
flush=True,
|
| 66 |
)
|
|
|
|
| 79 |
write_srt(result["segments"], file=srt)
|
| 80 |
"""
|
| 81 |
for i, segment in enumerate(transcript, start=1):
|
| 82 |
+
text = processText(segment['text'].strip()).replace('-->', '->')
|
| 83 |
+
|
| 84 |
# write srt lines
|
| 85 |
print(
|
| 86 |
f"{i}\n"
|
| 87 |
f"{format_timestamp(segment['start'], always_include_hours=True, fractionalSeperator=',')} --> "
|
| 88 |
f"{format_timestamp(segment['end'], always_include_hours=True, fractionalSeperator=',')}\n"
|
| 89 |
+
f"{text}\n",
|
| 90 |
file=file,
|
| 91 |
flush=True,
|
| 92 |
)
|
| 93 |
|
| 94 |
+
def processText(text: str):
|
| 95 |
+
lines = textwrap.wrap(text, width=47, tabsize=4)
|
| 96 |
+
return '\n'.join(lines)
|
| 97 |
+
|
| 98 |
def slugify(value, allow_unicode=False):
|
| 99 |
"""
|
| 100 |
Taken from https://github.com/django/django/blob/master/django/utils/text.py
|