Commit
·
810585d
1
Parent(s):
092cfe0
add json output for word highlighting
Browse files- .gitignore +4 -1
- app.py +9 -2
- src/transcriber.py +28 -4
.gitignore
CHANGED
|
@@ -2,4 +2,7 @@
|
|
| 2 |
.python-version
|
| 3 |
pyproject.toml
|
| 4 |
uv.lock
|
| 5 |
-
*/__pycache__/
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
.python-version
|
| 3 |
pyproject.toml
|
| 4 |
uv.lock
|
| 5 |
+
*/__pycache__/
|
| 6 |
+
Dockerfile
|
| 7 |
+
docker-compose.yml
|
| 8 |
+
.gradio
|
app.py
CHANGED
|
@@ -1,5 +1,10 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
from src.transcriber import transcriber
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
def main():
|
| 5 |
with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
|
|
@@ -18,10 +23,11 @@ def main():
|
|
| 18 |
text_output = gr.Textbox(label="SRT Text transcription")
|
| 19 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
| 20 |
text_clean_output = gr.Textbox(label="Text transcription")
|
|
|
|
| 21 |
gr.Interface(
|
| 22 |
fn=transcriber,
|
| 23 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
| 24 |
-
outputs=[text_output, srt_file, text_clean_output],
|
| 25 |
allow_flagging="never"
|
| 26 |
)
|
| 27 |
with gr.Tab("Audio", id = "audio"):
|
|
@@ -36,10 +42,11 @@ def main():
|
|
| 36 |
text_output = gr.Textbox(label="SRT Text transcription")
|
| 37 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
| 38 |
text_clean_output = gr.Textbox(label="Text transcription")
|
|
|
|
| 39 |
gr.Interface(
|
| 40 |
fn=transcriber,
|
| 41 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
| 42 |
-
outputs=[text_output, srt_file, text_clean_output],
|
| 43 |
allow_flagging="never"
|
| 44 |
)
|
| 45 |
demo.launch()
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
from src.transcriber import transcriber
|
| 3 |
+
import os
|
| 4 |
+
|
| 5 |
+
# os.environ["GRADIO_TEMP_DIR"] = os.path.join(os.getcwd(), "tmp")
|
| 6 |
+
# os.environ["HF_HOME"] = os.path.join(os.getcwd(), "hf_tmp")
|
| 7 |
+
|
| 8 |
|
| 9 |
def main():
|
| 10 |
with gr.Blocks(title='multilang-asr-transcriber', delete_cache=(86400, 86400), theme=gr.themes.Base()) as demo:
|
|
|
|
| 23 |
text_output = gr.Textbox(label="SRT Text transcription")
|
| 24 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
| 25 |
text_clean_output = gr.Textbox(label="Text transcription")
|
| 26 |
+
json_output = gr.JSON("JSON Transcription")
|
| 27 |
gr.Interface(
|
| 28 |
fn=transcriber,
|
| 29 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
| 30 |
+
outputs=[text_output, srt_file, text_clean_output, json_output],
|
| 31 |
allow_flagging="never"
|
| 32 |
)
|
| 33 |
with gr.Tab("Audio", id = "audio"):
|
|
|
|
| 42 |
text_output = gr.Textbox(label="SRT Text transcription")
|
| 43 |
srt_file = gr.File(file_count="single", type="filepath", file_types=[".srt"], label="SRT file")
|
| 44 |
text_clean_output = gr.Textbox(label="Text transcription")
|
| 45 |
+
json_output = gr.JSON("JSON Transcription")
|
| 46 |
gr.Interface(
|
| 47 |
fn=transcriber,
|
| 48 |
inputs=[file, file_type, max_words_per_line, task, model_version],
|
| 49 |
+
outputs=[text_output, srt_file, text_clean_output, json_output],
|
| 50 |
allow_flagging="never"
|
| 51 |
)
|
| 52 |
demo.launch()
|
src/transcriber.py
CHANGED
|
@@ -24,24 +24,48 @@ def write_srt(segments, max_words_per_line, srt_path):
|
|
| 24 |
with open(srt_path, "w", encoding='utf-8') as file:
|
| 25 |
result = ''
|
| 26 |
result_clean = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
line_counter = 1
|
| 28 |
for _, segment in enumerate(segments):
|
| 29 |
words_in_line = []
|
| 30 |
for w, word in enumerate(segment.words):
|
| 31 |
words_in_line.append(word)
|
|
|
|
| 32 |
# Write the line if max words limit reached or it's the last word in the segment
|
| 33 |
if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
|
| 34 |
-
if words_in_line:
|
| 35 |
start_time = convert_seconds_to_time(words_in_line[0].start)
|
| 36 |
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
| 37 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
|
|
|
|
|
|
| 38 |
result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
|
| 39 |
result_clean += [line_text]
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
line_counter += 1
|
| 42 |
-
words_in_line = []
|
|
|
|
| 43 |
file.write(result)
|
| 44 |
-
return result, srt_path, " ".join(result_clean)
|
|
|
|
| 45 |
|
| 46 |
def transcriber(file_input:gr.File,
|
| 47 |
file_type: str,
|
|
|
|
| 24 |
with open(srt_path, "w", encoding='utf-8') as file:
|
| 25 |
result = ''
|
| 26 |
result_clean = []
|
| 27 |
+
json_output = {
|
| 28 |
+
"lines": []
|
| 29 |
+
}
|
| 30 |
+
|
| 31 |
line_counter = 1
|
| 32 |
for _, segment in enumerate(segments):
|
| 33 |
words_in_line = []
|
| 34 |
for w, word in enumerate(segment.words):
|
| 35 |
words_in_line.append(word)
|
| 36 |
+
|
| 37 |
# Write the line if max words limit reached or it's the last word in the segment
|
| 38 |
if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
|
| 39 |
+
if words_in_line:
|
| 40 |
start_time = convert_seconds_to_time(words_in_line[0].start)
|
| 41 |
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
| 42 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
| 43 |
+
|
| 44 |
+
# SRT format
|
| 45 |
result += f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n"
|
| 46 |
result_clean += [line_text]
|
| 47 |
+
|
| 48 |
+
# JSON format
|
| 49 |
+
json_output["lines"].append({
|
| 50 |
+
"line_index": line_counter,
|
| 51 |
+
"start": words_in_line[0].start,
|
| 52 |
+
"end": words_in_line[-1].end,
|
| 53 |
+
"text": line_text,
|
| 54 |
+
"words": [
|
| 55 |
+
{
|
| 56 |
+
"word": w.word.strip(),
|
| 57 |
+
"start": w.start,
|
| 58 |
+
"end": w.end
|
| 59 |
+
} for w in words_in_line
|
| 60 |
+
]
|
| 61 |
+
})
|
| 62 |
+
|
| 63 |
line_counter += 1
|
| 64 |
+
words_in_line = []
|
| 65 |
+
|
| 66 |
file.write(result)
|
| 67 |
+
return result, srt_path, " ".join(result_clean), json_output
|
| 68 |
+
|
| 69 |
|
| 70 |
def transcriber(file_input:gr.File,
|
| 71 |
file_type: str,
|