stashface / tests /test_vtt_parser.py
cc1234
init
244b0b6
import pytest
from utils.vtt_parser import parse_vtt_offsets
def test_parse_simple_vtt():
"""Test parsing a simple VTT file with one timestamp and coordinates"""
vtt_content = """WEBVTT
00:00:05.000 --> 00:00:10.000
xywh=100,200,300,400
"""
result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
assert len(result) == 1
left, top, right, bottom, time = result[0]
assert left == 100
assert top == 200
assert right == 300
assert bottom == 400
assert time == 5.0
def test_parse_multiple_entries():
"""Test parsing multiple timestamps and coordinates"""
vtt_content = """WEBVTT
00:00:05.000 --> 00:00:10.000
xywh=100,200,300,400
00:01:30.500 --> 00:01:35.000
xywh=150,250,350,450
"""
result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
assert len(result) == 2
# First entry
left, top, right, bottom, time = result[0]
assert (left, top, right, bottom) == (100, 200, 300, 400)
assert time == 5.0
# Second entry
left, top, right, bottom, time = result[1]
assert (left, top, right, bottom) == (150, 250, 350, 450)
assert time == 90.5 # 1 minute 30.5 seconds
def test_parse_empty_vtt():
"""Test parsing an empty VTT file"""
vtt_content = "WEBVTT\n"
result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
assert len(result) == 0
def test_parse_invalid_format():
"""Test parsing VTT with invalid format should not yield results"""
vtt_content = """WEBVTT
00:00:05.000 --> 00:00:10.000
invalid_line
"""
result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
assert len(result) == 0
def test_parse_hour_timestamp():
"""Test parsing timestamp with hours"""
vtt_content = """WEBVTT
01:30:05.000 --> 01:30:10.000
xywh=100,200,300,400
"""
result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
assert len(result) == 1
left, top, right, bottom, time = result[0]
assert time == 5405.0 # 1 hour + 30 minutes + 5 seconds
def test_parse_missing_coordinates():
"""Test that entries without coordinates are skipped"""
vtt_content = """WEBVTT
00:00:05.000 --> 00:00:10.000
Some text content
00:00:10.000 --> 00:00:15.000
xywh=100,200,300,400
"""
result = list(parse_vtt_offsets(vtt_content.encode('utf-8')))
assert len(result) == 1
left, top, right, bottom, time = result[0]
assert time == 10.0
assert (left, top, right, bottom) == (100, 200, 300, 400)