import pytest from utils.vtt_parser import parse_vtt_offsets def test_parse_simple_vtt(): """Test parsing a simple VTT file with one timestamp and coordinates""" vtt_content = """WEBVTT 00:00:05.000 --> 00:00:10.000 xywh=100,200,300,400 """ result = list(parse_vtt_offsets(vtt_content.encode('utf-8'))) assert len(result) == 1 left, top, right, bottom, time = result[0] assert left == 100 assert top == 200 assert right == 300 assert bottom == 400 assert time == 5.0 def test_parse_multiple_entries(): """Test parsing multiple timestamps and coordinates""" vtt_content = """WEBVTT 00:00:05.000 --> 00:00:10.000 xywh=100,200,300,400 00:01:30.500 --> 00:01:35.000 xywh=150,250,350,450 """ result = list(parse_vtt_offsets(vtt_content.encode('utf-8'))) assert len(result) == 2 # First entry left, top, right, bottom, time = result[0] assert (left, top, right, bottom) == (100, 200, 300, 400) assert time == 5.0 # Second entry left, top, right, bottom, time = result[1] assert (left, top, right, bottom) == (150, 250, 350, 450) assert time == 90.5 # 1 minute 30.5 seconds def test_parse_empty_vtt(): """Test parsing an empty VTT file""" vtt_content = "WEBVTT\n" result = list(parse_vtt_offsets(vtt_content.encode('utf-8'))) assert len(result) == 0 def test_parse_invalid_format(): """Test parsing VTT with invalid format should not yield results""" vtt_content = """WEBVTT 00:00:05.000 --> 00:00:10.000 invalid_line """ result = list(parse_vtt_offsets(vtt_content.encode('utf-8'))) assert len(result) == 0 def test_parse_hour_timestamp(): """Test parsing timestamp with hours""" vtt_content = """WEBVTT 01:30:05.000 --> 01:30:10.000 xywh=100,200,300,400 """ result = list(parse_vtt_offsets(vtt_content.encode('utf-8'))) assert len(result) == 1 left, top, right, bottom, time = result[0] assert time == 5405.0 # 1 hour + 30 minutes + 5 seconds def test_parse_missing_coordinates(): """Test that entries without coordinates are skipped""" vtt_content = """WEBVTT 00:00:05.000 --> 00:00:10.000 Some text content 00:00:10.000 --> 00:00:15.000 xywh=100,200,300,400 """ result = list(parse_vtt_offsets(vtt_content.encode('utf-8'))) assert len(result) == 1 left, top, right, bottom, time = result[0] assert time == 10.0 assert (left, top, right, bottom) == (100, 200, 300, 400)