From 8445751f534384c15d158c67bd3746b89a338ddb Mon Sep 17 00:00:00 2001 From: Joshua Date: Mon, 1 Jun 2026 13:39:43 -0700 Subject: [PATCH] Fix subtitle UTF-8 handling --- av/subtitles/subtitle.py | 2 +- tests/test_subtitles.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/av/subtitles/subtitle.py b/av/subtitles/subtitle.py index 24bf8cbe5..5c58647a9 100644 --- a/av/subtitles/subtitle.py +++ b/av/subtitles/subtitle.py @@ -297,7 +297,7 @@ def dialogue(self): i: cython.Py_ssize_t = 0 state: cython.bint = False ass_text: bytes = self.ass - char, next_char = cython.declare(cython.char) + char, next_char = cython.declare(cython.uchar) result: bytearray = bytearray() text_len: cython.Py_ssize_t = len(ass_text) diff --git a/tests/test_subtitles.py b/tests/test_subtitles.py index 321b29d1e..dd13d7756 100644 --- a/tests/test_subtitles.py +++ b/tests/test_subtitles.py @@ -119,6 +119,16 @@ def test_subtitle_set_create(self) -> None: assert len(subtitle) == 1 assert cast(AssSubtitle, subtitle[0]).ass == text + def test_subtitle_dialogue_extended_chars(self) -> None: + """Test handling of extended UTF-8 characters in subtitle dialogue.""" + from av.subtitles.subtitle import SubtitleSet + + text = "0,0,Default,,0,0,0,,♪ Hey, hey, hey ♪".encode("utf-8") + subtitle = SubtitleSet.create(text=text, start=0, end=2000, pts=0) + sub = cast(AssSubtitle, subtitle[0]) + + assert sub.dialogue == "♪ Hey, hey, hey ♪".encode("utf-8") + def test_subtitle_encode_mp4(self) -> None: """Test encoding subtitles to MP4 container.""" from av.subtitles.subtitle import SubtitleSet