diff --git a/docs/api/codec.rst b/docs/api/codec.rst index 7e06ff8cc..43bb235c2 100644 --- a/docs/api/codec.rst +++ b/docs/api/codec.rst @@ -15,7 +15,6 @@ Descriptors .. autoattribute:: Codec.is_decoder .. autoattribute:: Codec.is_encoder -.. autoattribute:: Codec.descriptor .. autoattribute:: Codec.name .. autoattribute:: Codec.long_name .. autoattribute:: Codec.type diff --git a/docs/api/container.rst b/docs/api/container.rst index f4c9732c9..71739fe3f 100644 --- a/docs/api/container.rst +++ b/docs/api/container.rst @@ -60,7 +60,6 @@ Formats .. autoattribute:: ContainerFormat.name .. autoattribute:: ContainerFormat.long_name -.. autoattribute:: ContainerFormat.options .. autoattribute:: ContainerFormat.input .. autoattribute:: ContainerFormat.output .. autoattribute:: ContainerFormat.is_input diff --git a/examples/basics/remux.py b/examples/basics/remux.py index 64ac00c99..13247c41a 100644 --- a/examples/basics/remux.py +++ b/examples/basics/remux.py @@ -14,8 +14,11 @@ for packet in input_.demux(in_stream): print(packet) - # We need to skip the "flushing" packets that `demux` generates. - if packet.dts is None: + # We need to skip the empty "flushing" packet that `demux` generates at the + # end. Don't test `packet.dts is None` here: a valid keyframe can legitimately + # have no DTS (e.g. the leading reordered packets of a B-frame stream in MKV), + # and skipping it would drop the keyframe and corrupt the output. + if packet.size == 0: continue # We need to assign the packet to the new stream. diff --git a/examples/subtitles/remux.py b/examples/subtitles/remux.py index 32ee7d6d3..4fdc71f1d 100644 --- a/examples/subtitles/remux.py +++ b/examples/subtitles/remux.py @@ -9,7 +9,7 @@ out_stream = output.add_stream_from_template(in_stream) for packet in input_.demux(in_stream): - if packet.dts is None: + if packet.size == 0: continue packet.stream = out_stream output.mux(packet) diff --git a/tests/test_encode.py b/tests/test_encode.py index d64cadc20..240c67872 100644 --- a/tests/test_encode.py +++ b/tests/test_encode.py @@ -233,7 +233,7 @@ def test_subtitle_muxing(self) -> None: out_stream = output.add_stream_from_template(in_stream) for packet in input_.demux(in_stream): - if packet.dts is None: + if packet.size == 0: continue packet.stream = out_stream output.mux(packet) diff --git a/tests/test_packet.py b/tests/test_packet.py index f1c862d0b..c4503caad 100644 --- a/tests/test_packet.py +++ b/tests/test_packet.py @@ -197,7 +197,7 @@ def test_skip_samples_remux(self) -> None: with av.open(output_path, "w") as out: out_stream = out.add_stream_from_template(audio_stream) for pkt in inp.demux(audio_stream): - if pkt.dts is None: + if pkt.size == 0: continue if pkt.has_sidedata("skip_samples"): sdata = pkt.get_sidedata("skip_samples") diff --git a/tests/test_remux.py b/tests/test_remux.py index 72247c45f..85f00563b 100644 --- a/tests/test_remux.py +++ b/tests/test_remux.py @@ -1,5 +1,8 @@ import io +import numpy as np +import pytest + import av import av.datasets @@ -16,7 +19,7 @@ def test_video_remux() -> None: out_stream = output.add_stream_from_template(in_stream) for packet in input_.demux(in_stream): - if packet.dts is None: + if packet.size == 0: # skip the flushing packet, not keyframes with no DTS continue packet.stream = out_stream @@ -58,7 +61,7 @@ def test_add_mux_stream_video() -> None: out_stream.time_base = in_stream.time_base for packet in input_.demux(in_stream): - if packet.dts is None: + if packet.size == 0: continue packet.stream = out_stream output.mux(packet) @@ -109,3 +112,79 @@ def test_add_stream_from_template_copies_time_base() -> None: out_audio = output.add_stream_from_template(in_audio) assert out_audio.time_base is not None assert out_audio.time_base == in_audio.time_base + + +def _make_b_frame_mkv(n: int = 48) -> io.BytesIO: + """Encode `n` frames with B-frames into an in-memory MKV. + + Matroska stores only presentation timestamps, so when this is demuxed again + libavformat cannot reconstruct a DTS for the leading reordered packets and + leaves it as None -- including on the very first packet, which is the + keyframe. That is exactly the layout that broke the remux example in #1917. + """ + buf = io.BytesIO() + with av.open(buf, "w", format="matroska") as out: + stream = out.add_stream("h264", rate=30) + stream.width, stream.height, stream.pix_fmt = 160, 120, "yuv420p" + stream.options = {"bf": "3", "g": "30"} + for i in range(n): + img = np.full((120, 160, 3), (i * 5) % 256, dtype="uint8") + frame = av.VideoFrame.from_ndarray(img, format="rgb24") + for packet in stream.encode(frame): + out.mux(packet) + for packet in stream.encode(None): + out.mux(packet) + buf.seek(0) + return buf + + +def _decoded_frame_count(buf: io.BytesIO) -> int: + buf.seek(0) + with av.open(buf, "r") as container: + return sum(1 for _ in container.decode(video=0)) + + +def test_remux_keeps_keyframe_with_none_dts() -> None: + """Regression test for #1917. + + A keyframe can legitimately demux with ``dts is None`` (B-frame stream in a + PTS-only container like MKV). The remux loop must skip only the empty + flushing packet (``size == 0``), not every ``dts is None`` packet, otherwise + the keyframe is dropped and the output is undecodable. + """ + if av.codec.Codec("h264", "w").name != "libx264": + pytest.skip("requires libx264") + + source = _make_b_frame_mkv() + expected_frames = _decoded_frame_count(source) + assert expected_frames > 0 + + # Precondition: the first packet really is a keyframe without a DTS, which is + # what the old `dts is None` filter would have wrongly discarded. + source.seek(0) + with av.open(source, "r") as input_: + first = next(p for p in input_.demux(input_.streams.video[0]) if p.size) + assert first.is_keyframe + assert first.dts is None + + source.seek(0) + output = io.BytesIO() + with ( + av.open(source, "r") as input_, + av.open(output, "w", format="matroska") as out, + ): + in_video = input_.streams.video[0] + out_video = out.add_stream_from_template(in_video) + for packet in input_.demux(in_video): + if packet.size == 0: # the flushing packet, not a keyframe with no DTS + continue + packet.stream = out_video + out.mux(packet) + + # The keyframe survived: every frame still decodes and the first packet of + # the remuxed stream is a keyframe. + assert _decoded_frame_count(output) == expected_frames + output.seek(0) + with av.open(output, "r") as container: + first_out = next(p for p in container.demux(video=0) if p.size) + assert first_out.is_keyframe diff --git a/tests/test_streams.py b/tests/test_streams.py index a78952013..9e4941c3a 100644 --- a/tests/test_streams.py +++ b/tests/test_streams.py @@ -360,7 +360,7 @@ def test_attachment_stream(self) -> None: out_v = out1.add_stream_from_template(in_v) for packet in input_.demux(in_v): - if packet.dts is None: + if packet.size == 0: continue packet.stream = out_v out1.mux(packet) @@ -382,7 +382,7 @@ def test_attachment_stream(self) -> None: stream_map[s.index] = oc.add_stream_from_template(s) for packet in ic.demux(ic.streams.video): - if packet.dts is None: + if packet.size == 0: continue updated_stream = stream_map.get(packet.stream.index) if isinstance(updated_stream, av.video.stream.VideoStream):