diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 068fbd80d..5f61756df 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -32,6 +32,7 @@ Features: - Use FFmpeg 8.1.1 in the binary wheels. - Expose ``AVCodecContext.global_quality`` by :gh-user:`WyattBlue` in (:pr:`2246`). - Expose ``Stream.discard`` so demuxing and seeking can skip unwanted streams (:issue:`2272`). +- Add ``Stream.set_display_matrix()`` and ``Stream.set_display_rotation()`` to write the container display (rotation) matrix on output streams by :gh-user:`hmaarrfk` in (:pr:`2287`). Fixes: - Add ``cython.final`` to leaf classes, ensuring that they are not subclassed. diff --git a/av/video/stream.pxd b/av/video/stream.pxd index f0dcfb9b2..1a553f34c 100644 --- a/av/video/stream.pxd +++ b/av/video/stream.pxd @@ -1,3 +1,5 @@ +from libc.stdint cimport int32_t, uint8_t + from av.packet cimport Packet from av.stream cimport Stream @@ -5,5 +7,12 @@ from .frame cimport VideoFrame cdef class VideoStream(Stream): + # Display matrix (9 int32, native-endian) written as AV_PKT_DATA_DISPLAYMATRIX + # coded side data at mux time, applied only when _has_display_matrix is set. + cdef int32_t _display_matrix[9] + cdef uint8_t _has_display_matrix + + cdef _apply_display_matrix(self) + cpdef encode(self, VideoFrame frame=?) cpdef decode(self, Packet packet=?) diff --git a/av/video/stream.py b/av/video/stream.py index c37f45a3e..bb8b06fc0 100644 --- a/av/video/stream.py +++ b/av/video/stream.py @@ -1,8 +1,11 @@ import cython from cython.cimports import libav as lib from cython.cimports.av.packet import Packet +from cython.cimports.av.stream import Stream from cython.cimports.av.utils import avrational_to_fraction from cython.cimports.av.video.frame import VideoFrame +from cython.cimports.libc.stdint import int32_t +from cython.cimports.libc.string import memcpy @cython.final @@ -56,6 +59,62 @@ def decode(self, packet: Packet | None = None): """ return self.codec_context.decode(packet) + @cython.cfunc + def _finalize_for_output(self): + Stream._finalize_for_output(self) + # avcodec_parameters_from_context() overwrites codecpar.coded_side_data, + # so inject the display matrix after it, before avformat_write_header(). + if self.codec_context is not None and self._has_display_matrix: + self._apply_display_matrix() + + @cython.cfunc + def _apply_display_matrix(self): + n: cython.int = 9 * cython.sizeof(int32_t) + sd: cython.pointer[lib.AVPacketSideData] = lib.av_packet_side_data_new( + cython.address(self.ptr.codecpar.coded_side_data), + cython.address(self.ptr.codecpar.nb_coded_side_data), + lib.AV_PKT_DATA_DISPLAYMATRIX, + n, + 0, + ) + if sd == cython.NULL: + raise MemoryError("could not allocate display matrix side data") + + memcpy(sd.data, self._display_matrix, n) + + def set_display_matrix(self, matrix): + """Set the display matrix written to the container as coded side data. + + ``matrix`` is a sequence of 9 integers in FFmpeg's ``AV_PKT_DATA_DISPLAYMATRIX`` + layout, or ``None`` to clear. Must be called before the first frame is + encoded. See :meth:`set_display_rotation` for a higher-level helper. + """ + if matrix is None: + self._has_display_matrix = False + return + + vals = [int(v) for v in matrix] + if len(vals) != 9: + raise ValueError("display matrix must have exactly 9 elements") + i: cython.int + for i in range(9): + self._display_matrix[i] = vals[i] + self._has_display_matrix = True + + def set_display_rotation(self, degrees, hflip=False, vflip=False): + """Set the container display matrix from a rotation and optional flips. + + ``degrees`` is a counter-clockwise rotation (matching the value read back + from :attr:`VideoFrame.rotation`); ``hflip`` / ``vflip`` mirror after it. + Together these express all eight EXIF orientations. Must be called before + the first frame is encoded. + """ + # av_display_rotation_set() takes a clockwise angle; negate so our public + # `degrees` is counter-clockwise, matching VideoFrame.rotation on read. + lib.av_display_rotation_set(self._display_matrix, -float(degrees)) + lib.av_display_matrix_flip(self._display_matrix, bool(hflip), bool(vflip)) + self._has_display_matrix = True + @property def average_rate(self): """ diff --git a/av/video/stream.pyi b/av/video/stream.pyi index dd670d3cf..4e2a61e46 100644 --- a/av/video/stream.pyi +++ b/av/video/stream.pyi @@ -1,3 +1,4 @@ +from collections.abc import Sequence from fractions import Fraction from typing import Iterator, Literal @@ -20,6 +21,10 @@ class VideoStream(Stream): def encode(self, frame: VideoFrame | None = None) -> list[Packet]: ... def encode_lazy(self, frame: VideoFrame | None = None) -> Iterator[Packet]: ... def decode(self, packet: Packet | None = None) -> list[VideoFrame]: ... + def set_display_matrix(self, matrix: Sequence[int] | None) -> None: ... + def set_display_rotation( + self, degrees: float, hflip: bool = ..., vflip: bool = ... + ) -> None: ... # from codec context format: VideoFormat diff --git a/include/avcodec.pxd b/include/avcodec.pxd index f2123ce0d..5d13abed1 100644 --- a/include/avcodec.pxd +++ b/include/avcodec.pxd @@ -298,7 +298,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: cdef char* avcodec_get_name(AVCodecID id) cdef int avcodec_open2(AVCodecContext *ctx, const AVCodec *codec, AVDictionary **options) cdef enum AVPacketSideDataType: - pass + AV_PKT_DATA_DISPLAYMATRIX cdef struct AVPacketSideData: uint8_t *data size_t size @@ -476,6 +476,8 @@ cdef extern from "libavcodec/avcodec.h" nogil: int width int height int sample_rate + AVPacketSideData *coded_side_data + int nb_coded_side_data cdef int avcodec_parameters_copy( AVCodecParameters *dst, const AVCodecParameters *src @@ -513,6 +515,10 @@ cdef extern from "libavcodec/packet.h" nogil: const AVPacketSideData *av_packet_side_data_get( const AVPacketSideData *sd, int nb_sd, AVPacketSideDataType type ) + AVPacketSideData *av_packet_side_data_new( + AVPacketSideData **psd, int *pnb_sd, + AVPacketSideDataType type, size_t size, int flags + ) uint8_t* av_packet_get_side_data( const AVPacket *pkt, AVPacketSideDataType type, size_t *size ) diff --git a/include/avutil.pxd b/include/avutil.pxd index ede8f6fbe..7b0a9e311 100644 --- a/include/avutil.pxd +++ b/include/avutil.pxd @@ -147,6 +147,8 @@ cdef extern from "libavutil/dict.h" nogil: cdef extern from "libavutil/display.h" nogil: cdef double av_display_rotation_get(const int32_t matrix[9]) + cdef void av_display_rotation_set(int32_t matrix[9], double angle) + cdef void av_display_matrix_flip(int32_t matrix[9], int hflip, int vflip) cdef extern from "libavutil/error.h" nogil: cdef int AVERROR_BSF_NOT_FOUND diff --git a/tests/test_display_matrix.py b/tests/test_display_matrix.py new file mode 100644 index 000000000..25c02af6c --- /dev/null +++ b/tests/test_display_matrix.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import io +import struct +from typing import cast + +import numpy as np +import pytest + +import av +from av.sidedata.sidedata import SideData +from av.video.stream import VideoStream + +WIDTH = 320 +HEIGHT = 240 +DURATION = 10 + +# The 8 EXIF orientations as 3x3 transformation matrices, built the same way as +# the application code: a 90 deg rotation generator (R) and a horizontal-flip +# generator (F). Orientations 2, 4, 5, 7 are reflections, which a scalar +# rotation cannot represent -- so these are verified by comparing the full +# matrix that round-trips through the container. +_R = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]], dtype=float) # exif 8 +_F = np.asarray([[-1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=float) # exif 2 + +EXIF_MATRICES = { + 1: np.eye(3), + 2: _F, + 3: _R @ _R, + 4: _F @ _R @ _R, + 5: _F @ _R @ _R @ _R, + 6: _R @ _R @ _R, + 7: _F @ _R, + 8: _R, +} + +# Pure-rotation orientations also have a well-defined scalar rotation, reported +# by av_display_rotation_get() (counter-clockwise, range [-180, 180]). +EXPECTED_ROTATION = {1: 0, 3: 180, 6: -90, 8: 90} + +# Each EXIF orientation expressed through the convenience API as +# (degrees_ccw, hflip, vflip). Verified to reproduce EXIF_MATRICES exactly. +EXIF_VIA_ROTATION = { + 1: (0, False, False), + 2: (0, True, False), + 3: (0, True, True), + 4: (0, False, True), + 5: (90, True, False), + 6: (90, True, True), + 7: (90, False, True), + 8: (90, False, False), +} + +# One encoder per codec family we care about, plus the near-universal mpeg4 +# baseline. Unavailable encoders are skipped at runtime so the suite stays +# portable across FFmpeg builds. +CODECS = ["mpeg4", "libx264", "libopenh264", "libx265", "libsvtav1", "libaom-av1"] + + +def matrix_to_ints(matrix: np.ndarray) -> list[int]: + """Convert a 3x3 matrix to FFmpeg's AV_PKT_DATA_DISPLAYMATRIX integers. + + Layout (a, b, u, c, d, v, x, y, w): 16.16 fixed point everywhere except + u, v, w (indices 2, 5, 8) which are 2.30. + """ + flat = [float(v) for v in matrix.reshape(-1)] + return [ + int(round(v * (1 << 30))) if i in (2, 5, 8) else int(round(v * (1 << 16))) + for i, v in enumerate(flat) + ] + + +def _has_encoder(name: str) -> bool: + try: + av.codec.Codec(name, "w") + except Exception: + return False + return True + + +def _encode(codec_name: str, matrix: list[int] | None) -> io.BytesIO: + buf = io.BytesIO() + container = av.open(buf, "w", format="mp4") + stream = cast(VideoStream, container.add_stream(codec_name, rate=24)) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + + if matrix is not None: + stream.set_display_matrix(matrix) + + for i in range(DURATION): + img = np.full((HEIGHT, WIDTH, 3), (i * 8) % 256, dtype=np.uint8) + frame = av.VideoFrame.from_ndarray(img, format="rgb24") + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + container.close() + + buf.seek(0) + return buf + + +def _read_frame(buf: io.BytesIO) -> av.VideoFrame: + with av.open(buf, "r", format="mp4") as container: + return next(container.decode(video=0)) + + +def _read_matrix(frame: av.VideoFrame) -> list[int] | None: + sd = frame.side_data.get("DISPLAYMATRIX") + if sd is None: + return None + return list(struct.unpack("=9i", bytes(cast(SideData, sd)))) + + +@pytest.mark.parametrize("codec_name", CODECS) +@pytest.mark.parametrize("orientation", sorted(EXIF_MATRICES)) +def test_exif_orientation_roundtrip(codec_name: str, orientation: int) -> None: + if not _has_encoder(codec_name): + pytest.skip(f"encoder {codec_name} not available") + + expected = matrix_to_ints(EXIF_MATRICES[orientation]) + frame = _read_frame(_encode(codec_name, expected)) + got = _read_matrix(frame) + + identity = matrix_to_ints(np.eye(3)) + if expected == identity: + # Identity is the container default; demuxers emit no side data for it. + assert got is None + assert frame.rotation == 0 + else: + assert got == expected, f"exif {orientation}: wrote {expected}, read {got}" + + if orientation in EXPECTED_ROTATION: + rotation = frame.rotation + # 180 may come back negated; rotations are exact otherwise. + if abs(EXPECTED_ROTATION[orientation]) == 180: + assert abs(rotation) == 180 + else: + assert rotation == EXPECTED_ROTATION[orientation] + + +@pytest.mark.parametrize("degrees,expected", [(0, 0), (90, 90), (180, 180), (270, -90)]) +def test_set_display_rotation_roundtrip(degrees: int, expected: int) -> None: + # The public angle is counter-clockwise, matching VideoFrame.rotation. + buf = io.BytesIO() + container = av.open(buf, "w", format="mp4") + stream = container.add_stream("mpeg4", rate=24) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + stream.set_display_rotation(degrees) + for i in range(DURATION): + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + container.close() + + buf.seek(0) + rotation = _read_frame(buf).rotation + if abs(expected) == 180: + assert abs(rotation) == 180 + else: + assert rotation == expected + + +@pytest.mark.parametrize("orientation", sorted(EXIF_VIA_ROTATION)) +def test_convenience_reaches_all_exif_orientations(orientation: int) -> None: + # set_display_rotation(degrees, hflip, vflip) must reproduce the exact same + # matrix as the explicit EXIF table for every one of the 8 orientations. + degrees, hflip, vflip = EXIF_VIA_ROTATION[orientation] + expected = matrix_to_ints(EXIF_MATRICES[orientation]) + + buf = io.BytesIO() + container = av.open(buf, "w", format="mp4") + stream = container.add_stream("mpeg4", rate=24) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + stream.set_display_rotation(degrees, hflip=hflip, vflip=vflip) + for i in range(DURATION): + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + container.close() + + buf.seek(0) + got = _read_matrix(_read_frame(buf)) + if expected == matrix_to_ints(np.eye(3)): + assert got is None # identity emits no side data + else: + assert got == expected, f"exif {orientation}: wrote {expected}, read {got}" + + +def test_matrix_and_rotation_setters_are_mutually_exclusive() -> None: + # Setting one path must clear the other so they don't both apply. + buf = io.BytesIO() + with av.open(buf, "w", format="mp4") as container: + stream = container.add_stream("mpeg4", rate=24) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + stream.set_display_rotation(90) + stream.set_display_matrix(None) # clears both paths + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + + buf.seek(0) + assert _read_matrix(_read_frame(buf)) is None + + +def test_set_display_matrix_validates_length() -> None: + buf = io.BytesIO() + with av.open(buf, "w", format="mp4") as container: + stream = container.add_stream("mpeg4", rate=24) + with pytest.raises(ValueError): + stream.set_display_matrix([0, 1, 2]) + + +def test_set_display_matrix_none_clears() -> None: + buf = io.BytesIO() + with av.open(buf, "w", format="mp4") as container: + stream = container.add_stream("mpeg4", rate=24) + stream.set_display_matrix(matrix_to_ints(EXIF_MATRICES[6])) + stream.set_display_matrix(None) # clear before encoding + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + + buf.seek(0) + assert _read_matrix(_read_frame(buf)) is None