From 508a752ed5c04c449a07844b5084055d9570e9a0 Mon Sep 17 00:00:00 2001 From: Mark Harfouche Date: Sat, 6 Jun 2026 14:34:44 -0400 Subject: [PATCH 1/2] Allow setting the display (rotation) matrix on output streams Expose two methods on Stream: - set_display_matrix(matrix): write a raw 9-integer AV_PKT_DATA_DISPLAYMATRIX matrix (16.16 fixed point, 2.30 for the third column). - set_display_rotation(degrees, hflip=False, vflip=False): build the matrix with av_display_rotation_set() / av_display_matrix_flip(); the angle is counter-clockwise, matching VideoFrame.rotation on read. The matrix is written as coded side data on the output stream's codecpar inside _finalize_for_output() (after avcodec_parameters_from_context, which would otherwise overwrite it), so muxers record it in the container -- e.g. the MP4/MOV tkhd transformation matrix. Adds the required libav bindings (AVCodecParameters.coded_side_data, AV_PKT_DATA_DISPLAYMATRIX, av_packet_side_data_new, av_display_rotation_set, av_display_matrix_flip), .pyi stubs, a CHANGELOG entry, and tests covering all 8 EXIF orientations across several codecs. Addresses #1045 and #1012. Co-Authored-By: Claude Opus 4.8 (1M context) --- CHANGELOG.rst | 1 + av/stream.pxd | 8 ++ av/stream.py | 76 +++++++++++ av/stream.pyi | 6 + include/avcodec.pxd | 8 +- include/avutil.pxd | 2 + tests/test_display_matrix.py | 252 +++++++++++++++++++++++++++++++++++ 7 files changed, 352 insertions(+), 1 deletion(-) create mode 100644 tests/test_display_matrix.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 068fbd80d..5f61756df 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -32,6 +32,7 @@ Features: - Use FFmpeg 8.1.1 in the binary wheels. - Expose ``AVCodecContext.global_quality`` by :gh-user:`WyattBlue` in (:pr:`2246`). - Expose ``Stream.discard`` so demuxing and seeking can skip unwanted streams (:issue:`2272`). +- Add ``Stream.set_display_matrix()`` and ``Stream.set_display_rotation()`` to write the container display (rotation) matrix on output streams by :gh-user:`hmaarrfk` in (:pr:`2287`). Fixes: - Add ``cython.final`` to leaf classes, ensuring that they are not subclassed. diff --git a/av/stream.pxd b/av/stream.pxd index 89d41e559..8eeb321fe 100644 --- a/av/stream.pxd +++ b/av/stream.pxd @@ -19,9 +19,17 @@ cdef class Stream: cdef readonly IndexEntries index_entries + # Display (rotation) matrix to write as AV_PKT_DATA_DISPLAYMATRIX coded + # side data at mux time. Exactly one of these is set at a time (or neither): + # _display_matrix: native-endian packed bytes (9 int32), raw form. + # _display_rotation: (degrees, hflip, vflip), built via FFmpeg helpers. + cdef bytes _display_matrix + cdef object _display_rotation + # Private API. cdef _init(self, Container, lib.AVStream*, CodecContext) cdef _finalize_for_output(self) + cdef _apply_display_matrix(self) cdef _set_id(self, value) diff --git a/av/stream.py b/av/stream.py index f96030e26..ce47528ad 100644 --- a/av/stream.py +++ b/av/stream.py @@ -1,3 +1,4 @@ +import struct from enum import IntEnum, IntFlag import cython @@ -10,6 +11,7 @@ dict_to_avdict, to_avrational, ) +from cython.cimports.libc.stdint import int32_t class Disposition(IntFlag): @@ -175,6 +177,80 @@ def _finalize_for_output(self): ) ) + # avcodec_parameters_from_context() frees and overwrites + # codecpar.coded_side_data, so the display matrix must be injected + # *after* it, right before avformat_write_header() consumes it. + if self._display_matrix is not None or self._display_rotation is not None: + self._apply_display_matrix() + + @cython.cfunc + def _apply_display_matrix(self): + sd: cython.pointer[lib.AVPacketSideData] = lib.av_packet_side_data_new( + cython.address(self.ptr.codecpar.coded_side_data), + cython.address(self.ptr.codecpar.nb_coded_side_data), + lib.AV_PKT_DATA_DISPLAYMATRIX, + 36, + 0, + ) + if sd == cython.NULL: + raise MemoryError("could not allocate display matrix side data") + + if self._display_matrix is not None: + data: cython.pointer[cython.uchar] = sd.data + i: cython.int + for i in range(36): + data[i] = self._display_matrix[i] + return + + # Convenience path: build the matrix in place with FFmpeg's helpers. + angle: cython.double = self._display_rotation[0] + hflip: cython.int = self._display_rotation[1] + vflip: cython.int = self._display_rotation[2] + matrix: cython.pointer[int32_t] = cython.cast(cython.pointer[int32_t], sd.data) + # av_display_rotation_set() takes a clockwise angle; negate so our public + # `degrees` is counter-clockwise, matching VideoFrame.rotation on read. + lib.av_display_rotation_set(matrix, -angle) + lib.av_display_matrix_flip(matrix, hflip, vflip) + + def set_display_matrix(self, matrix): + """Set the display (rotation) matrix written to the container. + + ``matrix`` is a sequence of 9 integers in FFmpeg's display-matrix + layout (16.16 fixed point for entries 0,1,3,4,6,7 and 2.30 fixed point + for entries 2,5,8), matching ``AV_PKT_DATA_DISPLAYMATRIX``. The values + are written, native-endian, as coded side data on the output stream so + the muxer records them in the container (e.g. the MP4/MOV ``tkhd`` + transformation matrix). Pass ``None`` to clear. + + Must be called before the first frame is encoded / the header is + written. See :meth:`set_display_rotation` for a higher-level helper. + """ + self._display_rotation = None + if matrix is None: + self._display_matrix = None + return + + vals = [int(v) for v in matrix] + if len(vals) != 9: + raise ValueError("display matrix must have exactly 9 elements") + self._display_matrix = struct.pack("=9i", *vals) + + def set_display_rotation(self, degrees, hflip=False, vflip=False): + """Set the container display matrix from a rotation and optional flips. + + ``degrees`` is a counter-clockwise rotation (matching the value read + back from :attr:`VideoFrame.rotation`); ``hflip`` / ``vflip`` apply a + horizontal / vertical mirror after the rotation. Together these express + all eight EXIF orientations. The matrix is built with FFmpeg's + ``av_display_rotation_set`` / ``av_display_matrix_flip`` and written as + coded side data on the output stream (e.g. the MP4/MOV ``tkhd`` matrix). + + This is a convenience wrapper over :meth:`set_display_matrix`; it must + likewise be called before the first frame is encoded. + """ + self._display_matrix = None + self._display_rotation = (float(degrees), bool(hflip), bool(vflip)) + @property def id(self): """ diff --git a/av/stream.pyi b/av/stream.pyi index f9148021f..4b9c61250 100644 --- a/av/stream.pyi +++ b/av/stream.pyi @@ -1,3 +1,4 @@ +from collections.abc import Sequence from enum import IntEnum, IntFlag from fractions import Fraction from typing import Literal, cast @@ -63,6 +64,11 @@ class Stream: # From context codec_tag: str + def set_display_matrix(self, matrix: Sequence[int] | None) -> None: ... + def set_display_rotation( + self, degrees: float, hflip: bool = ..., vflip: bool = ... + ) -> None: ... + class DataStream(Stream): type: Literal["data"] name: str | None diff --git a/include/avcodec.pxd b/include/avcodec.pxd index f2123ce0d..5d13abed1 100644 --- a/include/avcodec.pxd +++ b/include/avcodec.pxd @@ -298,7 +298,7 @@ cdef extern from "libavcodec/avcodec.h" nogil: cdef char* avcodec_get_name(AVCodecID id) cdef int avcodec_open2(AVCodecContext *ctx, const AVCodec *codec, AVDictionary **options) cdef enum AVPacketSideDataType: - pass + AV_PKT_DATA_DISPLAYMATRIX cdef struct AVPacketSideData: uint8_t *data size_t size @@ -476,6 +476,8 @@ cdef extern from "libavcodec/avcodec.h" nogil: int width int height int sample_rate + AVPacketSideData *coded_side_data + int nb_coded_side_data cdef int avcodec_parameters_copy( AVCodecParameters *dst, const AVCodecParameters *src @@ -513,6 +515,10 @@ cdef extern from "libavcodec/packet.h" nogil: const AVPacketSideData *av_packet_side_data_get( const AVPacketSideData *sd, int nb_sd, AVPacketSideDataType type ) + AVPacketSideData *av_packet_side_data_new( + AVPacketSideData **psd, int *pnb_sd, + AVPacketSideDataType type, size_t size, int flags + ) uint8_t* av_packet_get_side_data( const AVPacket *pkt, AVPacketSideDataType type, size_t *size ) diff --git a/include/avutil.pxd b/include/avutil.pxd index ede8f6fbe..7b0a9e311 100644 --- a/include/avutil.pxd +++ b/include/avutil.pxd @@ -147,6 +147,8 @@ cdef extern from "libavutil/dict.h" nogil: cdef extern from "libavutil/display.h" nogil: cdef double av_display_rotation_get(const int32_t matrix[9]) + cdef void av_display_rotation_set(int32_t matrix[9], double angle) + cdef void av_display_matrix_flip(int32_t matrix[9], int hflip, int vflip) cdef extern from "libavutil/error.h" nogil: cdef int AVERROR_BSF_NOT_FOUND diff --git a/tests/test_display_matrix.py b/tests/test_display_matrix.py new file mode 100644 index 000000000..25c02af6c --- /dev/null +++ b/tests/test_display_matrix.py @@ -0,0 +1,252 @@ +from __future__ import annotations + +import io +import struct +from typing import cast + +import numpy as np +import pytest + +import av +from av.sidedata.sidedata import SideData +from av.video.stream import VideoStream + +WIDTH = 320 +HEIGHT = 240 +DURATION = 10 + +# The 8 EXIF orientations as 3x3 transformation matrices, built the same way as +# the application code: a 90 deg rotation generator (R) and a horizontal-flip +# generator (F). Orientations 2, 4, 5, 7 are reflections, which a scalar +# rotation cannot represent -- so these are verified by comparing the full +# matrix that round-trips through the container. +_R = np.asarray([[0, -1, 0], [1, 0, 0], [0, 0, 1]], dtype=float) # exif 8 +_F = np.asarray([[-1, 0, 0], [0, 1, 0], [0, 0, 1]], dtype=float) # exif 2 + +EXIF_MATRICES = { + 1: np.eye(3), + 2: _F, + 3: _R @ _R, + 4: _F @ _R @ _R, + 5: _F @ _R @ _R @ _R, + 6: _R @ _R @ _R, + 7: _F @ _R, + 8: _R, +} + +# Pure-rotation orientations also have a well-defined scalar rotation, reported +# by av_display_rotation_get() (counter-clockwise, range [-180, 180]). +EXPECTED_ROTATION = {1: 0, 3: 180, 6: -90, 8: 90} + +# Each EXIF orientation expressed through the convenience API as +# (degrees_ccw, hflip, vflip). Verified to reproduce EXIF_MATRICES exactly. +EXIF_VIA_ROTATION = { + 1: (0, False, False), + 2: (0, True, False), + 3: (0, True, True), + 4: (0, False, True), + 5: (90, True, False), + 6: (90, True, True), + 7: (90, False, True), + 8: (90, False, False), +} + +# One encoder per codec family we care about, plus the near-universal mpeg4 +# baseline. Unavailable encoders are skipped at runtime so the suite stays +# portable across FFmpeg builds. +CODECS = ["mpeg4", "libx264", "libopenh264", "libx265", "libsvtav1", "libaom-av1"] + + +def matrix_to_ints(matrix: np.ndarray) -> list[int]: + """Convert a 3x3 matrix to FFmpeg's AV_PKT_DATA_DISPLAYMATRIX integers. + + Layout (a, b, u, c, d, v, x, y, w): 16.16 fixed point everywhere except + u, v, w (indices 2, 5, 8) which are 2.30. + """ + flat = [float(v) for v in matrix.reshape(-1)] + return [ + int(round(v * (1 << 30))) if i in (2, 5, 8) else int(round(v * (1 << 16))) + for i, v in enumerate(flat) + ] + + +def _has_encoder(name: str) -> bool: + try: + av.codec.Codec(name, "w") + except Exception: + return False + return True + + +def _encode(codec_name: str, matrix: list[int] | None) -> io.BytesIO: + buf = io.BytesIO() + container = av.open(buf, "w", format="mp4") + stream = cast(VideoStream, container.add_stream(codec_name, rate=24)) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + + if matrix is not None: + stream.set_display_matrix(matrix) + + for i in range(DURATION): + img = np.full((HEIGHT, WIDTH, 3), (i * 8) % 256, dtype=np.uint8) + frame = av.VideoFrame.from_ndarray(img, format="rgb24") + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + container.close() + + buf.seek(0) + return buf + + +def _read_frame(buf: io.BytesIO) -> av.VideoFrame: + with av.open(buf, "r", format="mp4") as container: + return next(container.decode(video=0)) + + +def _read_matrix(frame: av.VideoFrame) -> list[int] | None: + sd = frame.side_data.get("DISPLAYMATRIX") + if sd is None: + return None + return list(struct.unpack("=9i", bytes(cast(SideData, sd)))) + + +@pytest.mark.parametrize("codec_name", CODECS) +@pytest.mark.parametrize("orientation", sorted(EXIF_MATRICES)) +def test_exif_orientation_roundtrip(codec_name: str, orientation: int) -> None: + if not _has_encoder(codec_name): + pytest.skip(f"encoder {codec_name} not available") + + expected = matrix_to_ints(EXIF_MATRICES[orientation]) + frame = _read_frame(_encode(codec_name, expected)) + got = _read_matrix(frame) + + identity = matrix_to_ints(np.eye(3)) + if expected == identity: + # Identity is the container default; demuxers emit no side data for it. + assert got is None + assert frame.rotation == 0 + else: + assert got == expected, f"exif {orientation}: wrote {expected}, read {got}" + + if orientation in EXPECTED_ROTATION: + rotation = frame.rotation + # 180 may come back negated; rotations are exact otherwise. + if abs(EXPECTED_ROTATION[orientation]) == 180: + assert abs(rotation) == 180 + else: + assert rotation == EXPECTED_ROTATION[orientation] + + +@pytest.mark.parametrize("degrees,expected", [(0, 0), (90, 90), (180, 180), (270, -90)]) +def test_set_display_rotation_roundtrip(degrees: int, expected: int) -> None: + # The public angle is counter-clockwise, matching VideoFrame.rotation. + buf = io.BytesIO() + container = av.open(buf, "w", format="mp4") + stream = container.add_stream("mpeg4", rate=24) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + stream.set_display_rotation(degrees) + for i in range(DURATION): + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + container.close() + + buf.seek(0) + rotation = _read_frame(buf).rotation + if abs(expected) == 180: + assert abs(rotation) == 180 + else: + assert rotation == expected + + +@pytest.mark.parametrize("orientation", sorted(EXIF_VIA_ROTATION)) +def test_convenience_reaches_all_exif_orientations(orientation: int) -> None: + # set_display_rotation(degrees, hflip, vflip) must reproduce the exact same + # matrix as the explicit EXIF table for every one of the 8 orientations. + degrees, hflip, vflip = EXIF_VIA_ROTATION[orientation] + expected = matrix_to_ints(EXIF_MATRICES[orientation]) + + buf = io.BytesIO() + container = av.open(buf, "w", format="mp4") + stream = container.add_stream("mpeg4", rate=24) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + stream.set_display_rotation(degrees, hflip=hflip, vflip=vflip) + for i in range(DURATION): + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + container.close() + + buf.seek(0) + got = _read_matrix(_read_frame(buf)) + if expected == matrix_to_ints(np.eye(3)): + assert got is None # identity emits no side data + else: + assert got == expected, f"exif {orientation}: wrote {expected}, read {got}" + + +def test_matrix_and_rotation_setters_are_mutually_exclusive() -> None: + # Setting one path must clear the other so they don't both apply. + buf = io.BytesIO() + with av.open(buf, "w", format="mp4") as container: + stream = container.add_stream("mpeg4", rate=24) + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + stream.set_display_rotation(90) + stream.set_display_matrix(None) # clears both paths + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + + buf.seek(0) + assert _read_matrix(_read_frame(buf)) is None + + +def test_set_display_matrix_validates_length() -> None: + buf = io.BytesIO() + with av.open(buf, "w", format="mp4") as container: + stream = container.add_stream("mpeg4", rate=24) + with pytest.raises(ValueError): + stream.set_display_matrix([0, 1, 2]) + + +def test_set_display_matrix_none_clears() -> None: + buf = io.BytesIO() + with av.open(buf, "w", format="mp4") as container: + stream = container.add_stream("mpeg4", rate=24) + stream.set_display_matrix(matrix_to_ints(EXIF_MATRICES[6])) + stream.set_display_matrix(None) # clear before encoding + stream.width = WIDTH + stream.height = HEIGHT + stream.pix_fmt = "yuv420p" + frame = av.VideoFrame.from_ndarray( + np.zeros((HEIGHT, WIDTH, 3), dtype=np.uint8), format="rgb24" + ) + for packet in stream.encode(frame): + container.mux(packet) + for packet in stream.encode(): + container.mux(packet) + + buf.seek(0) + assert _read_matrix(_read_frame(buf)) is None From c2b88dd69fa4d5d5762f9df488d70304f942058e Mon Sep 17 00:00:00 2001 From: WyattBlue Date: Sat, 6 Jun 2026 19:49:10 -0400 Subject: [PATCH 2/2] Simplify display matrix API and move it to VideoStream Collapse the two stream fields into a single packed-bytes field by building the rotation matrix eagerly in set_display_rotation(), and move the field and both setters from Stream to VideoStream so audio/subtitle streams no longer expose them. --- av/stream.pxd | 8 ----- av/stream.py | 76 --------------------------------------------- av/stream.pyi | 6 ---- av/video/stream.pxd | 9 ++++++ av/video/stream.py | 59 +++++++++++++++++++++++++++++++++++ av/video/stream.pyi | 5 +++ 6 files changed, 73 insertions(+), 90 deletions(-) diff --git a/av/stream.pxd b/av/stream.pxd index 8eeb321fe..89d41e559 100644 --- a/av/stream.pxd +++ b/av/stream.pxd @@ -19,17 +19,9 @@ cdef class Stream: cdef readonly IndexEntries index_entries - # Display (rotation) matrix to write as AV_PKT_DATA_DISPLAYMATRIX coded - # side data at mux time. Exactly one of these is set at a time (or neither): - # _display_matrix: native-endian packed bytes (9 int32), raw form. - # _display_rotation: (degrees, hflip, vflip), built via FFmpeg helpers. - cdef bytes _display_matrix - cdef object _display_rotation - # Private API. cdef _init(self, Container, lib.AVStream*, CodecContext) cdef _finalize_for_output(self) - cdef _apply_display_matrix(self) cdef _set_id(self, value) diff --git a/av/stream.py b/av/stream.py index ce47528ad..f96030e26 100644 --- a/av/stream.py +++ b/av/stream.py @@ -1,4 +1,3 @@ -import struct from enum import IntEnum, IntFlag import cython @@ -11,7 +10,6 @@ dict_to_avdict, to_avrational, ) -from cython.cimports.libc.stdint import int32_t class Disposition(IntFlag): @@ -177,80 +175,6 @@ def _finalize_for_output(self): ) ) - # avcodec_parameters_from_context() frees and overwrites - # codecpar.coded_side_data, so the display matrix must be injected - # *after* it, right before avformat_write_header() consumes it. - if self._display_matrix is not None or self._display_rotation is not None: - self._apply_display_matrix() - - @cython.cfunc - def _apply_display_matrix(self): - sd: cython.pointer[lib.AVPacketSideData] = lib.av_packet_side_data_new( - cython.address(self.ptr.codecpar.coded_side_data), - cython.address(self.ptr.codecpar.nb_coded_side_data), - lib.AV_PKT_DATA_DISPLAYMATRIX, - 36, - 0, - ) - if sd == cython.NULL: - raise MemoryError("could not allocate display matrix side data") - - if self._display_matrix is not None: - data: cython.pointer[cython.uchar] = sd.data - i: cython.int - for i in range(36): - data[i] = self._display_matrix[i] - return - - # Convenience path: build the matrix in place with FFmpeg's helpers. - angle: cython.double = self._display_rotation[0] - hflip: cython.int = self._display_rotation[1] - vflip: cython.int = self._display_rotation[2] - matrix: cython.pointer[int32_t] = cython.cast(cython.pointer[int32_t], sd.data) - # av_display_rotation_set() takes a clockwise angle; negate so our public - # `degrees` is counter-clockwise, matching VideoFrame.rotation on read. - lib.av_display_rotation_set(matrix, -angle) - lib.av_display_matrix_flip(matrix, hflip, vflip) - - def set_display_matrix(self, matrix): - """Set the display (rotation) matrix written to the container. - - ``matrix`` is a sequence of 9 integers in FFmpeg's display-matrix - layout (16.16 fixed point for entries 0,1,3,4,6,7 and 2.30 fixed point - for entries 2,5,8), matching ``AV_PKT_DATA_DISPLAYMATRIX``. The values - are written, native-endian, as coded side data on the output stream so - the muxer records them in the container (e.g. the MP4/MOV ``tkhd`` - transformation matrix). Pass ``None`` to clear. - - Must be called before the first frame is encoded / the header is - written. See :meth:`set_display_rotation` for a higher-level helper. - """ - self._display_rotation = None - if matrix is None: - self._display_matrix = None - return - - vals = [int(v) for v in matrix] - if len(vals) != 9: - raise ValueError("display matrix must have exactly 9 elements") - self._display_matrix = struct.pack("=9i", *vals) - - def set_display_rotation(self, degrees, hflip=False, vflip=False): - """Set the container display matrix from a rotation and optional flips. - - ``degrees`` is a counter-clockwise rotation (matching the value read - back from :attr:`VideoFrame.rotation`); ``hflip`` / ``vflip`` apply a - horizontal / vertical mirror after the rotation. Together these express - all eight EXIF orientations. The matrix is built with FFmpeg's - ``av_display_rotation_set`` / ``av_display_matrix_flip`` and written as - coded side data on the output stream (e.g. the MP4/MOV ``tkhd`` matrix). - - This is a convenience wrapper over :meth:`set_display_matrix`; it must - likewise be called before the first frame is encoded. - """ - self._display_matrix = None - self._display_rotation = (float(degrees), bool(hflip), bool(vflip)) - @property def id(self): """ diff --git a/av/stream.pyi b/av/stream.pyi index 4b9c61250..f9148021f 100644 --- a/av/stream.pyi +++ b/av/stream.pyi @@ -1,4 +1,3 @@ -from collections.abc import Sequence from enum import IntEnum, IntFlag from fractions import Fraction from typing import Literal, cast @@ -64,11 +63,6 @@ class Stream: # From context codec_tag: str - def set_display_matrix(self, matrix: Sequence[int] | None) -> None: ... - def set_display_rotation( - self, degrees: float, hflip: bool = ..., vflip: bool = ... - ) -> None: ... - class DataStream(Stream): type: Literal["data"] name: str | None diff --git a/av/video/stream.pxd b/av/video/stream.pxd index f0dcfb9b2..1a553f34c 100644 --- a/av/video/stream.pxd +++ b/av/video/stream.pxd @@ -1,3 +1,5 @@ +from libc.stdint cimport int32_t, uint8_t + from av.packet cimport Packet from av.stream cimport Stream @@ -5,5 +7,12 @@ from .frame cimport VideoFrame cdef class VideoStream(Stream): + # Display matrix (9 int32, native-endian) written as AV_PKT_DATA_DISPLAYMATRIX + # coded side data at mux time, applied only when _has_display_matrix is set. + cdef int32_t _display_matrix[9] + cdef uint8_t _has_display_matrix + + cdef _apply_display_matrix(self) + cpdef encode(self, VideoFrame frame=?) cpdef decode(self, Packet packet=?) diff --git a/av/video/stream.py b/av/video/stream.py index c37f45a3e..bb8b06fc0 100644 --- a/av/video/stream.py +++ b/av/video/stream.py @@ -1,8 +1,11 @@ import cython from cython.cimports import libav as lib from cython.cimports.av.packet import Packet +from cython.cimports.av.stream import Stream from cython.cimports.av.utils import avrational_to_fraction from cython.cimports.av.video.frame import VideoFrame +from cython.cimports.libc.stdint import int32_t +from cython.cimports.libc.string import memcpy @cython.final @@ -56,6 +59,62 @@ def decode(self, packet: Packet | None = None): """ return self.codec_context.decode(packet) + @cython.cfunc + def _finalize_for_output(self): + Stream._finalize_for_output(self) + # avcodec_parameters_from_context() overwrites codecpar.coded_side_data, + # so inject the display matrix after it, before avformat_write_header(). + if self.codec_context is not None and self._has_display_matrix: + self._apply_display_matrix() + + @cython.cfunc + def _apply_display_matrix(self): + n: cython.int = 9 * cython.sizeof(int32_t) + sd: cython.pointer[lib.AVPacketSideData] = lib.av_packet_side_data_new( + cython.address(self.ptr.codecpar.coded_side_data), + cython.address(self.ptr.codecpar.nb_coded_side_data), + lib.AV_PKT_DATA_DISPLAYMATRIX, + n, + 0, + ) + if sd == cython.NULL: + raise MemoryError("could not allocate display matrix side data") + + memcpy(sd.data, self._display_matrix, n) + + def set_display_matrix(self, matrix): + """Set the display matrix written to the container as coded side data. + + ``matrix`` is a sequence of 9 integers in FFmpeg's ``AV_PKT_DATA_DISPLAYMATRIX`` + layout, or ``None`` to clear. Must be called before the first frame is + encoded. See :meth:`set_display_rotation` for a higher-level helper. + """ + if matrix is None: + self._has_display_matrix = False + return + + vals = [int(v) for v in matrix] + if len(vals) != 9: + raise ValueError("display matrix must have exactly 9 elements") + i: cython.int + for i in range(9): + self._display_matrix[i] = vals[i] + self._has_display_matrix = True + + def set_display_rotation(self, degrees, hflip=False, vflip=False): + """Set the container display matrix from a rotation and optional flips. + + ``degrees`` is a counter-clockwise rotation (matching the value read back + from :attr:`VideoFrame.rotation`); ``hflip`` / ``vflip`` mirror after it. + Together these express all eight EXIF orientations. Must be called before + the first frame is encoded. + """ + # av_display_rotation_set() takes a clockwise angle; negate so our public + # `degrees` is counter-clockwise, matching VideoFrame.rotation on read. + lib.av_display_rotation_set(self._display_matrix, -float(degrees)) + lib.av_display_matrix_flip(self._display_matrix, bool(hflip), bool(vflip)) + self._has_display_matrix = True + @property def average_rate(self): """ diff --git a/av/video/stream.pyi b/av/video/stream.pyi index dd670d3cf..4e2a61e46 100644 --- a/av/video/stream.pyi +++ b/av/video/stream.pyi @@ -1,3 +1,4 @@ +from collections.abc import Sequence from fractions import Fraction from typing import Iterator, Literal @@ -20,6 +21,10 @@ class VideoStream(Stream): def encode(self, frame: VideoFrame | None = None) -> list[Packet]: ... def encode_lazy(self, frame: VideoFrame | None = None) -> Iterator[Packet]: ... def decode(self, packet: Packet | None = None) -> list[VideoFrame]: ... + def set_display_matrix(self, matrix: Sequence[int] | None) -> None: ... + def set_display_rotation( + self, degrees: float, hflip: bool = ..., vflip: bool = ... + ) -> None: ... # from codec context format: VideoFormat