Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,14 @@ v17.1.0 (next)
Features:
- Use FFmpeg 8.1.1 in the binary wheels.
- Expose ``AVCodecContext.global_quality`` by :gh-user:`WyattBlue` in (:pr:`2246`).
- Expose ``Stream.discard`` so demuxing and seeking can skip unwanted streams (:issue:`2272`).

Fixes:
- Add ``cython.final`` to leaf classes, ensuring that they are not subclassed.
- Warn that ``CodecContext.decode()`` is not memory safe in some cases.
- Fix ``enumerate_input_devices`` and ``enumerate_output_devices`` raising ``AttributeError`` (:issue:`2264`).
- Map HTTP 429 to ``HTTPTooManyRequestsError`` instead of ``UndefinedError`` (:issue:`2267`).
- Fix crash in ``VideoFrame.to_ndarray()`` and ``to_image()`` on bottom-up frames with a negative ``line_size`` (:issue:`2213`).

v17.0.1
-------
Expand Down
28 changes: 27 additions & 1 deletion av/stream.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from enum import IntFlag
from enum import IntEnum, IntFlag

import cython
from cython.cimports import libav as lib
Expand Down Expand Up @@ -34,6 +34,16 @@ class Disposition(IntFlag):
multilayer = 1 << 21


class Discard(IntEnum):
none = lib.AVDISCARD_NONE
default = lib.AVDISCARD_DEFAULT
nonref = lib.AVDISCARD_NONREF
bidir = lib.AVDISCARD_BIDIR
nonintra = lib.AVDISCARD_NONINTRA
nonkey = lib.AVDISCARD_NONKEY
all = lib.AVDISCARD_ALL


_cinit_bypass_sentinel = cython.declare(object, object())


Expand Down Expand Up @@ -132,6 +142,9 @@ def __setattr__(self, name, value):
if name == "disposition":
self.ptr.disposition = value
return
if name == "discard":
self.ptr.discard = Discard(value).value
return
if name == "time_base":
to_avrational(value, cython.address(self.ptr.time_base))
return
Expand Down Expand Up @@ -268,6 +281,19 @@ def language(self):
def disposition(self):
return Disposition(self.ptr.disposition)

@property
def discard(self):
"""
Controls which packets of this stream are discarded by the demuxer.

Set this to e.g. :attr:`Discard.all` on streams you don't need so that
:meth:`.Container.demux` and :meth:`.Container.seek` skip them, avoiding
the cost of synchronizing streams you never read.

:type: Discard
"""
return Discard(self.ptr.discard)

@property
def type(self):
"""
Expand Down
12 changes: 11 additions & 1 deletion av/stream.pyi
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from enum import IntFlag
from enum import IntEnum, IntFlag
from fractions import Fraction
from typing import Literal, cast

Expand Down Expand Up @@ -27,6 +27,15 @@ class Disposition(IntFlag):
still_image = cast(int, ...)
multilayer = cast(int, ...)

class Discard(IntEnum):
none = cast(int, ...)
default = cast(int, ...)
nonref = cast(int, ...)
bidir = cast(int, ...)
nonintra = cast(int, ...)
nonkey = cast(int, ...)
all = cast(int, ...)

class Stream:
name: str | None
container: Container
Expand All @@ -46,6 +55,7 @@ class Stream:
start_time: int | None
duration: int | None
disposition: Disposition
discard: Discard
frames: int
language: str | None
type: Literal["video", "audio", "data", "subtitle", "attachment", "unknown"]
Expand Down
23 changes: 19 additions & 4 deletions av/video/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,8 @@ def useful_array(
import numpy as np

dtype_obj = np.dtype(dtype)
total_line_size = abs(plane.frame.ptr.linesize[plane.index])
line_size = plane.frame.ptr.linesize[plane.index]
total_line_size = abs(line_size)
itemsize = dtype_obj.itemsize
channels = bytes_per_pixel // itemsize

Expand All @@ -458,6 +459,13 @@ def useful_array(
shape = (plane.height, plane.width, channels)
strides = (total_line_size, bytes_per_pixel, itemsize)

if line_size < 0:
offset = (plane.height - 1) * total_line_size
strides = (-total_line_size, *strides[1:])
return np.ndarray(
shape, dtype=dtype_obj, buffer=plane, offset=offset, strides=strides
)

return np.ndarray(shape, dtype=dtype_obj, buffer=plane, strides=strides)


Expand Down Expand Up @@ -704,17 +712,24 @@ def to_image(self, **kwargs):
plane: VideoPlane = self.reformat(format="rgb24", **kwargs).planes[0]

i_buf: cython.const[uint8_t][:] = plane
i_pos: cython.size_t = 0
i_stride: cython.size_t = plane.line_size
line_size: cython.int = plane.line_size
i_stride: cython.size_t = abs(line_size)

o_pos: cython.size_t = 0
o_stride: cython.size_t = plane.width * 3
o_size: cython.size_t = plane.height * o_stride
o_buf: bytearray = bytearray(o_size)

# For bottom-up frames (negative line_size) the buffer protocol exposes
# rows from the lowest address, so the top display row is at the far end.
i_pos: cython.size_t = (plane.height - 1) * i_stride if line_size < 0 else 0

while o_pos < o_size:
o_buf[o_pos : o_pos + o_stride] = i_buf[i_pos : i_pos + o_stride]
i_pos += i_stride
if line_size < 0:
i_pos -= i_stride
else:
i_pos += i_stride
o_pos += o_stride

return Image.frombytes(
Expand Down
9 changes: 8 additions & 1 deletion av/video/plane.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,14 @@ def __getbuffer__(self, view: cython.pointer[Py_buffer], flags: cython.int):
)
if flags & PyBUF_WRITABLE and not self._buffer_writable():
raise ValueError("buffer is not writable")
PyBuffer_FillInfo(view, self, self._buffer_ptr(), self._buffer_size(), 0, flags)

ptr: cython.p_void = self._buffer_ptr()
line_size: cython.int = self.frame.ptr.linesize[self.index]
if line_size < 0:
height: cython.int = self.height
ptr = cython.cast(cython.p_char, ptr) + (height - 1) * line_size

PyBuffer_FillInfo(view, self, ptr, self._buffer_size(), 0, flags)

def __dlpack_device__(self):
if self.frame.ptr.hw_frames_ctx:
Expand Down
2 changes: 2 additions & 0 deletions docs/api/stream.rst
Original file line number Diff line number Diff line change
Expand Up @@ -92,5 +92,7 @@ Others

.. autoattribute:: Stream.language

.. autoattribute:: Stream.discard



1 change: 1 addition & 0 deletions include/avformat.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ cdef extern from "libavformat/avformat.h" nogil:
int index
int id
int disposition
AVDiscard discard
AVCodecParameters *codecpar
AVRational time_base
int64_t start_time
Expand Down
39 changes: 39 additions & 0 deletions tests/test_streams.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,45 @@ def test_selection(self) -> None:
data = container.streams.data[0]
assert data == container.streams.best("data")

def test_discard(self) -> None:
from av.stream import Discard

container = av.open(
fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
)
audio = container.streams.audio[0]

# Default discard policy.
assert audio.discard == Discard.default

# Setter accepts the enum and round-trips.
audio.discard = Discard.all
assert audio.discard == Discard.all

audio.discard = Discard.nonkey
assert audio.discard == Discard.nonkey
container.close()

# Discarding a stream makes demux skip (almost) all of its packets.
def audio_packets(discard: Discard | None) -> int:
c = av.open(
fate_suite(
"amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv"
)
)
if discard is not None:
c.streams.audio[0].discard = discard
count = sum(
1 for p in c.demux() if p.dts is not None and p.stream.type == "audio"
)
c.close()
return count

baseline = audio_packets(None)
discarded = audio_packets(Discard.all)
assert baseline > 0
assert discarded < baseline

def test_printing_video_stream(self) -> None:
input_ = av.open(
fate_suite("amv/MTV_high_res_320x240_sample_Penguin_Joke_MTV_from_WMV.amv")
Expand Down
43 changes: 43 additions & 0 deletions tests/test_videoframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,49 @@ def test_basic_to_ndarray() -> None:
assert array.shape == (480, 640, 3)


def _vflip(frame: VideoFrame) -> VideoFrame:
"""Vertically flip a frame, which yields a bottom-up frame with a negative
``line_size`` (the same layout DirectShow produces, see GH-2213)."""
graph = av.filter.Graph()
src = graph.add_buffer(
template=None,
width=frame.width,
height=frame.height,
format=frame.format,
time_base=Fraction(1, 1000),
)
vflip = graph.add("vflip")
sink = graph.add("buffersink")
src.link_to(vflip)
vflip.link_to(sink)
graph.configure()
graph.push(frame)
out = graph.pull()
assert isinstance(out, VideoFrame)
return out


@pytest.mark.parametrize("format", ["rgb24", "bgr24", "gray"])
def test_negative_linesize_to_ndarray(format: str) -> None:
# Bottom-up packed frames have a negative line_size; to_ndarray() must read
# them without crashing (GH-2213) and in the correct top-down order.
height, width = 6, 4
if format == "gray":
array = numpy.arange(height * width, dtype=numpy.uint8).reshape(height, width)
else:
array = numpy.zeros((height, width, 3), dtype=numpy.uint8)
for row in range(height):
array[row, :, :] = row * 10

frame = _vflip(VideoFrame.from_ndarray(array, format=format))
assert frame.planes[0].line_size < 0

result = frame.to_ndarray(format=format)
assertNdarraysEqual(result, array[::-1])
# Fully materializing the array used to segfault on a bottom-up frame.
assert result.copy().sum() == int(array.sum())


def test_ndarray_gray() -> None:
array = numpy.random.randint(0, 256, size=(480, 640), dtype=numpy.uint8)
for format in ("gray", "gray8"):
Expand Down
Loading