diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 202cad8..3c21934 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -1,7 +1,7 @@
 // For format details, see https://aka.ms/devcontainer.json. For config options, see the
 // README at: https://github.com/devcontainers/templates/tree/main/src/rust
 {
-  "name": "c++",
+  "name": "pixelflux (rust)",
   // Or use a Dockerfile or Docker Compose file. More info: https://containers.dev/guide/dockerfile
   "image": "mcr.microsoft.com/devcontainers/base:ubuntu-24.04",
   "features": {
@@ -13,11 +13,8 @@
       "vncPort": "5901"
     }
   },
-  
-  "runArgs": ["--env-file", ".devcontainer/devcontainer.env"],
 
-  // Features to add to the dev container. More info: https://containers.dev/features.
-  // "features": {},
+  "runArgs": ["--env-file", ".devcontainer/devcontainer.env"],
 
   // Use 'forwardPorts' to make a list of ports inside the container available locally.
   "forwardPorts": [6080, 5901],
@@ -29,10 +26,9 @@
   "customizations": {
     "vscode": {
       "extensions": [
-        // C++
-        "ms-vscode.cpptools",
-        "ms-vscode.cpptools-extension-pack",
-        "ms-vscode.cpptools-themes"
+        "rust-lang.rust-analyzer",
+        "tamasfe.even-better-toml",
+        "vadimcn.vscode-lldb"
       ]
     }
   }
diff --git a/.devcontainer/install-dependencies.sh b/.devcontainer/install-dependencies.sh
index 5e5022f..c26b12d 100644
--- a/.devcontainer/install-dependencies.sh
+++ b/.devcontainer/install-dependencies.sh
@@ -1,21 +1,42 @@
 #!/bin/bash
+# Dev setup for the pure-Rust pixelflux PyO3 extension (replaces the old C++ setup.py build).
+set -euxo pipefail
 
-sudo apt update
-# dependencies
+sudo apt-get update
+# System C libraries the crate links against (x264-sys -> libx264, turbojpeg ->
+# libjpeg-turbo, x11rb -> libxcb + shm + xfixes, VA-API, GBM/DRM, Wayland/xkb) plus
+# the build toolchain (nasm is needed to build the vendored OpenH264 source).
 sudo apt-get install -y \
-  g++ \
-  libjpeg-turbo8-dev \
-  libx11-dev \
-  libxfixes-dev \
-  libxext-dev \
-  libx264-dev \
-  python3-dev \
-  python3-pip \
-  python3-websockets
+  build-essential pkg-config nasm clang libclang-dev curl ca-certificates \
+  libjpeg-turbo8-dev libx264-dev \
+  libva-dev libdrm-dev libgbm-dev \
+  libwayland-dev libxkbcommon-dev \
+  libxcb1-dev libxcb-shm0-dev libxcb-xfixes0-dev \
+  python3-dev python3-pip
 
-# firefox-esr
+# firefox-esr (for end-to-end testing the stream in a browser)
 sudo apt install -y software-properties-common && sudo add-apt-repository ppa:mozillateam/ppa -y && sudo apt install -y firefox-esr
 
-# setup
-pip3 install setuptools
-sudo python3 setup.py install
\ No newline at end of file
+# Rust toolchain (the extension is built via setuptools-rust / cargo).
+if ! command -v cargo >/dev/null 2>&1; then
+  curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
+  # shellcheck source=/dev/null
+  source "$HOME/.cargo/env"
+fi
+
+# FFmpeg 8.1 is REQUIRED by ffmpeg-sys-next =8.1.0 (the VA-API encoder path) and is not
+# in the Ubuntu archive, so pull it from conda-forge (Miniforge), matching the repo's
+# build environment, and point pkg-config at it for the build.
+if [ ! -d "$HOME/miniforge3" ]; then
+  curl -L -o /tmp/miniforge.sh \
+    "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-$(uname)-$(uname -m).sh"
+  bash /tmp/miniforge.sh -b -p "$HOME/miniforge3"
+fi
+source "$HOME/miniforge3/etc/profile.d/conda.sh"
+conda create -y -n pixelflux -c conda-forge "ffmpeg=8.1"
+conda activate pixelflux
+export PKG_CONFIG_PATH="$CONDA_PREFIX/lib/pkgconfig:${PKG_CONFIG_PATH:-}"
+
+# Build and install the extension from source.
+pip3 install --upgrade pip setuptools-rust
+pip3 install .
diff --git a/MANIFEST.in b/MANIFEST.in
index 6059c15..a606bf7 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -1,5 +1,10 @@
-graft pixelflux
-include pixelflux_wayland/Cargo.toml
-recursive-include pixelflux_wayland/src *
+include pixelflux/Cargo.toml
+include pixelflux/Cargo.lock
+recursive-include pixelflux/src *
+recursive-include pixelflux/nvcodec-sys *
+recursive-exclude pixelflux *.so *.pyc
+prune pixelflux/target
+prune pixelflux/nvcodec-sys/target
 include pyproject.toml
 include README.md
+include setup.py
diff --git a/README.md b/README.md
index 5f5b55a..16c3367 100644
--- a/README.md
+++ b/README.md
@@ -7,57 +7,43 @@
 
 This module provides a Python interface to a high-performance capture library supporting both **X11** and **Wayland** environments. It captures pixel data, detects changes, and encodes modified stripes into JPEG or H.264.
 
-It supports CPU-based encoding (libx264, libjpeg-turbo) as well as hardware-accelerated H.264 encoding via NVIDIA's NVENC and VA-API for Intel/AMD GPUs. The Wayland backend features a **zero-copy pipeline**, passing GPU buffers directly to the encoder to minimize latency and CPU usage.
+It supports CPU-based encoding (x264, JPEG) as well as hardware-accelerated H.264 encoding via NVIDIA's NVENC and VA-API for Intel/AMD GPUs. Both backends share a **zero-copy pipeline** that minimizes copies and latency end to end.
 
 ## Installation
 
-This module relies on native C++ (X11) and Rust (Wayland) extensions that are compiled during installation.
+pixelflux is a single self-contained **Rust** extension (no C/C++ sources) compiled during installation. Both the X11 and Wayland backends, all encoders, and the Python API live in it.
 
 ### 1. Prerequisites
 
-Ensure you have a C++ compiler (`g++`), the Rust toolchain (`cargo`), and development files for Python and the underlying graphics libraries.
+Ensure you have the Rust toolchain (`cargo`), Python development files, and the development libraries below.
 
-**Base Dependencies (Debian/Ubuntu):**
 ```bash
+# Install Rust
+curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
+
+# Build dependencies (Debian/Ubuntu)
 sudo apt-get update && \
 sudo apt-get install -y \
-  g++ \
   git \
   curl \
   python3-dev \
+  cmake \
+  nasm \
+  libclang-dev \
   libavcodec-dev \
   libavutil-dev \
-  libjpeg-turbo8-dev \
   libx264-dev \
-  libyuv-dev
-```
-
-**X11 Backend Dependencies:**
-```bash
-sudo apt-get install -y \
-  libx11-dev \
-  libxext-dev \
-  libxfixes-dev
-```
-
-**Wayland Backend Dependencies:**
-To build the Rust-based Wayland backend, you need the Rust toolchain and Wayland/DRM libraries:
-
-```bash
-# Install Rust
-curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh
-
-# Install Libraries
-sudo apt-get install -y \
+  libturbojpeg0-dev \
   libgbm-dev \
   libdrm-dev \
   libwayland-dev \
   libinput-dev \
   libxkbcommon-dev \
-  libva-dev \
-  libclang-dev
+  libva-dev
 ```
 
+> **Notes:** the FFmpeg bindings (`ffmpeg-next` 8.1) require **FFmpeg 8.1**; on distros shipping an older FFmpeg, install a newer build and point `PKG_CONFIG_PATH` at it. X11 capture uses pure-Rust XCB (no `libX11`/`libxcb`/`Xfixes` dev packages needed); colorspace conversion is pure-Rust and the NVENC/CUDA libraries are loaded at runtime (no compile-time NVIDIA packages).
+
 ### 2. Hardware Acceleration (Optional but Recommended)
 *   **NVIDIA (NVENC):** The library detects the NVIDIA driver at runtime. No extra compile-time packages are needed.
 *   **Intel/AMD (VA-API):** Ensure `libva-dev` and `libdrm-dev` are installed. You must also have the correct drivers (e.g., `intel-media-va-driver-non-free` or `mesa-va-drivers`).
@@ -91,6 +77,21 @@ To test launching programs into this backend simply add `WAYLAND_DISPLAY=wayland
 WAYLAND_DISPLAY=wayland-1 glmark2-es2-wayland -s 1920x1080
 ```
 
+### Automatic GPU Selection
+
+Set `SELKIES_AUTO_GPU=true` (preferred, or the legacy `AUTO_GPU=true`) to let pixelflux pick a
+render node automatically instead of supplying one. It enumerates `/sys/class/drm`, pairs each
+`cardN` with its `renderD*` node by PCI device, and skips non-GPU cards (IPMI/VGA). Selection is
+**driver-aware**: NVIDIA nodes are routed to NVENC, while Intel (`i915`) and AMD (`amdgpu`) nodes
+take the VA-API path. Both the X11 and Wayland backends honor this.
+
+```bash
+export SELKIES_AUTO_GPU=true
+```
+
+When auto-selection is off and no node is supplied, an operator-set `DRINODE` (e.g.
+`/dev/dri/renderD128`) is honored before falling back to the software renderer.
+
 ### Capture Settings
 
 The `CaptureSettings` class configures both backends.
@@ -126,7 +127,7 @@ settings.paint_over_jpeg_quality = 90   # Quality for static "paint-over" stripe
 settings.h264_crf = 25                            # CRF value (0-51, lower is better quality/higher bitrate)
 settings.h264_paintover_crf = 18                  # CRF for H.264 paintover on static content. Must be lower than h264_crf to activate.
 settings.h264_paintover_burst_frames = 5          # Number of high-quality frames to send in a burst when a paintover is triggered.
-settings.h264_fullcolor = False                   # Use I444 (full color) instead of I420 for software encoding
+settings.h264_fullcolor = False                   # Use I444/full color (High 4:4:4) instead of I420. Supported by software encoding and NVENC.
 settings.h264_fullframe = True                    # Encode full frames (required for HW accel) instead of just changed stripes
 settings.h264_streaming_mode = False              # Bypass all VNC logic and work like a normal video encoder, higher constant CPU usage for fullscreen gaming/videos
 settings.h264_cbr_mode = False                    # Switches to CBR mode and ignores CRF value. Used in conjunction with h264_bitrate_kbps.
@@ -138,6 +139,18 @@ settings.auto_adjust_screen_capture_size = True   # Allow pixelflux to adjust it
 # >= 0: Enable GPU Encoding on /dev/dri/renderD(128 + index)
 # -1: Disable GPU Encoding (System will try NVENC if available when using the x11 backend, Wayland needs this set to a render node)
 settings.vaapi_render_node_index = -1
+# Explicit render node path (X11). Takes precedence over the positional index above and
+# avoids the index ambiguity. Must be a bytes object, e.g. b"/dev/dri/renderD128".
+settings.vaapi_render_node_path = None
+
+# --- Wire Format / Zero-Copy (X11) ---
+# False (default): prepend the per-stripe header to each packet (the WebSocket path).
+# True: emit the raw encoded payload with no header (for a WebRTC path that frames itself).
+settings.omit_stripe_headers = False
+# Deprecated/ignored: the native frame handed to your callback always owns its buffer
+# (zero-copy on every Python version, see below), so this flag no longer has any effect.
+# Kept only for backward compatibility.
+settings.deferred_free = False
 
 # --- Change Detection & Optimization ---
 settings.use_paint_over_quality = True  # Enable paint-over/IDR requests for static regions
@@ -178,23 +191,40 @@ capture.inject_key(scancode=17, state=1)
 
 ### Stripe Callback
 
-Your callback receives a `ctypes.POINTER(StripeEncodeResult)`.
+Your callback receives a single **frame object** (`StripeFrame` on X11, `WaylandFrame` on
+Wayland). Both support the buffer protocol — `bytes(frame)` / `memoryview(frame)` / `len(frame)`
+— and expose the stripe metadata as attributes:
+
+```python
+def my_callback(frame):
+    # frame.data_type      (0=Unknown, 1=JPEG, 2=H.264)
+    # frame.frame_id
+    # frame.stripe_y_start
+    # frame.stripe_height
+    encoded_data = bytes(frame)          # copy out, or use memoryview(frame) zero-copy (below)
+    # Send encoded_data to the client...
+```
+
+### Zero-Copy Frames
+
+`memoryview(frame)` aliases the native encoder buffer with **no copy**, on **every supported
+Python version (3.9–3.14)**. The frame object owns its buffer and keeps it alive until every
+consumer — including a transport that retained a slice during a partial write — has released its
+view, so the hand-off is memory-safe. (The old `deferred_free` / `OwnedFrame` / PEP 688 /
+Python-3.12-only path is gone; the native buffer protocol does this on all versions.) Hand the
+view straight to an async socket; keep the frame referenced for the duration of the send.
 
 ```python
-def my_callback(result_ptr, user_data):
-    result = result_ptr.contents
-    
-    # Access data
-    # result.type (0=H264, 1=JPEG)
-    # result.frame_id
-    # result.stripe_y_start
-    
-    # Copy data to Python bytes
-    encoded_data = ctypes.string_at(result.data, result.size)
-    
-    # Send encoded_data to client...
+def my_callback(frame):
+    if frame.data_type == 0 or len(frame) == 0:   # nothing to send
+        return
+    # Hand BOTH the view and the frame to your sender (e.g. an asyncio.Queue) so the buffer
+    # outlives the send: the view pins the frame, which frees the buffer once the view drops.
+    queue.put_nowait({"data": memoryview(frame), "owner": frame})
 ```
 
+See `example/screen_to_browser.py` for a complete queue-based usage.
+
 ## Zero-Copy Pipeline (Wayland)
 
 The Wayland backend implements a **Zero-Copy** architecture for hardware encoding.
@@ -203,7 +233,7 @@ The Wayland backend implements a **Zero-Copy** architecture for hardware encodin
 2.  **Export:** This buffer is exported as a `Dmabuf` (file descriptor).
 3.  **Encoding:** The `Dmabuf` is imported directly into the encoder context (NVENC or VA-API) without ever copying pixel data to system RAM (CPU).
 
-**Performance Note:** Enabling **watermarking** or utilizing a render node different from the encoding node will force a "Readback" fallback, copying pixels to the CPU and breaking the zero-copy chain. This increases latency and CPU load.
+**Performance Note:** Software (Pixman) rendering, the absence of a hardware encoder, or utilizing a render node different from the encoding node will force a "Readback" fallback, copying pixels to the CPU and breaking the zero-copy chain (higher latency and CPU load). A watermark does **not** force readback — on the GPU path it is composited into the frame before encoding.
 
 ## Recording Sink (Wayland)
 
@@ -226,18 +256,39 @@ ffmpeg -f h264 -i unix:///tmp/pixelflux_record -c:v copy test.h264
 ffmpeg -f h264 -framerate 60 -i unix:///tmp/pixelflux_record -c:v libx264 -preset fast -crf 23 -pix_fmt yuv420p test.mp4
 ```
 
+## NVIDIA NVENC (X11)
+
+*   **Multi-GPU containers:** When several GPUs are exposed to a container, NVENC is filtered
+    in-process to the GPU you selected (no separate `LD_PRELOAD` shim is required). Verified on
+    NVIDIA drivers 570–595.
+*   **4:4:4 (High 4:4:4):** Set `h264_fullcolor = True` to encode full-chroma H.264 via NVENC
+    (`h264_fullcolor` codec), in addition to the software path.
+*   **Force a keyframe on demand:** `capture.request_idr_frame()` forces an IDR frame, e.g. when
+    a client reconnects or its decoder is reset. It routes to whichever encoder is active
+    (NVENC, VA-API, or software) and is a no-op while no capture is running.
+
+### NVENC color conversion
+
+NVENC encodes the captured ARGB directly (the driver's hardware does the ARGB→NV12 colorspace
+conversion in BT.709), so there is **no CUDA Toolkit / NVRTC requirement** — only the NVIDIA
+driver runtime (`libnvidia-encode`, `libcuda`), which is loaded at runtime. Nothing extra to
+install at build or runtime beyond the driver.
+
 ## Features
 
-*   **Hybrid Backend:**
-    *   **X11 (C++):** Legacy support using XShm.
-    *   **Wayland (Rust):** Modern, secure, headless compositor based on [Smithay](https://github.com/Smithay/smithay).
+*   **Dual Backend (one Rust extension):**
+    *   **X11:** XShm capture via pure-Rust XCB, with XFixes cursor and watermark compositing.
+    *   **Wayland:** Modern, secure, headless compositor based on [Smithay](https://github.com/Smithay/smithay).
 *   **Flexible Encoding:**
-    *   **Software:** libx264 (H.264) and libjpeg-turbo (JPEG) with multi-threaded striping.
-    *   **Hardware:** NVIDIA NVENC and VA-API (Intel/AMD) with Zero-Copy support.
+    *   **Software:** x264 (H.264, incl. 4:4:4) and JPEG with multi-threaded striping.
+    *   **Hardware:** NVIDIA NVENC (incl. High 4:4:4, ARGB-direct BT.709, multi-GPU containers, API-version negotiation) and VA-API (Intel/AMD, VA-VPP convert) with Zero-Copy support.
+    *   **Driver-aware GPU auto-selection** via `SELKIES_AUTO_GPU`.
+*   **Zero-Copy Frames (X11 & Wayland):** the native frame object (buffer protocol) hands the encoded buffer to Python with no copy, on every supported Python version (3.9–3.14).
 *   **Smart Bandwidth Management:**
     *   **Change Detection:** Encodes only changed stripes (Software/JPEG mode).
     *   **Paint-Over:** Automatically improves quality for static regions.
     *   **Damage Throttling:** Limits processing during high-motion scenes.
+    *   **On-demand keyframes:** `request_idr_frame()` forces an IDR for reconnecting clients.
 *   **Input Handling:** Built-in input injection for mouse and keyboard (Wayland).
 *   **Cursor Compositing:** Hardware cursor planes or software rendering options.
 *   **Dynamic Watermarking:** Overlay PNGs with static positioning or DVD-screensaver style animation.
diff --git a/example/screen_to_browser.py b/example/screen_to_browser.py
index 1287a04..44417e1 100644
--- a/example/screen_to_browser.py
+++ b/example/screen_to_browser.py
@@ -16,7 +16,7 @@
 import threading
 
 # Third-party library imports
-from pixelflux import CaptureSettings, ScreenCapture, StripeCallback
+from pixelflux import CaptureSettings, ScreenCapture
 
 # ==============================================================================
 # --- BASE CONFIGURATION SETTINGS ---
@@ -47,6 +47,14 @@
 # Force CPU encoding and ignore hardware encoders
 base_capture_settings.use_cpu = False
 
+# --- Zero-copy frames ---
+# The native callback delivers a StripeFrame (buffer-protocol object owning the C
+# buffer). We send memoryview(frame) with no copy on the H.264 path; the JPEG path
+# re-frames with a 2-byte prefix and needs a fresh bytes object. The StripeFrame's
+# buffer protocol pins the buffer alive across every Python version (no PEP 688).
+ZERO_COPY = base_capture_settings.output_mode != 0
+base_capture_settings.deferred_free = True  # ignored by the C-API; kept for parity
+
 # --- H.264 Quality Settings ---
 # Constant Rate Factor (0-51, lower is better quality & higher bitrate).
 # Good values are typically 18-28.
@@ -125,9 +133,14 @@ async def send_stripes_task(websocket, queue):
         # This loop will run until the connection is closed,
         # which will raise a ConnectionClosed exception.
         while True:
-            data_to_send = await queue.get()
-            await websocket.send(data_to_send)
-            queue.task_done()
+            item = await queue.get()
+            try:
+                # item == {'data': <memoryview|bytes>, 'owner': <StripeFrame|None>}. Keeping
+                # `item` (hence the StripeFrame) referenced for the whole send keeps the C
+                # buffer alive until the send releases the view, so zero-copy is safe.
+                await websocket.send(item['data'])
+            finally:
+                queue.task_done()
 
     except websockets.exceptions.ConnectionClosed:
         # This is the expected, clean way to exit the loop when a client disconnects.
@@ -154,8 +167,8 @@ async def websocket_handler(websocket):
 
     client_module = None
     send_task = None
-    # Keep a reference to the callback object to prevent it from being garbage collected
-    c_callback = None
+    # Keep a reference to the callback to prevent it from being garbage collected
+    on_stripe = None
 
     try:
         # --- 1. Configure Capture for this Specific Client ---
@@ -175,23 +188,27 @@ async def websocket_handler(websocket):
         # --- 3. Create a unique callback (closure) for this client ---
         # This function "closes over" client_queue and g_loop, giving it access
         # without needing global lookups or user_data.
-        def client_specific_callback(result_ptr, user_data_ptr):
-            """Callback invoked by pixelflux when a new video stripe is ready."""
-            if result_ptr:
-                result = result_ptr.contents
-                if result.size > 0 and g_loop and not g_loop.is_closed():
-                    raw_data_from_cpp = bytes(result.data[:result.size])
-                    final_payload = raw_data_from_cpp
-                    
-                    if client_settings.output_mode == 0:
-                        final_payload = b"\x03\x00" + raw_data_from_cpp
-                    
-                    asyncio.run_coroutine_threadsafe(
-                        client_queue.put(final_payload), g_loop
-                    )
-        
-        # Convert the Python closure into a C-compatible function pointer
-        c_callback = StripeCallback(client_specific_callback)
+        def on_stripe(frame):
+            """Callback invoked by pixelflux with a StripeFrame per video stripe."""
+            if not (len(frame) > 0 and g_loop and not g_loop.is_closed()):
+                return
+
+            if ZERO_COPY:
+                # Zero-copy: memoryview aliases the C buffer and pins the StripeFrame
+                # alive. The queued item holds both, so the frame (hence its buffer)
+                # outlives the send until the view is released.
+                item_to_queue = {'data': memoryview(frame), 'owner': frame}
+            else:
+                # JPEG re-framing needs a fresh bytes object with the 2-byte prefix.
+                raw_data = bytes(frame)
+                final_payload = raw_data
+                if client_settings.output_mode == 0:
+                    final_payload = b"\x03\x00" + raw_data
+                item_to_queue = {'data': final_payload, 'owner': None}
+
+            asyncio.run_coroutine_threadsafe(
+                client_queue.put(item_to_queue), g_loop
+            )
 
         # --- 4. Register and Start Resources for this Client ---
         send_task = asyncio.create_task(send_stripes_task(websocket, client_queue))
@@ -199,13 +216,13 @@ def client_specific_callback(result_ptr, user_data_ptr):
             "module": client_module,
             "queue": client_queue,
             "task": send_task,
-            "callback": c_callback # Store reference to prevent GC
+            "callback": on_stripe # Store reference to prevent GC
         }
 
-        # --- 5. Start the Capture with the correct 3 arguments ---
+        # --- 5. Start the Capture with the settings and callback ---
         loop = asyncio.get_running_loop()
         await loop.run_in_executor(
-            None, client_module.start_capture, client_settings, c_callback
+            None, client_module.start_capture, client_settings, on_stripe
         )
         print(f"Capture started for client {client_id}.")
 
diff --git a/pixelflux/.gitignore b/pixelflux/.gitignore
new file mode 100644
index 0000000..2f7896d
--- /dev/null
+++ b/pixelflux/.gitignore
@@ -0,0 +1 @@
+target/
diff --git a/pixelflux/Cargo.lock b/pixelflux/Cargo.lock
new file mode 100644
index 0000000..328b927
--- /dev/null
+++ b/pixelflux/Cargo.lock
@@ -0,0 +1,2729 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "adler2"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa"
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "aligned"
+version = "0.4.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee4508988c62edf04abd8d92897fca0c2995d907ce1dfeaf369dac3716a40685"
+dependencies = [
+ "as-slice",
+]
+
+[[package]]
+name = "aligned-vec"
+version = "0.6.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b"
+dependencies = [
+ "equator",
+]
+
+[[package]]
+name = "anyhow"
+version = "1.0.102"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
+
+[[package]]
+name = "appendlist"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e149dc73cd30538307e7ffa2acd3d2221148eaeed4871f246657b1c3eaa1cbd2"
+
+[[package]]
+name = "approx"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f2a05fd1bd10b2527e20a2cd32d8873d115b8b39fe219ee25f42a8aca6ba278"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "arbitrary"
+version = "1.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c3d036a3c4ab069c7b410a2ce876bd74808d2d0888a82667669f8e783a898bf1"
+
+[[package]]
+name = "arg_enum_proc_macro"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ae92a5119aa49cdbcf6b9f893fe4e1d98b04ccbf82ee0584ad948a44a734dea"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "arrayvec"
+version = "0.7.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50"
+
+[[package]]
+name = "as-slice"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "516b6b4f0e40d50dcda9365d53964ec74560ad4284da2e7fc97122cd83174516"
+dependencies = [
+ "stable_deref_trait",
+]
+
+[[package]]
+name = "atomic_float"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "628d228f918ac3b82fe590352cc719d30664a0c13ca3a60266fe02c7132d480a"
+
+[[package]]
+name = "autocfg"
+version = "1.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2032f911046de80f0a198e0901378627c33f59ea0ac00e363d481118bd70a53"
+
+[[package]]
+name = "av-scenechange"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f321d77c20e19b92c39e7471cf986812cbb46659d2af674adc4331ef3f18394"
+dependencies = [
+ "aligned",
+ "anyhow",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "log",
+ "num-rational",
+ "num-traits",
+ "pastey",
+ "rayon",
+ "thiserror 2.0.18",
+ "v_frame",
+ "y4m",
+]
+
+[[package]]
+name = "av1-grain"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8cfddb07216410377231960af4fcab838eaa12e013417781b78bd95ee22077f8"
+dependencies = [
+ "anyhow",
+ "arrayvec",
+ "log",
+ "nom 8.0.0",
+ "num-rational",
+ "v_frame",
+]
+
+[[package]]
+name = "avif-serialize"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e7178fe5f7d460b13895ebb9dcb28a3a6216d2df2574a0806cb51b555d297f38"
+dependencies = [
+ "arrayvec",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.72.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "993776b509cfb49c750f11b8f07a46fa23e0a1386ffc01fb1e7d343efc387895"
+dependencies = [
+ "bitflags 2.13.0",
+ "cexpr",
+ "clang-sys",
+ "itertools 0.13.0",
+ "log",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex 1.3.0",
+ "syn",
+]
+
+[[package]]
+name = "bit_field"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e4b40c7323adcfc0a41c4b88143ed58346ff65a288fc144329c5c45e05d70c6"
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "bitflags"
+version = "2.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b4388bee8683e3d04af747c73422af53102d2bd24d9eadb6cbc100baef4b43f8"
+
+[[package]]
+name = "bitstream-io"
+version = "4.10.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7eff00be299a18769011411c9def0d827e8f2d7bf0c3dbf53633147a8867fd1f"
+dependencies = [
+ "no_std_io2",
+]
+
+[[package]]
+name = "block-buffer"
+version = "0.10.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71"
+dependencies = [
+ "generic-array",
+]
+
+[[package]]
+name = "built"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5c0e531d93d39c34eef561e929e8a7f86d77a5af08aac4f6d6e39976c51858e9"
+
+[[package]]
+name = "bumpalo"
+version = "3.20.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72f5acc6cb2ba439de613abc23857ec3d78374d8ed5ac84e9d11336e87da8649"
+
+[[package]]
+name = "bytemuck"
+version = "1.25.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec"
+dependencies = [
+ "bytemuck_derive",
+]
+
+[[package]]
+name = "bytemuck_derive"
+version = "1.10.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "byteorder-lite"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495"
+
+[[package]]
+name = "calloop"
+version = "0.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fba7adb4dd5aa98e5553510223000e7148f621165ec5f9acd7113f6ca4995298"
+dependencies = [
+ "bitflags 2.13.0",
+ "log",
+ "polling",
+ "rustix 0.38.44",
+ "slab",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "calloop"
+version = "0.14.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4dbf9978365bac10f54d1d4b04f7ce4427e51f71d61f2fe15e3fed5166474df7"
+dependencies = [
+ "bitflags 2.13.0",
+ "polling",
+ "rustix 1.1.4",
+ "slab",
+ "tracing",
+]
+
+[[package]]
+name = "cc"
+version = "1.2.64"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dad887fd958be91b5098c0248def011f4523ab786cd411be668777e55063501f"
+dependencies = [
+ "find-msvc-tools",
+ "jobserver",
+ "libc",
+ "shlex 2.0.1",
+]
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom 7.1.3",
+]
+
+[[package]]
+name = "cfg-expr"
+version = "0.20.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb693542bcafa528e198be0ebd9d3632ca5b7c93dbe7237460e199910835997c"
+dependencies = [
+ "smallvec",
+ "target-lexicon",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "cgmath"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a98d30140e3296250832bbaaff83b27dcd6fa3cc70fb6f1f3e5c9c0023b5317"
+dependencies = [
+ "approx",
+ "num-traits",
+]
+
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "cmake"
+version = "0.1.58"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c0f78a02292a74a88ac736019ab962ece0bc380e3f977bf72e376c5d78ff0678"
+dependencies = [
+ "cc",
+]
+
+[[package]]
+name = "color_quant"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d7b894f5411737b7867f4827955924d7c254fc9f4d91a6aad6b097804b1018b"
+
+[[package]]
+name = "concurrent-queue"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "cpufeatures"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "59ed5838eebb26a2bb2e58f6d5b5316989ae9d08bab10e0e6d103e656d1b0280"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "crc32fast"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511"
+dependencies = [
+ "cfg-if",
+]
+
+[[package]]
+name = "crossbeam-channel"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-deque"
+version = "0.8.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
+dependencies = [
+ "crossbeam-epoch",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-epoch"
+version = "0.9.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
+dependencies = [
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "crossbeam-utils"
+version = "0.8.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
+
+[[package]]
+name = "crunchy"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5"
+
+[[package]]
+name = "crypto-common"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a"
+dependencies = [
+ "generic-array",
+ "typenum",
+]
+
+[[package]]
+name = "cursor-icon"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f27ae1dd37df86211c42e150270f82743308803d90a6f6e6651cd730d5e1732f"
+
+[[package]]
+name = "digest"
+version = "0.10.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292"
+dependencies = [
+ "block-buffer",
+ "crypto-common",
+]
+
+[[package]]
+name = "dlib"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab8ecd87370524b461f8557c119c405552c396ed91fc0a8eec68679eab26f94a"
+dependencies = [
+ "libloading",
+]
+
+[[package]]
+name = "downcast-rs"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
+
+[[package]]
+name = "drm"
+version = "0.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a0f8a69e60d75ae7dab4ef26a59ca99f2a89d4c142089b537775ae0c198bdcde"
+dependencies = [
+ "bitflags 2.13.0",
+ "bytemuck",
+ "drm-ffi 0.7.1",
+ "drm-fourcc",
+ "rustix 0.38.44",
+]
+
+[[package]]
+name = "drm"
+version = "0.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "80bc8c5c6c2941f70a55c15f8d9f00f9710ebda3ffda98075f996a0e6c92756f"
+dependencies = [
+ "bitflags 2.13.0",
+ "bytemuck",
+ "drm-ffi 0.9.1",
+ "drm-fourcc",
+ "libc",
+ "rustix 0.38.44",
+]
+
+[[package]]
+name = "drm-ffi"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41334f8405792483e32ad05fbb9c5680ff4e84491883d2947a4757dc54cb2ac6"
+dependencies = [
+ "drm-sys 0.6.1",
+ "rustix 0.38.44",
+]
+
+[[package]]
+name = "drm-ffi"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "51a91c9b32ac4e8105dec255e849e0d66e27d7c34d184364fb93e469db08f690"
+dependencies = [
+ "drm-sys 0.8.1",
+ "rustix 1.1.4",
+]
+
+[[package]]
+name = "drm-fourcc"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0aafbcdb8afc29c1a7ee5fbe53b5d62f4565b35a042a662ca9fecd0b54dae6f4"
+
+[[package]]
+name = "drm-sys"
+version = "0.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2d09ff881f92f118b11105ba5e34ff8f4adf27b30dae8f12e28c193af1c83176"
+dependencies = [
+ "libc",
+ "linux-raw-sys 0.6.5",
+]
+
+[[package]]
+name = "drm-sys"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8e1361066d91f5ffccff060a3c3be9c3ecde15be2959c1937595f7a82a9f8"
+dependencies = [
+ "libc",
+ "linux-raw-sys 0.9.4",
+]
+
+[[package]]
+name = "either"
+version = "1.16.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91622ff5e7162018101f2fea40d6ebf4a78bbe5a49736a2020649edf9693679e"
+
+[[package]]
+name = "equator"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc"
+dependencies = [
+ "equator-macro",
+]
+
+[[package]]
+name = "equator-macro"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "equivalent"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
+
+[[package]]
+name = "errno"
+version = "0.3.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "exr"
+version = "1.74.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4300e043a56aa2cb633c01af81ca8f699a321879a7854d3896a0ba89056363be"
+dependencies = [
+ "bit_field",
+ "half",
+ "lebe",
+ "miniz_oxide",
+ "rayon-core",
+ "smallvec",
+ "zune-inflate",
+]
+
+[[package]]
+name = "fastrand"
+version = "2.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6"
+
+[[package]]
+name = "fax"
+version = "0.2.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "caf1079563223d5d59d83c85886a56e586cfd5c1a26292e971a0fa266531ac5a"
+
+[[package]]
+name = "fdeflate"
+version = "0.3.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e6853b52649d4ac5c0bd02320cddc5ba956bdb407c4b75a2c6b75bf51500f8c"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "ffmpeg-sys-next"
+version = "8.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a314bc0e022a33a99567ed4bd2576bd58ffd8fcff7891c29194cfecc26a62547"
+dependencies = [
+ "bindgen",
+ "cc",
+ "libc",
+ "num_cpus",
+ "pkg-config",
+ "vcpkg",
+]
+
+[[package]]
+name = "find-msvc-tools"
+version = "0.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
+
+[[package]]
+name = "flate2"
+version = "1.1.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c"
+dependencies = [
+ "crc32fast",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "foldhash"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
+
+[[package]]
+name = "gbm"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "313702b30cdeb83ddc72bc14dcee67803cd0ae2d12282ea06e368c25a900c844"
+dependencies = [
+ "bitflags 1.3.2",
+ "drm 0.11.1",
+ "drm-fourcc",
+ "gbm-sys 0.3.1",
+ "libc",
+ "wayland-backend",
+ "wayland-server",
+]
+
+[[package]]
+name = "gbm"
+version = "0.18.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce852e998d3ca5e4a97014fb31c940dc5ef344ec7d364984525fd11e8a547e6a"
+dependencies = [
+ "bitflags 2.13.0",
+ "drm 0.14.1",
+ "drm-fourcc",
+ "gbm-sys 0.4.0",
+ "libc",
+ "wayland-backend",
+ "wayland-server",
+]
+
+[[package]]
+name = "gbm-sys"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9cc2f64de9fa707b5c6b2d2f10d7a7e49e845018a9f5685891eb40d3bab2538"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "gbm-sys"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c13a5f2acc785d8fb6bf6b7ab6bfb0ef5dad4f4d97e8e70bb8e470722312f76f"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "gcd"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1d758ba1b47b00caf47f24925c0074ecb20d6dfcffe7f6d53395c0465674841a"
+
+[[package]]
+name = "generic-array"
+version = "0.14.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a"
+dependencies = [
+ "typenum",
+ "version_check",
+]
+
+[[package]]
+name = "gethostname"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1bd49230192a3797a9a4d6abe9b3eed6f7fa4c8a8a4947977c6f80025f92cbd8"
+dependencies = [
+ "rustix 1.1.4",
+ "windows-link",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 5.3.0",
+ "wasip2",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "r-efi 6.0.0",
+ "wasip2",
+ "wasip3",
+]
+
+[[package]]
+name = "gif"
+version = "0.14.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ee8cfcc411d9adbbaba82fb72661cc1bcca13e8bba98b364e62b2dba8f960159"
+dependencies = [
+ "color_quant",
+ "weezl",
+]
+
+[[package]]
+name = "gl_generator"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1a95dfc23a2b4a9a2f5ab41d194f8bfda3cabec42af4e39f08c339eb2a0c124d"
+dependencies = [
+ "khronos_api",
+ "log",
+ "xml-rs",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "half"
+version = "2.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b"
+dependencies = [
+ "cfg-if",
+ "crunchy",
+ "zerocopy",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.15.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1"
+dependencies = [
+ "foldhash",
+]
+
+[[package]]
+name = "hashbrown"
+version = "0.17.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a"
+
+[[package]]
+name = "heck"
+version = "0.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
+
+[[package]]
+name = "hermit-abi"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024"
+
+[[package]]
+name = "hermit-abi"
+version = "0.5.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c"
+
+[[package]]
+name = "id-arena"
+version = "2.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
+
+[[package]]
+name = "image"
+version = "0.25.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6506c6c10786659413faa717ceebcb8f70731c0a60cbae39795fdf114519c1a"
+dependencies = [
+ "bytemuck",
+ "byteorder-lite",
+ "color_quant",
+ "exr",
+ "gif",
+ "image-webp",
+ "moxcms",
+ "num-traits",
+ "png",
+ "qoi",
+ "ravif",
+ "rayon",
+ "rgb",
+ "tiff",
+ "zune-core 0.5.1",
+ "zune-jpeg 0.5.15",
+]
+
+[[package]]
+name = "image-webp"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "525e9ff3e1a4be2fbea1fdf0e98686a6d98b4d8f937e1bf7402245af1909e8c3"
+dependencies = [
+ "byteorder-lite",
+ "quick-error",
+]
+
+[[package]]
+name = "imgref"
+version = "1.12.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "89194689a993ab15268672e99e7b0e19da2da3268ac682e8f02d29d4d1434cd7"
+
+[[package]]
+name = "indexmap"
+version = "2.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9"
+dependencies = [
+ "equivalent",
+ "hashbrown 0.17.1",
+ "serde",
+ "serde_core",
+]
+
+[[package]]
+name = "indoc"
+version = "2.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "79cf5c93f93228cf8efb3ba362535fb11199ac548a09ce117c9b1adc3030d706"
+dependencies = [
+ "rustversion",
+]
+
+[[package]]
+name = "input"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fbdc09524a91f9cacd26f16734ff63d7dc650daffadd2b6f84d17a285bd875a9"
+dependencies = [
+ "bitflags 2.13.0",
+ "input-sys",
+ "libc",
+ "udev",
+]
+
+[[package]]
+name = "input-sys"
+version = "1.19.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "36eee07d8e02bd95bf52b2e642cf13d33701b94c6e4b04fbf1d1fb07e9cb19e7"
+
+[[package]]
+name = "interpolate_name"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c34819042dc3d3971c46c2190835914dfbe0c3c13f61449b2997f4e9722dfa60"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "io-lifetimes"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
+dependencies = [
+ "hermit-abi 0.3.9",
+ "libc",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "itertools"
+version = "0.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itertools"
+version = "0.14.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285"
+dependencies = [
+ "either",
+]
+
+[[package]]
+name = "itoa"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
+
+[[package]]
+name = "jobserver"
+version = "0.1.34"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33"
+dependencies = [
+ "getrandom 0.3.4",
+ "libc",
+]
+
+[[package]]
+name = "khronos_api"
+version = "3.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc"
+
+[[package]]
+name = "leb128fmt"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2"
+
+[[package]]
+name = "lebe"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a79a3332a6609480d7d0c9eab957bca6b455b91bb84e66d19f5ff66294b85b8"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "libfuzzer-sys"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9fd2f41a1cba099f79a0b6b6c35656cf7c03351a7bae8ff0f28f25270f929d2"
+dependencies = [
+ "arbitrary",
+ "cc",
+]
+
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link",
+]
+
+[[package]]
+name = "libudev-sys"
+version = "0.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3c8469b4a23b962c1396b9b451dda50ef5b283e8dd309d69033475fa9b334324"
+dependencies = [
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.4.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.6.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2a385b1be4e5c3e362ad2ffa73c392e53f031eaa5b7d648e64cd87f27f6063d7"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cd945864f07fe9f5371a27ad7b52a172b4b499999f1d97574c9fa68373937e12"
+
+[[package]]
+name = "linux-raw-sys"
+version = "0.12.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
+
+[[package]]
+name = "log"
+version = "0.4.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "953f07c43838f8e6f9758cab68bf5bed85465e7587ebe0b823f1bcd81978ad3a"
+
+[[package]]
+name = "loop9"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fae87c125b03c1d2c0150c90365d7d6bcc53fb73a9acaef207d2d065860f062"
+dependencies = [
+ "imgref",
+]
+
+[[package]]
+name = "maybe-rayon"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ea1f30cedd69f0a2954655f7188c6a834246d2bcf1e315e2ac40c4b24dc9519"
+dependencies = [
+ "cfg-if",
+ "rayon",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
+
+[[package]]
+name = "memmap2"
+version = "0.9.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3"
+dependencies = [
+ "libc",
+]
+
+[[package]]
+name = "memoffset"
+version = "0.9.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "miniz_oxide"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316"
+dependencies = [
+ "adler2",
+ "simd-adler32",
+]
+
+[[package]]
+name = "moxcms"
+version = "0.7.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ac9557c559cd6fc9867e122e20d2cbefc9ca29d80d027a8e39310920ed2f0a97"
+dependencies = [
+ "num-traits",
+ "pxfm",
+]
+
+[[package]]
+name = "nasm-rs"
+version = "0.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "706bf8a5e8c8ddb99128c3291d31bd21f4bcde17f0f4c20ec678d85c74faa149"
+dependencies = [
+ "log",
+]
+
+[[package]]
+name = "new_debug_unreachable"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086"
+
+[[package]]
+name = "no_std_io2"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "418abd1b6d34fbf6cae440dc874771b0525a604428704c76e48b29a5e67b8003"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "nom"
+version = "8.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "noop_proc_macro"
+version = "0.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0676bb32a98c1a483ce53e500a81ad9c3d5b3f7c920c28c24e9cb0980d0b5bc8"
+
+[[package]]
+name = "num-bigint"
+version = "0.4.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9"
+dependencies = [
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-derive"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "num-integer"
+version = "0.1.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "num-rational"
+version = "0.4.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824"
+dependencies = [
+ "num-bigint",
+ "num-integer",
+ "num-traits",
+]
+
+[[package]]
+name = "num-traits"
+version = "0.2.19"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841"
+dependencies = [
+ "autocfg",
+]
+
+[[package]]
+name = "num_cpus"
+version = "1.17.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b"
+dependencies = [
+ "hermit-abi 0.5.2",
+ "libc",
+]
+
+[[package]]
+name = "nvcodec-sys"
+version = "0.1.0"
+
+[[package]]
+name = "once_cell"
+version = "1.21.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50"
+
+[[package]]
+name = "openh264"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fef0655e143954965073374f5390411131590d0bc793208aabf7c6785430fa00"
+dependencies = [
+ "openh264-sys2",
+ "wide",
+]
+
+[[package]]
+name = "openh264-sys2"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ad97e73d98000c46623ec4719e4fd2d7f79076a75350af8ae3878abf682c071d"
+dependencies = [
+ "cc",
+ "nasm-rs",
+ "walkdir",
+]
+
+[[package]]
+name = "paste"
+version = "1.0.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
+
+[[package]]
+name = "pastey"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "35fb2e5f958ec131621fdd531e9fc186ed768cbe395337403ae56c17a74c68ec"
+
+[[package]]
+name = "pin-project-lite"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
+
+[[package]]
+name = "pixelflux"
+version = "1.6.4"
+dependencies = [
+ "calloop 0.12.4",
+ "crossbeam-channel",
+ "ffmpeg-sys-next",
+ "gbm 0.14.2",
+ "image",
+ "libc",
+ "libloading",
+ "nvcodec-sys",
+ "openh264",
+ "openh264-sys2",
+ "pyo3",
+ "rayon",
+ "smithay",
+ "turbojpeg",
+ "wayland-protocols 0.31.2",
+ "wayland-server",
+ "x11rb",
+ "x264-sys",
+ "xcursor",
+ "yuv",
+]
+
+[[package]]
+name = "pixman"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cea217d496c19ac0a8e502b37078e1f683d16344adee9eb247a5d57c165e1edf"
+dependencies = [
+ "drm-fourcc",
+ "paste",
+ "pixman-sys",
+ "thiserror 1.0.69",
+]
+
+[[package]]
+name = "pixman-sys"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1a0483e89e81d7915defe83c51f23f6800594d64f6f4a21253ce87fd8444ada"
+
+[[package]]
+name = "pkg-config"
+version = "0.3.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e"
+
+[[package]]
+name = "png"
+version = "0.18.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61"
+dependencies = [
+ "bitflags 2.13.0",
+ "crc32fast",
+ "fdeflate",
+ "flate2",
+ "miniz_oxide",
+]
+
+[[package]]
+name = "polling"
+version = "3.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d0e4f59085d47d8241c88ead0f274e8a0cb551f3625263c05eb8dd897c34218"
+dependencies = [
+ "cfg-if",
+ "concurrent-queue",
+ "hermit-abi 0.5.2",
+ "pin-project-lite",
+ "rustix 1.1.4",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "portable-atomic"
+version = "1.13.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49"
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
+[[package]]
+name = "prettyplease"
+version = "0.2.37"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b"
+dependencies = [
+ "proc-macro2",
+ "syn",
+]
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "profiling"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3d595e54a326bc53c1c197b32d295e14b169e3cfeaa8dc82b529f947fba6bcf5"
+dependencies = [
+ "profiling-procmacros",
+]
+
+[[package]]
+name = "profiling-procmacros"
+version = "1.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4488a4a36b9a4ba6b9334a32a39971f77c1436ec82c38707bce707699cc3bbcb"
+dependencies = [
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pxfm"
+version = "0.1.29"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f"
+
+[[package]]
+name = "pyo3"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ab53c047fcd1a1d2a8820fe84f05d6be69e9526be40cb03b73f86b6b03e6d87d"
+dependencies = [
+ "indoc",
+ "libc",
+ "memoffset",
+ "once_cell",
+ "portable-atomic",
+ "pyo3-build-config",
+ "pyo3-ffi",
+ "pyo3-macros",
+ "unindent",
+]
+
+[[package]]
+name = "pyo3-build-config"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b455933107de8642b4487ed26d912c2d899dec6114884214a0b3bb3be9261ea6"
+dependencies = [
+ "target-lexicon",
+]
+
+[[package]]
+name = "pyo3-ffi"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1c85c9cbfaddf651b1221594209aed57e9e5cff63c4d11d1feead529b872a089"
+dependencies = [
+ "libc",
+ "pyo3-build-config",
+]
+
+[[package]]
+name = "pyo3-macros"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0a5b10c9bf9888125d917fb4d2ca2d25c8df94c7ab5a52e13313a07e050a3b02"
+dependencies = [
+ "proc-macro2",
+ "pyo3-macros-backend",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "pyo3-macros-backend"
+version = "0.27.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03b51720d314836e53327f5871d4c0cfb4fb37cc2c4a11cc71907a86342c40f9"
+dependencies = [
+ "heck",
+ "proc-macro2",
+ "pyo3-build-config",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "qoi"
+version = "0.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f6d64c71eb498fe9eae14ce4ec935c555749aef511cca85b5568910d6e48001"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "quick-error"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a993555f31e5a609f617c12db6250dedcac1b0a85076912c436e6fc9b2c8e6a3"
+
+[[package]]
+name = "quick-xml"
+version = "0.39.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cdcc8dd4e2f670d309a5f0e83fe36dfdc05af317008fea29144da1a2ac858e5e"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
+[[package]]
+name = "r-efi"
+version = "6.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
+
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "rav1e"
+version = "0.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "43b6dd56e85d9483277cde964fd1bdb0428de4fec5ebba7540995639a21cb32b"
+dependencies = [
+ "aligned-vec",
+ "arbitrary",
+ "arg_enum_proc_macro",
+ "arrayvec",
+ "av-scenechange",
+ "av1-grain",
+ "bitstream-io",
+ "built",
+ "cfg-if",
+ "interpolate_name",
+ "itertools 0.14.0",
+ "libc",
+ "libfuzzer-sys",
+ "log",
+ "maybe-rayon",
+ "new_debug_unreachable",
+ "noop_proc_macro",
+ "num-derive",
+ "num-traits",
+ "paste",
+ "profiling",
+ "rand",
+ "rand_chacha",
+ "simd_helpers",
+ "thiserror 2.0.18",
+ "v_frame",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "ravif"
+version = "0.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef69c1990ceef18a116855938e74793a5f7496ee907562bd0857b6ac734ab285"
+dependencies = [
+ "avif-serialize",
+ "imgref",
+ "loop9",
+ "quick-error",
+ "rav1e",
+ "rayon",
+ "rgb",
+]
+
+[[package]]
+name = "rayon"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d"
+dependencies = [
+ "either",
+ "rayon-core",
+]
+
+[[package]]
+name = "rayon-core"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91"
+dependencies = [
+ "crossbeam-deque",
+ "crossbeam-utils",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
+
+[[package]]
+name = "rgb"
+version = "0.8.53"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b34b781b31e5d73e9fbc8689c70551fd1ade9a19e3e28cfec8580a79290cc4"
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
+
+[[package]]
+name = "rustix"
+version = "0.38.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154"
+dependencies = [
+ "bitflags 2.13.0",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.15",
+ "windows-sys 0.59.0",
+]
+
+[[package]]
+name = "rustix"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190"
+dependencies = [
+ "bitflags 2.13.0",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.12.1",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "safe_arch"
+version = "0.7.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96b02de82ddbe1b636e6170c21be622223aea188ef2e139be0a5b219ec215323"
+dependencies = [
+ "bytemuck",
+]
+
+[[package]]
+name = "same-file"
+version = "1.0.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
+dependencies = [
+ "winapi-util",
+]
+
+[[package]]
+name = "scoped-tls"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
+
+[[package]]
+name = "semver"
+version = "1.0.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd"
+
+[[package]]
+name = "serde"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "serde_core"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad"
+dependencies = [
+ "serde_derive",
+]
+
+[[package]]
+name = "serde_derive"
+version = "1.0.228"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "serde_json"
+version = "1.0.150"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e8014e44b4736ed0538adeecded0fce2a272f22dc9578a7eb6b2d9993c74cfb9"
+dependencies = [
+ "itoa",
+ "memchr",
+ "serde",
+ "serde_core",
+ "zmij",
+]
+
+[[package]]
+name = "serde_spanned"
+version = "1.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6662b5879511e06e8999a8a235d848113e942c9124f211511b16466ee2995f26"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "sha2"
+version = "0.10.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283"
+dependencies = [
+ "cfg-if",
+ "cpufeatures",
+ "digest",
+]
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "shlex"
+version = "2.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f8fadd59c855ef2080decdef8ff161eb6661b86933c9d82e5ba29dc602a55aba"
+
+[[package]]
+name = "simd-adler32"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214"
+
+[[package]]
+name = "simd_helpers"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "95890f873bec569a0362c235787f3aca6e1e887302ba4840839bcc6459c42da6"
+dependencies = [
+ "quote",
+]
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
+[[package]]
+name = "smallvec"
+version = "1.15.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ed6a63f02c8539c91a8685a86f4099661ba3da017932f6ebbea6de3f0fa7c90"
+
+[[package]]
+name = "smithay"
+version = "0.7.0"
+source = "git+https://github.com/Smithay/smithay?rev=ca932e042fa9ad150605c150a86275b85f9ad5b3#ca932e042fa9ad150605c150a86275b85f9ad5b3"
+dependencies = [
+ "appendlist",
+ "atomic_float",
+ "bitflags 2.13.0",
+ "calloop 0.14.4",
+ "cc",
+ "cgmath",
+ "cursor-icon",
+ "downcast-rs",
+ "drm 0.14.1",
+ "drm-ffi 0.9.1",
+ "drm-fourcc",
+ "errno",
+ "gbm 0.18.0",
+ "gl_generator",
+ "indexmap",
+ "input",
+ "libc",
+ "libloading",
+ "pixman",
+ "pkg-config",
+ "profiling",
+ "rand",
+ "rustix 1.1.4",
+ "sha2",
+ "smallvec",
+ "tempfile",
+ "thiserror 2.0.18",
+ "tracing",
+ "udev",
+ "wayland-backend",
+ "wayland-protocols 0.32.12",
+ "wayland-protocols-misc",
+ "wayland-protocols-wlr",
+ "wayland-server",
+ "wayland-sys",
+ "xkbcommon",
+]
+
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
+[[package]]
+name = "syn"
+version = "2.0.117"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
+[[package]]
+name = "system-deps"
+version = "7.0.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "396a35feb67335377e0251fcbc1092fc85c484bd4e3a7a54319399da127796e7"
+dependencies = [
+ "cfg-expr",
+ "heck",
+ "pkg-config",
+ "toml",
+ "version-compare",
+]
+
+[[package]]
+name = "target-lexicon"
+version = "0.13.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "adb6935a6f5c20170eeceb1a3835a49e12e19d792f6dd344ccc76a985ca5a6ca"
+
+[[package]]
+name = "tempfile"
+version = "3.27.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
+dependencies = [
+ "fastrand",
+ "getrandom 0.4.2",
+ "once_cell",
+ "rustix 1.1.4",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "thiserror"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
+dependencies = [
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
+dependencies = [
+ "thiserror-impl 2.0.18",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "1.0.69"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "thiserror-impl"
+version = "2.0.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tiff"
+version = "0.10.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af9605de7fee8d9551863fd692cce7637f548dbd9db9180fcc07ccc6d26c336f"
+dependencies = [
+ "fax",
+ "flate2",
+ "half",
+ "quick-error",
+ "weezl",
+ "zune-jpeg 0.4.21",
+]
+
+[[package]]
+name = "toml"
+version = "1.1.2+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "81f3d15e84cbcd896376e6730314d59fb5a87f31e4b038454184435cd57defee"
+dependencies = [
+ "indexmap",
+ "serde_core",
+ "serde_spanned",
+ "toml_datetime",
+ "toml_parser",
+ "toml_writer",
+ "winnow",
+]
+
+[[package]]
+name = "toml_datetime"
+version = "1.1.1+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3165f65f62e28e0115a00b2ebdd37eb6f3b641855f9d636d3cd4103767159ad7"
+dependencies = [
+ "serde_core",
+]
+
+[[package]]
+name = "toml_parser"
+version = "1.1.2+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a2abe9b86193656635d2411dc43050282ca48aa31c2451210f4202550afb7526"
+dependencies = [
+ "winnow",
+]
+
+[[package]]
+name = "toml_writer"
+version = "1.1.1+spec-1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "756daf9b1013ebe47a8776667b466417e2d4c5679d441c26230efd9ef78692db"
+
+[[package]]
+name = "tracing"
+version = "0.1.44"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100"
+dependencies = [
+ "log",
+ "pin-project-lite",
+ "tracing-attributes",
+ "tracing-core",
+]
+
+[[package]]
+name = "tracing-attributes"
+version = "0.1.31"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "tracing-core"
+version = "0.1.36"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a"
+dependencies = [
+ "once_cell",
+]
+
+[[package]]
+name = "turbojpeg"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b12ab1b96184a3879b4b16a74ee67436a5ef8833c0e6954a8cbf47ec9036559"
+dependencies = [
+ "gcd",
+ "libc",
+ "thiserror 1.0.69",
+ "turbojpeg-sys",
+]
+
+[[package]]
+name = "turbojpeg-sys"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49b5a974bc59382d35927e3a07eef2ba9a24fe7227cefe52ec44b508e1b90f86"
+dependencies = [
+ "anyhow",
+ "cmake",
+ "libc",
+ "pkg-config",
+]
+
+[[package]]
+name = "typenum"
+version = "1.20.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6f5e870be6c3b371b77fe0ee0bafb859fa4964b4404c27de1d380043c4dda20"
+
+[[package]]
+name = "udev"
+version = "0.9.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af4e37e9ea4401fc841ff54b9ddfc9be1079b1e89434c1a6a865dd68980f7e9f"
+dependencies = [
+ "io-lifetimes",
+ "libc",
+ "libudev-sys",
+ "pkg-config",
+]
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "unicode-xid"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
+
+[[package]]
+name = "unindent"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7264e107f553ccae879d21fbea1d6724ac785e8c3bfc762137959b5802826ef3"
+
+[[package]]
+name = "v_frame"
+version = "0.3.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "666b7727c8875d6ab5db9533418d7c764233ac9c0cff1d469aec8fa127597be2"
+dependencies = [
+ "aligned-vec",
+ "num-traits",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "vcpkg"
+version = "0.2.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
+
+[[package]]
+name = "version-compare"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "03c2856837ef78f57382f06b2b8563a2f512f7185d732608fd9176cb3b8edf0e"
+
+[[package]]
+name = "version_check"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
+
+[[package]]
+name = "walkdir"
+version = "2.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
+dependencies = [
+ "same-file",
+ "winapi-util",
+]
+
+[[package]]
+name = "wasip2"
+version = "1.0.4+wasi-0.2.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b67efb37e106e55ce722a510d6b5f9c17f083e5fc79afc2badeb12cc313d9487"
+dependencies = [
+ "wit-bindgen 0.57.1",
+]
+
+[[package]]
+name = "wasip3"
+version = "0.4.0+wasi-0.3.0-rc-2026-01-06"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5"
+dependencies = [
+ "wit-bindgen 0.51.0",
+]
+
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.125"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ddb3f79143bced6de84270411622a2699cee572fc0875aeaf1e7867cf9fca1a"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.125"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4e21a184b13fb19e157296e2c46056aec9092264fab83e4ba59e68c61b323c3d"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.125"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fecefd9c35bd935a20fc3fc344b5f29138961e4f47fb03297d88f2587afb5ebd"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.125"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "23939e44bb9a5d7576fa2b563dc2e136628f1224e88a8deed09e04858b77871f"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "wasm-encoder"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319"
+dependencies = [
+ "leb128fmt",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasm-metadata"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909"
+dependencies = [
+ "anyhow",
+ "indexmap",
+ "wasm-encoder",
+ "wasmparser",
+]
+
+[[package]]
+name = "wasmparser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe"
+dependencies = [
+ "bitflags 2.13.0",
+ "hashbrown 0.15.5",
+ "indexmap",
+ "semver",
+]
+
+[[package]]
+name = "wayland-backend"
+version = "0.3.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2857dd20b54e916ec7253b3d6b4d5c4d7d4ca2c33c2e11c6c76a99bd8744755d"
+dependencies = [
+ "cc",
+ "downcast-rs",
+ "rustix 1.1.4",
+ "scoped-tls",
+ "smallvec",
+ "wayland-sys",
+]
+
+[[package]]
+name = "wayland-protocols"
+version = "0.31.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f81f365b8b4a97f422ac0e8737c438024b5951734506b0e1d775c73030561f4"
+dependencies = [
+ "bitflags 2.13.0",
+ "wayland-backend",
+ "wayland-scanner",
+ "wayland-server",
+]
+
+[[package]]
+name = "wayland-protocols"
+version = "0.32.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "563a85523cade2429938e790815fd7319062103b9f4a2dc806e9b53b95982d8f"
+dependencies = [
+ "bitflags 2.13.0",
+ "wayland-backend",
+ "wayland-scanner",
+ "wayland-server",
+]
+
+[[package]]
+name = "wayland-protocols-misc"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e9567599ef23e09b8dad6e429e5738d4509dfc46b3b21f32841a304d16b29c8"
+dependencies = [
+ "bitflags 2.13.0",
+ "wayland-backend",
+ "wayland-protocols 0.32.12",
+ "wayland-scanner",
+ "wayland-server",
+]
+
+[[package]]
+name = "wayland-protocols-wlr"
+version = "0.3.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eb04e52f7836d7c7976c78ca0250d61e33873c34156a2a1fc9474828ec268234"
+dependencies = [
+ "bitflags 2.13.0",
+ "wayland-backend",
+ "wayland-protocols 0.32.12",
+ "wayland-scanner",
+ "wayland-server",
+]
+
+[[package]]
+name = "wayland-scanner"
+version = "0.31.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c324a910fd86ebdc364a3e61ec1f11737d3b1d6c273c0239ee8ff4bc0d24b4a"
+dependencies = [
+ "proc-macro2",
+ "quick-xml",
+ "quote",
+]
+
+[[package]]
+name = "wayland-server"
+version = "0.31.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cc1846eb04c49182e04f4a099e2a830a2b745610bbc1d61246e206f29c7000a0"
+dependencies = [
+ "bitflags 2.13.0",
+ "downcast-rs",
+ "rustix 1.1.4",
+ "wayland-backend",
+ "wayland-scanner",
+]
+
+[[package]]
+name = "wayland-sys"
+version = "0.31.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d8eab23fefc9e41f8e841df4a9c707e8a8c4ed26e944ef69297184de2785e3be"
+dependencies = [
+ "dlib",
+ "libc",
+ "log",
+ "memoffset",
+ "pkg-config",
+]
+
+[[package]]
+name = "weezl"
+version = "0.1.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a28ac98ddc8b9274cb41bb4d9d4d5c425b6020c50c46f25559911905610b4a88"
+
+[[package]]
+name = "wide"
+version = "0.7.33"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ce5da8ecb62bcd8ec8b7ea19f69a51275e91299be594ea5cc6ef7819e16cd03"
+dependencies = [
+ "bytemuck",
+ "safe_arch",
+]
+
+[[package]]
+name = "winapi-util"
+version = "0.1.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
+
+[[package]]
+name = "windows-sys"
+version = "0.48.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
+dependencies = [
+ "windows-targets 0.48.5",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.59.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.61.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
+dependencies = [
+ "windows-link",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c"
+dependencies = [
+ "windows_aarch64_gnullvm 0.48.5",
+ "windows_aarch64_msvc 0.48.5",
+ "windows_i686_gnu 0.48.5",
+ "windows_i686_msvc 0.48.5",
+ "windows_x86_64_gnu 0.48.5",
+ "windows_x86_64_gnullvm 0.48.5",
+ "windows_x86_64_msvc 0.48.5",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.48.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "winnow"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0592e1c9d151f854e6fd382574c3a0855250e1d9b2f99d9281c6e6391af352f1"
+
+[[package]]
+name = "wit-bindgen"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5"
+dependencies = [
+ "wit-bindgen-rust-macro",
+]
+
+[[package]]
+name = "wit-bindgen"
+version = "0.57.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e"
+
+[[package]]
+name = "wit-bindgen-core"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc"
+dependencies = [
+ "anyhow",
+ "heck",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-bindgen-rust"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21"
+dependencies = [
+ "anyhow",
+ "heck",
+ "indexmap",
+ "prettyplease",
+ "syn",
+ "wasm-metadata",
+ "wit-bindgen-core",
+ "wit-component",
+]
+
+[[package]]
+name = "wit-bindgen-rust-macro"
+version = "0.51.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a"
+dependencies = [
+ "anyhow",
+ "prettyplease",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wit-bindgen-core",
+ "wit-bindgen-rust",
+]
+
+[[package]]
+name = "wit-component"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2"
+dependencies = [
+ "anyhow",
+ "bitflags 2.13.0",
+ "indexmap",
+ "log",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "wasm-encoder",
+ "wasm-metadata",
+ "wasmparser",
+ "wit-parser",
+]
+
+[[package]]
+name = "wit-parser"
+version = "0.244.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736"
+dependencies = [
+ "anyhow",
+ "id-arena",
+ "indexmap",
+ "log",
+ "semver",
+ "serde",
+ "serde_derive",
+ "serde_json",
+ "unicode-xid",
+ "wasmparser",
+]
+
+[[package]]
+name = "x11rb"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9993aa5be5a26815fe2c3eacfc1fde061fc1a1f094bf1ad2a18bf9c495dd7414"
+dependencies = [
+ "gethostname",
+ "rustix 1.1.4",
+ "x11rb-protocol",
+]
+
+[[package]]
+name = "x11rb-protocol"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ea6fc2961e4ef194dcbfe56bb845534d0dc8098940c7e5c012a258bfec6701bd"
+
+[[package]]
+name = "x264-sys"
+version = "0.2.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b694bc460acd21d48a05977f57025a93e2a4d4a10feffde9f0c66925d6e72ff4"
+dependencies = [
+ "bindgen",
+ "system-deps",
+]
+
+[[package]]
+name = "xcursor"
+version = "0.3.10"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bec9e4a500ca8864c5b47b8b482a73d62e4237670e5b5f1d6b9e3cae50f28f2b"
+
+[[package]]
+name = "xkbcommon"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a7a974f48060a14e95705c01f24ad9c3345022f4d97441b8a36beb7ed5c4a02d"
+dependencies = [
+ "libc",
+ "memmap2",
+ "xkeysym",
+]
+
+[[package]]
+name = "xkeysym"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9cc00251562a284751c9973bace760d86c0276c471b4be569fe6b068ee97a56"
+
+[[package]]
+name = "xml-rs"
+version = "0.8.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3ae8337f8a065cfc972643663ea4279e04e7256de865aa66fe25cec5fb912d3f"
+
+[[package]]
+name = "y4m"
+version = "0.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7a5a4b21e1a62b67a2970e6831bc091d7b87e119e7f9791aef9702e3bef04448"
+
+[[package]]
+name = "yuv"
+version = "0.8.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dba772cc0fb636dfce23c6907e7ca49c35a4b2ac687ea5df86a1a8879e8ea219"
+dependencies = [
+ "num-traits",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ce1022995ff5ff5d841ad7d994facc23098cd40152f2c1d11cd607c6f530653f"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.52"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ae7f38b72ec2a254e2b87ef277cf2cd4fb97cbebf944faa6f33354da0867930"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zmij"
+version = "1.0.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa"
+
+[[package]]
+name = "zune-core"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a"
+
+[[package]]
+name = "zune-core"
+version = "0.5.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9"
+
+[[package]]
+name = "zune-inflate"
+version = "0.2.54"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "73ab332fe2f6680068f3582b16a24f90ad7096d5d39b974d1c0aff0125116f02"
+dependencies = [
+ "simd-adler32",
+]
+
+[[package]]
+name = "zune-jpeg"
+version = "0.4.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29ce2c8a9384ad323cf564b67da86e21d3cfdff87908bc1223ed5c99bc792713"
+dependencies = [
+ "zune-core 0.4.12",
+]
+
+[[package]]
+name = "zune-jpeg"
+version = "0.5.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296"
+dependencies = [
+ "zune-core 0.5.1",
+]
diff --git a/pixelflux/Cargo.toml b/pixelflux/Cargo.toml
new file mode 100644
index 0000000..e70fcc3
--- /dev/null
+++ b/pixelflux/Cargo.toml
@@ -0,0 +1,68 @@
+[package]
+name = "pixelflux"
+version = "1.6.4"
+edition = "2021"
+
+[lib]
+name = "pixelflux"
+crate-type = ["cdylib"]
+
+[dependencies]
+pyo3 = { version = "0.27.2", features = ["extension-module"] }
+wayland-server = { version = "0.31.10", features = ["libwayland_1_23"] }
+wayland-protocols = { version = "0.31", features = ["server"] }
+crossbeam-channel = "0.5"
+calloop = "0.12"
+turbojpeg = "1.3" 
+rayon = "1.10"
+x264-sys = "0.2.2"
+# BSD-licensed Cisco OpenH264 as an alternative software H.264 encoder; builds the
+# vendored C source (needs a C toolchain + nasm), no runtime download. openh264-sys2
+# is used for the raw set_option live-bitrate path the safe wrapper doesn't expose.
+openh264 = "0.8"
+openh264-sys2 = "0.8"
+libc = "0.2"
+# fast_mode enables YuvConversionMode::Fast: empirically 1.23x faster than libyuv ARGBToI420
+# on x86-64-v3 (AVX2) at <=1 LSB error vs a float BT.709 reference (accuracy preserved).
+yuv = { version = "0.8.9", features = ["fast_mode"] }
+gbm = "0.14" 
+libloading = "0.8"
+xcursor = "0.3.1"
+# X11 host capture (replaces the C++ XShm/XFixes path): shm = XShm zero-copy grab,
+# xfixes = hardware cursor image. Pure-Rust XCB; links the system libxcb.
+x11rb = { version = "0.13", features = ["shm", "xfixes"] }
+image = "=0.25.9"
+# Pinned EXACT (not caret "8.0", which would float to a future 8.2 that may
+# break). Each ffmpeg-sys-next minor tracks ONE FFmpeg release: 8.0.0 needs FFmpeg
+# 7.x (FF_PROFILE_* names), 8.1.0 needs FFmpeg 8.1 (AV_PROFILE_*, JPEGXS,
+# AV_PKT_DATA_EXIF). Build REQUIRES FFmpeg 8.1: point PKG_CONFIG_PATH at an env
+# providing it (conda `flux` has 8.1.2; the cibuildwheel before-all must build
+# n8.1, not n8.0). Verified: 8.1.0 compiles clean against FFmpeg 8.1.2.
+ffmpeg-sys-next = "=8.1.0"
+
+[dependencies.nvcodec-sys]
+path = "nvcodec-sys"
+features = ["cuda"]
+
+[dependencies.smithay]
+git = "https://github.com/Smithay/smithay"
+rev = "ca932e042fa9ad150605c150a86275b85f9ad5b3"
+default-features = false
+features = [
+    "backend_drm",
+    "backend_egl",
+    "backend_gbm",
+    "backend_libinput",
+    "backend_udev",
+    "renderer_gl", 
+    "renderer_pixman",
+    "use_system_lib",
+    "desktop",
+    "wayland_frontend",
+]
+
+[profile.release]
+opt-level = 3
+lto = "fat"
+codegen-units = 1
+strip = true
diff --git a/pixelflux/__init__.py b/pixelflux/__init__.py
deleted file mode 100644
index 99076ad..0000000
--- a/pixelflux/__init__.py
+++ /dev/null
@@ -1,249 +0,0 @@
-import ctypes
-import os
-import threading
-import sys
-
-class CaptureSettings(ctypes.Structure):
-    _fields_ = [
-        ("capture_width", ctypes.c_int),
-        ("capture_height", ctypes.c_int),
-        ("scale", ctypes.c_double),
-        ("capture_x", ctypes.c_int),
-        ("capture_y", ctypes.c_int),
-        ("target_fps", ctypes.c_double),
-        ("jpeg_quality", ctypes.c_int),
-        ("paint_over_jpeg_quality", ctypes.c_int),
-        ("use_paint_over_quality", ctypes.c_bool),
-        ("paint_over_trigger_frames", ctypes.c_int),
-        ("damage_block_threshold", ctypes.c_int),
-        ("damage_block_duration", ctypes.c_int),
-        ("output_mode", ctypes.c_int),
-        ("h264_crf", ctypes.c_int),
-        ("h264_paintover_crf", ctypes.c_int),
-        ("h264_paintover_burst_frames", ctypes.c_int),
-        ("h264_fullcolor", ctypes.c_bool),
-        ("h264_fullframe", ctypes.c_bool),
-        ("h264_streaming_mode", ctypes.c_bool),
-        ("capture_cursor", ctypes.c_bool),
-        ("watermark_path", ctypes.c_char_p),
-        ("watermark_location_enum", ctypes.c_int),
-        ("vaapi_render_node_index", ctypes.c_int),
-        ("use_cpu", ctypes.c_bool),
-        ("debug_logging", ctypes.c_bool),
-        ("h264_cbr_mode", ctypes.c_bool),
-        ("h264_bitrate_kbps", ctypes.c_int),
-        ("h264_vbv_buffer_size_kb", ctypes.c_int),
-        ("auto_adjust_screen_capture_size", ctypes.c_bool),
-    ]
-
-class StripeEncodeResult(ctypes.Structure):
-    _fields_ = [
-        ("type", ctypes.c_int),
-        ("stripe_y_start", ctypes.c_int),
-        ("stripe_height", ctypes.c_int),
-        ("size", ctypes.c_int),
-        ("data", ctypes.POINTER(ctypes.c_ubyte)),
-        ("frame_id", ctypes.c_int),
-    ]
-
-StripeCallback = ctypes.CFUNCTYPE(
-    None, ctypes.POINTER(StripeEncodeResult), ctypes.c_void_p
-)
-
-lib_dir = os.path.dirname(__file__)
-lib_path = os.path.join(lib_dir, 'screen_capture_module.so')
-
-_legacy_lib = None
-try:
-    if os.path.exists(lib_path):
-        _legacy_lib = ctypes.CDLL(lib_path)
-    else:
-        _legacy_lib = ctypes.CDLL('screen_capture_module.so')
-except OSError:
-    pass
-
-if _legacy_lib:
-    create_module = _legacy_lib.create_screen_capture_module
-    create_module.restype = ctypes.c_void_p
-    destroy_module = _legacy_lib.destroy_screen_capture_module
-    destroy_module.argtypes = [ctypes.c_void_p]
-    start_capture_c = _legacy_lib.start_screen_capture
-    start_capture_c.argtypes = [ctypes.c_void_p, CaptureSettings, StripeCallback, ctypes.c_void_p]
-    stop_capture_c = _legacy_lib.stop_screen_capture
-    stop_capture_c.argtypes = [ctypes.c_void_p]
-    free_stripe_encode_result_data = _legacy_lib.free_stripe_encode_result_data
-    free_stripe_encode_result_data.argtypes = [ctypes.POINTER(StripeEncodeResult)]
-    request_idr = _legacy_lib.request_idr
-    request_idr.argtypes = [ctypes.c_void_p]
-    update_video_bitrate_c = _legacy_lib.update_video_bitrate
-    update_video_bitrate_c.argtypes = [ctypes.c_void_p, ctypes.c_int]
-    update_framerate_c = _legacy_lib.update_framerate
-    update_framerate_c.argtypes = [ctypes.c_void_p, ctypes.c_double]
-    update_vbv_buffer_size_c = _legacy_lib.update_vbv_buffer_size
-    update_vbv_buffer_size_c.argtypes = [ctypes.c_void_p, ctypes.c_int]
- 
-_GLOBAL_WAYLAND_BACKEND = None
-if os.environ.get("PIXELFLUX_WAYLAND") == "true":
-    try:
-        from . import pixelflux_wayland
-        _GLOBAL_WAYLAND_BACKEND = pixelflux_wayland.WaylandBackend()
-        print(">> [PixelFlux] Rust Wayland Backend Initialized Globally.")
-    except ImportError as e:
-        print(f">> [PixelFlux] Failed to load Wayland backend: {e}")
-        pass
-
-class ScreenCapture:
-    """Python wrapper for screen capture module using ctypes."""
-
-    def __init__(self):
-        if _legacy_lib:
-            self._module = create_module()
-        else:
-            self._module = None
-        
-        self._is_capturing = False
-        self._python_stripe_callback = None
-        self._c_callback = None
-
-    def __del__(self):
-        if hasattr(self, '_module') and self._module:
-            try:
-                self.stop_capture()
-                destroy_module(self._module)
-            except:
-                pass
-            self._module = None
-
-    def start_capture(self, settings: CaptureSettings, stripe_callback):
-        if self._is_capturing:
-            raise ValueError("Capture already started.")
-
-        self._python_stripe_callback = stripe_callback
-        mode = getattr(settings, 'mode', 'x11')
-
-    def start_capture(self, settings: CaptureSettings, stripe_callback):
-        if self._is_capturing:
-            raise ValueError("Capture already started.")
-
-        self._python_stripe_callback = stripe_callback
-        
-        if _GLOBAL_WAYLAND_BACKEND:
-            if settings.scale < 0.1:
-                if settings.debug_logging:
-                    print(f">> [PixelFlux] Warning: Scale {settings.scale} is invalid. Defaulting to 1.0")
-                settings.scale = 1.0
-
-            if settings.debug_logging:
-                print(f">> [PixelFlux] Connecting to Rust Wayland Backend (Scale: {settings.scale})...")
-            
-            is_h264 = (settings.output_mode == 1)
-
-            def rust_bridge_callback(data_bytes): 
-                if not self._python_stripe_callback:
-                    return
-                size = len(data_bytes)
-                c_buffer = (ctypes.c_ubyte * size).from_buffer_copy(data_bytes)
-                result_struct = StripeEncodeResult()
-                result_struct.size = size
-                result_struct.data = ctypes.cast(c_buffer, ctypes.POINTER(ctypes.c_ubyte))
-                if is_h264:
-                    result_struct.type = 0
-                    if size >= 4:
-                        result_struct.frame_id = int.from_bytes(data_bytes[2:4], 'big')
-                    else:
-                        result_struct.frame_id = 0
-                    if size >= 6:
-                         result_struct.stripe_y_start = int.from_bytes(data_bytes[4:6], 'big')
-                    else:
-                         result_struct.stripe_y_start = 0
-                    result_struct.stripe_height = settings.capture_height
-                else:
-                    result_struct.type = 1
-                    if size >= 2:
-                        result_struct.frame_id = int.from_bytes(data_bytes[0:2], 'big')
-                    else:
-                        result_struct.frame_id = 0
-                    if size >= 4:
-                        result_struct.stripe_y_start = int.from_bytes(data_bytes[2:4], 'big')
-                    else:
-                        result_struct.stripe_y_start = 0
-                    result_struct.stripe_height = 0
-                self._python_stripe_callback(ctypes.byref(result_struct), None)
-            _GLOBAL_WAYLAND_BACKEND.start_capture(rust_bridge_callback, settings)
-            self._is_capturing = True
-            return 
-
-        if not self._module:
-             raise OSError("Legacy screen_capture_module.so not found.")
-
-        if not callable(stripe_callback):
-            raise TypeError("stripe_callback must be callable.")
-        
-        self._c_callback = StripeCallback(self._internal_c_callback)
-        start_capture_c(self._module, settings, self._c_callback, None)
-        self._is_capturing = True
-
-    def stop_capture(self):
-        if not self._is_capturing:
-            return
-        
-        if self._module and self._c_callback:
-            stop_capture_c(self._module)
-            self._c_callback = None
-        
-        if _GLOBAL_WAYLAND_BACKEND:
-             _GLOBAL_WAYLAND_BACKEND.stop_capture()
-            
-        self._is_capturing = False
-        self._python_stripe_callback = None
-
-    def _internal_c_callback(self, result_ptr, user_data):
-        if self._is_capturing and self._python_stripe_callback:
-            try:
-                self._python_stripe_callback(result_ptr, user_data)
-            finally:
-                free_stripe_encode_result_data(result_ptr)
-
-    def inject_key(self, scancode, state):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.inject_key(scancode, state)
-
-    def inject_mouse_move(self, x, y):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.inject_mouse_move(float(x), float(y))
-
-    def inject_relative_mouse_move(self, dx, dy):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.inject_relative_mouse_move(float(dx), float(dy))
-
-    def inject_mouse_button(self, btn, state):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.inject_mouse_button(btn, state)
-
-    def inject_mouse_scroll(self, x, y):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.inject_mouse_scroll(float(x), float(y))
-
-    def set_cursor_rendering(self, enabled):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.set_cursor_rendering(bool(enabled))
-
-    def set_cursor_callback(self, callback):
-        if _GLOBAL_WAYLAND_BACKEND:
-            _GLOBAL_WAYLAND_BACKEND.set_cursor_callback(callback)
-
-    def request_idr_frame(self):
-        if self._is_capturing and self._module:
-            request_idr(self._module)
-
-    def update_video_bitrate(self, bitrate):
-        if self._is_capturing and self._module:
-            update_video_bitrate_c(self._module, bitrate)
-    
-    def update_framerate(self, fps):
-        if self._is_capturing and self._module:
-            update_framerate_c(self._module, ctypes.c_double(fps))
-    
-    def update_vbv_buf_size(self, buffer_size):
-        if self._is_capturing and self._module:
-            update_vbv_buffer_size_c(self._module, buffer_size)
diff --git a/pixelflux/include/cuda.h b/pixelflux/include/cuda.h
deleted file mode 100644
index fa6118b..0000000
--- a/pixelflux/include/cuda.h
+++ /dev/null
@@ -1,599 +0,0 @@
-/* CUDA stub header
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Library General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Library General Public License for more details.
- *
- * You should have received a copy of the GNU Library General Public
- * License along with this library; if not, write to the
- * Free Software Foundation, Inc., 51 Franklin St, Fifth Floor,
- * Boston, MA 02110-1301, USA.
- */
-
-#pragma once
-
-#include <glib.h>
-
-G_BEGIN_DECLS
-
-typedef gpointer CUcontext;
-typedef gpointer CUgraphicsResource;
-typedef gpointer CUstream;
-typedef gpointer CUarray;
-typedef gpointer CUmodule;
-typedef gpointer CUfunction;
-typedef gpointer CUmipmappedArray;
-typedef gpointer CUevent;
-typedef gpointer CUmemoryPool;
-typedef gpointer CUexternalMemory;
-typedef gpointer CUexternalSemaphore;
-
-typedef guint64  CUtexObject;
-typedef guintptr CUdeviceptr;
-typedef gint CUdevice;
-
-typedef enum
-{
-  CUDA_SUCCESS                              = 0,
-  CUDA_ERROR_INVALID_VALUE                  = 1,
-  CUDA_ERROR_OUT_OF_MEMORY                  = 2,
-  CUDA_ERROR_NOT_INITIALIZED                = 3,
-  CUDA_ERROR_DEINITIALIZED                  = 4,
-  CUDA_ERROR_PROFILER_DISABLED              = 5,
-  CUDA_ERROR_PROFILER_NOT_INITIALIZED       = 6,
-  CUDA_ERROR_PROFILER_ALREADY_STARTED       = 7,
-  CUDA_ERROR_PROFILER_ALREADY_STOPPED       = 8,
-  CUDA_ERROR_STUB_LIBRARY                   = 34,
-  CUDA_ERROR_DEVICE_UNAVAILABLE             = 46,
-  CUDA_ERROR_NO_DEVICE                      = 100,
-  CUDA_ERROR_INVALID_DEVICE                 = 101,
-  CUDA_ERROR_DEVICE_NOT_LICENSED            = 102,
-  CUDA_ERROR_INVALID_IMAGE                  = 200,
-  CUDA_ERROR_INVALID_CONTEXT                = 201,
-  CUDA_ERROR_CONTEXT_ALREADY_CURRENT        = 202,
-  CUDA_ERROR_MAP_FAILED                     = 205,
-  CUDA_ERROR_UNMAP_FAILED                   = 206,
-  CUDA_ERROR_ARRAY_IS_MAPPED                = 207,
-  CUDA_ERROR_ALREADY_MAPPED                 = 208,
-  CUDA_ERROR_NO_BINARY_FOR_GPU              = 209,
-  CUDA_ERROR_ALREADY_ACQUIRED               = 210,
-  CUDA_ERROR_NOT_MAPPED                     = 211,
-  CUDA_ERROR_NOT_MAPPED_AS_ARRAY            = 212,
-  CUDA_ERROR_NOT_MAPPED_AS_POINTER          = 213,
-  CUDA_ERROR_ECC_UNCORRECTABLE              = 214,
-  CUDA_ERROR_UNSUPPORTED_LIMIT              = 215,
-  CUDA_ERROR_CONTEXT_ALREADY_IN_USE         = 216,
-  CUDA_ERROR_PEER_ACCESS_UNSUPPORTED        = 217,
-  CUDA_ERROR_INVALID_PTX                    = 218,
-  CUDA_ERROR_INVALID_GRAPHICS_CONTEXT       = 219,
-  CUDA_ERROR_NVLINK_UNCORRECTABLE           = 220,
-  CUDA_ERROR_JIT_COMPILER_NOT_FOUND         = 221,
-  CUDA_ERROR_UNSUPPORTED_PTX_VERSION        = 222,
-  CUDA_ERROR_JIT_COMPILATION_DISABLED       = 223,
-  CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY      = 224,
-  CUDA_ERROR_UNSUPPORTED_DEVSIDE_SYNC       = 225,
-  CUDA_ERROR_INVALID_SOURCE                 = 300,
-  CUDA_ERROR_FILE_NOT_FOUND                 = 301,
-  CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND = 302,
-  CUDA_ERROR_SHARED_OBJECT_INIT_FAILED      = 303,
-  CUDA_ERROR_OPERATING_SYSTEM               = 304,
-  CUDA_ERROR_INVALID_HANDLE                 = 400,
-  CUDA_ERROR_ILLEGAL_STATE                  = 401,
-  CUDA_ERROR_LOSSY_QUERY                    = 402,
-  CUDA_ERROR_NOT_FOUND                      = 500,
-  CUDA_ERROR_NOT_READY                      = 600,
-  CUDA_ERROR_ILLEGAL_ADDRESS                = 700,
-  CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES        = 701,
-  CUDA_ERROR_LAUNCH_TIMEOUT                 = 702,
-  CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING  = 703,
-  CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED    = 704,
-  CUDA_ERROR_PEER_ACCESS_NOT_ENABLED        = 705,
-  CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE         = 708,
-  CUDA_ERROR_CONTEXT_IS_DESTROYED           = 709,
-  CUDA_ERROR_ASSERT                         = 710,
-  CUDA_ERROR_TOO_MANY_PEERS                 = 711,
-  CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED = 712,
-  CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED     = 713,
-  CUDA_ERROR_HARDWARE_STACK_ERROR           = 714,
-  CUDA_ERROR_ILLEGAL_INSTRUCTION            = 715,
-  CUDA_ERROR_MISALIGNED_ADDRESS             = 716,
-  CUDA_ERROR_INVALID_ADDRESS_SPACE          = 717,
-  CUDA_ERROR_INVALID_PC                     = 718,
-  CUDA_ERROR_LAUNCH_FAILED                  = 719,
-  CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE   = 720,
-  CUDA_ERROR_NOT_PERMITTED                  = 800,
-  CUDA_ERROR_NOT_SUPPORTED                  = 801,
-  CUDA_ERROR_SYSTEM_NOT_READY               = 802,
-  CUDA_ERROR_SYSTEM_DRIVER_MISMATCH         = 803,
-  CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE = 804,
-  CUDA_ERROR_MPS_CONNECTION_FAILED          = 805,
-  CUDA_ERROR_MPS_RPC_FAILURE                = 806,
-  CUDA_ERROR_MPS_SERVER_NOT_READY           = 807,
-  CUDA_ERROR_MPS_MAX_CLIENTS_REACHED        = 808,
-  CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED    = 809,
-  CUDA_ERROR_MPS_CLIENT_TERMINATED          = 810,
-  CUDA_ERROR_CDP_NOT_SUPPORTED              = 811,
-  CUDA_ERROR_CDP_VERSION_MISMATCH           = 812,
-  CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED     = 900,
-  CUDA_ERROR_STREAM_CAPTURE_INVALIDATED     = 901,
-  CUDA_ERROR_STREAM_CAPTURE_MERGE           = 902,
-  CUDA_ERROR_STREAM_CAPTURE_UNMATCHED       = 903,
-  CUDA_ERROR_STREAM_CAPTURE_UNJOINED        = 904,
-  CUDA_ERROR_STREAM_CAPTURE_ISOLATION       = 905,
-  CUDA_ERROR_STREAM_CAPTURE_IMPLICIT        = 906,
-  CUDA_ERROR_CAPTURED_EVENT                 = 907,
-  CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD    = 908,
-  CUDA_ERROR_TIMEOUT                        = 909,
-  CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE      = 910,
-  CUDA_ERROR_EXTERNAL_DEVICE                = 911,
-  CUDA_ERROR_INVALID_CLUSTER_SIZE           = 912,
-  CUDA_ERROR_FUNCTION_NOT_LOADED            = 913,
-  CUDA_ERROR_INVALID_RESOURCE_TYPE          = 914,
-  CUDA_ERROR_INVALID_RESOURCE_CONFIGURATION = 915,
-  CUDA_ERROR_UNKNOWN                        = 999
-} CUresult;
-
-typedef enum
-{
-  CU_MEMORYTYPE_HOST = 1,
-  CU_MEMORYTYPE_DEVICE = 2,
-  CU_MEMORYTYPE_ARRAY = 3,
-  CU_MEMORYTYPE_UNIFIED = 4,
-} CUmemorytype;
-
-typedef enum
-{
-  CU_DEVICE_ATTRIBUTE_TEXTURE_ALIGNMENT = 14,
-  CU_DEVICE_ATTRIBUTE_UNIFIED_ADDRESSING = 41,
-  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
-  CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76,
-  CU_DEVICE_ATTRIBUTE_VIRTUAL_MEMORY_MANAGEMENT_SUPPORTED = 102,
-  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR_SUPPORTED = 103,
-  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_HANDLE_SUPPORTED = 104,
-  CU_DEVICE_ATTRIBUTE_HANDLE_TYPE_WIN32_KMT_HANDLE_SUPPORTED = 105,
-  CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED = 115,
-} CUdevice_attribute;
-
-typedef enum
-{
-  CU_GRAPHICS_REGISTER_FLAGS_NONE = 0x00,
-  CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY = 0x01,
-  CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD = 0x02,
-  CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LOAD_STORE = 0x04,
-  CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER = 0x08,
-} CUgraphicsRegisterFlags;
-
-typedef enum
-{
-  CU_GRAPHICS_MAP_RESOURCE_FLAGS_NONE = 0x00,
-  CU_GRAPHICS_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
-  CU_GRAPHICS_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
-} CUgraphicsMapResourceFlags;
-
-typedef enum
-{
-  CU_STREAM_DEFAULT = 0x0,
-  CU_STREAM_NON_BLOCKING = 0x1
-} CUstream_flags;
-
-typedef enum
-{
-  CU_TR_FILTER_MODE_POINT = 0,
-  CU_TR_FILTER_MODE_LINEAR = 1
-} CUfilter_mode;
-
-typedef enum
-{
-  CU_TR_ADDRESS_MODE_WRAP = 0,
-  CU_TR_ADDRESS_MODE_CLAMP = 1,
-  CU_TR_ADDRESS_MODE_MIRROR = 2,
-  CU_TR_ADDRESS_MODE_BORDER = 3
-} CUaddress_mode;
-
-typedef enum
-{
-  CU_RESOURCE_TYPE_ARRAY = 0,
-  CU_RESOURCE_TYPE_MIPMAPPED_ARRAY = 1,
-  CU_RESOURCE_TYPE_LINEAR = 2,
-  CU_RESOURCE_TYPE_PITCH2D = 3
-} CUresourcetype;
-
-typedef enum
-{
-  CU_AD_FORMAT_UNSIGNED_INT8  = 1,
-  CU_AD_FORMAT_UNSIGNED_INT16 = 2,
-} CUarray_format;
-
-typedef enum
-{
-  CU_RES_VIEW_FORMAT_NONE = 0,
-} CUresourceViewFormat;
-
-typedef enum
-{
-  CU_EVENT_DEFAULT = 0x0,
-  CU_EVENT_BLOCKING_SYNC = 0x1,
-  CU_EVENT_DISABLE_TIMING = 0x2,
-  CU_EVENT_INTERPROCESS = 0x4,
-} CUevent_flags;
-
-typedef enum
-{
-  CU_LIMIT_STACK_SIZE = 0x0,
-  CU_LIMIT_PRINTF_FIFO_SIZE = 0x1,
-  CU_LIMIT_MALLOC_HEAP_SIZE = 0x2,
-  CU_LIMIT_DEV_RUNTIME_SYNC_DEPTH = 0x3,
-  CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 0x4,
-  CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 0x5,
-  CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 0x6,
-  CU_LIMIT_SHMEM_SIZE = 0x7,
-  CU_LIMIT_CIG_ENABLED = 0x8,
-  CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 0x9,
-} CUlimit;
-
-typedef struct
-{
-  gsize srcXInBytes;
-  gsize srcY;
-  CUmemorytype srcMemoryType;
-  gconstpointer srcHost;
-  CUdeviceptr srcDevice;
-  CUarray srcArray;
-  gsize srcPitch;
-
-  gsize dstXInBytes;
-  gsize dstY;
-  CUmemorytype dstMemoryType;
-  gpointer dstHost;
-  CUdeviceptr dstDevice;
-  CUarray dstArray;
-  gsize dstPitch;
-
-  gsize WidthInBytes;
-  gsize Height;
-} CUDA_MEMCPY2D;
-
-typedef struct
-{
-  CUaddress_mode addressMode[3];
-  CUfilter_mode filterMode;
-  guint flags;
-  guint maxAnisotropy;
-  CUfilter_mode mipmapFilterMode;
-  gfloat mipmapLevelBias;
-  gfloat minMipmapLevelClamp;
-  gfloat maxMipmapLevelClamp;
-  gfloat borderColor[4];
-  gint reserved[12];
-} CUDA_TEXTURE_DESC;
-
-typedef struct
-{
-  CUresourcetype resType;
-
-  union {
-    struct {
-      CUarray hArray;
-    } array;
-    struct {
-      CUmipmappedArray hMipmappedArray;
-    } mipmap;
-    struct {
-      CUdeviceptr devPtr;
-      CUarray_format format;
-      guint numChannels;
-      gsize sizeInBytes;
-    } linear;
-    struct {
-      CUdeviceptr devPtr;
-      CUarray_format format;
-      guint numChannels;
-      gsize width;
-      gsize height;
-      gsize pitchInBytes;
-    } pitch2D;
-    struct {
-      gint reserved[32];
-    } reserved;
-  } res;
-
-  guint flags;
-} CUDA_RESOURCE_DESC;
-
-typedef struct
-{
-  CUresourceViewFormat format;
-  gsize width;
-  gsize height;
-  gsize depth;
-  guint firstMipmapLevel;
-  guint lastMipmapLevel;
-  guint firstLayer;
-  guint lastLayer;
-  guint reserved[16];
-} CUDA_RESOURCE_VIEW_DESC;
-
-typedef enum
-{
-  CU_IPC_MEM_LAZY_ENABLE_PEER_ACCESS = 0x1
-} CUipcMem_flags;
-
-#define CU_IPC_HANDLE_SIZE 64
-typedef struct
-{
-  char reserved[CU_IPC_HANDLE_SIZE];
-} CUipcMemHandle;
-
-typedef struct
-{
-  char reserved[CU_IPC_HANDLE_SIZE];
-} CUipcEventHandle;
-
-typedef unsigned long long CUmemGenericAllocationHandle;
-
-typedef enum
-{
-  CU_MEM_HANDLE_TYPE_NONE = 0x0,
-  CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR = 0x1,
-  CU_MEM_HANDLE_TYPE_WIN32 = 0x2,
-  CU_MEM_HANDLE_TYPE_WIN32_KMT = 0x4,
-  CU_MEM_HANDLE_TYPE_MAX = 0x7FFFFFFF
-} CUmemAllocationHandleType;
-
-typedef enum
-{
-  CU_MEM_ACCESS_FLAGS_PROT_NONE = 0x0,
-  CU_MEM_ACCESS_FLAGS_PROT_READ  = 0x1,
-  CU_MEM_ACCESS_FLAGS_PROT_READWRITE = 0x3,
-  CU_MEM_ACCESS_FLAGS_PROT_MAX = 0x7FFFFFFF
-} CUmemAccess_flags;
-
-typedef enum
-{
-  CU_MEM_LOCATION_TYPE_INVALID = 0x0,
-  CU_MEM_LOCATION_TYPE_DEVICE = 0x1,
-  CU_MEM_LOCATION_TYPE_MAX = 0x7FFFFFFF
-} CUmemLocationType;
-
-typedef enum CUmemAllocationType_enum {
-  CU_MEM_ALLOCATION_TYPE_INVALID = 0x0,
-  CU_MEM_ALLOCATION_TYPE_PINNED = 0x1,
-  CU_MEM_ALLOCATION_TYPE_MAX = 0x7FFFFFFF
-} CUmemAllocationType;
-
-typedef enum
-{
-  CU_MEM_ALLOC_GRANULARITY_MINIMUM = 0x0,
-  CU_MEM_ALLOC_GRANULARITY_RECOMMENDED = 0x1
-} CUmemAllocationGranularity_flags;
-
-typedef struct
-{
-  CUmemLocationType type;
-  int id;
-} CUmemLocation;
-
-typedef struct
-{
-  unsigned char compressionType;
-  unsigned char gpuDirectRDMACapable;
-  unsigned short usage;
-  unsigned char reserved[4];
-} CUmemAllocationPropAllocFlags;
-
-typedef struct
-{
-  CUmemAllocationType type;
-  CUmemAllocationHandleType requestedHandleTypes;
-  CUmemLocation location;
-  void *win32HandleMetaData;
-  CUmemAllocationPropAllocFlags allocFlags;
-} CUmemAllocationProp;
-
-typedef struct
-{
-  CUmemLocation location;
-  CUmemAccess_flags flags;
-} CUmemAccessDesc;
-
-typedef struct
-{
-  CUmemAllocationType allocType;
-  CUmemAllocationHandleType handleTypes;
-  CUmemLocation location;
-  void *win32SecurityAttributes;
-  size_t maxSize;
-  unsigned char reserved[56];
-} CUmemPoolProps;
-
-typedef enum
-{
-  CU_MEMPOOL_ATTR_REUSE_FOLLOW_EVENT_DEPENDENCIES = 1,
-  CU_MEMPOOL_ATTR_REUSE_ALLOW_OPPORTUNISTIC,
-  CU_MEMPOOL_ATTR_REUSE_ALLOW_INTERNAL_DEPENDENCIES,
-  CU_MEMPOOL_ATTR_RELEASE_THRESHOLD,
-  CU_MEMPOOL_ATTR_RESERVED_MEM_CURRENT,
-  CU_MEMPOOL_ATTR_RESERVED_MEM_HIGH,
-  CU_MEMPOOL_ATTR_USED_MEM_CURRENT,
-  CU_MEMPOOL_ATTR_USED_MEM_HIGH,
-} CUmemPool_attribute;
-
-typedef struct
-{
-  unsigned long long offset;
-  unsigned long long size;
-  unsigned int flags;
-  unsigned int reserved[16];
-} CUDA_EXTERNAL_MEMORY_BUFFER_DESC;
-
-typedef enum
-{
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
-  CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8
-} CUexternalMemoryHandleType;
-
-/**
- * CUDA_EXTERNAL_MEMORY_HANDLE_DESC: (skip) (attributes doc.skip=true)
- */
-typedef struct
-{
-  CUexternalMemoryHandleType type;
-  union {
-    int fd;
-    struct {
-      void *handle;
-      const void *name;
-    } win32;
-    const void *nvSciBufObject;
-  } handle;
-  unsigned long long size;
-  unsigned int flags;
-  unsigned int reserved[16];
-} CUDA_EXTERNAL_MEMORY_HANDLE_DESC;
-
-typedef enum
-{
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
-  CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10
-} CUexternalSemaphoreHandleType;
-
-/**
- * CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC: (skip) (attributes doc.skip=true)
- */
-typedef struct
-{
-  CUexternalSemaphoreHandleType type;
-  union {
-    int fd;
-    struct {
-      void *handle;
-      const void *name;
-    } win32;
-    const void* nvSciSyncObj;
-  } handle;
-  unsigned int flags;
-  unsigned int reserved[16];
-} CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC;
-
-/**
- * CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS: (skip) (attributes doc.skip=true)
- */
-typedef struct
-{
-  struct {
-    struct {
-      unsigned long long value;
-    } fence;
-    union {
-      void *fence;
-      unsigned long long reserved;
-    } nvSciSync;
-    struct {
-      unsigned long long key;
-    } keyedMutex;
-    unsigned int reserved[12];
-  } params;
-  unsigned int flags;
-  unsigned int reserved[16];
-} CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS;
-
-/**
- * CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS: (skip) (attributes doc.skip=true)
- */
-typedef struct
-{
-  struct {
-    struct {
-      unsigned long long value;
-    } fence;
-    union {
-      void *fence;
-      unsigned long long reserved;
-    } nvSciSync;
-    struct {
-      unsigned long long key;
-      unsigned int timeoutMs;
-    } keyedMutex;
-    unsigned int reserved[10];
-  } params;
-  unsigned int flags;
-  unsigned int reserved[16];
-} CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS;
-
-typedef struct
-{
-  size_t Width;
-  size_t Height;
-  size_t Depth;
-  CUarray_format Format;
-  unsigned int NumChannels;
-  unsigned int Flags;
-} CUDA_ARRAY3D_DESCRIPTOR;
-
-typedef struct
-{
-  unsigned long long offset;
-  CUDA_ARRAY3D_DESCRIPTOR arrayDesc;
-  unsigned int numLevels;
-  unsigned int reserved[16];
-} CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC;
-
-#define CUDA_VERSION 10000
-
-#ifdef _WIN32
-#define CUDAAPI __stdcall
-#else
-#define CUDAAPI
-#endif
-
-#define cuCtxCreate cuCtxCreate_v2
-#define cuCtxDestroy cuCtxDestroy_v2
-#define cuCtxPopCurrent cuCtxPopCurrent_v2
-#define cuCtxPushCurrent cuCtxPushCurrent_v2
-#define cuGraphicsResourceGetMappedPointer cuGraphicsResourceGetMappedPointer_v2
-#define cuGraphicsResourceSetMapFlags cuGraphicsResourceSetMapFlags_v2
-
-#define cuStreamDestroy cuStreamDestroy_v2
-
-#define cuMemAlloc cuMemAlloc_v2
-#define cuMemAllocPitch cuMemAllocPitch_v2
-#define cuMemAllocHost  cuMemAllocHost_v2
-#define cuMemcpy2D cuMemcpy2D_v2
-#define cuMemcpy2DAsync cuMemcpy2DAsync_v2
-#define cuMemcpyDtoD cuMemcpyDtoD_v2
-#define cuMemcpyDtoDAsync cuMemcpyDtoDAsync_v2
-#define cuMemcpyDtoH cuMemcpyDtoH_v2
-#define cuMemcpyDtoHAsync cuMemcpyDtoHAsync_v2
-#define cuMemcpyHtoD cuMemcpyHtoD_v2
-#define cuMemcpyHtoDAsync cuMemcpyHtoDAsync_v2
-#define cuMemFree cuMemFree_v2
-#define cuMemsetD2D8 cuMemsetD2D8_v2
-#define cuMemsetD2D16 cuMemsetD2D16_v2
-#define cuMemsetD2D32 cuMemsetD2D32_v2
-
-#define cuEventDestroy cuEventDestroy_v2
-
-#define CU_TRSF_READ_AS_INTEGER 1
-
-G_END_DECLS
-
diff --git a/pixelflux/include/stb_image.h b/pixelflux/include/stb_image.h
deleted file mode 100644
index 9eedabe..0000000
--- a/pixelflux/include/stb_image.h
+++ /dev/null
@@ -1,7988 +0,0 @@
-/* stb_image - v2.30 - public domain image loader - http://nothings.org/stb
-                                  no warranty implied; use at your own risk
-
-   Do this:
-      #define STB_IMAGE_IMPLEMENTATION
-   before you include this file in *one* C or C++ file to create the implementation.
-
-   // i.e. it should look like this:
-   #include ...
-   #include ...
-   #include ...
-   #define STB_IMAGE_IMPLEMENTATION
-   #include "stb_image.h"
-
-   You can #define STBI_ASSERT(x) before the #include to avoid using assert.h.
-   And #define STBI_MALLOC, STBI_REALLOC, and STBI_FREE to avoid using malloc,realloc,free
-
-
-   QUICK NOTES:
-      Primarily of interest to game developers and other people who can
-          avoid problematic images and only need the trivial interface
-
-      JPEG baseline & progressive (12 bpc/arithmetic not supported, same as stock IJG lib)
-      PNG 1/2/4/8/16-bit-per-channel
-
-      TGA (not sure what subset, if a subset)
-      BMP non-1bpp, non-RLE
-      PSD (composited view only, no extra channels, 8/16 bit-per-channel)
-
-      GIF (*comp always reports as 4-channel)
-      HDR (radiance rgbE format)
-      PIC (Softimage PIC)
-      PNM (PPM and PGM binary only)
-
-      Animated GIF still needs a proper API, but here's one way to do it:
-          http://gist.github.com/urraka/685d9a6340b26b830d49
-
-      - decode from memory or through FILE (define STBI_NO_STDIO to remove code)
-      - decode from arbitrary I/O callbacks
-      - SIMD acceleration on x86/x64 (SSE2) and ARM (NEON)
-
-   Full documentation under "DOCUMENTATION" below.
-
-
-LICENSE
-
-  See end of file for license information.
-
-RECENT REVISION HISTORY:
-
-      2.30  (2024-05-31) avoid erroneous gcc warning
-      2.29  (2023-05-xx) optimizations
-      2.28  (2023-01-29) many error fixes, security errors, just tons of stuff
-      2.27  (2021-07-11) document stbi_info better, 16-bit PNM support, bug fixes
-      2.26  (2020-07-13) many minor fixes
-      2.25  (2020-02-02) fix warnings
-      2.24  (2020-02-02) fix warnings; thread-local failure_reason and flip_vertically
-      2.23  (2019-08-11) fix clang static analysis warning
-      2.22  (2019-03-04) gif fixes, fix warnings
-      2.21  (2019-02-25) fix typo in comment
-      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
-      2.19  (2018-02-11) fix warning
-      2.18  (2018-01-30) fix warnings
-      2.17  (2018-01-29) bugfix, 1-bit BMP, 16-bitness query, fix warnings
-      2.16  (2017-07-23) all functions have 16-bit variants; optimizations; bugfixes
-      2.15  (2017-03-18) fix png-1,2,4; all Imagenet JPGs; no runtime SSE detection on GCC
-      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
-      2.13  (2016-12-04) experimental 16-bit API, only for PNG so far; fixes
-      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
-      2.11  (2016-04-02) 16-bit PNGS; enable SSE2 in non-gcc x64
-                         RGB-format JPEG; remove white matting in PSD;
-                         allocate large structures on the stack;
-                         correct channel count for PNG & BMP
-      2.10  (2016-01-22) avoid warning introduced in 2.09
-      2.09  (2016-01-16) 16-bit TGA; comments in PNM files; STBI_REALLOC_SIZED
-
-   See end of file for full revision history.
-
-
- ============================    Contributors    =========================
-
- Image formats                          Extensions, features
-    Sean Barrett (jpeg, png, bmp)          Jetro Lauha (stbi_info)
-    Nicolas Schulz (hdr, psd)              Martin "SpartanJ" Golini (stbi_info)
-    Jonathan Dummer (tga)                  James "moose2000" Brown (iPhone PNG)
-    Jean-Marc Lienher (gif)                Ben "Disch" Wenger (io callbacks)
-    Tom Seddon (pic)                       Omar Cornut (1/2/4-bit PNG)
-    Thatcher Ulrich (psd)                  Nicolas Guillemot (vertical flip)
-    Ken Miller (pgm, ppm)                  Richard Mitton (16-bit PSD)
-    github:urraka (animated gif)           Junggon Kim (PNM comments)
-    Christopher Forseth (animated gif)     Daniel Gibson (16-bit TGA)
-                                           socks-the-fox (16-bit PNG)
-                                           Jeremy Sawicki (handle all ImageNet JPGs)
- Optimizations & bugfixes                  Mikhail Morozov (1-bit BMP)
-    Fabian "ryg" Giesen                    Anael Seghezzi (is-16-bit query)
-    Arseny Kapoulkine                      Simon Breuss (16-bit PNM)
-    John-Mark Allen
-    Carmelo J Fdez-Aguera
-
- Bug & warning fixes
-    Marc LeBlanc            David Woo          Guillaume George     Martins Mozeiko
-    Christpher Lloyd        Jerry Jansson      Joseph Thomson       Blazej Dariusz Roszkowski
-    Phil Jordan                                Dave Moore           Roy Eltham
-    Hayaki Saito            Nathan Reed        Won Chun
-    Luke Graham             Johan Duparc       Nick Verigakis       the Horde3D community
-    Thomas Ruf              Ronny Chevalier                         github:rlyeh
-    Janez Zemva             John Bartholomew   Michal Cichon        github:romigrou
-    Jonathan Blow           Ken Hamada         Tero Hanninen        github:svdijk
-    Eugene Golushkov        Laurent Gomila     Cort Stratton        github:snagar
-    Aruelien Pocheville     Sergio Gonzalez    Thibault Reuille     github:Zelex
-    Cass Everitt            Ryamond Barbiero                        github:grim210
-    Paul Du Bois            Engin Manap        Aldo Culquicondor    github:sammyhw
-    Philipp Wiesemann       Dale Weiler        Oriol Ferrer Mesia   github:phprus
-    Josh Tobin              Neil Bickford      Matthew Gregan       github:poppolopoppo
-    Julian Raschke          Gregory Mullen     Christian Floisand   github:darealshinji
-    Baldur Karlsson         Kevin Schmidt      JR Smith             github:Michaelangel007
-                            Brad Weinberger    Matvey Cherevko      github:mosra
-    Luca Sas                Alexander Veselov  Zack Middleton       [reserved]
-    Ryan C. Gordon          [reserved]                              [reserved]
-                     DO NOT ADD YOUR NAME HERE
-
-                     Jacko Dirks
-
-  To add your name to the credits, pick a random blank space in the middle and fill it.
-  80% of merge conflicts on stb PRs are due to people adding their name at the end
-  of the credits.
-*/
-
-#ifndef STBI_INCLUDE_STB_IMAGE_H
-#define STBI_INCLUDE_STB_IMAGE_H
-
-// DOCUMENTATION
-//
-// Limitations:
-//    - no 12-bit-per-channel JPEG
-//    - no JPEGs with arithmetic coding
-//    - GIF always returns *comp=4
-//
-// Basic usage (see HDR discussion below for HDR usage):
-//    int x,y,n;
-//    unsigned char *data = stbi_load(filename, &x, &y, &n, 0);
-//    // ... process data if not NULL ...
-//    // ... x = width, y = height, n = # 8-bit components per pixel ...
-//    // ... replace '0' with '1'..'4' to force that many components per pixel
-//    // ... but 'n' will always be the number that it would have been if you said 0
-//    stbi_image_free(data);
-//
-// Standard parameters:
-//    int *x                 -- outputs image width in pixels
-//    int *y                 -- outputs image height in pixels
-//    int *channels_in_file  -- outputs # of image components in image file
-//    int desired_channels   -- if non-zero, # of image components requested in result
-//
-// The return value from an image loader is an 'unsigned char *' which points
-// to the pixel data, or NULL on an allocation failure or if the image is
-// corrupt or invalid. The pixel data consists of *y scanlines of *x pixels,
-// with each pixel consisting of N interleaved 8-bit components; the first
-// pixel pointed to is top-left-most in the image. There is no padding between
-// image scanlines or between pixels, regardless of format. The number of
-// components N is 'desired_channels' if desired_channels is non-zero, or
-// *channels_in_file otherwise. If desired_channels is non-zero,
-// *channels_in_file has the number of components that _would_ have been
-// output otherwise. E.g. if you set desired_channels to 4, you will always
-// get RGBA output, but you can check *channels_in_file to see if it's trivially
-// opaque because e.g. there were only 3 channels in the source image.
-//
-// An output image with N components has the following components interleaved
-// in this order in each pixel:
-//
-//     N=#comp     components
-//       1           grey
-//       2           grey, alpha
-//       3           red, green, blue
-//       4           red, green, blue, alpha
-//
-// If image loading fails for any reason, the return value will be NULL,
-// and *x, *y, *channels_in_file will be unchanged. The function
-// stbi_failure_reason() can be queried for an extremely brief, end-user
-// unfriendly explanation of why the load failed. Define STBI_NO_FAILURE_STRINGS
-// to avoid compiling these strings at all, and STBI_FAILURE_USERMSG to get slightly
-// more user-friendly ones.
-//
-// Paletted PNG, BMP, GIF, and PIC images are automatically depalettized.
-//
-// To query the width, height and component count of an image without having to
-// decode the full file, you can use the stbi_info family of functions:
-//
-//   int x,y,n,ok;
-//   ok = stbi_info(filename, &x, &y, &n);
-//   // returns ok=1 and sets x, y, n if image is a supported format,
-//   // 0 otherwise.
-//
-// Note that stb_image pervasively uses ints in its public API for sizes,
-// including sizes of memory buffers. This is now part of the API and thus
-// hard to change without causing breakage. As a result, the various image
-// loaders all have certain limits on image size; these differ somewhat
-// by format but generally boil down to either just under 2GB or just under
-// 1GB. When the decoded image would be larger than this, stb_image decoding
-// will fail.
-//
-// Additionally, stb_image will reject image files that have any of their
-// dimensions set to a larger value than the configurable STBI_MAX_DIMENSIONS,
-// which defaults to 2**24 = 16777216 pixels. Due to the above memory limit,
-// the only way to have an image with such dimensions load correctly
-// is for it to have a rather extreme aspect ratio. Either way, the
-// assumption here is that such larger images are likely to be malformed
-// or malicious. If you do need to load an image with individual dimensions
-// larger than that, and it still fits in the overall size limit, you can
-// #define STBI_MAX_DIMENSIONS on your own to be something larger.
-//
-// ===========================================================================
-//
-// UNICODE:
-//
-//   If compiling for Windows and you wish to use Unicode filenames, compile
-//   with
-//       #define STBI_WINDOWS_UTF8
-//   and pass utf8-encoded filenames. Call stbi_convert_wchar_to_utf8 to convert
-//   Windows wchar_t filenames to utf8.
-//
-// ===========================================================================
-//
-// Philosophy
-//
-// stb libraries are designed with the following priorities:
-//
-//    1. easy to use
-//    2. easy to maintain
-//    3. good performance
-//
-// Sometimes I let "good performance" creep up in priority over "easy to maintain",
-// and for best performance I may provide less-easy-to-use APIs that give higher
-// performance, in addition to the easy-to-use ones. Nevertheless, it's important
-// to keep in mind that from the standpoint of you, a client of this library,
-// all you care about is #1 and #3, and stb libraries DO NOT emphasize #3 above all.
-//
-// Some secondary priorities arise directly from the first two, some of which
-// provide more explicit reasons why performance can't be emphasized.
-//
-//    - Portable ("ease of use")
-//    - Small source code footprint ("easy to maintain")
-//    - No dependencies ("ease of use")
-//
-// ===========================================================================
-//
-// I/O callbacks
-//
-// I/O callbacks allow you to read from arbitrary sources, like packaged
-// files or some other source. Data read from callbacks are processed
-// through a small internal buffer (currently 128 bytes) to try to reduce
-// overhead.
-//
-// The three functions you must define are "read" (reads some bytes of data),
-// "skip" (skips some bytes of data), "eof" (reports if the stream is at the end).
-//
-// ===========================================================================
-//
-// SIMD support
-//
-// The JPEG decoder will try to automatically use SIMD kernels on x86 when
-// supported by the compiler. For ARM Neon support, you must explicitly
-// request it.
-//
-// (The old do-it-yourself SIMD API is no longer supported in the current
-// code.)
-//
-// On x86, SSE2 will automatically be used when available based on a run-time
-// test; if not, the generic C versions are used as a fall-back. On ARM targets,
-// the typical path is to have separate builds for NEON and non-NEON devices
-// (at least this is true for iOS and Android). Therefore, the NEON support is
-// toggled by a build flag: define STBI_NEON to get NEON loops.
-//
-// If for some reason you do not want to use any of SIMD code, or if
-// you have issues compiling it, you can disable it entirely by
-// defining STBI_NO_SIMD.
-//
-// ===========================================================================
-//
-// HDR image support   (disable by defining STBI_NO_HDR)
-//
-// stb_image supports loading HDR images in general, and currently the Radiance
-// .HDR file format specifically. You can still load any file through the existing
-// interface; if you attempt to load an HDR file, it will be automatically remapped
-// to LDR, assuming gamma 2.2 and an arbitrary scale factor defaulting to 1;
-// both of these constants can be reconfigured through this interface:
-//
-//     stbi_hdr_to_ldr_gamma(2.2f);
-//     stbi_hdr_to_ldr_scale(1.0f);
-//
-// (note, do not use _inverse_ constants; stbi_image will invert them
-// appropriately).
-//
-// Additionally, there is a new, parallel interface for loading files as
-// (linear) floats to preserve the full dynamic range:
-//
-//    float *data = stbi_loadf(filename, &x, &y, &n, 0);
-//
-// If you load LDR images through this interface, those images will
-// be promoted to floating point values, run through the inverse of
-// constants corresponding to the above:
-//
-//     stbi_ldr_to_hdr_scale(1.0f);
-//     stbi_ldr_to_hdr_gamma(2.2f);
-//
-// Finally, given a filename (or an open file or memory block--see header
-// file for details) containing image data, you can query for the "most
-// appropriate" interface to use (that is, whether the image is HDR or
-// not), using:
-//
-//     stbi_is_hdr(char *filename);
-//
-// ===========================================================================
-//
-// iPhone PNG support:
-//
-// We optionally support converting iPhone-formatted PNGs (which store
-// premultiplied BGRA) back to RGB, even though they're internally encoded
-// differently. To enable this conversion, call
-// stbi_convert_iphone_png_to_rgb(1).
-//
-// Call stbi_set_unpremultiply_on_load(1) as well to force a divide per
-// pixel to remove any premultiplied alpha *only* if the image file explicitly
-// says there's premultiplied data (currently only happens in iPhone images,
-// and only if iPhone convert-to-rgb processing is on).
-//
-// ===========================================================================
-//
-// ADDITIONAL CONFIGURATION
-//
-//  - You can suppress implementation of any of the decoders to reduce
-//    your code footprint by #defining one or more of the following
-//    symbols before creating the implementation.
-//
-//        STBI_NO_JPEG
-//        STBI_NO_PNG
-//        STBI_NO_BMP
-//        STBI_NO_PSD
-//        STBI_NO_TGA
-//        STBI_NO_GIF
-//        STBI_NO_HDR
-//        STBI_NO_PIC
-//        STBI_NO_PNM   (.ppm and .pgm)
-//
-//  - You can request *only* certain decoders and suppress all other ones
-//    (this will be more forward-compatible, as addition of new decoders
-//    doesn't require you to disable them explicitly):
-//
-//        STBI_ONLY_JPEG
-//        STBI_ONLY_PNG
-//        STBI_ONLY_BMP
-//        STBI_ONLY_PSD
-//        STBI_ONLY_TGA
-//        STBI_ONLY_GIF
-//        STBI_ONLY_HDR
-//        STBI_ONLY_PIC
-//        STBI_ONLY_PNM   (.ppm and .pgm)
-//
-//   - If you use STBI_NO_PNG (or _ONLY_ without PNG), and you still
-//     want the zlib decoder to be available, #define STBI_SUPPORT_ZLIB
-//
-//  - If you define STBI_MAX_DIMENSIONS, stb_image will reject images greater
-//    than that size (in either width or height) without further processing.
-//    This is to let programs in the wild set an upper bound to prevent
-//    denial-of-service attacks on untrusted data, as one could generate a
-//    valid image of gigantic dimensions and force stb_image to allocate a
-//    huge block of memory and spend disproportionate time decoding it. By
-//    default this is set to (1 << 24), which is 16777216, but that's still
-//    very big.
-
-#ifndef STBI_NO_STDIO
-#include <stdio.h>
-#endif // STBI_NO_STDIO
-
-#define STBI_VERSION 1
-
-enum
-{
-   STBI_default = 0, // only used for desired_channels
-
-   STBI_grey       = 1,
-   STBI_grey_alpha = 2,
-   STBI_rgb        = 3,
-   STBI_rgb_alpha  = 4
-};
-
-#include <stdlib.h>
-typedef unsigned char stbi_uc;
-typedef unsigned short stbi_us;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifndef STBIDEF
-#ifdef STB_IMAGE_STATIC
-#define STBIDEF static
-#else
-#define STBIDEF extern
-#endif
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// PRIMARY API - works on images of any type
-//
-
-//
-// load image by filename, open file, or memory buffer
-//
-
-typedef struct
-{
-   int      (*read)  (void *user,char *data,int size);   // fill 'data' with 'size' bytes.  return number of bytes actually read
-   void     (*skip)  (void *user,int n);                 // skip the next 'n' bytes, or 'unget' the last -n bytes if negative
-   int      (*eof)   (void *user);                       // returns nonzero if we are at end of file/data
-} stbi_io_callbacks;
-
-////////////////////////////////////
-//
-// 8-bits-per-channel interface
-//
-
-STBIDEF stbi_uc *stbi_load_from_memory   (stbi_uc           const *buffer, int len   , int *x, int *y, int *channels_in_file, int desired_channels);
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk  , void *user, int *x, int *y, int *channels_in_file, int desired_channels);
-
-#ifndef STBI_NO_STDIO
-STBIDEF stbi_uc *stbi_load            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
-STBIDEF stbi_uc *stbi_load_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
-// for stbi_load_from_file, file pointer is left pointing immediately after image
-#endif
-
-#ifndef STBI_NO_GIF
-STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
-#endif
-
-#ifdef STBI_WINDOWS_UTF8
-STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input);
-#endif
-
-////////////////////////////////////
-//
-// 16-bits-per-channel interface
-//
-
-STBIDEF stbi_us *stbi_load_16_from_memory   (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
-STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels);
-
-#ifndef STBI_NO_STDIO
-STBIDEF stbi_us *stbi_load_16          (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
-STBIDEF stbi_us *stbi_load_from_file_16(FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
-#endif
-
-////////////////////////////////////
-//
-// float-per-channel interface
-//
-#ifndef STBI_NO_LINEAR
-   STBIDEF float *stbi_loadf_from_memory     (stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels);
-   STBIDEF float *stbi_loadf_from_callbacks  (stbi_io_callbacks const *clbk, void *user, int *x, int *y,  int *channels_in_file, int desired_channels);
-
-   #ifndef STBI_NO_STDIO
-   STBIDEF float *stbi_loadf            (char const *filename, int *x, int *y, int *channels_in_file, int desired_channels);
-   STBIDEF float *stbi_loadf_from_file  (FILE *f, int *x, int *y, int *channels_in_file, int desired_channels);
-   #endif
-#endif
-
-#ifndef STBI_NO_HDR
-   STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma);
-   STBIDEF void   stbi_hdr_to_ldr_scale(float scale);
-#endif // STBI_NO_HDR
-
-#ifndef STBI_NO_LINEAR
-   STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma);
-   STBIDEF void   stbi_ldr_to_hdr_scale(float scale);
-#endif // STBI_NO_LINEAR
-
-// stbi_is_hdr is always defined, but always returns false if STBI_NO_HDR
-STBIDEF int    stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user);
-STBIDEF int    stbi_is_hdr_from_memory(stbi_uc const *buffer, int len);
-#ifndef STBI_NO_STDIO
-STBIDEF int      stbi_is_hdr          (char const *filename);
-STBIDEF int      stbi_is_hdr_from_file(FILE *f);
-#endif // STBI_NO_STDIO
-
-
-// get a VERY brief reason for failure
-// on most compilers (and ALL modern mainstream compilers) this is threadsafe
-STBIDEF const char *stbi_failure_reason  (void);
-
-// free the loaded image -- this is just free()
-STBIDEF void     stbi_image_free      (void *retval_from_stbi_load);
-
-// get image dimensions & components without fully decoding
-STBIDEF int      stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp);
-STBIDEF int      stbi_info_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp);
-STBIDEF int      stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len);
-STBIDEF int      stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *clbk, void *user);
-
-#ifndef STBI_NO_STDIO
-STBIDEF int      stbi_info               (char const *filename,     int *x, int *y, int *comp);
-STBIDEF int      stbi_info_from_file     (FILE *f,                  int *x, int *y, int *comp);
-STBIDEF int      stbi_is_16_bit          (char const *filename);
-STBIDEF int      stbi_is_16_bit_from_file(FILE *f);
-#endif
-
-
-
-// for image formats that explicitly notate that they have premultiplied alpha,
-// we just return the colors as stored in the file. set this flag to force
-// unpremultiplication. results are undefined if the unpremultiply overflow.
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply);
-
-// indicate whether we should process iphone images back to canonical format,
-// or just pass them through "as-is"
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert);
-
-// flip the image vertically, so the first pixel in the output array is the bottom left
-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip);
-
-// as above, but only applies to images loaded on the thread that calls the function
-// this function is only available if your compiler supports thread-local variables;
-// calling it will fail to link if your compiler doesn't
-STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply);
-STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert);
-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip);
-
-// ZLIB client - used by PNG, available for other purposes
-
-STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen);
-STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header);
-STBIDEF char *stbi_zlib_decode_malloc(const char *buffer, int len, int *outlen);
-STBIDEF int   stbi_zlib_decode_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
-
-STBIDEF char *stbi_zlib_decode_noheader_malloc(const char *buffer, int len, int *outlen);
-STBIDEF int   stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen);
-
-
-#ifdef __cplusplus
-}
-#endif
-
-//
-//
-////   end header file   /////////////////////////////////////////////////////
-#endif // STBI_INCLUDE_STB_IMAGE_H
-
-#ifdef STB_IMAGE_IMPLEMENTATION
-
-#if defined(STBI_ONLY_JPEG) || defined(STBI_ONLY_PNG) || defined(STBI_ONLY_BMP) \
-  || defined(STBI_ONLY_TGA) || defined(STBI_ONLY_GIF) || defined(STBI_ONLY_PSD) \
-  || defined(STBI_ONLY_HDR) || defined(STBI_ONLY_PIC) || defined(STBI_ONLY_PNM) \
-  || defined(STBI_ONLY_ZLIB)
-   #ifndef STBI_ONLY_JPEG
-   #define STBI_NO_JPEG
-   #endif
-   #ifndef STBI_ONLY_PNG
-   #define STBI_NO_PNG
-   #endif
-   #ifndef STBI_ONLY_BMP
-   #define STBI_NO_BMP
-   #endif
-   #ifndef STBI_ONLY_PSD
-   #define STBI_NO_PSD
-   #endif
-   #ifndef STBI_ONLY_TGA
-   #define STBI_NO_TGA
-   #endif
-   #ifndef STBI_ONLY_GIF
-   #define STBI_NO_GIF
-   #endif
-   #ifndef STBI_ONLY_HDR
-   #define STBI_NO_HDR
-   #endif
-   #ifndef STBI_ONLY_PIC
-   #define STBI_NO_PIC
-   #endif
-   #ifndef STBI_ONLY_PNM
-   #define STBI_NO_PNM
-   #endif
-#endif
-
-#if defined(STBI_NO_PNG) && !defined(STBI_SUPPORT_ZLIB) && !defined(STBI_NO_ZLIB)
-#define STBI_NO_ZLIB
-#endif
-
-
-#include <stdarg.h>
-#include <stddef.h> // ptrdiff_t on osx
-#include <stdlib.h>
-#include <string.h>
-#include <limits.h>
-
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR)
-#include <math.h>  // ldexp, pow
-#endif
-
-#ifndef STBI_NO_STDIO
-#include <stdio.h>
-#endif
-
-#ifndef STBI_ASSERT
-#include <assert.h>
-#define STBI_ASSERT(x) assert(x)
-#endif
-
-#ifdef __cplusplus
-#define STBI_EXTERN extern "C"
-#else
-#define STBI_EXTERN extern
-#endif
-
-
-#ifndef _MSC_VER
-   #ifdef __cplusplus
-   #define stbi_inline inline
-   #else
-   #define stbi_inline
-   #endif
-#else
-   #define stbi_inline __forceinline
-#endif
-
-#ifndef STBI_NO_THREAD_LOCALS
-   #if defined(__cplusplus) &&  __cplusplus >= 201103L
-      #define STBI_THREAD_LOCAL       thread_local
-   #elif defined(__GNUC__) && __GNUC__ < 5
-      #define STBI_THREAD_LOCAL       __thread
-   #elif defined(_MSC_VER)
-      #define STBI_THREAD_LOCAL       __declspec(thread)
-   #elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__)
-      #define STBI_THREAD_LOCAL       _Thread_local
-   #endif
-
-   #ifndef STBI_THREAD_LOCAL
-      #if defined(__GNUC__)
-        #define STBI_THREAD_LOCAL       __thread
-      #endif
-   #endif
-#endif
-
-#if defined(_MSC_VER) || defined(__SYMBIAN32__)
-typedef unsigned short stbi__uint16;
-typedef   signed short stbi__int16;
-typedef unsigned int   stbi__uint32;
-typedef   signed int   stbi__int32;
-#else
-#include <stdint.h>
-typedef uint16_t stbi__uint16;
-typedef int16_t  stbi__int16;
-typedef uint32_t stbi__uint32;
-typedef int32_t  stbi__int32;
-#endif
-
-// should produce compiler error if size is wrong
-typedef unsigned char validate_uint32[sizeof(stbi__uint32)==4 ? 1 : -1];
-
-#ifdef _MSC_VER
-#define STBI_NOTUSED(v)  (void)(v)
-#else
-#define STBI_NOTUSED(v)  (void)sizeof(v)
-#endif
-
-#ifdef _MSC_VER
-#define STBI_HAS_LROTL
-#endif
-
-#ifdef STBI_HAS_LROTL
-   #define stbi_lrot(x,y)  _lrotl(x,y)
-#else
-   #define stbi_lrot(x,y)  (((x) << (y)) | ((x) >> (-(y) & 31)))
-#endif
-
-#if defined(STBI_MALLOC) && defined(STBI_FREE) && (defined(STBI_REALLOC) || defined(STBI_REALLOC_SIZED))
-// ok
-#elif !defined(STBI_MALLOC) && !defined(STBI_FREE) && !defined(STBI_REALLOC) && !defined(STBI_REALLOC_SIZED)
-// ok
-#else
-#error "Must define all or none of STBI_MALLOC, STBI_FREE, and STBI_REALLOC (or STBI_REALLOC_SIZED)."
-#endif
-
-#ifndef STBI_MALLOC
-#define STBI_MALLOC(sz)           malloc(sz)
-#define STBI_REALLOC(p,newsz)     realloc(p,newsz)
-#define STBI_FREE(p)              free(p)
-#endif
-
-#ifndef STBI_REALLOC_SIZED
-#define STBI_REALLOC_SIZED(p,oldsz,newsz) STBI_REALLOC(p,newsz)
-#endif
-
-// x86/x64 detection
-#if defined(__x86_64__) || defined(_M_X64)
-#define STBI__X64_TARGET
-#elif defined(__i386) || defined(_M_IX86)
-#define STBI__X86_TARGET
-#endif
-
-#if defined(__GNUC__) && defined(STBI__X86_TARGET) && !defined(__SSE2__) && !defined(STBI_NO_SIMD)
-// gcc doesn't support sse2 intrinsics unless you compile with -msse2,
-// which in turn means it gets to use SSE2 everywhere. This is unfortunate,
-// but previous attempts to provide the SSE2 functions with runtime
-// detection caused numerous issues. The way architecture extensions are
-// exposed in GCC/Clang is, sadly, not really suited for one-file libs.
-// New behavior: if compiled with -msse2, we use SSE2 without any
-// detection; if not, we don't use it at all.
-#define STBI_NO_SIMD
-#endif
-
-#if defined(__MINGW32__) && defined(STBI__X86_TARGET) && !defined(STBI_MINGW_ENABLE_SSE2) && !defined(STBI_NO_SIMD)
-// Note that __MINGW32__ doesn't actually mean 32-bit, so we have to avoid STBI__X64_TARGET
-//
-// 32-bit MinGW wants ESP to be 16-byte aligned, but this is not in the
-// Windows ABI and VC++ as well as Windows DLLs don't maintain that invariant.
-// As a result, enabling SSE2 on 32-bit MinGW is dangerous when not
-// simultaneously enabling "-mstackrealign".
-//
-// See https://github.com/nothings/stb/issues/81 for more information.
-//
-// So default to no SSE2 on 32-bit MinGW. If you've read this far and added
-// -mstackrealign to your build settings, feel free to #define STBI_MINGW_ENABLE_SSE2.
-#define STBI_NO_SIMD
-#endif
-
-#if !defined(STBI_NO_SIMD) && (defined(STBI__X86_TARGET) || defined(STBI__X64_TARGET))
-#define STBI_SSE2
-#include <emmintrin.h>
-
-#ifdef _MSC_VER
-
-#if _MSC_VER >= 1400  // not VC6
-#include <intrin.h> // __cpuid
-static int stbi__cpuid3(void)
-{
-   int info[4];
-   __cpuid(info,1);
-   return info[3];
-}
-#else
-static int stbi__cpuid3(void)
-{
-   int res;
-   __asm {
-      mov  eax,1
-      cpuid
-      mov  res,edx
-   }
-   return res;
-}
-#endif
-
-#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
-
-#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
-static int stbi__sse2_available(void)
-{
-   int info3 = stbi__cpuid3();
-   return ((info3 >> 26) & 1) != 0;
-}
-#endif
-
-#else // assume GCC-style if not VC++
-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-
-#if !defined(STBI_NO_JPEG) && defined(STBI_SSE2)
-static int stbi__sse2_available(void)
-{
-   // If we're even attempting to compile this on GCC/Clang, that means
-   // -msse2 is on, which means the compiler is allowed to use SSE2
-   // instructions at will, and so are we.
-   return 1;
-}
-#endif
-
-#endif
-#endif
-
-// ARM NEON
-#if defined(STBI_NO_SIMD) && defined(STBI_NEON)
-#undef STBI_NEON
-#endif
-
-#ifdef STBI_NEON
-#include <arm_neon.h>
-#ifdef _MSC_VER
-#define STBI_SIMD_ALIGN(type, name) __declspec(align(16)) type name
-#else
-#define STBI_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-#endif
-#endif
-
-#ifndef STBI_SIMD_ALIGN
-#define STBI_SIMD_ALIGN(type, name) type name
-#endif
-
-#ifndef STBI_MAX_DIMENSIONS
-#define STBI_MAX_DIMENSIONS (1 << 24)
-#endif
-
-///////////////////////////////////////////////
-//
-//  stbi__context struct and start_xxx functions
-
-// stbi__context structure is our basic context used by all images, so it
-// contains all the IO context, plus some basic image information
-typedef struct
-{
-   stbi__uint32 img_x, img_y;
-   int img_n, img_out_n;
-
-   stbi_io_callbacks io;
-   void *io_user_data;
-
-   int read_from_callbacks;
-   int buflen;
-   stbi_uc buffer_start[128];
-   int callback_already_read;
-
-   stbi_uc *img_buffer, *img_buffer_end;
-   stbi_uc *img_buffer_original, *img_buffer_original_end;
-} stbi__context;
-
-
-static void stbi__refill_buffer(stbi__context *s);
-
-// initialize a memory-decode context
-static void stbi__start_mem(stbi__context *s, stbi_uc const *buffer, int len)
-{
-   s->io.read = NULL;
-   s->read_from_callbacks = 0;
-   s->callback_already_read = 0;
-   s->img_buffer = s->img_buffer_original = (stbi_uc *) buffer;
-   s->img_buffer_end = s->img_buffer_original_end = (stbi_uc *) buffer+len;
-}
-
-// initialize a callback-based context
-static void stbi__start_callbacks(stbi__context *s, stbi_io_callbacks *c, void *user)
-{
-   s->io = *c;
-   s->io_user_data = user;
-   s->buflen = sizeof(s->buffer_start);
-   s->read_from_callbacks = 1;
-   s->callback_already_read = 0;
-   s->img_buffer = s->img_buffer_original = s->buffer_start;
-   stbi__refill_buffer(s);
-   s->img_buffer_original_end = s->img_buffer_end;
-}
-
-#ifndef STBI_NO_STDIO
-
-static int stbi__stdio_read(void *user, char *data, int size)
-{
-   return (int) fread(data,1,size,(FILE*) user);
-}
-
-static void stbi__stdio_skip(void *user, int n)
-{
-   int ch;
-   fseek((FILE*) user, n, SEEK_CUR);
-   ch = fgetc((FILE*) user);  /* have to read a byte to reset feof()'s flag */
-   if (ch != EOF) {
-      ungetc(ch, (FILE *) user);  /* push byte back onto stream if valid. */
-   }
-}
-
-static int stbi__stdio_eof(void *user)
-{
-   return feof((FILE*) user) || ferror((FILE *) user);
-}
-
-static stbi_io_callbacks stbi__stdio_callbacks =
-{
-   stbi__stdio_read,
-   stbi__stdio_skip,
-   stbi__stdio_eof,
-};
-
-static void stbi__start_file(stbi__context *s, FILE *f)
-{
-   stbi__start_callbacks(s, &stbi__stdio_callbacks, (void *) f);
-}
-
-//static void stop_file(stbi__context *s) { }
-
-#endif // !STBI_NO_STDIO
-
-static void stbi__rewind(stbi__context *s)
-{
-   // conceptually rewind SHOULD rewind to the beginning of the stream,
-   // but we just rewind to the beginning of the initial buffer, because
-   // we only use it after doing 'test', which only ever looks at at most 92 bytes
-   s->img_buffer = s->img_buffer_original;
-   s->img_buffer_end = s->img_buffer_original_end;
-}
-
-enum
-{
-   STBI_ORDER_RGB,
-   STBI_ORDER_BGR
-};
-
-typedef struct
-{
-   int bits_per_channel;
-   int num_channels;
-   int channel_order;
-} stbi__result_info;
-
-#ifndef STBI_NO_JPEG
-static int      stbi__jpeg_test(stbi__context *s);
-static void    *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PNG
-static int      stbi__png_test(stbi__context *s);
-static void    *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__png_info(stbi__context *s, int *x, int *y, int *comp);
-static int      stbi__png_is16(stbi__context *s);
-#endif
-
-#ifndef STBI_NO_BMP
-static int      stbi__bmp_test(stbi__context *s);
-static void    *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_TGA
-static int      stbi__tga_test(stbi__context *s);
-static void    *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__tga_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PSD
-static int      stbi__psd_test(stbi__context *s);
-static void    *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc);
-static int      stbi__psd_info(stbi__context *s, int *x, int *y, int *comp);
-static int      stbi__psd_is16(stbi__context *s);
-#endif
-
-#ifndef STBI_NO_HDR
-static int      stbi__hdr_test(stbi__context *s);
-static float   *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PIC
-static int      stbi__pic_test(stbi__context *s);
-static void    *stbi__pic_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__pic_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_GIF
-static int      stbi__gif_test(stbi__context *s);
-static void    *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static void    *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp);
-static int      stbi__gif_info(stbi__context *s, int *x, int *y, int *comp);
-#endif
-
-#ifndef STBI_NO_PNM
-static int      stbi__pnm_test(stbi__context *s);
-static void    *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri);
-static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp);
-static int      stbi__pnm_is16(stbi__context *s);
-#endif
-
-static
-#ifdef STBI_THREAD_LOCAL
-STBI_THREAD_LOCAL
-#endif
-const char *stbi__g_failure_reason;
-
-STBIDEF const char *stbi_failure_reason(void)
-{
-   return stbi__g_failure_reason;
-}
-
-#ifndef STBI_NO_FAILURE_STRINGS
-static int stbi__err(const char *str)
-{
-   stbi__g_failure_reason = str;
-   return 0;
-}
-#endif
-
-static void *stbi__malloc(size_t size)
-{
-    return STBI_MALLOC(size);
-}
-
-// stb_image uses ints pervasively, including for offset calculations.
-// therefore the largest decoded image size we can support with the
-// current code, even on 64-bit targets, is INT_MAX. this is not a
-// significant limitation for the intended use case.
-//
-// we do, however, need to make sure our size calculations don't
-// overflow. hence a few helper functions for size calculations that
-// multiply integers together, making sure that they're non-negative
-// and no overflow occurs.
-
-// return 1 if the sum is valid, 0 on overflow.
-// negative terms are considered invalid.
-static int stbi__addsizes_valid(int a, int b)
-{
-   if (b < 0) return 0;
-   // now 0 <= b <= INT_MAX, hence also
-   // 0 <= INT_MAX - b <= INTMAX.
-   // And "a + b <= INT_MAX" (which might overflow) is the
-   // same as a <= INT_MAX - b (no overflow)
-   return a <= INT_MAX - b;
-}
-
-// returns 1 if the product is valid, 0 on overflow.
-// negative factors are considered invalid.
-static int stbi__mul2sizes_valid(int a, int b)
-{
-   if (a < 0 || b < 0) return 0;
-   if (b == 0) return 1; // mul-by-0 is always safe
-   // portable way to check for no overflows in a*b
-   return a <= INT_MAX/b;
-}
-
-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
-// returns 1 if "a*b + add" has no negative terms/factors and doesn't overflow
-static int stbi__mad2sizes_valid(int a, int b, int add)
-{
-   return stbi__mul2sizes_valid(a, b) && stbi__addsizes_valid(a*b, add);
-}
-#endif
-
-// returns 1 if "a*b*c + add" has no negative terms/factors and doesn't overflow
-static int stbi__mad3sizes_valid(int a, int b, int c, int add)
-{
-   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
-      stbi__addsizes_valid(a*b*c, add);
-}
-
-// returns 1 if "a*b*c*d + add" has no negative terms/factors and doesn't overflow
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
-static int stbi__mad4sizes_valid(int a, int b, int c, int d, int add)
-{
-   return stbi__mul2sizes_valid(a, b) && stbi__mul2sizes_valid(a*b, c) &&
-      stbi__mul2sizes_valid(a*b*c, d) && stbi__addsizes_valid(a*b*c*d, add);
-}
-#endif
-
-#if !defined(STBI_NO_JPEG) || !defined(STBI_NO_PNG) || !defined(STBI_NO_TGA) || !defined(STBI_NO_HDR)
-// mallocs with size overflow checking
-static void *stbi__malloc_mad2(int a, int b, int add)
-{
-   if (!stbi__mad2sizes_valid(a, b, add)) return NULL;
-   return stbi__malloc(a*b + add);
-}
-#endif
-
-static void *stbi__malloc_mad3(int a, int b, int c, int add)
-{
-   if (!stbi__mad3sizes_valid(a, b, c, add)) return NULL;
-   return stbi__malloc(a*b*c + add);
-}
-
-#if !defined(STBI_NO_LINEAR) || !defined(STBI_NO_HDR) || !defined(STBI_NO_PNM)
-static void *stbi__malloc_mad4(int a, int b, int c, int d, int add)
-{
-   if (!stbi__mad4sizes_valid(a, b, c, d, add)) return NULL;
-   return stbi__malloc(a*b*c*d + add);
-}
-#endif
-
-// returns 1 if the sum of two signed ints is valid (between -2^31 and 2^31-1 inclusive), 0 on overflow.
-static int stbi__addints_valid(int a, int b)
-{
-   if ((a >= 0) != (b >= 0)) return 1; // a and b have different signs, so no overflow
-   if (a < 0 && b < 0) return a >= INT_MIN - b; // same as a + b >= INT_MIN; INT_MIN - b cannot overflow since b < 0.
-   return a <= INT_MAX - b;
-}
-
-// returns 1 if the product of two ints fits in a signed short, 0 on overflow.
-static int stbi__mul2shorts_valid(int a, int b)
-{
-   if (b == 0 || b == -1) return 1; // multiplication by 0 is always 0; check for -1 so SHRT_MIN/b doesn't overflow
-   if ((a >= 0) == (b >= 0)) return a <= SHRT_MAX/b; // product is positive, so similar to mul2sizes_valid
-   if (b < 0) return a <= SHRT_MIN / b; // same as a * b >= SHRT_MIN
-   return a >= SHRT_MIN / b;
-}
-
-// stbi__err - error
-// stbi__errpf - error returning pointer to float
-// stbi__errpuc - error returning pointer to unsigned char
-
-#ifdef STBI_NO_FAILURE_STRINGS
-   #define stbi__err(x,y)  0
-#elif defined(STBI_FAILURE_USERMSG)
-   #define stbi__err(x,y)  stbi__err(y)
-#else
-   #define stbi__err(x,y)  stbi__err(x)
-#endif
-
-#define stbi__errpf(x,y)   ((float *)(size_t) (stbi__err(x,y)?NULL:NULL))
-#define stbi__errpuc(x,y)  ((unsigned char *)(size_t) (stbi__err(x,y)?NULL:NULL))
-
-STBIDEF void stbi_image_free(void *retval_from_stbi_load)
-{
-   STBI_FREE(retval_from_stbi_load);
-}
-
-#ifndef STBI_NO_LINEAR
-static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp);
-#endif
-
-#ifndef STBI_NO_HDR
-static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp);
-#endif
-
-static int stbi__vertically_flip_on_load_global = 0;
-
-STBIDEF void stbi_set_flip_vertically_on_load(int flag_true_if_should_flip)
-{
-   stbi__vertically_flip_on_load_global = flag_true_if_should_flip;
-}
-
-#ifndef STBI_THREAD_LOCAL
-#define stbi__vertically_flip_on_load  stbi__vertically_flip_on_load_global
-#else
-static STBI_THREAD_LOCAL int stbi__vertically_flip_on_load_local, stbi__vertically_flip_on_load_set;
-
-STBIDEF void stbi_set_flip_vertically_on_load_thread(int flag_true_if_should_flip)
-{
-   stbi__vertically_flip_on_load_local = flag_true_if_should_flip;
-   stbi__vertically_flip_on_load_set = 1;
-}
-
-#define stbi__vertically_flip_on_load  (stbi__vertically_flip_on_load_set       \
-                                         ? stbi__vertically_flip_on_load_local  \
-                                         : stbi__vertically_flip_on_load_global)
-#endif // STBI_THREAD_LOCAL
-
-static void *stbi__load_main(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
-{
-   memset(ri, 0, sizeof(*ri)); // make sure it's initialized if we add new fields
-   ri->bits_per_channel = 8; // default is 8 so most paths don't have to be changed
-   ri->channel_order = STBI_ORDER_RGB; // all current input & output are this, but this is here so we can add BGR order
-   ri->num_channels = 0;
-
-   // test the formats with a very explicit header first (at least a FOURCC
-   // or distinctive magic number first)
-   #ifndef STBI_NO_PNG
-   if (stbi__png_test(s))  return stbi__png_load(s,x,y,comp,req_comp, ri);
-   #endif
-   #ifndef STBI_NO_BMP
-   if (stbi__bmp_test(s))  return stbi__bmp_load(s,x,y,comp,req_comp, ri);
-   #endif
-   #ifndef STBI_NO_GIF
-   if (stbi__gif_test(s))  return stbi__gif_load(s,x,y,comp,req_comp, ri);
-   #endif
-   #ifndef STBI_NO_PSD
-   if (stbi__psd_test(s))  return stbi__psd_load(s,x,y,comp,req_comp, ri, bpc);
-   #else
-   STBI_NOTUSED(bpc);
-   #endif
-   #ifndef STBI_NO_PIC
-   if (stbi__pic_test(s))  return stbi__pic_load(s,x,y,comp,req_comp, ri);
-   #endif
-
-   // then the formats that can end up attempting to load with just 1 or 2
-   // bytes matching expectations; these are prone to false positives, so
-   // try them later
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_test(s)) return stbi__jpeg_load(s,x,y,comp,req_comp, ri);
-   #endif
-   #ifndef STBI_NO_PNM
-   if (stbi__pnm_test(s))  return stbi__pnm_load(s,x,y,comp,req_comp, ri);
-   #endif
-
-   #ifndef STBI_NO_HDR
-   if (stbi__hdr_test(s)) {
-      float *hdr = stbi__hdr_load(s, x,y,comp,req_comp, ri);
-      return stbi__hdr_to_ldr(hdr, *x, *y, req_comp ? req_comp : *comp);
-   }
-   #endif
-
-   #ifndef STBI_NO_TGA
-   // test tga last because it's a crappy test!
-   if (stbi__tga_test(s))
-      return stbi__tga_load(s,x,y,comp,req_comp, ri);
-   #endif
-
-   return stbi__errpuc("unknown image type", "Image not of any known type, or corrupt");
-}
-
-static stbi_uc *stbi__convert_16_to_8(stbi__uint16 *orig, int w, int h, int channels)
-{
-   int i;
-   int img_len = w * h * channels;
-   stbi_uc *reduced;
-
-   reduced = (stbi_uc *) stbi__malloc(img_len);
-   if (reduced == NULL) return stbi__errpuc("outofmem", "Out of memory");
-
-   for (i = 0; i < img_len; ++i)
-      reduced[i] = (stbi_uc)((orig[i] >> 8) & 0xFF); // top half of each byte is sufficient approx of 16->8 bit scaling
-
-   STBI_FREE(orig);
-   return reduced;
-}
-
-static stbi__uint16 *stbi__convert_8_to_16(stbi_uc *orig, int w, int h, int channels)
-{
-   int i;
-   int img_len = w * h * channels;
-   stbi__uint16 *enlarged;
-
-   enlarged = (stbi__uint16 *) stbi__malloc(img_len*2);
-   if (enlarged == NULL) return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
-
-   for (i = 0; i < img_len; ++i)
-      enlarged[i] = (stbi__uint16)((orig[i] << 8) + orig[i]); // replicate to high and low byte, maps 0->0, 255->0xffff
-
-   STBI_FREE(orig);
-   return enlarged;
-}
-
-static void stbi__vertical_flip(void *image, int w, int h, int bytes_per_pixel)
-{
-   int row;
-   size_t bytes_per_row = (size_t)w * bytes_per_pixel;
-   stbi_uc temp[2048];
-   stbi_uc *bytes = (stbi_uc *)image;
-
-   for (row = 0; row < (h>>1); row++) {
-      stbi_uc *row0 = bytes + row*bytes_per_row;
-      stbi_uc *row1 = bytes + (h - row - 1)*bytes_per_row;
-      // swap row0 with row1
-      size_t bytes_left = bytes_per_row;
-      while (bytes_left) {
-         size_t bytes_copy = (bytes_left < sizeof(temp)) ? bytes_left : sizeof(temp);
-         memcpy(temp, row0, bytes_copy);
-         memcpy(row0, row1, bytes_copy);
-         memcpy(row1, temp, bytes_copy);
-         row0 += bytes_copy;
-         row1 += bytes_copy;
-         bytes_left -= bytes_copy;
-      }
-   }
-}
-
-#ifndef STBI_NO_GIF
-static void stbi__vertical_flip_slices(void *image, int w, int h, int z, int bytes_per_pixel)
-{
-   int slice;
-   int slice_size = w * h * bytes_per_pixel;
-
-   stbi_uc *bytes = (stbi_uc *)image;
-   for (slice = 0; slice < z; ++slice) {
-      stbi__vertical_flip(bytes, w, h, bytes_per_pixel);
-      bytes += slice_size;
-   }
-}
-#endif
-
-static unsigned char *stbi__load_and_postprocess_8bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__result_info ri;
-   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 8);
-
-   if (result == NULL)
-      return NULL;
-
-   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
-   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
-
-   if (ri.bits_per_channel != 8) {
-      result = stbi__convert_16_to_8((stbi__uint16 *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
-      ri.bits_per_channel = 8;
-   }
-
-   // @TODO: move stbi__convert_format to here
-
-   if (stbi__vertically_flip_on_load) {
-      int channels = req_comp ? req_comp : *comp;
-      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi_uc));
-   }
-
-   return (unsigned char *) result;
-}
-
-static stbi__uint16 *stbi__load_and_postprocess_16bit(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__result_info ri;
-   void *result = stbi__load_main(s, x, y, comp, req_comp, &ri, 16);
-
-   if (result == NULL)
-      return NULL;
-
-   // it is the responsibility of the loaders to make sure we get either 8 or 16 bit.
-   STBI_ASSERT(ri.bits_per_channel == 8 || ri.bits_per_channel == 16);
-
-   if (ri.bits_per_channel != 16) {
-      result = stbi__convert_8_to_16((stbi_uc *) result, *x, *y, req_comp == 0 ? *comp : req_comp);
-      ri.bits_per_channel = 16;
-   }
-
-   // @TODO: move stbi__convert_format16 to here
-   // @TODO: special case RGB-to-Y (and RGBA-to-YA) for 8-bit-to-16-bit case to keep more precision
-
-   if (stbi__vertically_flip_on_load) {
-      int channels = req_comp ? req_comp : *comp;
-      stbi__vertical_flip(result, *x, *y, channels * sizeof(stbi__uint16));
-   }
-
-   return (stbi__uint16 *) result;
-}
-
-#if !defined(STBI_NO_HDR) && !defined(STBI_NO_LINEAR)
-static void stbi__float_postprocess(float *result, int *x, int *y, int *comp, int req_comp)
-{
-   if (stbi__vertically_flip_on_load && result != NULL) {
-      int channels = req_comp ? req_comp : *comp;
-      stbi__vertical_flip(result, *x, *y, channels * sizeof(float));
-   }
-}
-#endif
-
-#ifndef STBI_NO_STDIO
-
-#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
-STBI_EXTERN __declspec(dllimport) int __stdcall MultiByteToWideChar(unsigned int cp, unsigned long flags, const char *str, int cbmb, wchar_t *widestr, int cchwide);
-STBI_EXTERN __declspec(dllimport) int __stdcall WideCharToMultiByte(unsigned int cp, unsigned long flags, const wchar_t *widestr, int cchwide, char *str, int cbmb, const char *defchar, int *used_default);
-#endif
-
-#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
-STBIDEF int stbi_convert_wchar_to_utf8(char *buffer, size_t bufferlen, const wchar_t* input)
-{
-	return WideCharToMultiByte(65001 /* UTF8 */, 0, input, -1, buffer, (int) bufferlen, NULL, NULL);
-}
-#endif
-
-static FILE *stbi__fopen(char const *filename, char const *mode)
-{
-   FILE *f;
-#if defined(_WIN32) && defined(STBI_WINDOWS_UTF8)
-   wchar_t wMode[64];
-   wchar_t wFilename[1024];
-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, filename, -1, wFilename, sizeof(wFilename)/sizeof(*wFilename)))
-      return 0;
-
-	if (0 == MultiByteToWideChar(65001 /* UTF8 */, 0, mode, -1, wMode, sizeof(wMode)/sizeof(*wMode)))
-      return 0;
-
-#if defined(_MSC_VER) && _MSC_VER >= 1400
-	if (0 != _wfopen_s(&f, wFilename, wMode))
-		f = 0;
-#else
-   f = _wfopen(wFilename, wMode);
-#endif
-
-#elif defined(_MSC_VER) && _MSC_VER >= 1400
-   if (0 != fopen_s(&f, filename, mode))
-      f=0;
-#else
-   f = fopen(filename, mode);
-#endif
-   return f;
-}
-
-
-STBIDEF stbi_uc *stbi_load(char const *filename, int *x, int *y, int *comp, int req_comp)
-{
-   FILE *f = stbi__fopen(filename, "rb");
-   unsigned char *result;
-   if (!f) return stbi__errpuc("can't fopen", "Unable to open file");
-   result = stbi_load_from_file(f,x,y,comp,req_comp);
-   fclose(f);
-   return result;
-}
-
-STBIDEF stbi_uc *stbi_load_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
-{
-   unsigned char *result;
-   stbi__context s;
-   stbi__start_file(&s,f);
-   result = stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
-   if (result) {
-      // need to 'unget' all the characters in the IO buffer
-      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
-   }
-   return result;
-}
-
-STBIDEF stbi__uint16 *stbi_load_from_file_16(FILE *f, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__uint16 *result;
-   stbi__context s;
-   stbi__start_file(&s,f);
-   result = stbi__load_and_postprocess_16bit(&s,x,y,comp,req_comp);
-   if (result) {
-      // need to 'unget' all the characters in the IO buffer
-      fseek(f, - (int) (s.img_buffer_end - s.img_buffer), SEEK_CUR);
-   }
-   return result;
-}
-
-STBIDEF stbi_us *stbi_load_16(char const *filename, int *x, int *y, int *comp, int req_comp)
-{
-   FILE *f = stbi__fopen(filename, "rb");
-   stbi__uint16 *result;
-   if (!f) return (stbi_us *) stbi__errpuc("can't fopen", "Unable to open file");
-   result = stbi_load_from_file_16(f,x,y,comp,req_comp);
-   fclose(f);
-   return result;
-}
-
-
-#endif //!STBI_NO_STDIO
-
-STBIDEF stbi_us *stbi_load_16_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *channels_in_file, int desired_channels)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
-}
-
-STBIDEF stbi_us *stbi_load_16_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *channels_in_file, int desired_channels)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *)clbk, user);
-   return stbi__load_and_postprocess_16bit(&s,x,y,channels_in_file,desired_channels);
-}
-
-STBIDEF stbi_uc *stbi_load_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
-}
-
-STBIDEF stbi_uc *stbi_load_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__load_and_postprocess_8bit(&s,x,y,comp,req_comp);
-}
-
-#ifndef STBI_NO_GIF
-STBIDEF stbi_uc *stbi_load_gif_from_memory(stbi_uc const *buffer, int len, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
-{
-   unsigned char *result;
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-
-   result = (unsigned char*) stbi__load_gif_main(&s, delays, x, y, z, comp, req_comp);
-   if (stbi__vertically_flip_on_load) {
-      stbi__vertical_flip_slices( result, *x, *y, *z, *comp );
-   }
-
-   return result;
-}
-#endif
-
-#ifndef STBI_NO_LINEAR
-static float *stbi__loadf_main(stbi__context *s, int *x, int *y, int *comp, int req_comp)
-{
-   unsigned char *data;
-   #ifndef STBI_NO_HDR
-   if (stbi__hdr_test(s)) {
-      stbi__result_info ri;
-      float *hdr_data = stbi__hdr_load(s,x,y,comp,req_comp, &ri);
-      if (hdr_data)
-         stbi__float_postprocess(hdr_data,x,y,comp,req_comp);
-      return hdr_data;
-   }
-   #endif
-   data = stbi__load_and_postprocess_8bit(s, x, y, comp, req_comp);
-   if (data)
-      return stbi__ldr_to_hdr(data, *x, *y, req_comp ? req_comp : *comp);
-   return stbi__errpf("unknown image type", "Image not of any known type, or corrupt");
-}
-
-STBIDEF float *stbi_loadf_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__loadf_main(&s,x,y,comp,req_comp);
-}
-
-STBIDEF float *stbi_loadf_from_callbacks(stbi_io_callbacks const *clbk, void *user, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__loadf_main(&s,x,y,comp,req_comp);
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF float *stbi_loadf(char const *filename, int *x, int *y, int *comp, int req_comp)
-{
-   float *result;
-   FILE *f = stbi__fopen(filename, "rb");
-   if (!f) return stbi__errpf("can't fopen", "Unable to open file");
-   result = stbi_loadf_from_file(f,x,y,comp,req_comp);
-   fclose(f);
-   return result;
-}
-
-STBIDEF float *stbi_loadf_from_file(FILE *f, int *x, int *y, int *comp, int req_comp)
-{
-   stbi__context s;
-   stbi__start_file(&s,f);
-   return stbi__loadf_main(&s,x,y,comp,req_comp);
-}
-#endif // !STBI_NO_STDIO
-
-#endif // !STBI_NO_LINEAR
-
-// these is-hdr-or-not is defined independent of whether STBI_NO_LINEAR is
-// defined, for API simplicity; if STBI_NO_LINEAR is defined, it always
-// reports false!
-
-STBIDEF int stbi_is_hdr_from_memory(stbi_uc const *buffer, int len)
-{
-   #ifndef STBI_NO_HDR
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__hdr_test(&s);
-   #else
-   STBI_NOTUSED(buffer);
-   STBI_NOTUSED(len);
-   return 0;
-   #endif
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF int      stbi_is_hdr          (char const *filename)
-{
-   FILE *f = stbi__fopen(filename, "rb");
-   int result=0;
-   if (f) {
-      result = stbi_is_hdr_from_file(f);
-      fclose(f);
-   }
-   return result;
-}
-
-STBIDEF int stbi_is_hdr_from_file(FILE *f)
-{
-   #ifndef STBI_NO_HDR
-   long pos = ftell(f);
-   int res;
-   stbi__context s;
-   stbi__start_file(&s,f);
-   res = stbi__hdr_test(&s);
-   fseek(f, pos, SEEK_SET);
-   return res;
-   #else
-   STBI_NOTUSED(f);
-   return 0;
-   #endif
-}
-#endif // !STBI_NO_STDIO
-
-STBIDEF int      stbi_is_hdr_from_callbacks(stbi_io_callbacks const *clbk, void *user)
-{
-   #ifndef STBI_NO_HDR
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) clbk, user);
-   return stbi__hdr_test(&s);
-   #else
-   STBI_NOTUSED(clbk);
-   STBI_NOTUSED(user);
-   return 0;
-   #endif
-}
-
-#ifndef STBI_NO_LINEAR
-static float stbi__l2h_gamma=2.2f, stbi__l2h_scale=1.0f;
-
-STBIDEF void   stbi_ldr_to_hdr_gamma(float gamma) { stbi__l2h_gamma = gamma; }
-STBIDEF void   stbi_ldr_to_hdr_scale(float scale) { stbi__l2h_scale = scale; }
-#endif
-
-static float stbi__h2l_gamma_i=1.0f/2.2f, stbi__h2l_scale_i=1.0f;
-
-STBIDEF void   stbi_hdr_to_ldr_gamma(float gamma) { stbi__h2l_gamma_i = 1/gamma; }
-STBIDEF void   stbi_hdr_to_ldr_scale(float scale) { stbi__h2l_scale_i = 1/scale; }
-
-
-//////////////////////////////////////////////////////////////////////////////
-//
-// Common code used by all image loaders
-//
-
-enum
-{
-   STBI__SCAN_load=0,
-   STBI__SCAN_type,
-   STBI__SCAN_header
-};
-
-static void stbi__refill_buffer(stbi__context *s)
-{
-   int n = (s->io.read)(s->io_user_data,(char*)s->buffer_start,s->buflen);
-   s->callback_already_read += (int) (s->img_buffer - s->img_buffer_original);
-   if (n == 0) {
-      // at end of file, treat same as if from memory, but need to handle case
-      // where s->img_buffer isn't pointing to safe memory, e.g. 0-byte file
-      s->read_from_callbacks = 0;
-      s->img_buffer = s->buffer_start;
-      s->img_buffer_end = s->buffer_start+1;
-      *s->img_buffer = 0;
-   } else {
-      s->img_buffer = s->buffer_start;
-      s->img_buffer_end = s->buffer_start + n;
-   }
-}
-
-stbi_inline static stbi_uc stbi__get8(stbi__context *s)
-{
-   if (s->img_buffer < s->img_buffer_end)
-      return *s->img_buffer++;
-   if (s->read_from_callbacks) {
-      stbi__refill_buffer(s);
-      return *s->img_buffer++;
-   }
-   return 0;
-}
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_HDR) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
-// nothing
-#else
-stbi_inline static int stbi__at_eof(stbi__context *s)
-{
-   if (s->io.read) {
-      if (!(s->io.eof)(s->io_user_data)) return 0;
-      // if feof() is true, check if buffer = end
-      // special case: we've only got the special 0 character at the end
-      if (s->read_from_callbacks == 0) return 1;
-   }
-
-   return s->img_buffer >= s->img_buffer_end;
-}
-#endif
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC)
-// nothing
-#else
-static void stbi__skip(stbi__context *s, int n)
-{
-   if (n == 0) return;  // already there!
-   if (n < 0) {
-      s->img_buffer = s->img_buffer_end;
-      return;
-   }
-   if (s->io.read) {
-      int blen = (int) (s->img_buffer_end - s->img_buffer);
-      if (blen < n) {
-         s->img_buffer = s->img_buffer_end;
-         (s->io.skip)(s->io_user_data, n - blen);
-         return;
-      }
-   }
-   s->img_buffer += n;
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_TGA) && defined(STBI_NO_HDR) && defined(STBI_NO_PNM)
-// nothing
-#else
-static int stbi__getn(stbi__context *s, stbi_uc *buffer, int n)
-{
-   if (s->io.read) {
-      int blen = (int) (s->img_buffer_end - s->img_buffer);
-      if (blen < n) {
-         int res, count;
-
-         memcpy(buffer, s->img_buffer, blen);
-
-         count = (s->io.read)(s->io_user_data, (char*) buffer + blen, n - blen);
-         res = (count == (n-blen));
-         s->img_buffer = s->img_buffer_end;
-         return res;
-      }
-   }
-
-   if (s->img_buffer+n <= s->img_buffer_end) {
-      memcpy(buffer, s->img_buffer, n);
-      s->img_buffer += n;
-      return 1;
-   } else
-      return 0;
-}
-#endif
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
-// nothing
-#else
-static int stbi__get16be(stbi__context *s)
-{
-   int z = stbi__get8(s);
-   return (z << 8) + stbi__get8(s);
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD) && defined(STBI_NO_PIC)
-// nothing
-#else
-static stbi__uint32 stbi__get32be(stbi__context *s)
-{
-   stbi__uint32 z = stbi__get16be(s);
-   return (z << 16) + stbi__get16be(s);
-}
-#endif
-
-#if defined(STBI_NO_BMP) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF)
-// nothing
-#else
-static int stbi__get16le(stbi__context *s)
-{
-   int z = stbi__get8(s);
-   return z + (stbi__get8(s) << 8);
-}
-#endif
-
-#ifndef STBI_NO_BMP
-static stbi__uint32 stbi__get32le(stbi__context *s)
-{
-   stbi__uint32 z = stbi__get16le(s);
-   z += (stbi__uint32)stbi__get16le(s) << 16;
-   return z;
-}
-#endif
-
-#define STBI__BYTECAST(x)  ((stbi_uc) ((x) & 255))  // truncate int to byte without warnings
-
-#if defined(STBI_NO_JPEG) && defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
-// nothing
-#else
-//////////////////////////////////////////////////////////////////////////////
-//
-//  generic converter from built-in img_n to req_comp
-//    individual types do this automatically as much as possible (e.g. jpeg
-//    does all cases internally since it needs to colorspace convert anyway,
-//    and it never has alpha, so very few cases ). png can automatically
-//    interleave an alpha=255 channel, but falls back to this for other cases
-//
-//  assume data buffer is malloced, so malloc a new one and free that one
-//  only failure mode is malloc failing
-
-static stbi_uc stbi__compute_y(int r, int g, int b)
-{
-   return (stbi_uc) (((r*77) + (g*150) +  (29*b)) >> 8);
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_BMP) && defined(STBI_NO_PSD) && defined(STBI_NO_TGA) && defined(STBI_NO_GIF) && defined(STBI_NO_PIC) && defined(STBI_NO_PNM)
-// nothing
-#else
-static unsigned char *stbi__convert_format(unsigned char *data, int img_n, int req_comp, unsigned int x, unsigned int y)
-{
-   int i,j;
-   unsigned char *good;
-
-   if (req_comp == img_n) return data;
-   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
-
-   good = (unsigned char *) stbi__malloc_mad3(req_comp, x, y, 0);
-   if (good == NULL) {
-      STBI_FREE(data);
-      return stbi__errpuc("outofmem", "Out of memory");
-   }
-
-   for (j=0; j < (int) y; ++j) {
-      unsigned char *src  = data + j * x * img_n   ;
-      unsigned char *dest = good + j * x * req_comp;
-
-      #define STBI__COMBO(a,b)  ((a)*8+(b))
-      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
-      // convert source image with img_n components to one with req_comp components;
-      // avoid switch per pixel, so use switch per scanline and massive macros
-      switch (STBI__COMBO(img_n, req_comp)) {
-         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=255;                                     } break;
-         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
-         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=255;                     } break;
-         STBI__CASE(2,1) { dest[0]=src[0];                                                  } break;
-         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                  } break;
-         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                  } break;
-         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=255;        } break;
-         STBI__CASE(3,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
-         STBI__CASE(3,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = 255;    } break;
-         STBI__CASE(4,1) { dest[0]=stbi__compute_y(src[0],src[1],src[2]);                   } break;
-         STBI__CASE(4,2) { dest[0]=stbi__compute_y(src[0],src[1],src[2]); dest[1] = src[3]; } break;
-         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                    } break;
-         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return stbi__errpuc("unsupported", "Unsupported format conversion");
-      }
-      #undef STBI__CASE
-   }
-
-   STBI_FREE(data);
-   return good;
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
-// nothing
-#else
-static stbi__uint16 stbi__compute_y_16(int r, int g, int b)
-{
-   return (stbi__uint16) (((r*77) + (g*150) +  (29*b)) >> 8);
-}
-#endif
-
-#if defined(STBI_NO_PNG) && defined(STBI_NO_PSD)
-// nothing
-#else
-static stbi__uint16 *stbi__convert_format16(stbi__uint16 *data, int img_n, int req_comp, unsigned int x, unsigned int y)
-{
-   int i,j;
-   stbi__uint16 *good;
-
-   if (req_comp == img_n) return data;
-   STBI_ASSERT(req_comp >= 1 && req_comp <= 4);
-
-   good = (stbi__uint16 *) stbi__malloc(req_comp * x * y * 2);
-   if (good == NULL) {
-      STBI_FREE(data);
-      return (stbi__uint16 *) stbi__errpuc("outofmem", "Out of memory");
-   }
-
-   for (j=0; j < (int) y; ++j) {
-      stbi__uint16 *src  = data + j * x * img_n   ;
-      stbi__uint16 *dest = good + j * x * req_comp;
-
-      #define STBI__COMBO(a,b)  ((a)*8+(b))
-      #define STBI__CASE(a,b)   case STBI__COMBO(a,b): for(i=x-1; i >= 0; --i, src += a, dest += b)
-      // convert source image with img_n components to one with req_comp components;
-      // avoid switch per pixel, so use switch per scanline and massive macros
-      switch (STBI__COMBO(img_n, req_comp)) {
-         STBI__CASE(1,2) { dest[0]=src[0]; dest[1]=0xffff;                                     } break;
-         STBI__CASE(1,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
-         STBI__CASE(1,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=0xffff;                     } break;
-         STBI__CASE(2,1) { dest[0]=src[0];                                                     } break;
-         STBI__CASE(2,3) { dest[0]=dest[1]=dest[2]=src[0];                                     } break;
-         STBI__CASE(2,4) { dest[0]=dest[1]=dest[2]=src[0]; dest[3]=src[1];                     } break;
-         STBI__CASE(3,4) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];dest[3]=0xffff;        } break;
-         STBI__CASE(3,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
-         STBI__CASE(3,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = 0xffff; } break;
-         STBI__CASE(4,1) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]);                   } break;
-         STBI__CASE(4,2) { dest[0]=stbi__compute_y_16(src[0],src[1],src[2]); dest[1] = src[3]; } break;
-         STBI__CASE(4,3) { dest[0]=src[0];dest[1]=src[1];dest[2]=src[2];                       } break;
-         default: STBI_ASSERT(0); STBI_FREE(data); STBI_FREE(good); return (stbi__uint16*) stbi__errpuc("unsupported", "Unsupported format conversion");
-      }
-      #undef STBI__CASE
-   }
-
-   STBI_FREE(data);
-   return good;
-}
-#endif
-
-#ifndef STBI_NO_LINEAR
-static float   *stbi__ldr_to_hdr(stbi_uc *data, int x, int y, int comp)
-{
-   int i,k,n;
-   float *output;
-   if (!data) return NULL;
-   output = (float *) stbi__malloc_mad4(x, y, comp, sizeof(float), 0);
-   if (output == NULL) { STBI_FREE(data); return stbi__errpf("outofmem", "Out of memory"); }
-   // compute number of non-alpha components
-   if (comp & 1) n = comp; else n = comp-1;
-   for (i=0; i < x*y; ++i) {
-      for (k=0; k < n; ++k) {
-         output[i*comp + k] = (float) (pow(data[i*comp+k]/255.0f, stbi__l2h_gamma) * stbi__l2h_scale);
-      }
-   }
-   if (n < comp) {
-      for (i=0; i < x*y; ++i) {
-         output[i*comp + n] = data[i*comp + n]/255.0f;
-      }
-   }
-   STBI_FREE(data);
-   return output;
-}
-#endif
-
-#ifndef STBI_NO_HDR
-#define stbi__float2int(x)   ((int) (x))
-static stbi_uc *stbi__hdr_to_ldr(float   *data, int x, int y, int comp)
-{
-   int i,k,n;
-   stbi_uc *output;
-   if (!data) return NULL;
-   output = (stbi_uc *) stbi__malloc_mad3(x, y, comp, 0);
-   if (output == NULL) { STBI_FREE(data); return stbi__errpuc("outofmem", "Out of memory"); }
-   // compute number of non-alpha components
-   if (comp & 1) n = comp; else n = comp-1;
-   for (i=0; i < x*y; ++i) {
-      for (k=0; k < n; ++k) {
-         float z = (float) pow(data[i*comp+k]*stbi__h2l_scale_i, stbi__h2l_gamma_i) * 255 + 0.5f;
-         if (z < 0) z = 0;
-         if (z > 255) z = 255;
-         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
-      }
-      if (k < comp) {
-         float z = data[i*comp+k] * 255 + 0.5f;
-         if (z < 0) z = 0;
-         if (z > 255) z = 255;
-         output[i*comp + k] = (stbi_uc) stbi__float2int(z);
-      }
-   }
-   STBI_FREE(data);
-   return output;
-}
-#endif
-
-//////////////////////////////////////////////////////////////////////////////
-//
-//  "baseline" JPEG/JFIF decoder
-//
-//    simple implementation
-//      - doesn't support delayed output of y-dimension
-//      - simple interface (only one output format: 8-bit interleaved RGB)
-//      - doesn't try to recover corrupt jpegs
-//      - doesn't allow partial loading, loading multiple at once
-//      - still fast on x86 (copying globals into locals doesn't help x86)
-//      - allocates lots of intermediate memory (full size of all components)
-//        - non-interleaved case requires this anyway
-//        - allows good upsampling (see next)
-//    high-quality
-//      - upsampled channels are bilinearly interpolated, even across blocks
-//      - quality integer IDCT derived from IJG's 'slow'
-//    performance
-//      - fast huffman; reasonable integer IDCT
-//      - some SIMD kernels for common paths on targets with SSE2/NEON
-//      - uses a lot of intermediate memory, could cache poorly
-
-#ifndef STBI_NO_JPEG
-
-// huffman decoding acceleration
-#define FAST_BITS   9  // larger handles more cases; smaller stomps less cache
-
-typedef struct
-{
-   stbi_uc  fast[1 << FAST_BITS];
-   // weirdly, repacking this into AoS is a 10% speed loss, instead of a win
-   stbi__uint16 code[256];
-   stbi_uc  values[256];
-   stbi_uc  size[257];
-   unsigned int maxcode[18];
-   int    delta[17];   // old 'firstsymbol' - old 'firstcode'
-} stbi__huffman;
-
-typedef struct
-{
-   stbi__context *s;
-   stbi__huffman huff_dc[4];
-   stbi__huffman huff_ac[4];
-   stbi__uint16 dequant[4][64];
-   stbi__int16 fast_ac[4][1 << FAST_BITS];
-
-// sizes for components, interleaved MCUs
-   int img_h_max, img_v_max;
-   int img_mcu_x, img_mcu_y;
-   int img_mcu_w, img_mcu_h;
-
-// definition of jpeg image component
-   struct
-   {
-      int id;
-      int h,v;
-      int tq;
-      int hd,ha;
-      int dc_pred;
-
-      int x,y,w2,h2;
-      stbi_uc *data;
-      void *raw_data, *raw_coeff;
-      stbi_uc *linebuf;
-      short   *coeff;   // progressive only
-      int      coeff_w, coeff_h; // number of 8x8 coefficient blocks
-   } img_comp[4];
-
-   stbi__uint32   code_buffer; // jpeg entropy-coded buffer
-   int            code_bits;   // number of valid bits
-   unsigned char  marker;      // marker seen while filling entropy buffer
-   int            nomore;      // flag if we saw a marker so must stop
-
-   int            progressive;
-   int            spec_start;
-   int            spec_end;
-   int            succ_high;
-   int            succ_low;
-   int            eob_run;
-   int            jfif;
-   int            app14_color_transform; // Adobe APP14 tag
-   int            rgb;
-
-   int scan_n, order[4];
-   int restart_interval, todo;
-
-// kernels
-   void (*idct_block_kernel)(stbi_uc *out, int out_stride, short data[64]);
-   void (*YCbCr_to_RGB_kernel)(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step);
-   stbi_uc *(*resample_row_hv_2_kernel)(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs);
-} stbi__jpeg;
-
-static int stbi__build_huffman(stbi__huffman *h, int *count)
-{
-   int i,j,k=0;
-   unsigned int code;
-   // build size list for each symbol (from JPEG spec)
-   for (i=0; i < 16; ++i) {
-      for (j=0; j < count[i]; ++j) {
-         h->size[k++] = (stbi_uc) (i+1);
-         if(k >= 257) return stbi__err("bad size list","Corrupt JPEG");
-      }
-   }
-   h->size[k] = 0;
-
-   // compute actual symbols (from jpeg spec)
-   code = 0;
-   k = 0;
-   for(j=1; j <= 16; ++j) {
-      // compute delta to add to code to compute symbol id
-      h->delta[j] = k - code;
-      if (h->size[k] == j) {
-         while (h->size[k] == j)
-            h->code[k++] = (stbi__uint16) (code++);
-         if (code-1 >= (1u << j)) return stbi__err("bad code lengths","Corrupt JPEG");
-      }
-      // compute largest code + 1 for this size, preshifted as needed later
-      h->maxcode[j] = code << (16-j);
-      code <<= 1;
-   }
-   h->maxcode[j] = 0xffffffff;
-
-   // build non-spec acceleration table; 255 is flag for not-accelerated
-   memset(h->fast, 255, 1 << FAST_BITS);
-   for (i=0; i < k; ++i) {
-      int s = h->size[i];
-      if (s <= FAST_BITS) {
-         int c = h->code[i] << (FAST_BITS-s);
-         int m = 1 << (FAST_BITS-s);
-         for (j=0; j < m; ++j) {
-            h->fast[c+j] = (stbi_uc) i;
-         }
-      }
-   }
-   return 1;
-}
-
-// build a table that decodes both magnitude and value of small ACs in
-// one go.
-static void stbi__build_fast_ac(stbi__int16 *fast_ac, stbi__huffman *h)
-{
-   int i;
-   for (i=0; i < (1 << FAST_BITS); ++i) {
-      stbi_uc fast = h->fast[i];
-      fast_ac[i] = 0;
-      if (fast < 255) {
-         int rs = h->values[fast];
-         int run = (rs >> 4) & 15;
-         int magbits = rs & 15;
-         int len = h->size[fast];
-
-         if (magbits && len + magbits <= FAST_BITS) {
-            // magnitude code followed by receive_extend code
-            int k = ((i << len) & ((1 << FAST_BITS) - 1)) >> (FAST_BITS - magbits);
-            int m = 1 << (magbits - 1);
-            if (k < m) k += (~0U << magbits) + 1;
-            // if the result is small enough, we can fit it in fast_ac table
-            if (k >= -128 && k <= 127)
-               fast_ac[i] = (stbi__int16) ((k * 256) + (run * 16) + (len + magbits));
-         }
-      }
-   }
-}
-
-static void stbi__grow_buffer_unsafe(stbi__jpeg *j)
-{
-   do {
-      unsigned int b = j->nomore ? 0 : stbi__get8(j->s);
-      if (b == 0xff) {
-         int c = stbi__get8(j->s);
-         while (c == 0xff) c = stbi__get8(j->s); // consume fill bytes
-         if (c != 0) {
-            j->marker = (unsigned char) c;
-            j->nomore = 1;
-            return;
-         }
-      }
-      j->code_buffer |= b << (24 - j->code_bits);
-      j->code_bits += 8;
-   } while (j->code_bits <= 24);
-}
-
-// (1 << n) - 1
-static const stbi__uint32 stbi__bmask[17]={0,1,3,7,15,31,63,127,255,511,1023,2047,4095,8191,16383,32767,65535};
-
-// decode a jpeg huffman value from the bitstream
-stbi_inline static int stbi__jpeg_huff_decode(stbi__jpeg *j, stbi__huffman *h)
-{
-   unsigned int temp;
-   int c,k;
-
-   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-
-   // look at the top FAST_BITS and determine what symbol ID it is,
-   // if the code is <= FAST_BITS
-   c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
-   k = h->fast[c];
-   if (k < 255) {
-      int s = h->size[k];
-      if (s > j->code_bits)
-         return -1;
-      j->code_buffer <<= s;
-      j->code_bits -= s;
-      return h->values[k];
-   }
-
-   // naive test is to shift the code_buffer down so k bits are
-   // valid, then test against maxcode. To speed this up, we've
-   // preshifted maxcode left so that it has (16-k) 0s at the
-   // end; in other words, regardless of the number of bits, it
-   // wants to be compared against something shifted to have 16;
-   // that way we don't need to shift inside the loop.
-   temp = j->code_buffer >> 16;
-   for (k=FAST_BITS+1 ; ; ++k)
-      if (temp < h->maxcode[k])
-         break;
-   if (k == 17) {
-      // error! code not found
-      j->code_bits -= 16;
-      return -1;
-   }
-
-   if (k > j->code_bits)
-      return -1;
-
-   // convert the huffman code to the symbol id
-   c = ((j->code_buffer >> (32 - k)) & stbi__bmask[k]) + h->delta[k];
-   if(c < 0 || c >= 256) // symbol id out of bounds!
-       return -1;
-   STBI_ASSERT((((j->code_buffer) >> (32 - h->size[c])) & stbi__bmask[h->size[c]]) == h->code[c]);
-
-   // convert the id to a symbol
-   j->code_bits -= k;
-   j->code_buffer <<= k;
-   return h->values[c];
-}
-
-// bias[n] = (-1<<n) + 1
-static const int stbi__jbias[16] = {0,-1,-3,-7,-15,-31,-63,-127,-255,-511,-1023,-2047,-4095,-8191,-16383,-32767};
-
-// combined JPEG 'receive' and JPEG 'extend', since baseline
-// always extends everything it receives.
-stbi_inline static int stbi__extend_receive(stbi__jpeg *j, int n)
-{
-   unsigned int k;
-   int sgn;
-   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
-   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
-
-   sgn = j->code_buffer >> 31; // sign bit always in MSB; 0 if MSB clear (positive), 1 if MSB set (negative)
-   k = stbi_lrot(j->code_buffer, n);
-   j->code_buffer = k & ~stbi__bmask[n];
-   k &= stbi__bmask[n];
-   j->code_bits -= n;
-   return k + (stbi__jbias[n] & (sgn - 1));
-}
-
-// get some unsigned bits
-stbi_inline static int stbi__jpeg_get_bits(stbi__jpeg *j, int n)
-{
-   unsigned int k;
-   if (j->code_bits < n) stbi__grow_buffer_unsafe(j);
-   if (j->code_bits < n) return 0; // ran out of bits from stream, return 0s intead of continuing
-   k = stbi_lrot(j->code_buffer, n);
-   j->code_buffer = k & ~stbi__bmask[n];
-   k &= stbi__bmask[n];
-   j->code_bits -= n;
-   return k;
-}
-
-stbi_inline static int stbi__jpeg_get_bit(stbi__jpeg *j)
-{
-   unsigned int k;
-   if (j->code_bits < 1) stbi__grow_buffer_unsafe(j);
-   if (j->code_bits < 1) return 0; // ran out of bits from stream, return 0s intead of continuing
-   k = j->code_buffer;
-   j->code_buffer <<= 1;
-   --j->code_bits;
-   return k & 0x80000000;
-}
-
-// given a value that's at position X in the zigzag stream,
-// where does it appear in the 8x8 matrix coded as row-major?
-static const stbi_uc stbi__jpeg_dezigzag[64+15] =
-{
-    0,  1,  8, 16,  9,  2,  3, 10,
-   17, 24, 32, 25, 18, 11,  4,  5,
-   12, 19, 26, 33, 40, 48, 41, 34,
-   27, 20, 13,  6,  7, 14, 21, 28,
-   35, 42, 49, 56, 57, 50, 43, 36,
-   29, 22, 15, 23, 30, 37, 44, 51,
-   58, 59, 52, 45, 38, 31, 39, 46,
-   53, 60, 61, 54, 47, 55, 62, 63,
-   // let corrupt input sample past end
-   63, 63, 63, 63, 63, 63, 63, 63,
-   63, 63, 63, 63, 63, 63, 63
-};
-
-// decode one 64-entry block--
-static int stbi__jpeg_decode_block(stbi__jpeg *j, short data[64], stbi__huffman *hdc, stbi__huffman *hac, stbi__int16 *fac, int b, stbi__uint16 *dequant)
-{
-   int diff,dc,k;
-   int t;
-
-   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-   t = stbi__jpeg_huff_decode(j, hdc);
-   if (t < 0 || t > 15) return stbi__err("bad huffman code","Corrupt JPEG");
-
-   // 0 all the ac values now so we can do it 32-bits at a time
-   memset(data,0,64*sizeof(data[0]));
-
-   diff = t ? stbi__extend_receive(j, t) : 0;
-   if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta","Corrupt JPEG");
-   dc = j->img_comp[b].dc_pred + diff;
-   j->img_comp[b].dc_pred = dc;
-   if (!stbi__mul2shorts_valid(dc, dequant[0])) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-   data[0] = (short) (dc * dequant[0]);
-
-   // decode AC components, see JPEG spec
-   k = 1;
-   do {
-      unsigned int zig;
-      int c,r,s;
-      if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-      c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
-      r = fac[c];
-      if (r) { // fast-AC path
-         k += (r >> 4) & 15; // run
-         s = r & 15; // combined length
-         if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
-         j->code_buffer <<= s;
-         j->code_bits -= s;
-         // decode into unzigzag'd location
-         zig = stbi__jpeg_dezigzag[k++];
-         data[zig] = (short) ((r >> 8) * dequant[zig]);
-      } else {
-         int rs = stbi__jpeg_huff_decode(j, hac);
-         if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-         s = rs & 15;
-         r = rs >> 4;
-         if (s == 0) {
-            if (rs != 0xf0) break; // end block
-            k += 16;
-         } else {
-            k += r;
-            // decode into unzigzag'd location
-            zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) (stbi__extend_receive(j,s) * dequant[zig]);
-         }
-      }
-   } while (k < 64);
-   return 1;
-}
-
-static int stbi__jpeg_decode_block_prog_dc(stbi__jpeg *j, short data[64], stbi__huffman *hdc, int b)
-{
-   int diff,dc;
-   int t;
-   if (j->spec_end != 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-
-   if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-
-   if (j->succ_high == 0) {
-      // first scan for DC coefficient, must be first
-      memset(data,0,64*sizeof(data[0])); // 0 all the ac values now
-      t = stbi__jpeg_huff_decode(j, hdc);
-      if (t < 0 || t > 15) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-      diff = t ? stbi__extend_receive(j, t) : 0;
-
-      if (!stbi__addints_valid(j->img_comp[b].dc_pred, diff)) return stbi__err("bad delta", "Corrupt JPEG");
-      dc = j->img_comp[b].dc_pred + diff;
-      j->img_comp[b].dc_pred = dc;
-      if (!stbi__mul2shorts_valid(dc, 1 << j->succ_low)) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-      data[0] = (short) (dc * (1 << j->succ_low));
-   } else {
-      // refinement scan for DC coefficient
-      if (stbi__jpeg_get_bit(j))
-         data[0] += (short) (1 << j->succ_low);
-   }
-   return 1;
-}
-
-// @OPTIMIZE: store non-zigzagged during the decode passes,
-// and only de-zigzag when dequantizing
-static int stbi__jpeg_decode_block_prog_ac(stbi__jpeg *j, short data[64], stbi__huffman *hac, stbi__int16 *fac)
-{
-   int k;
-   if (j->spec_start == 0) return stbi__err("can't merge dc and ac", "Corrupt JPEG");
-
-   if (j->succ_high == 0) {
-      int shift = j->succ_low;
-
-      if (j->eob_run) {
-         --j->eob_run;
-         return 1;
-      }
-
-      k = j->spec_start;
-      do {
-         unsigned int zig;
-         int c,r,s;
-         if (j->code_bits < 16) stbi__grow_buffer_unsafe(j);
-         c = (j->code_buffer >> (32 - FAST_BITS)) & ((1 << FAST_BITS)-1);
-         r = fac[c];
-         if (r) { // fast-AC path
-            k += (r >> 4) & 15; // run
-            s = r & 15; // combined length
-            if (s > j->code_bits) return stbi__err("bad huffman code", "Combined length longer than code bits available");
-            j->code_buffer <<= s;
-            j->code_bits -= s;
-            zig = stbi__jpeg_dezigzag[k++];
-            data[zig] = (short) ((r >> 8) * (1 << shift));
-         } else {
-            int rs = stbi__jpeg_huff_decode(j, hac);
-            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-            s = rs & 15;
-            r = rs >> 4;
-            if (s == 0) {
-               if (r < 15) {
-                  j->eob_run = (1 << r);
-                  if (r)
-                     j->eob_run += stbi__jpeg_get_bits(j, r);
-                  --j->eob_run;
-                  break;
-               }
-               k += 16;
-            } else {
-               k += r;
-               zig = stbi__jpeg_dezigzag[k++];
-               data[zig] = (short) (stbi__extend_receive(j,s) * (1 << shift));
-            }
-         }
-      } while (k <= j->spec_end);
-   } else {
-      // refinement scan for these AC coefficients
-
-      short bit = (short) (1 << j->succ_low);
-
-      if (j->eob_run) {
-         --j->eob_run;
-         for (k = j->spec_start; k <= j->spec_end; ++k) {
-            short *p = &data[stbi__jpeg_dezigzag[k]];
-            if (*p != 0)
-               if (stbi__jpeg_get_bit(j))
-                  if ((*p & bit)==0) {
-                     if (*p > 0)
-                        *p += bit;
-                     else
-                        *p -= bit;
-                  }
-         }
-      } else {
-         k = j->spec_start;
-         do {
-            int r,s;
-            int rs = stbi__jpeg_huff_decode(j, hac); // @OPTIMIZE see if we can use the fast path here, advance-by-r is so slow, eh
-            if (rs < 0) return stbi__err("bad huffman code","Corrupt JPEG");
-            s = rs & 15;
-            r = rs >> 4;
-            if (s == 0) {
-               if (r < 15) {
-                  j->eob_run = (1 << r) - 1;
-                  if (r)
-                     j->eob_run += stbi__jpeg_get_bits(j, r);
-                  r = 64; // force end of block
-               } else {
-                  // r=15 s=0 should write 16 0s, so we just do
-                  // a run of 15 0s and then write s (which is 0),
-                  // so we don't have to do anything special here
-               }
-            } else {
-               if (s != 1) return stbi__err("bad huffman code", "Corrupt JPEG");
-               // sign bit
-               if (stbi__jpeg_get_bit(j))
-                  s = bit;
-               else
-                  s = -bit;
-            }
-
-            // advance by r
-            while (k <= j->spec_end) {
-               short *p = &data[stbi__jpeg_dezigzag[k++]];
-               if (*p != 0) {
-                  if (stbi__jpeg_get_bit(j))
-                     if ((*p & bit)==0) {
-                        if (*p > 0)
-                           *p += bit;
-                        else
-                           *p -= bit;
-                     }
-               } else {
-                  if (r == 0) {
-                     *p = (short) s;
-                     break;
-                  }
-                  --r;
-               }
-            }
-         } while (k <= j->spec_end);
-      }
-   }
-   return 1;
-}
-
-// take a -128..127 value and stbi__clamp it and convert to 0..255
-stbi_inline static stbi_uc stbi__clamp(int x)
-{
-   // trick to use a single test to catch both cases
-   if ((unsigned int) x > 255) {
-      if (x < 0) return 0;
-      if (x > 255) return 255;
-   }
-   return (stbi_uc) x;
-}
-
-#define stbi__f2f(x)  ((int) (((x) * 4096 + 0.5)))
-#define stbi__fsh(x)  ((x) * 4096)
-
-// derived from jidctint -- DCT_ISLOW
-#define STBI__IDCT_1D(s0,s1,s2,s3,s4,s5,s6,s7) \
-   int t0,t1,t2,t3,p1,p2,p3,p4,p5,x0,x1,x2,x3; \
-   p2 = s2;                                    \
-   p3 = s6;                                    \
-   p1 = (p2+p3) * stbi__f2f(0.5411961f);       \
-   t2 = p1 + p3*stbi__f2f(-1.847759065f);      \
-   t3 = p1 + p2*stbi__f2f( 0.765366865f);      \
-   p2 = s0;                                    \
-   p3 = s4;                                    \
-   t0 = stbi__fsh(p2+p3);                      \
-   t1 = stbi__fsh(p2-p3);                      \
-   x0 = t0+t3;                                 \
-   x3 = t0-t3;                                 \
-   x1 = t1+t2;                                 \
-   x2 = t1-t2;                                 \
-   t0 = s7;                                    \
-   t1 = s5;                                    \
-   t2 = s3;                                    \
-   t3 = s1;                                    \
-   p3 = t0+t2;                                 \
-   p4 = t1+t3;                                 \
-   p1 = t0+t3;                                 \
-   p2 = t1+t2;                                 \
-   p5 = (p3+p4)*stbi__f2f( 1.175875602f);      \
-   t0 = t0*stbi__f2f( 0.298631336f);           \
-   t1 = t1*stbi__f2f( 2.053119869f);           \
-   t2 = t2*stbi__f2f( 3.072711026f);           \
-   t3 = t3*stbi__f2f( 1.501321110f);           \
-   p1 = p5 + p1*stbi__f2f(-0.899976223f);      \
-   p2 = p5 + p2*stbi__f2f(-2.562915447f);      \
-   p3 = p3*stbi__f2f(-1.961570560f);           \
-   p4 = p4*stbi__f2f(-0.390180644f);           \
-   t3 += p1+p4;                                \
-   t2 += p2+p3;                                \
-   t1 += p2+p4;                                \
-   t0 += p1+p3;
-
-static void stbi__idct_block(stbi_uc *out, int out_stride, short data[64])
-{
-   int i,val[64],*v=val;
-   stbi_uc *o;
-   short *d = data;
-
-   // columns
-   for (i=0; i < 8; ++i,++d, ++v) {
-      // if all zeroes, shortcut -- this avoids dequantizing 0s and IDCTing
-      if (d[ 8]==0 && d[16]==0 && d[24]==0 && d[32]==0
-           && d[40]==0 && d[48]==0 && d[56]==0) {
-         //    no shortcut                 0     seconds
-         //    (1|2|3|4|5|6|7)==0          0     seconds
-         //    all separate               -0.047 seconds
-         //    1 && 2|3 && 4|5 && 6|7:    -0.047 seconds
-         int dcterm = d[0]*4;
-         v[0] = v[8] = v[16] = v[24] = v[32] = v[40] = v[48] = v[56] = dcterm;
-      } else {
-         STBI__IDCT_1D(d[ 0],d[ 8],d[16],d[24],d[32],d[40],d[48],d[56])
-         // constants scaled things up by 1<<12; let's bring them back
-         // down, but keep 2 extra bits of precision
-         x0 += 512; x1 += 512; x2 += 512; x3 += 512;
-         v[ 0] = (x0+t3) >> 10;
-         v[56] = (x0-t3) >> 10;
-         v[ 8] = (x1+t2) >> 10;
-         v[48] = (x1-t2) >> 10;
-         v[16] = (x2+t1) >> 10;
-         v[40] = (x2-t1) >> 10;
-         v[24] = (x3+t0) >> 10;
-         v[32] = (x3-t0) >> 10;
-      }
-   }
-
-   for (i=0, v=val, o=out; i < 8; ++i,v+=8,o+=out_stride) {
-      // no fast case since the first 1D IDCT spread components out
-      STBI__IDCT_1D(v[0],v[1],v[2],v[3],v[4],v[5],v[6],v[7])
-      // constants scaled things up by 1<<12, plus we had 1<<2 from first
-      // loop, plus horizontal and vertical each scale by sqrt(8) so together
-      // we've got an extra 1<<3, so 1<<17 total we need to remove.
-      // so we want to round that, which means adding 0.5 * 1<<17,
-      // aka 65536. Also, we'll end up with -128 to 127 that we want
-      // to encode as 0..255 by adding 128, so we'll add that before the shift
-      x0 += 65536 + (128<<17);
-      x1 += 65536 + (128<<17);
-      x2 += 65536 + (128<<17);
-      x3 += 65536 + (128<<17);
-      // tried computing the shifts into temps, or'ing the temps to see
-      // if any were out of range, but that was slower
-      o[0] = stbi__clamp((x0+t3) >> 17);
-      o[7] = stbi__clamp((x0-t3) >> 17);
-      o[1] = stbi__clamp((x1+t2) >> 17);
-      o[6] = stbi__clamp((x1-t2) >> 17);
-      o[2] = stbi__clamp((x2+t1) >> 17);
-      o[5] = stbi__clamp((x2-t1) >> 17);
-      o[3] = stbi__clamp((x3+t0) >> 17);
-      o[4] = stbi__clamp((x3-t0) >> 17);
-   }
-}
-
-#ifdef STBI_SSE2
-// sse2 integer IDCT. not the fastest possible implementation but it
-// produces bit-identical results to the generic C version so it's
-// fully "transparent".
-static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
-{
-   // This is constructed to match our regular (generic) integer IDCT exactly.
-   __m128i row0, row1, row2, row3, row4, row5, row6, row7;
-   __m128i tmp;
-
-   // dot product constant: even elems=x, odd elems=y
-   #define dct_const(x,y)  _mm_setr_epi16((x),(y),(x),(y),(x),(y),(x),(y))
-
-   // out(0) = c0[even]*x + c0[odd]*y   (c0, x, y 16-bit, out 32-bit)
-   // out(1) = c1[even]*x + c1[odd]*y
-   #define dct_rot(out0,out1, x,y,c0,c1) \
-      __m128i c0##lo = _mm_unpacklo_epi16((x),(y)); \
-      __m128i c0##hi = _mm_unpackhi_epi16((x),(y)); \
-      __m128i out0##_l = _mm_madd_epi16(c0##lo, c0); \
-      __m128i out0##_h = _mm_madd_epi16(c0##hi, c0); \
-      __m128i out1##_l = _mm_madd_epi16(c0##lo, c1); \
-      __m128i out1##_h = _mm_madd_epi16(c0##hi, c1)
-
-   // out = in << 12  (in 16-bit, out 32-bit)
-   #define dct_widen(out, in) \
-      __m128i out##_l = _mm_srai_epi32(_mm_unpacklo_epi16(_mm_setzero_si128(), (in)), 4); \
-      __m128i out##_h = _mm_srai_epi32(_mm_unpackhi_epi16(_mm_setzero_si128(), (in)), 4)
-
-   // wide add
-   #define dct_wadd(out, a, b) \
-      __m128i out##_l = _mm_add_epi32(a##_l, b##_l); \
-      __m128i out##_h = _mm_add_epi32(a##_h, b##_h)
-
-   // wide sub
-   #define dct_wsub(out, a, b) \
-      __m128i out##_l = _mm_sub_epi32(a##_l, b##_l); \
-      __m128i out##_h = _mm_sub_epi32(a##_h, b##_h)
-
-   // butterfly a/b, add bias, then shift by "s" and pack
-   #define dct_bfly32o(out0, out1, a,b,bias,s) \
-      { \
-         __m128i abiased_l = _mm_add_epi32(a##_l, bias); \
-         __m128i abiased_h = _mm_add_epi32(a##_h, bias); \
-         dct_wadd(sum, abiased, b); \
-         dct_wsub(dif, abiased, b); \
-         out0 = _mm_packs_epi32(_mm_srai_epi32(sum_l, s), _mm_srai_epi32(sum_h, s)); \
-         out1 = _mm_packs_epi32(_mm_srai_epi32(dif_l, s), _mm_srai_epi32(dif_h, s)); \
-      }
-
-   // 8-bit interleave step (for transposes)
-   #define dct_interleave8(a, b) \
-      tmp = a; \
-      a = _mm_unpacklo_epi8(a, b); \
-      b = _mm_unpackhi_epi8(tmp, b)
-
-   // 16-bit interleave step (for transposes)
-   #define dct_interleave16(a, b) \
-      tmp = a; \
-      a = _mm_unpacklo_epi16(a, b); \
-      b = _mm_unpackhi_epi16(tmp, b)
-
-   #define dct_pass(bias,shift) \
-      { \
-         /* even part */ \
-         dct_rot(t2e,t3e, row2,row6, rot0_0,rot0_1); \
-         __m128i sum04 = _mm_add_epi16(row0, row4); \
-         __m128i dif04 = _mm_sub_epi16(row0, row4); \
-         dct_widen(t0e, sum04); \
-         dct_widen(t1e, dif04); \
-         dct_wadd(x0, t0e, t3e); \
-         dct_wsub(x3, t0e, t3e); \
-         dct_wadd(x1, t1e, t2e); \
-         dct_wsub(x2, t1e, t2e); \
-         /* odd part */ \
-         dct_rot(y0o,y2o, row7,row3, rot2_0,rot2_1); \
-         dct_rot(y1o,y3o, row5,row1, rot3_0,rot3_1); \
-         __m128i sum17 = _mm_add_epi16(row1, row7); \
-         __m128i sum35 = _mm_add_epi16(row3, row5); \
-         dct_rot(y4o,y5o, sum17,sum35, rot1_0,rot1_1); \
-         dct_wadd(x4, y0o, y4o); \
-         dct_wadd(x5, y1o, y5o); \
-         dct_wadd(x6, y2o, y5o); \
-         dct_wadd(x7, y3o, y4o); \
-         dct_bfly32o(row0,row7, x0,x7,bias,shift); \
-         dct_bfly32o(row1,row6, x1,x6,bias,shift); \
-         dct_bfly32o(row2,row5, x2,x5,bias,shift); \
-         dct_bfly32o(row3,row4, x3,x4,bias,shift); \
-      }
-
-   __m128i rot0_0 = dct_const(stbi__f2f(0.5411961f), stbi__f2f(0.5411961f) + stbi__f2f(-1.847759065f));
-   __m128i rot0_1 = dct_const(stbi__f2f(0.5411961f) + stbi__f2f( 0.765366865f), stbi__f2f(0.5411961f));
-   __m128i rot1_0 = dct_const(stbi__f2f(1.175875602f) + stbi__f2f(-0.899976223f), stbi__f2f(1.175875602f));
-   __m128i rot1_1 = dct_const(stbi__f2f(1.175875602f), stbi__f2f(1.175875602f) + stbi__f2f(-2.562915447f));
-   __m128i rot2_0 = dct_const(stbi__f2f(-1.961570560f) + stbi__f2f( 0.298631336f), stbi__f2f(-1.961570560f));
-   __m128i rot2_1 = dct_const(stbi__f2f(-1.961570560f), stbi__f2f(-1.961570560f) + stbi__f2f( 3.072711026f));
-   __m128i rot3_0 = dct_const(stbi__f2f(-0.390180644f) + stbi__f2f( 2.053119869f), stbi__f2f(-0.390180644f));
-   __m128i rot3_1 = dct_const(stbi__f2f(-0.390180644f), stbi__f2f(-0.390180644f) + stbi__f2f( 1.501321110f));
-
-   // rounding biases in column/row passes, see stbi__idct_block for explanation.
-   __m128i bias_0 = _mm_set1_epi32(512);
-   __m128i bias_1 = _mm_set1_epi32(65536 + (128<<17));
-
-   // load
-   row0 = _mm_load_si128((const __m128i *) (data + 0*8));
-   row1 = _mm_load_si128((const __m128i *) (data + 1*8));
-   row2 = _mm_load_si128((const __m128i *) (data + 2*8));
-   row3 = _mm_load_si128((const __m128i *) (data + 3*8));
-   row4 = _mm_load_si128((const __m128i *) (data + 4*8));
-   row5 = _mm_load_si128((const __m128i *) (data + 5*8));
-   row6 = _mm_load_si128((const __m128i *) (data + 6*8));
-   row7 = _mm_load_si128((const __m128i *) (data + 7*8));
-
-   // column pass
-   dct_pass(bias_0, 10);
-
-   {
-      // 16bit 8x8 transpose pass 1
-      dct_interleave16(row0, row4);
-      dct_interleave16(row1, row5);
-      dct_interleave16(row2, row6);
-      dct_interleave16(row3, row7);
-
-      // transpose pass 2
-      dct_interleave16(row0, row2);
-      dct_interleave16(row1, row3);
-      dct_interleave16(row4, row6);
-      dct_interleave16(row5, row7);
-
-      // transpose pass 3
-      dct_interleave16(row0, row1);
-      dct_interleave16(row2, row3);
-      dct_interleave16(row4, row5);
-      dct_interleave16(row6, row7);
-   }
-
-   // row pass
-   dct_pass(bias_1, 17);
-
-   {
-      // pack
-      __m128i p0 = _mm_packus_epi16(row0, row1); // a0a1a2a3...a7b0b1b2b3...b7
-      __m128i p1 = _mm_packus_epi16(row2, row3);
-      __m128i p2 = _mm_packus_epi16(row4, row5);
-      __m128i p3 = _mm_packus_epi16(row6, row7);
-
-      // 8bit 8x8 transpose pass 1
-      dct_interleave8(p0, p2); // a0e0a1e1...
-      dct_interleave8(p1, p3); // c0g0c1g1...
-
-      // transpose pass 2
-      dct_interleave8(p0, p1); // a0c0e0g0...
-      dct_interleave8(p2, p3); // b0d0f0h0...
-
-      // transpose pass 3
-      dct_interleave8(p0, p2); // a0b0c0d0...
-      dct_interleave8(p1, p3); // a4b4c4d4...
-
-      // store
-      _mm_storel_epi64((__m128i *) out, p0); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p0, 0x4e)); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, p2); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p2, 0x4e)); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, p1); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p1, 0x4e)); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, p3); out += out_stride;
-      _mm_storel_epi64((__m128i *) out, _mm_shuffle_epi32(p3, 0x4e));
-   }
-
-#undef dct_const
-#undef dct_rot
-#undef dct_widen
-#undef dct_wadd
-#undef dct_wsub
-#undef dct_bfly32o
-#undef dct_interleave8
-#undef dct_interleave16
-#undef dct_pass
-}
-
-#endif // STBI_SSE2
-
-#ifdef STBI_NEON
-
-// NEON integer IDCT. should produce bit-identical
-// results to the generic C version.
-static void stbi__idct_simd(stbi_uc *out, int out_stride, short data[64])
-{
-   int16x8_t row0, row1, row2, row3, row4, row5, row6, row7;
-
-   int16x4_t rot0_0 = vdup_n_s16(stbi__f2f(0.5411961f));
-   int16x4_t rot0_1 = vdup_n_s16(stbi__f2f(-1.847759065f));
-   int16x4_t rot0_2 = vdup_n_s16(stbi__f2f( 0.765366865f));
-   int16x4_t rot1_0 = vdup_n_s16(stbi__f2f( 1.175875602f));
-   int16x4_t rot1_1 = vdup_n_s16(stbi__f2f(-0.899976223f));
-   int16x4_t rot1_2 = vdup_n_s16(stbi__f2f(-2.562915447f));
-   int16x4_t rot2_0 = vdup_n_s16(stbi__f2f(-1.961570560f));
-   int16x4_t rot2_1 = vdup_n_s16(stbi__f2f(-0.390180644f));
-   int16x4_t rot3_0 = vdup_n_s16(stbi__f2f( 0.298631336f));
-   int16x4_t rot3_1 = vdup_n_s16(stbi__f2f( 2.053119869f));
-   int16x4_t rot3_2 = vdup_n_s16(stbi__f2f( 3.072711026f));
-   int16x4_t rot3_3 = vdup_n_s16(stbi__f2f( 1.501321110f));
-
-#define dct_long_mul(out, inq, coeff) \
-   int32x4_t out##_l = vmull_s16(vget_low_s16(inq), coeff); \
-   int32x4_t out##_h = vmull_s16(vget_high_s16(inq), coeff)
-
-#define dct_long_mac(out, acc, inq, coeff) \
-   int32x4_t out##_l = vmlal_s16(acc##_l, vget_low_s16(inq), coeff); \
-   int32x4_t out##_h = vmlal_s16(acc##_h, vget_high_s16(inq), coeff)
-
-#define dct_widen(out, inq) \
-   int32x4_t out##_l = vshll_n_s16(vget_low_s16(inq), 12); \
-   int32x4_t out##_h = vshll_n_s16(vget_high_s16(inq), 12)
-
-// wide add
-#define dct_wadd(out, a, b) \
-   int32x4_t out##_l = vaddq_s32(a##_l, b##_l); \
-   int32x4_t out##_h = vaddq_s32(a##_h, b##_h)
-
-// wide sub
-#define dct_wsub(out, a, b) \
-   int32x4_t out##_l = vsubq_s32(a##_l, b##_l); \
-   int32x4_t out##_h = vsubq_s32(a##_h, b##_h)
-
-// butterfly a/b, then shift using "shiftop" by "s" and pack
-#define dct_bfly32o(out0,out1, a,b,shiftop,s) \
-   { \
-      dct_wadd(sum, a, b); \
-      dct_wsub(dif, a, b); \
-      out0 = vcombine_s16(shiftop(sum_l, s), shiftop(sum_h, s)); \
-      out1 = vcombine_s16(shiftop(dif_l, s), shiftop(dif_h, s)); \
-   }
-
-#define dct_pass(shiftop, shift) \
-   { \
-      /* even part */ \
-      int16x8_t sum26 = vaddq_s16(row2, row6); \
-      dct_long_mul(p1e, sum26, rot0_0); \
-      dct_long_mac(t2e, p1e, row6, rot0_1); \
-      dct_long_mac(t3e, p1e, row2, rot0_2); \
-      int16x8_t sum04 = vaddq_s16(row0, row4); \
-      int16x8_t dif04 = vsubq_s16(row0, row4); \
-      dct_widen(t0e, sum04); \
-      dct_widen(t1e, dif04); \
-      dct_wadd(x0, t0e, t3e); \
-      dct_wsub(x3, t0e, t3e); \
-      dct_wadd(x1, t1e, t2e); \
-      dct_wsub(x2, t1e, t2e); \
-      /* odd part */ \
-      int16x8_t sum15 = vaddq_s16(row1, row5); \
-      int16x8_t sum17 = vaddq_s16(row1, row7); \
-      int16x8_t sum35 = vaddq_s16(row3, row5); \
-      int16x8_t sum37 = vaddq_s16(row3, row7); \
-      int16x8_t sumodd = vaddq_s16(sum17, sum35); \
-      dct_long_mul(p5o, sumodd, rot1_0); \
-      dct_long_mac(p1o, p5o, sum17, rot1_1); \
-      dct_long_mac(p2o, p5o, sum35, rot1_2); \
-      dct_long_mul(p3o, sum37, rot2_0); \
-      dct_long_mul(p4o, sum15, rot2_1); \
-      dct_wadd(sump13o, p1o, p3o); \
-      dct_wadd(sump24o, p2o, p4o); \
-      dct_wadd(sump23o, p2o, p3o); \
-      dct_wadd(sump14o, p1o, p4o); \
-      dct_long_mac(x4, sump13o, row7, rot3_0); \
-      dct_long_mac(x5, sump24o, row5, rot3_1); \
-      dct_long_mac(x6, sump23o, row3, rot3_2); \
-      dct_long_mac(x7, sump14o, row1, rot3_3); \
-      dct_bfly32o(row0,row7, x0,x7,shiftop,shift); \
-      dct_bfly32o(row1,row6, x1,x6,shiftop,shift); \
-      dct_bfly32o(row2,row5, x2,x5,shiftop,shift); \
-      dct_bfly32o(row3,row4, x3,x4,shiftop,shift); \
-   }
-
-   // load
-   row0 = vld1q_s16(data + 0*8);
-   row1 = vld1q_s16(data + 1*8);
-   row2 = vld1q_s16(data + 2*8);
-   row3 = vld1q_s16(data + 3*8);
-   row4 = vld1q_s16(data + 4*8);
-   row5 = vld1q_s16(data + 5*8);
-   row6 = vld1q_s16(data + 6*8);
-   row7 = vld1q_s16(data + 7*8);
-
-   // add DC bias
-   row0 = vaddq_s16(row0, vsetq_lane_s16(1024, vdupq_n_s16(0), 0));
-
-   // column pass
-   dct_pass(vrshrn_n_s32, 10);
-
-   // 16bit 8x8 transpose
-   {
-// these three map to a single VTRN.16, VTRN.32, and VSWP, respectively.
-// whether compilers actually get this is another story, sadly.
-#define dct_trn16(x, y) { int16x8x2_t t = vtrnq_s16(x, y); x = t.val[0]; y = t.val[1]; }
-#define dct_trn32(x, y) { int32x4x2_t t = vtrnq_s32(vreinterpretq_s32_s16(x), vreinterpretq_s32_s16(y)); x = vreinterpretq_s16_s32(t.val[0]); y = vreinterpretq_s16_s32(t.val[1]); }
-#define dct_trn64(x, y) { int16x8_t x0 = x; int16x8_t y0 = y; x = vcombine_s16(vget_low_s16(x0), vget_low_s16(y0)); y = vcombine_s16(vget_high_s16(x0), vget_high_s16(y0)); }
-
-      // pass 1
-      dct_trn16(row0, row1); // a0b0a2b2a4b4a6b6
-      dct_trn16(row2, row3);
-      dct_trn16(row4, row5);
-      dct_trn16(row6, row7);
-
-      // pass 2
-      dct_trn32(row0, row2); // a0b0c0d0a4b4c4d4
-      dct_trn32(row1, row3);
-      dct_trn32(row4, row6);
-      dct_trn32(row5, row7);
-
-      // pass 3
-      dct_trn64(row0, row4); // a0b0c0d0e0f0g0h0
-      dct_trn64(row1, row5);
-      dct_trn64(row2, row6);
-      dct_trn64(row3, row7);
-
-#undef dct_trn16
-#undef dct_trn32
-#undef dct_trn64
-   }
-
-   // row pass
-   // vrshrn_n_s32 only supports shifts up to 16, we need
-   // 17. so do a non-rounding shift of 16 first then follow
-   // up with a rounding shift by 1.
-   dct_pass(vshrn_n_s32, 16);
-
-   {
-      // pack and round
-      uint8x8_t p0 = vqrshrun_n_s16(row0, 1);
-      uint8x8_t p1 = vqrshrun_n_s16(row1, 1);
-      uint8x8_t p2 = vqrshrun_n_s16(row2, 1);
-      uint8x8_t p3 = vqrshrun_n_s16(row3, 1);
-      uint8x8_t p4 = vqrshrun_n_s16(row4, 1);
-      uint8x8_t p5 = vqrshrun_n_s16(row5, 1);
-      uint8x8_t p6 = vqrshrun_n_s16(row6, 1);
-      uint8x8_t p7 = vqrshrun_n_s16(row7, 1);
-
-      // again, these can translate into one instruction, but often don't.
-#define dct_trn8_8(x, y) { uint8x8x2_t t = vtrn_u8(x, y); x = t.val[0]; y = t.val[1]; }
-#define dct_trn8_16(x, y) { uint16x4x2_t t = vtrn_u16(vreinterpret_u16_u8(x), vreinterpret_u16_u8(y)); x = vreinterpret_u8_u16(t.val[0]); y = vreinterpret_u8_u16(t.val[1]); }
-#define dct_trn8_32(x, y) { uint32x2x2_t t = vtrn_u32(vreinterpret_u32_u8(x), vreinterpret_u32_u8(y)); x = vreinterpret_u8_u32(t.val[0]); y = vreinterpret_u8_u32(t.val[1]); }
-
-      // sadly can't use interleaved stores here since we only write
-      // 8 bytes to each scan line!
-
-      // 8x8 8-bit transpose pass 1
-      dct_trn8_8(p0, p1);
-      dct_trn8_8(p2, p3);
-      dct_trn8_8(p4, p5);
-      dct_trn8_8(p6, p7);
-
-      // pass 2
-      dct_trn8_16(p0, p2);
-      dct_trn8_16(p1, p3);
-      dct_trn8_16(p4, p6);
-      dct_trn8_16(p5, p7);
-
-      // pass 3
-      dct_trn8_32(p0, p4);
-      dct_trn8_32(p1, p5);
-      dct_trn8_32(p2, p6);
-      dct_trn8_32(p3, p7);
-
-      // store
-      vst1_u8(out, p0); out += out_stride;
-      vst1_u8(out, p1); out += out_stride;
-      vst1_u8(out, p2); out += out_stride;
-      vst1_u8(out, p3); out += out_stride;
-      vst1_u8(out, p4); out += out_stride;
-      vst1_u8(out, p5); out += out_stride;
-      vst1_u8(out, p6); out += out_stride;
-      vst1_u8(out, p7);
-
-#undef dct_trn8_8
-#undef dct_trn8_16
-#undef dct_trn8_32
-   }
-
-#undef dct_long_mul
-#undef dct_long_mac
-#undef dct_widen
-#undef dct_wadd
-#undef dct_wsub
-#undef dct_bfly32o
-#undef dct_pass
-}
-
-#endif // STBI_NEON
-
-#define STBI__MARKER_none  0xff
-// if there's a pending marker from the entropy stream, return that
-// otherwise, fetch from the stream and get a marker. if there's no
-// marker, return 0xff, which is never a valid marker value
-static stbi_uc stbi__get_marker(stbi__jpeg *j)
-{
-   stbi_uc x;
-   if (j->marker != STBI__MARKER_none) { x = j->marker; j->marker = STBI__MARKER_none; return x; }
-   x = stbi__get8(j->s);
-   if (x != 0xff) return STBI__MARKER_none;
-   while (x == 0xff)
-      x = stbi__get8(j->s); // consume repeated 0xff fill bytes
-   return x;
-}
-
-// in each scan, we'll have scan_n components, and the order
-// of the components is specified by order[]
-#define STBI__RESTART(x)     ((x) >= 0xd0 && (x) <= 0xd7)
-
-// after a restart interval, stbi__jpeg_reset the entropy decoder and
-// the dc prediction
-static void stbi__jpeg_reset(stbi__jpeg *j)
-{
-   j->code_bits = 0;
-   j->code_buffer = 0;
-   j->nomore = 0;
-   j->img_comp[0].dc_pred = j->img_comp[1].dc_pred = j->img_comp[2].dc_pred = j->img_comp[3].dc_pred = 0;
-   j->marker = STBI__MARKER_none;
-   j->todo = j->restart_interval ? j->restart_interval : 0x7fffffff;
-   j->eob_run = 0;
-   // no more than 1<<31 MCUs if no restart_interal? that's plenty safe,
-   // since we don't even allow 1<<30 pixels
-}
-
-static int stbi__parse_entropy_coded_data(stbi__jpeg *z)
-{
-   stbi__jpeg_reset(z);
-   if (!z->progressive) {
-      if (z->scan_n == 1) {
-         int i,j;
-         STBI_SIMD_ALIGN(short, data[64]);
-         int n = z->order[0];
-         // non-interleaved data, we just need to process one block at a time,
-         // in trivial scanline order
-         // number of blocks to do just depends on how many actual "pixels" this
-         // component has, independent of interleaved MCU blocking and such
-         int w = (z->img_comp[n].x+7) >> 3;
-         int h = (z->img_comp[n].y+7) >> 3;
-         for (j=0; j < h; ++j) {
-            for (i=0; i < w; ++i) {
-               int ha = z->img_comp[n].ha;
-               if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
-               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
-               // every data block is an MCU, so countdown the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  // if it's NOT a restart, then just bail, so we get corrupt data
-                  // rather than no data
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      } else { // interleaved
-         int i,j,k,x,y;
-         STBI_SIMD_ALIGN(short, data[64]);
-         for (j=0; j < z->img_mcu_y; ++j) {
-            for (i=0; i < z->img_mcu_x; ++i) {
-               // scan an interleaved mcu... process scan_n components in order
-               for (k=0; k < z->scan_n; ++k) {
-                  int n = z->order[k];
-                  // scan out an mcu's worth of this component; that's just determined
-                  // by the basic H and V specified for the component
-                  for (y=0; y < z->img_comp[n].v; ++y) {
-                     for (x=0; x < z->img_comp[n].h; ++x) {
-                        int x2 = (i*z->img_comp[n].h + x)*8;
-                        int y2 = (j*z->img_comp[n].v + y)*8;
-                        int ha = z->img_comp[n].ha;
-                        if (!stbi__jpeg_decode_block(z, data, z->huff_dc+z->img_comp[n].hd, z->huff_ac+ha, z->fast_ac[ha], n, z->dequant[z->img_comp[n].tq])) return 0;
-                        z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*y2+x2, z->img_comp[n].w2, data);
-                     }
-                  }
-               }
-               // after all interleaved components, that's an interleaved MCU,
-               // so now count down the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      }
-   } else {
-      if (z->scan_n == 1) {
-         int i,j;
-         int n = z->order[0];
-         // non-interleaved data, we just need to process one block at a time,
-         // in trivial scanline order
-         // number of blocks to do just depends on how many actual "pixels" this
-         // component has, independent of interleaved MCU blocking and such
-         int w = (z->img_comp[n].x+7) >> 3;
-         int h = (z->img_comp[n].y+7) >> 3;
-         for (j=0; j < h; ++j) {
-            for (i=0; i < w; ++i) {
-               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
-               if (z->spec_start == 0) {
-                  if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
-                     return 0;
-               } else {
-                  int ha = z->img_comp[n].ha;
-                  if (!stbi__jpeg_decode_block_prog_ac(z, data, &z->huff_ac[ha], z->fast_ac[ha]))
-                     return 0;
-               }
-               // every data block is an MCU, so countdown the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      } else { // interleaved
-         int i,j,k,x,y;
-         for (j=0; j < z->img_mcu_y; ++j) {
-            for (i=0; i < z->img_mcu_x; ++i) {
-               // scan an interleaved mcu... process scan_n components in order
-               for (k=0; k < z->scan_n; ++k) {
-                  int n = z->order[k];
-                  // scan out an mcu's worth of this component; that's just determined
-                  // by the basic H and V specified for the component
-                  for (y=0; y < z->img_comp[n].v; ++y) {
-                     for (x=0; x < z->img_comp[n].h; ++x) {
-                        int x2 = (i*z->img_comp[n].h + x);
-                        int y2 = (j*z->img_comp[n].v + y);
-                        short *data = z->img_comp[n].coeff + 64 * (x2 + y2 * z->img_comp[n].coeff_w);
-                        if (!stbi__jpeg_decode_block_prog_dc(z, data, &z->huff_dc[z->img_comp[n].hd], n))
-                           return 0;
-                     }
-                  }
-               }
-               // after all interleaved components, that's an interleaved MCU,
-               // so now count down the restart interval
-               if (--z->todo <= 0) {
-                  if (z->code_bits < 24) stbi__grow_buffer_unsafe(z);
-                  if (!STBI__RESTART(z->marker)) return 1;
-                  stbi__jpeg_reset(z);
-               }
-            }
-         }
-         return 1;
-      }
-   }
-}
-
-static void stbi__jpeg_dequantize(short *data, stbi__uint16 *dequant)
-{
-   int i;
-   for (i=0; i < 64; ++i)
-      data[i] *= dequant[i];
-}
-
-static void stbi__jpeg_finish(stbi__jpeg *z)
-{
-   if (z->progressive) {
-      // dequantize and idct the data
-      int i,j,n;
-      for (n=0; n < z->s->img_n; ++n) {
-         int w = (z->img_comp[n].x+7) >> 3;
-         int h = (z->img_comp[n].y+7) >> 3;
-         for (j=0; j < h; ++j) {
-            for (i=0; i < w; ++i) {
-               short *data = z->img_comp[n].coeff + 64 * (i + j * z->img_comp[n].coeff_w);
-               stbi__jpeg_dequantize(data, z->dequant[z->img_comp[n].tq]);
-               z->idct_block_kernel(z->img_comp[n].data+z->img_comp[n].w2*j*8+i*8, z->img_comp[n].w2, data);
-            }
-         }
-      }
-   }
-}
-
-static int stbi__process_marker(stbi__jpeg *z, int m)
-{
-   int L;
-   switch (m) {
-      case STBI__MARKER_none: // no marker found
-         return stbi__err("expected marker","Corrupt JPEG");
-
-      case 0xDD: // DRI - specify restart interval
-         if (stbi__get16be(z->s) != 4) return stbi__err("bad DRI len","Corrupt JPEG");
-         z->restart_interval = stbi__get16be(z->s);
-         return 1;
-
-      case 0xDB: // DQT - define quantization table
-         L = stbi__get16be(z->s)-2;
-         while (L > 0) {
-            int q = stbi__get8(z->s);
-            int p = q >> 4, sixteen = (p != 0);
-            int t = q & 15,i;
-            if (p != 0 && p != 1) return stbi__err("bad DQT type","Corrupt JPEG");
-            if (t > 3) return stbi__err("bad DQT table","Corrupt JPEG");
-
-            for (i=0; i < 64; ++i)
-               z->dequant[t][stbi__jpeg_dezigzag[i]] = (stbi__uint16)(sixteen ? stbi__get16be(z->s) : stbi__get8(z->s));
-            L -= (sixteen ? 129 : 65);
-         }
-         return L==0;
-
-      case 0xC4: // DHT - define huffman table
-         L = stbi__get16be(z->s)-2;
-         while (L > 0) {
-            stbi_uc *v;
-            int sizes[16],i,n=0;
-            int q = stbi__get8(z->s);
-            int tc = q >> 4;
-            int th = q & 15;
-            if (tc > 1 || th > 3) return stbi__err("bad DHT header","Corrupt JPEG");
-            for (i=0; i < 16; ++i) {
-               sizes[i] = stbi__get8(z->s);
-               n += sizes[i];
-            }
-            if(n > 256) return stbi__err("bad DHT header","Corrupt JPEG"); // Loop over i < n would write past end of values!
-            L -= 17;
-            if (tc == 0) {
-               if (!stbi__build_huffman(z->huff_dc+th, sizes)) return 0;
-               v = z->huff_dc[th].values;
-            } else {
-               if (!stbi__build_huffman(z->huff_ac+th, sizes)) return 0;
-               v = z->huff_ac[th].values;
-            }
-            for (i=0; i < n; ++i)
-               v[i] = stbi__get8(z->s);
-            if (tc != 0)
-               stbi__build_fast_ac(z->fast_ac[th], z->huff_ac + th);
-            L -= n;
-         }
-         return L==0;
-   }
-
-   // check for comment block or APP blocks
-   if ((m >= 0xE0 && m <= 0xEF) || m == 0xFE) {
-      L = stbi__get16be(z->s);
-      if (L < 2) {
-         if (m == 0xFE)
-            return stbi__err("bad COM len","Corrupt JPEG");
-         else
-            return stbi__err("bad APP len","Corrupt JPEG");
-      }
-      L -= 2;
-
-      if (m == 0xE0 && L >= 5) { // JFIF APP0 segment
-         static const unsigned char tag[5] = {'J','F','I','F','\0'};
-         int ok = 1;
-         int i;
-         for (i=0; i < 5; ++i)
-            if (stbi__get8(z->s) != tag[i])
-               ok = 0;
-         L -= 5;
-         if (ok)
-            z->jfif = 1;
-      } else if (m == 0xEE && L >= 12) { // Adobe APP14 segment
-         static const unsigned char tag[6] = {'A','d','o','b','e','\0'};
-         int ok = 1;
-         int i;
-         for (i=0; i < 6; ++i)
-            if (stbi__get8(z->s) != tag[i])
-               ok = 0;
-         L -= 6;
-         if (ok) {
-            stbi__get8(z->s); // version
-            stbi__get16be(z->s); // flags0
-            stbi__get16be(z->s); // flags1
-            z->app14_color_transform = stbi__get8(z->s); // color transform
-            L -= 6;
-         }
-      }
-
-      stbi__skip(z->s, L);
-      return 1;
-   }
-
-   return stbi__err("unknown marker","Corrupt JPEG");
-}
-
-// after we see SOS
-static int stbi__process_scan_header(stbi__jpeg *z)
-{
-   int i;
-   int Ls = stbi__get16be(z->s);
-   z->scan_n = stbi__get8(z->s);
-   if (z->scan_n < 1 || z->scan_n > 4 || z->scan_n > (int) z->s->img_n) return stbi__err("bad SOS component count","Corrupt JPEG");
-   if (Ls != 6+2*z->scan_n) return stbi__err("bad SOS len","Corrupt JPEG");
-   for (i=0; i < z->scan_n; ++i) {
-      int id = stbi__get8(z->s), which;
-      int q = stbi__get8(z->s);
-      for (which = 0; which < z->s->img_n; ++which)
-         if (z->img_comp[which].id == id)
-            break;
-      if (which == z->s->img_n) return 0; // no match
-      z->img_comp[which].hd = q >> 4;   if (z->img_comp[which].hd > 3) return stbi__err("bad DC huff","Corrupt JPEG");
-      z->img_comp[which].ha = q & 15;   if (z->img_comp[which].ha > 3) return stbi__err("bad AC huff","Corrupt JPEG");
-      z->order[i] = which;
-   }
-
-   {
-      int aa;
-      z->spec_start = stbi__get8(z->s);
-      z->spec_end   = stbi__get8(z->s); // should be 63, but might be 0
-      aa = stbi__get8(z->s);
-      z->succ_high = (aa >> 4);
-      z->succ_low  = (aa & 15);
-      if (z->progressive) {
-         if (z->spec_start > 63 || z->spec_end > 63  || z->spec_start > z->spec_end || z->succ_high > 13 || z->succ_low > 13)
-            return stbi__err("bad SOS", "Corrupt JPEG");
-      } else {
-         if (z->spec_start != 0) return stbi__err("bad SOS","Corrupt JPEG");
-         if (z->succ_high != 0 || z->succ_low != 0) return stbi__err("bad SOS","Corrupt JPEG");
-         z->spec_end = 63;
-      }
-   }
-
-   return 1;
-}
-
-static int stbi__free_jpeg_components(stbi__jpeg *z, int ncomp, int why)
-{
-   int i;
-   for (i=0; i < ncomp; ++i) {
-      if (z->img_comp[i].raw_data) {
-         STBI_FREE(z->img_comp[i].raw_data);
-         z->img_comp[i].raw_data = NULL;
-         z->img_comp[i].data = NULL;
-      }
-      if (z->img_comp[i].raw_coeff) {
-         STBI_FREE(z->img_comp[i].raw_coeff);
-         z->img_comp[i].raw_coeff = 0;
-         z->img_comp[i].coeff = 0;
-      }
-      if (z->img_comp[i].linebuf) {
-         STBI_FREE(z->img_comp[i].linebuf);
-         z->img_comp[i].linebuf = NULL;
-      }
-   }
-   return why;
-}
-
-static int stbi__process_frame_header(stbi__jpeg *z, int scan)
-{
-   stbi__context *s = z->s;
-   int Lf,p,i,q, h_max=1,v_max=1,c;
-   Lf = stbi__get16be(s);         if (Lf < 11) return stbi__err("bad SOF len","Corrupt JPEG"); // JPEG
-   p  = stbi__get8(s);            if (p != 8) return stbi__err("only 8-bit","JPEG format not supported: 8-bit only"); // JPEG baseline
-   s->img_y = stbi__get16be(s);   if (s->img_y == 0) return stbi__err("no header height", "JPEG format not supported: delayed height"); // Legal, but we don't handle it--but neither does IJG
-   s->img_x = stbi__get16be(s);   if (s->img_x == 0) return stbi__err("0 width","Corrupt JPEG"); // JPEG requires
-   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
-   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
-   c = stbi__get8(s);
-   if (c != 3 && c != 1 && c != 4) return stbi__err("bad component count","Corrupt JPEG");
-   s->img_n = c;
-   for (i=0; i < c; ++i) {
-      z->img_comp[i].data = NULL;
-      z->img_comp[i].linebuf = NULL;
-   }
-
-   if (Lf != 8+3*s->img_n) return stbi__err("bad SOF len","Corrupt JPEG");
-
-   z->rgb = 0;
-   for (i=0; i < s->img_n; ++i) {
-      static const unsigned char rgb[3] = { 'R', 'G', 'B' };
-      z->img_comp[i].id = stbi__get8(s);
-      if (s->img_n == 3 && z->img_comp[i].id == rgb[i])
-         ++z->rgb;
-      q = stbi__get8(s);
-      z->img_comp[i].h = (q >> 4);  if (!z->img_comp[i].h || z->img_comp[i].h > 4) return stbi__err("bad H","Corrupt JPEG");
-      z->img_comp[i].v = q & 15;    if (!z->img_comp[i].v || z->img_comp[i].v > 4) return stbi__err("bad V","Corrupt JPEG");
-      z->img_comp[i].tq = stbi__get8(s);  if (z->img_comp[i].tq > 3) return stbi__err("bad TQ","Corrupt JPEG");
-   }
-
-   if (scan != STBI__SCAN_load) return 1;
-
-   if (!stbi__mad3sizes_valid(s->img_x, s->img_y, s->img_n, 0)) return stbi__err("too large", "Image too large to decode");
-
-   for (i=0; i < s->img_n; ++i) {
-      if (z->img_comp[i].h > h_max) h_max = z->img_comp[i].h;
-      if (z->img_comp[i].v > v_max) v_max = z->img_comp[i].v;
-   }
-
-   // check that plane subsampling factors are integer ratios; our resamplers can't deal with fractional ratios
-   // and I've never seen a non-corrupted JPEG file actually use them
-   for (i=0; i < s->img_n; ++i) {
-      if (h_max % z->img_comp[i].h != 0) return stbi__err("bad H","Corrupt JPEG");
-      if (v_max % z->img_comp[i].v != 0) return stbi__err("bad V","Corrupt JPEG");
-   }
-
-   // compute interleaved mcu info
-   z->img_h_max = h_max;
-   z->img_v_max = v_max;
-   z->img_mcu_w = h_max * 8;
-   z->img_mcu_h = v_max * 8;
-   // these sizes can't be more than 17 bits
-   z->img_mcu_x = (s->img_x + z->img_mcu_w-1) / z->img_mcu_w;
-   z->img_mcu_y = (s->img_y + z->img_mcu_h-1) / z->img_mcu_h;
-
-   for (i=0; i < s->img_n; ++i) {
-      // number of effective pixels (e.g. for non-interleaved MCU)
-      z->img_comp[i].x = (s->img_x * z->img_comp[i].h + h_max-1) / h_max;
-      z->img_comp[i].y = (s->img_y * z->img_comp[i].v + v_max-1) / v_max;
-      // to simplify generation, we'll allocate enough memory to decode
-      // the bogus oversized data from using interleaved MCUs and their
-      // big blocks (e.g. a 16x16 iMCU on an image of width 33); we won't
-      // discard the extra data until colorspace conversion
-      //
-      // img_mcu_x, img_mcu_y: <=17 bits; comp[i].h and .v are <=4 (checked earlier)
-      // so these muls can't overflow with 32-bit ints (which we require)
-      z->img_comp[i].w2 = z->img_mcu_x * z->img_comp[i].h * 8;
-      z->img_comp[i].h2 = z->img_mcu_y * z->img_comp[i].v * 8;
-      z->img_comp[i].coeff = 0;
-      z->img_comp[i].raw_coeff = 0;
-      z->img_comp[i].linebuf = NULL;
-      z->img_comp[i].raw_data = stbi__malloc_mad2(z->img_comp[i].w2, z->img_comp[i].h2, 15);
-      if (z->img_comp[i].raw_data == NULL)
-         return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
-      // align blocks for idct using mmx/sse
-      z->img_comp[i].data = (stbi_uc*) (((size_t) z->img_comp[i].raw_data + 15) & ~15);
-      if (z->progressive) {
-         // w2, h2 are multiples of 8 (see above)
-         z->img_comp[i].coeff_w = z->img_comp[i].w2 / 8;
-         z->img_comp[i].coeff_h = z->img_comp[i].h2 / 8;
-         z->img_comp[i].raw_coeff = stbi__malloc_mad3(z->img_comp[i].w2, z->img_comp[i].h2, sizeof(short), 15);
-         if (z->img_comp[i].raw_coeff == NULL)
-            return stbi__free_jpeg_components(z, i+1, stbi__err("outofmem", "Out of memory"));
-         z->img_comp[i].coeff = (short*) (((size_t) z->img_comp[i].raw_coeff + 15) & ~15);
-      }
-   }
-
-   return 1;
-}
-
-// use comparisons since in some cases we handle more than one case (e.g. SOF)
-#define stbi__DNL(x)         ((x) == 0xdc)
-#define stbi__SOI(x)         ((x) == 0xd8)
-#define stbi__EOI(x)         ((x) == 0xd9)
-#define stbi__SOF(x)         ((x) == 0xc0 || (x) == 0xc1 || (x) == 0xc2)
-#define stbi__SOS(x)         ((x) == 0xda)
-
-#define stbi__SOF_progressive(x)   ((x) == 0xc2)
-
-static int stbi__decode_jpeg_header(stbi__jpeg *z, int scan)
-{
-   int m;
-   z->jfif = 0;
-   z->app14_color_transform = -1; // valid values are 0,1,2
-   z->marker = STBI__MARKER_none; // initialize cached marker to empty
-   m = stbi__get_marker(z);
-   if (!stbi__SOI(m)) return stbi__err("no SOI","Corrupt JPEG");
-   if (scan == STBI__SCAN_type) return 1;
-   m = stbi__get_marker(z);
-   while (!stbi__SOF(m)) {
-      if (!stbi__process_marker(z,m)) return 0;
-      m = stbi__get_marker(z);
-      while (m == STBI__MARKER_none) {
-         // some files have extra padding after their blocks, so ok, we'll scan
-         if (stbi__at_eof(z->s)) return stbi__err("no SOF", "Corrupt JPEG");
-         m = stbi__get_marker(z);
-      }
-   }
-   z->progressive = stbi__SOF_progressive(m);
-   if (!stbi__process_frame_header(z, scan)) return 0;
-   return 1;
-}
-
-static stbi_uc stbi__skip_jpeg_junk_at_end(stbi__jpeg *j)
-{
-   // some JPEGs have junk at end, skip over it but if we find what looks
-   // like a valid marker, resume there
-   while (!stbi__at_eof(j->s)) {
-      stbi_uc x = stbi__get8(j->s);
-      while (x == 0xff) { // might be a marker
-         if (stbi__at_eof(j->s)) return STBI__MARKER_none;
-         x = stbi__get8(j->s);
-         if (x != 0x00 && x != 0xff) {
-            // not a stuffed zero or lead-in to another marker, looks
-            // like an actual marker, return it
-            return x;
-         }
-         // stuffed zero has x=0 now which ends the loop, meaning we go
-         // back to regular scan loop.
-         // repeated 0xff keeps trying to read the next byte of the marker.
-      }
-   }
-   return STBI__MARKER_none;
-}
-
-// decode image to YCbCr format
-static int stbi__decode_jpeg_image(stbi__jpeg *j)
-{
-   int m;
-   for (m = 0; m < 4; m++) {
-      j->img_comp[m].raw_data = NULL;
-      j->img_comp[m].raw_coeff = NULL;
-   }
-   j->restart_interval = 0;
-   if (!stbi__decode_jpeg_header(j, STBI__SCAN_load)) return 0;
-   m = stbi__get_marker(j);
-   while (!stbi__EOI(m)) {
-      if (stbi__SOS(m)) {
-         if (!stbi__process_scan_header(j)) return 0;
-         if (!stbi__parse_entropy_coded_data(j)) return 0;
-         if (j->marker == STBI__MARKER_none ) {
-         j->marker = stbi__skip_jpeg_junk_at_end(j);
-            // if we reach eof without hitting a marker, stbi__get_marker() below will fail and we'll eventually return 0
-         }
-         m = stbi__get_marker(j);
-         if (STBI__RESTART(m))
-            m = stbi__get_marker(j);
-      } else if (stbi__DNL(m)) {
-         int Ld = stbi__get16be(j->s);
-         stbi__uint32 NL = stbi__get16be(j->s);
-         if (Ld != 4) return stbi__err("bad DNL len", "Corrupt JPEG");
-         if (NL != j->s->img_y) return stbi__err("bad DNL height", "Corrupt JPEG");
-         m = stbi__get_marker(j);
-      } else {
-         if (!stbi__process_marker(j, m)) return 1;
-         m = stbi__get_marker(j);
-      }
-   }
-   if (j->progressive)
-      stbi__jpeg_finish(j);
-   return 1;
-}
-
-// static jfif-centered resampling (across block boundaries)
-
-typedef stbi_uc *(*resample_row_func)(stbi_uc *out, stbi_uc *in0, stbi_uc *in1,
-                                    int w, int hs);
-
-#define stbi__div4(x) ((stbi_uc) ((x) >> 2))
-
-static stbi_uc *resample_row_1(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   STBI_NOTUSED(out);
-   STBI_NOTUSED(in_far);
-   STBI_NOTUSED(w);
-   STBI_NOTUSED(hs);
-   return in_near;
-}
-
-static stbi_uc* stbi__resample_row_v_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate two samples vertically for every one in input
-   int i;
-   STBI_NOTUSED(hs);
-   for (i=0; i < w; ++i)
-      out[i] = stbi__div4(3*in_near[i] + in_far[i] + 2);
-   return out;
-}
-
-static stbi_uc*  stbi__resample_row_h_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate two samples horizontally for every one in input
-   int i;
-   stbi_uc *input = in_near;
-
-   if (w == 1) {
-      // if only one sample, can't do any interpolation
-      out[0] = out[1] = input[0];
-      return out;
-   }
-
-   out[0] = input[0];
-   out[1] = stbi__div4(input[0]*3 + input[1] + 2);
-   for (i=1; i < w-1; ++i) {
-      int n = 3*input[i]+2;
-      out[i*2+0] = stbi__div4(n+input[i-1]);
-      out[i*2+1] = stbi__div4(n+input[i+1]);
-   }
-   out[i*2+0] = stbi__div4(input[w-2]*3 + input[w-1] + 2);
-   out[i*2+1] = input[w-1];
-
-   STBI_NOTUSED(in_far);
-   STBI_NOTUSED(hs);
-
-   return out;
-}
-
-#define stbi__div16(x) ((stbi_uc) ((x) >> 4))
-
-static stbi_uc *stbi__resample_row_hv_2(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate 2x2 samples for every one in input
-   int i,t0,t1;
-   if (w == 1) {
-      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
-      return out;
-   }
-
-   t1 = 3*in_near[0] + in_far[0];
-   out[0] = stbi__div4(t1+2);
-   for (i=1; i < w; ++i) {
-      t0 = t1;
-      t1 = 3*in_near[i]+in_far[i];
-      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
-      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
-   }
-   out[w*2-1] = stbi__div4(t1+2);
-
-   STBI_NOTUSED(hs);
-
-   return out;
-}
-
-#if defined(STBI_SSE2) || defined(STBI_NEON)
-static stbi_uc *stbi__resample_row_hv_2_simd(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // need to generate 2x2 samples for every one in input
-   int i=0,t0,t1;
-
-   if (w == 1) {
-      out[0] = out[1] = stbi__div4(3*in_near[0] + in_far[0] + 2);
-      return out;
-   }
-
-   t1 = 3*in_near[0] + in_far[0];
-   // process groups of 8 pixels for as long as we can.
-   // note we can't handle the last pixel in a row in this loop
-   // because we need to handle the filter boundary conditions.
-   for (; i < ((w-1) & ~7); i += 8) {
-#if defined(STBI_SSE2)
-      // load and perform the vertical filtering pass
-      // this uses 3*x + y = 4*x + (y - x)
-      __m128i zero  = _mm_setzero_si128();
-      __m128i farb  = _mm_loadl_epi64((__m128i *) (in_far + i));
-      __m128i nearb = _mm_loadl_epi64((__m128i *) (in_near + i));
-      __m128i farw  = _mm_unpacklo_epi8(farb, zero);
-      __m128i nearw = _mm_unpacklo_epi8(nearb, zero);
-      __m128i diff  = _mm_sub_epi16(farw, nearw);
-      __m128i nears = _mm_slli_epi16(nearw, 2);
-      __m128i curr  = _mm_add_epi16(nears, diff); // current row
-
-      // horizontal filter works the same based on shifted vers of current
-      // row. "prev" is current row shifted right by 1 pixel; we need to
-      // insert the previous pixel value (from t1).
-      // "next" is current row shifted left by 1 pixel, with first pixel
-      // of next block of 8 pixels added in.
-      __m128i prv0 = _mm_slli_si128(curr, 2);
-      __m128i nxt0 = _mm_srli_si128(curr, 2);
-      __m128i prev = _mm_insert_epi16(prv0, t1, 0);
-      __m128i next = _mm_insert_epi16(nxt0, 3*in_near[i+8] + in_far[i+8], 7);
-
-      // horizontal filter, polyphase implementation since it's convenient:
-      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
-      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
-      // note the shared term.
-      __m128i bias  = _mm_set1_epi16(8);
-      __m128i curs = _mm_slli_epi16(curr, 2);
-      __m128i prvd = _mm_sub_epi16(prev, curr);
-      __m128i nxtd = _mm_sub_epi16(next, curr);
-      __m128i curb = _mm_add_epi16(curs, bias);
-      __m128i even = _mm_add_epi16(prvd, curb);
-      __m128i odd  = _mm_add_epi16(nxtd, curb);
-
-      // interleave even and odd pixels, then undo scaling.
-      __m128i int0 = _mm_unpacklo_epi16(even, odd);
-      __m128i int1 = _mm_unpackhi_epi16(even, odd);
-      __m128i de0  = _mm_srli_epi16(int0, 4);
-      __m128i de1  = _mm_srli_epi16(int1, 4);
-
-      // pack and write output
-      __m128i outv = _mm_packus_epi16(de0, de1);
-      _mm_storeu_si128((__m128i *) (out + i*2), outv);
-#elif defined(STBI_NEON)
-      // load and perform the vertical filtering pass
-      // this uses 3*x + y = 4*x + (y - x)
-      uint8x8_t farb  = vld1_u8(in_far + i);
-      uint8x8_t nearb = vld1_u8(in_near + i);
-      int16x8_t diff  = vreinterpretq_s16_u16(vsubl_u8(farb, nearb));
-      int16x8_t nears = vreinterpretq_s16_u16(vshll_n_u8(nearb, 2));
-      int16x8_t curr  = vaddq_s16(nears, diff); // current row
-
-      // horizontal filter works the same based on shifted vers of current
-      // row. "prev" is current row shifted right by 1 pixel; we need to
-      // insert the previous pixel value (from t1).
-      // "next" is current row shifted left by 1 pixel, with first pixel
-      // of next block of 8 pixels added in.
-      int16x8_t prv0 = vextq_s16(curr, curr, 7);
-      int16x8_t nxt0 = vextq_s16(curr, curr, 1);
-      int16x8_t prev = vsetq_lane_s16(t1, prv0, 0);
-      int16x8_t next = vsetq_lane_s16(3*in_near[i+8] + in_far[i+8], nxt0, 7);
-
-      // horizontal filter, polyphase implementation since it's convenient:
-      // even pixels = 3*cur + prev = cur*4 + (prev - cur)
-      // odd  pixels = 3*cur + next = cur*4 + (next - cur)
-      // note the shared term.
-      int16x8_t curs = vshlq_n_s16(curr, 2);
-      int16x8_t prvd = vsubq_s16(prev, curr);
-      int16x8_t nxtd = vsubq_s16(next, curr);
-      int16x8_t even = vaddq_s16(curs, prvd);
-      int16x8_t odd  = vaddq_s16(curs, nxtd);
-
-      // undo scaling and round, then store with even/odd phases interleaved
-      uint8x8x2_t o;
-      o.val[0] = vqrshrun_n_s16(even, 4);
-      o.val[1] = vqrshrun_n_s16(odd,  4);
-      vst2_u8(out + i*2, o);
-#endif
-
-      // "previous" value for next iter
-      t1 = 3*in_near[i+7] + in_far[i+7];
-   }
-
-   t0 = t1;
-   t1 = 3*in_near[i] + in_far[i];
-   out[i*2] = stbi__div16(3*t1 + t0 + 8);
-
-   for (++i; i < w; ++i) {
-      t0 = t1;
-      t1 = 3*in_near[i]+in_far[i];
-      out[i*2-1] = stbi__div16(3*t0 + t1 + 8);
-      out[i*2  ] = stbi__div16(3*t1 + t0 + 8);
-   }
-   out[w*2-1] = stbi__div4(t1+2);
-
-   STBI_NOTUSED(hs);
-
-   return out;
-}
-#endif
-
-static stbi_uc *stbi__resample_row_generic(stbi_uc *out, stbi_uc *in_near, stbi_uc *in_far, int w, int hs)
-{
-   // resample with nearest-neighbor
-   int i,j;
-   STBI_NOTUSED(in_far);
-   for (i=0; i < w; ++i)
-      for (j=0; j < hs; ++j)
-         out[i*hs+j] = in_near[i];
-   return out;
-}
-
-// this is a reduced-precision calculation of YCbCr-to-RGB introduced
-// to make sure the code produces the same results in both SIMD and scalar
-#define stbi__float2fixed(x)  (((int) ((x) * 4096.0f + 0.5f)) << 8)
-static void stbi__YCbCr_to_RGB_row(stbi_uc *out, const stbi_uc *y, const stbi_uc *pcb, const stbi_uc *pcr, int count, int step)
-{
-   int i;
-   for (i=0; i < count; ++i) {
-      int y_fixed = (y[i] << 20) + (1<<19); // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed +  cr* stbi__float2fixed(1.40200f);
-      g = y_fixed + (cr*-stbi__float2fixed(0.71414f)) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                                     +   cb* stbi__float2fixed(1.77200f);
-      r >>= 20;
-      g >>= 20;
-      b >>= 20;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-
-#if defined(STBI_SSE2) || defined(STBI_NEON)
-static void stbi__YCbCr_to_RGB_simd(stbi_uc *out, stbi_uc const *y, stbi_uc const *pcb, stbi_uc const *pcr, int count, int step)
-{
-   int i = 0;
-
-#ifdef STBI_SSE2
-   // step == 3 is pretty ugly on the final interleave, and i'm not convinced
-   // it's useful in practice (you wouldn't use it for textures, for example).
-   // so just accelerate step == 4 case.
-   if (step == 4) {
-      // this is a fairly straightforward implementation and not super-optimized.
-      __m128i signflip  = _mm_set1_epi8(-0x80);
-      __m128i cr_const0 = _mm_set1_epi16(   (short) ( 1.40200f*4096.0f+0.5f));
-      __m128i cr_const1 = _mm_set1_epi16( - (short) ( 0.71414f*4096.0f+0.5f));
-      __m128i cb_const0 = _mm_set1_epi16( - (short) ( 0.34414f*4096.0f+0.5f));
-      __m128i cb_const1 = _mm_set1_epi16(   (short) ( 1.77200f*4096.0f+0.5f));
-      __m128i y_bias = _mm_set1_epi8((char) (unsigned char) 128);
-      __m128i xw = _mm_set1_epi16(255); // alpha channel
-
-      for (; i+7 < count; i += 8) {
-         // load
-         __m128i y_bytes = _mm_loadl_epi64((__m128i *) (y+i));
-         __m128i cr_bytes = _mm_loadl_epi64((__m128i *) (pcr+i));
-         __m128i cb_bytes = _mm_loadl_epi64((__m128i *) (pcb+i));
-         __m128i cr_biased = _mm_xor_si128(cr_bytes, signflip); // -128
-         __m128i cb_biased = _mm_xor_si128(cb_bytes, signflip); // -128
-
-         // unpack to short (and left-shift cr, cb by 8)
-         __m128i yw  = _mm_unpacklo_epi8(y_bias, y_bytes);
-         __m128i crw = _mm_unpacklo_epi8(_mm_setzero_si128(), cr_biased);
-         __m128i cbw = _mm_unpacklo_epi8(_mm_setzero_si128(), cb_biased);
-
-         // color transform
-         __m128i yws = _mm_srli_epi16(yw, 4);
-         __m128i cr0 = _mm_mulhi_epi16(cr_const0, crw);
-         __m128i cb0 = _mm_mulhi_epi16(cb_const0, cbw);
-         __m128i cb1 = _mm_mulhi_epi16(cbw, cb_const1);
-         __m128i cr1 = _mm_mulhi_epi16(crw, cr_const1);
-         __m128i rws = _mm_add_epi16(cr0, yws);
-         __m128i gwt = _mm_add_epi16(cb0, yws);
-         __m128i bws = _mm_add_epi16(yws, cb1);
-         __m128i gws = _mm_add_epi16(gwt, cr1);
-
-         // descale
-         __m128i rw = _mm_srai_epi16(rws, 4);
-         __m128i bw = _mm_srai_epi16(bws, 4);
-         __m128i gw = _mm_srai_epi16(gws, 4);
-
-         // back to byte, set up for transpose
-         __m128i brb = _mm_packus_epi16(rw, bw);
-         __m128i gxb = _mm_packus_epi16(gw, xw);
-
-         // transpose to interleave channels
-         __m128i t0 = _mm_unpacklo_epi8(brb, gxb);
-         __m128i t1 = _mm_unpackhi_epi8(brb, gxb);
-         __m128i o0 = _mm_unpacklo_epi16(t0, t1);
-         __m128i o1 = _mm_unpackhi_epi16(t0, t1);
-
-         // store
-         _mm_storeu_si128((__m128i *) (out + 0), o0);
-         _mm_storeu_si128((__m128i *) (out + 16), o1);
-         out += 32;
-      }
-   }
-#endif
-
-#ifdef STBI_NEON
-   // in this version, step=3 support would be easy to add. but is there demand?
-   if (step == 4) {
-      // this is a fairly straightforward implementation and not super-optimized.
-      uint8x8_t signflip = vdup_n_u8(0x80);
-      int16x8_t cr_const0 = vdupq_n_s16(   (short) ( 1.40200f*4096.0f+0.5f));
-      int16x8_t cr_const1 = vdupq_n_s16( - (short) ( 0.71414f*4096.0f+0.5f));
-      int16x8_t cb_const0 = vdupq_n_s16( - (short) ( 0.34414f*4096.0f+0.5f));
-      int16x8_t cb_const1 = vdupq_n_s16(   (short) ( 1.77200f*4096.0f+0.5f));
-
-      for (; i+7 < count; i += 8) {
-         // load
-         uint8x8_t y_bytes  = vld1_u8(y + i);
-         uint8x8_t cr_bytes = vld1_u8(pcr + i);
-         uint8x8_t cb_bytes = vld1_u8(pcb + i);
-         int8x8_t cr_biased = vreinterpret_s8_u8(vsub_u8(cr_bytes, signflip));
-         int8x8_t cb_biased = vreinterpret_s8_u8(vsub_u8(cb_bytes, signflip));
-
-         // expand to s16
-         int16x8_t yws = vreinterpretq_s16_u16(vshll_n_u8(y_bytes, 4));
-         int16x8_t crw = vshll_n_s8(cr_biased, 7);
-         int16x8_t cbw = vshll_n_s8(cb_biased, 7);
-
-         // color transform
-         int16x8_t cr0 = vqdmulhq_s16(crw, cr_const0);
-         int16x8_t cb0 = vqdmulhq_s16(cbw, cb_const0);
-         int16x8_t cr1 = vqdmulhq_s16(crw, cr_const1);
-         int16x8_t cb1 = vqdmulhq_s16(cbw, cb_const1);
-         int16x8_t rws = vaddq_s16(yws, cr0);
-         int16x8_t gws = vaddq_s16(vaddq_s16(yws, cb0), cr1);
-         int16x8_t bws = vaddq_s16(yws, cb1);
-
-         // undo scaling, round, convert to byte
-         uint8x8x4_t o;
-         o.val[0] = vqrshrun_n_s16(rws, 4);
-         o.val[1] = vqrshrun_n_s16(gws, 4);
-         o.val[2] = vqrshrun_n_s16(bws, 4);
-         o.val[3] = vdup_n_u8(255);
-
-         // store, interleaving r/g/b/a
-         vst4_u8(out, o);
-         out += 8*4;
-      }
-   }
-#endif
-
-   for (; i < count; ++i) {
-      int y_fixed = (y[i] << 20) + (1<<19); // rounding
-      int r,g,b;
-      int cr = pcr[i] - 128;
-      int cb = pcb[i] - 128;
-      r = y_fixed + cr* stbi__float2fixed(1.40200f);
-      g = y_fixed + cr*-stbi__float2fixed(0.71414f) + ((cb*-stbi__float2fixed(0.34414f)) & 0xffff0000);
-      b = y_fixed                                   +   cb* stbi__float2fixed(1.77200f);
-      r >>= 20;
-      g >>= 20;
-      b >>= 20;
-      if ((unsigned) r > 255) { if (r < 0) r = 0; else r = 255; }
-      if ((unsigned) g > 255) { if (g < 0) g = 0; else g = 255; }
-      if ((unsigned) b > 255) { if (b < 0) b = 0; else b = 255; }
-      out[0] = (stbi_uc)r;
-      out[1] = (stbi_uc)g;
-      out[2] = (stbi_uc)b;
-      out[3] = 255;
-      out += step;
-   }
-}
-#endif
-
-// set up the kernels
-static void stbi__setup_jpeg(stbi__jpeg *j)
-{
-   j->idct_block_kernel = stbi__idct_block;
-   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_row;
-   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2;
-
-#ifdef STBI_SSE2
-   if (stbi__sse2_available()) {
-      j->idct_block_kernel = stbi__idct_simd;
-      j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-      j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
-   }
-#endif
-
-#ifdef STBI_NEON
-   j->idct_block_kernel = stbi__idct_simd;
-   j->YCbCr_to_RGB_kernel = stbi__YCbCr_to_RGB_simd;
-   j->resample_row_hv_2_kernel = stbi__resample_row_hv_2_simd;
-#endif
-}
-
-// clean up the temporary component buffers
-static void stbi__cleanup_jpeg(stbi__jpeg *j)
-{
-   stbi__free_jpeg_components(j, j->s->img_n, 0);
-}
-
-typedef struct
-{
-   resample_row_func resample;
-   stbi_uc *line0,*line1;
-   int hs,vs;   // expansion factor in each axis
-   int w_lores; // horizontal pixels pre-expansion
-   int ystep;   // how far through vertical expansion we are
-   int ypos;    // which pre-expansion row we're on
-} stbi__resample;
-
-// fast 0..255 * 0..255 => 0..255 rounded multiplication
-static stbi_uc stbi__blinn_8x8(stbi_uc x, stbi_uc y)
-{
-   unsigned int t = x*y + 128;
-   return (stbi_uc) ((t + (t >>8)) >> 8);
-}
-
-static stbi_uc *load_jpeg_image(stbi__jpeg *z, int *out_x, int *out_y, int *comp, int req_comp)
-{
-   int n, decode_n, is_rgb;
-   z->s->img_n = 0; // make stbi__cleanup_jpeg safe
-
-   // validate req_comp
-   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
-
-   // load a jpeg image from whichever source, but leave in YCbCr format
-   if (!stbi__decode_jpeg_image(z)) { stbi__cleanup_jpeg(z); return NULL; }
-
-   // determine actual number of components to generate
-   n = req_comp ? req_comp : z->s->img_n >= 3 ? 3 : 1;
-
-   is_rgb = z->s->img_n == 3 && (z->rgb == 3 || (z->app14_color_transform == 0 && !z->jfif));
-
-   if (z->s->img_n == 3 && n < 3 && !is_rgb)
-      decode_n = 1;
-   else
-      decode_n = z->s->img_n;
-
-   // nothing to do if no components requested; check this now to avoid
-   // accessing uninitialized coutput[0] later
-   if (decode_n <= 0) { stbi__cleanup_jpeg(z); return NULL; }
-
-   // resample and color-convert
-   {
-      int k;
-      unsigned int i,j;
-      stbi_uc *output;
-      stbi_uc *coutput[4] = { NULL, NULL, NULL, NULL };
-
-      stbi__resample res_comp[4];
-
-      for (k=0; k < decode_n; ++k) {
-         stbi__resample *r = &res_comp[k];
-
-         // allocate line buffer big enough for upsampling off the edges
-         // with upsample factor of 4
-         z->img_comp[k].linebuf = (stbi_uc *) stbi__malloc(z->s->img_x + 3);
-         if (!z->img_comp[k].linebuf) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
-
-         r->hs      = z->img_h_max / z->img_comp[k].h;
-         r->vs      = z->img_v_max / z->img_comp[k].v;
-         r->ystep   = r->vs >> 1;
-         r->w_lores = (z->s->img_x + r->hs-1) / r->hs;
-         r->ypos    = 0;
-         r->line0   = r->line1 = z->img_comp[k].data;
-
-         if      (r->hs == 1 && r->vs == 1) r->resample = resample_row_1;
-         else if (r->hs == 1 && r->vs == 2) r->resample = stbi__resample_row_v_2;
-         else if (r->hs == 2 && r->vs == 1) r->resample = stbi__resample_row_h_2;
-         else if (r->hs == 2 && r->vs == 2) r->resample = z->resample_row_hv_2_kernel;
-         else                               r->resample = stbi__resample_row_generic;
-      }
-
-      // can't error after this so, this is safe
-      output = (stbi_uc *) stbi__malloc_mad3(n, z->s->img_x, z->s->img_y, 1);
-      if (!output) { stbi__cleanup_jpeg(z); return stbi__errpuc("outofmem", "Out of memory"); }
-
-      // now go ahead and resample
-      for (j=0; j < z->s->img_y; ++j) {
-         stbi_uc *out = output + n * z->s->img_x * j;
-         for (k=0; k < decode_n; ++k) {
-            stbi__resample *r = &res_comp[k];
-            int y_bot = r->ystep >= (r->vs >> 1);
-            coutput[k] = r->resample(z->img_comp[k].linebuf,
-                                     y_bot ? r->line1 : r->line0,
-                                     y_bot ? r->line0 : r->line1,
-                                     r->w_lores, r->hs);
-            if (++r->ystep >= r->vs) {
-               r->ystep = 0;
-               r->line0 = r->line1;
-               if (++r->ypos < z->img_comp[k].y)
-                  r->line1 += z->img_comp[k].w2;
-            }
-         }
-         if (n >= 3) {
-            stbi_uc *y = coutput[0];
-            if (z->s->img_n == 3) {
-               if (is_rgb) {
-                  for (i=0; i < z->s->img_x; ++i) {
-                     out[0] = y[i];
-                     out[1] = coutput[1][i];
-                     out[2] = coutput[2][i];
-                     out[3] = 255;
-                     out += n;
-                  }
-               } else {
-                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-               }
-            } else if (z->s->img_n == 4) {
-               if (z->app14_color_transform == 0) { // CMYK
-                  for (i=0; i < z->s->img_x; ++i) {
-                     stbi_uc m = coutput[3][i];
-                     out[0] = stbi__blinn_8x8(coutput[0][i], m);
-                     out[1] = stbi__blinn_8x8(coutput[1][i], m);
-                     out[2] = stbi__blinn_8x8(coutput[2][i], m);
-                     out[3] = 255;
-                     out += n;
-                  }
-               } else if (z->app14_color_transform == 2) { // YCCK
-                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-                  for (i=0; i < z->s->img_x; ++i) {
-                     stbi_uc m = coutput[3][i];
-                     out[0] = stbi__blinn_8x8(255 - out[0], m);
-                     out[1] = stbi__blinn_8x8(255 - out[1], m);
-                     out[2] = stbi__blinn_8x8(255 - out[2], m);
-                     out += n;
-                  }
-               } else { // YCbCr + alpha?  Ignore the fourth channel for now
-                  z->YCbCr_to_RGB_kernel(out, y, coutput[1], coutput[2], z->s->img_x, n);
-               }
-            } else
-               for (i=0; i < z->s->img_x; ++i) {
-                  out[0] = out[1] = out[2] = y[i];
-                  out[3] = 255; // not used if n==3
-                  out += n;
-               }
-         } else {
-            if (is_rgb) {
-               if (n == 1)
-                  for (i=0; i < z->s->img_x; ++i)
-                     *out++ = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
-               else {
-                  for (i=0; i < z->s->img_x; ++i, out += 2) {
-                     out[0] = stbi__compute_y(coutput[0][i], coutput[1][i], coutput[2][i]);
-                     out[1] = 255;
-                  }
-               }
-            } else if (z->s->img_n == 4 && z->app14_color_transform == 0) {
-               for (i=0; i < z->s->img_x; ++i) {
-                  stbi_uc m = coutput[3][i];
-                  stbi_uc r = stbi__blinn_8x8(coutput[0][i], m);
-                  stbi_uc g = stbi__blinn_8x8(coutput[1][i], m);
-                  stbi_uc b = stbi__blinn_8x8(coutput[2][i], m);
-                  out[0] = stbi__compute_y(r, g, b);
-                  out[1] = 255;
-                  out += n;
-               }
-            } else if (z->s->img_n == 4 && z->app14_color_transform == 2) {
-               for (i=0; i < z->s->img_x; ++i) {
-                  out[0] = stbi__blinn_8x8(255 - coutput[0][i], coutput[3][i]);
-                  out[1] = 255;
-                  out += n;
-               }
-            } else {
-               stbi_uc *y = coutput[0];
-               if (n == 1)
-                  for (i=0; i < z->s->img_x; ++i) out[i] = y[i];
-               else
-                  for (i=0; i < z->s->img_x; ++i) { *out++ = y[i]; *out++ = 255; }
-            }
-         }
-      }
-      stbi__cleanup_jpeg(z);
-      *out_x = z->s->img_x;
-      *out_y = z->s->img_y;
-      if (comp) *comp = z->s->img_n >= 3 ? 3 : 1; // report original components, not output
-      return output;
-   }
-}
-
-static void *stbi__jpeg_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   unsigned char* result;
-   stbi__jpeg* j = (stbi__jpeg*) stbi__malloc(sizeof(stbi__jpeg));
-   if (!j) return stbi__errpuc("outofmem", "Out of memory");
-   memset(j, 0, sizeof(stbi__jpeg));
-   STBI_NOTUSED(ri);
-   j->s = s;
-   stbi__setup_jpeg(j);
-   result = load_jpeg_image(j, x,y,comp,req_comp);
-   STBI_FREE(j);
-   return result;
-}
-
-static int stbi__jpeg_test(stbi__context *s)
-{
-   int r;
-   stbi__jpeg* j = (stbi__jpeg*)stbi__malloc(sizeof(stbi__jpeg));
-   if (!j) return stbi__err("outofmem", "Out of memory");
-   memset(j, 0, sizeof(stbi__jpeg));
-   j->s = s;
-   stbi__setup_jpeg(j);
-   r = stbi__decode_jpeg_header(j, STBI__SCAN_type);
-   stbi__rewind(s);
-   STBI_FREE(j);
-   return r;
-}
-
-static int stbi__jpeg_info_raw(stbi__jpeg *j, int *x, int *y, int *comp)
-{
-   if (!stbi__decode_jpeg_header(j, STBI__SCAN_header)) {
-      stbi__rewind( j->s );
-      return 0;
-   }
-   if (x) *x = j->s->img_x;
-   if (y) *y = j->s->img_y;
-   if (comp) *comp = j->s->img_n >= 3 ? 3 : 1;
-   return 1;
-}
-
-static int stbi__jpeg_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int result;
-   stbi__jpeg* j = (stbi__jpeg*) (stbi__malloc(sizeof(stbi__jpeg)));
-   if (!j) return stbi__err("outofmem", "Out of memory");
-   memset(j, 0, sizeof(stbi__jpeg));
-   j->s = s;
-   result = stbi__jpeg_info_raw(j, x, y, comp);
-   STBI_FREE(j);
-   return result;
-}
-#endif
-
-// public domain zlib decode    v0.2  Sean Barrett 2006-11-18
-//    simple implementation
-//      - all input must be provided in an upfront buffer
-//      - all output is written to a single output buffer (can malloc/realloc)
-//    performance
-//      - fast huffman
-
-#ifndef STBI_NO_ZLIB
-
-// fast-way is faster to check than jpeg huffman, but slow way is slower
-#define STBI__ZFAST_BITS  9 // accelerate all cases in default tables
-#define STBI__ZFAST_MASK  ((1 << STBI__ZFAST_BITS) - 1)
-#define STBI__ZNSYMS 288 // number of symbols in literal/length alphabet
-
-// zlib-style huffman encoding
-// (jpegs packs from left, zlib from right, so can't share code)
-typedef struct
-{
-   stbi__uint16 fast[1 << STBI__ZFAST_BITS];
-   stbi__uint16 firstcode[16];
-   int maxcode[17];
-   stbi__uint16 firstsymbol[16];
-   stbi_uc  size[STBI__ZNSYMS];
-   stbi__uint16 value[STBI__ZNSYMS];
-} stbi__zhuffman;
-
-stbi_inline static int stbi__bitreverse16(int n)
-{
-  n = ((n & 0xAAAA) >>  1) | ((n & 0x5555) << 1);
-  n = ((n & 0xCCCC) >>  2) | ((n & 0x3333) << 2);
-  n = ((n & 0xF0F0) >>  4) | ((n & 0x0F0F) << 4);
-  n = ((n & 0xFF00) >>  8) | ((n & 0x00FF) << 8);
-  return n;
-}
-
-stbi_inline static int stbi__bit_reverse(int v, int bits)
-{
-   STBI_ASSERT(bits <= 16);
-   // to bit reverse n bits, reverse 16 and shift
-   // e.g. 11 bits, bit reverse and shift away 5
-   return stbi__bitreverse16(v) >> (16-bits);
-}
-
-static int stbi__zbuild_huffman(stbi__zhuffman *z, const stbi_uc *sizelist, int num)
-{
-   int i,k=0;
-   int code, next_code[16], sizes[17];
-
-   // DEFLATE spec for generating codes
-   memset(sizes, 0, sizeof(sizes));
-   memset(z->fast, 0, sizeof(z->fast));
-   for (i=0; i < num; ++i)
-      ++sizes[sizelist[i]];
-   sizes[0] = 0;
-   for (i=1; i < 16; ++i)
-      if (sizes[i] > (1 << i))
-         return stbi__err("bad sizes", "Corrupt PNG");
-   code = 0;
-   for (i=1; i < 16; ++i) {
-      next_code[i] = code;
-      z->firstcode[i] = (stbi__uint16) code;
-      z->firstsymbol[i] = (stbi__uint16) k;
-      code = (code + sizes[i]);
-      if (sizes[i])
-         if (code-1 >= (1 << i)) return stbi__err("bad codelengths","Corrupt PNG");
-      z->maxcode[i] = code << (16-i); // preshift for inner loop
-      code <<= 1;
-      k += sizes[i];
-   }
-   z->maxcode[16] = 0x10000; // sentinel
-   for (i=0; i < num; ++i) {
-      int s = sizelist[i];
-      if (s) {
-         int c = next_code[s] - z->firstcode[s] + z->firstsymbol[s];
-         stbi__uint16 fastv = (stbi__uint16) ((s << 9) | i);
-         z->size [c] = (stbi_uc     ) s;
-         z->value[c] = (stbi__uint16) i;
-         if (s <= STBI__ZFAST_BITS) {
-            int j = stbi__bit_reverse(next_code[s],s);
-            while (j < (1 << STBI__ZFAST_BITS)) {
-               z->fast[j] = fastv;
-               j += (1 << s);
-            }
-         }
-         ++next_code[s];
-      }
-   }
-   return 1;
-}
-
-// zlib-from-memory implementation for PNG reading
-//    because PNG allows splitting the zlib stream arbitrarily,
-//    and it's annoying structurally to have PNG call ZLIB call PNG,
-//    we require PNG read all the IDATs and combine them into a single
-//    memory buffer
-
-typedef struct
-{
-   stbi_uc *zbuffer, *zbuffer_end;
-   int num_bits;
-   int hit_zeof_once;
-   stbi__uint32 code_buffer;
-
-   char *zout;
-   char *zout_start;
-   char *zout_end;
-   int   z_expandable;
-
-   stbi__zhuffman z_length, z_distance;
-} stbi__zbuf;
-
-stbi_inline static int stbi__zeof(stbi__zbuf *z)
-{
-   return (z->zbuffer >= z->zbuffer_end);
-}
-
-stbi_inline static stbi_uc stbi__zget8(stbi__zbuf *z)
-{
-   return stbi__zeof(z) ? 0 : *z->zbuffer++;
-}
-
-static void stbi__fill_bits(stbi__zbuf *z)
-{
-   do {
-      if (z->code_buffer >= (1U << z->num_bits)) {
-        z->zbuffer = z->zbuffer_end;  /* treat this as EOF so we fail. */
-        return;
-      }
-      z->code_buffer |= (unsigned int) stbi__zget8(z) << z->num_bits;
-      z->num_bits += 8;
-   } while (z->num_bits <= 24);
-}
-
-stbi_inline static unsigned int stbi__zreceive(stbi__zbuf *z, int n)
-{
-   unsigned int k;
-   if (z->num_bits < n) stbi__fill_bits(z);
-   k = z->code_buffer & ((1 << n) - 1);
-   z->code_buffer >>= n;
-   z->num_bits -= n;
-   return k;
-}
-
-static int stbi__zhuffman_decode_slowpath(stbi__zbuf *a, stbi__zhuffman *z)
-{
-   int b,s,k;
-   // not resolved by fast table, so compute it the slow way
-   // use jpeg approach, which requires MSbits at top
-   k = stbi__bit_reverse(a->code_buffer, 16);
-   for (s=STBI__ZFAST_BITS+1; ; ++s)
-      if (k < z->maxcode[s])
-         break;
-   if (s >= 16) return -1; // invalid code!
-   // code size is s, so:
-   b = (k >> (16-s)) - z->firstcode[s] + z->firstsymbol[s];
-   if (b >= STBI__ZNSYMS) return -1; // some data was corrupt somewhere!
-   if (z->size[b] != s) return -1;  // was originally an assert, but report failure instead.
-   a->code_buffer >>= s;
-   a->num_bits -= s;
-   return z->value[b];
-}
-
-stbi_inline static int stbi__zhuffman_decode(stbi__zbuf *a, stbi__zhuffman *z)
-{
-   int b,s;
-   if (a->num_bits < 16) {
-      if (stbi__zeof(a)) {
-         if (!a->hit_zeof_once) {
-            // This is the first time we hit eof, insert 16 extra padding btis
-            // to allow us to keep going; if we actually consume any of them
-            // though, that is invalid data. This is caught later.
-            a->hit_zeof_once = 1;
-            a->num_bits += 16; // add 16 implicit zero bits
-         } else {
-            // We already inserted our extra 16 padding bits and are again
-            // out, this stream is actually prematurely terminated.
-            return -1;
-         }
-      } else {
-         stbi__fill_bits(a);
-      }
-   }
-   b = z->fast[a->code_buffer & STBI__ZFAST_MASK];
-   if (b) {
-      s = b >> 9;
-      a->code_buffer >>= s;
-      a->num_bits -= s;
-      return b & 511;
-   }
-   return stbi__zhuffman_decode_slowpath(a, z);
-}
-
-static int stbi__zexpand(stbi__zbuf *z, char *zout, int n)  // need to make room for n bytes
-{
-   char *q;
-   unsigned int cur, limit, old_limit;
-   z->zout = zout;
-   if (!z->z_expandable) return stbi__err("output buffer limit","Corrupt PNG");
-   cur   = (unsigned int) (z->zout - z->zout_start);
-   limit = old_limit = (unsigned) (z->zout_end - z->zout_start);
-   if (UINT_MAX - cur < (unsigned) n) return stbi__err("outofmem", "Out of memory");
-   while (cur + n > limit) {
-      if(limit > UINT_MAX / 2) return stbi__err("outofmem", "Out of memory");
-      limit *= 2;
-   }
-   q = (char *) STBI_REALLOC_SIZED(z->zout_start, old_limit, limit);
-   STBI_NOTUSED(old_limit);
-   if (q == NULL) return stbi__err("outofmem", "Out of memory");
-   z->zout_start = q;
-   z->zout       = q + cur;
-   z->zout_end   = q + limit;
-   return 1;
-}
-
-static const int stbi__zlength_base[31] = {
-   3,4,5,6,7,8,9,10,11,13,
-   15,17,19,23,27,31,35,43,51,59,
-   67,83,99,115,131,163,195,227,258,0,0 };
-
-static const int stbi__zlength_extra[31]=
-{ 0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0,0,0 };
-
-static const int stbi__zdist_base[32] = { 1,2,3,4,5,7,9,13,17,25,33,49,65,97,129,193,
-257,385,513,769,1025,1537,2049,3073,4097,6145,8193,12289,16385,24577,0,0};
-
-static const int stbi__zdist_extra[32] =
-{ 0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
-
-static int stbi__parse_huffman_block(stbi__zbuf *a)
-{
-   char *zout = a->zout;
-   for(;;) {
-      int z = stbi__zhuffman_decode(a, &a->z_length);
-      if (z < 256) {
-         if (z < 0) return stbi__err("bad huffman code","Corrupt PNG"); // error in huffman codes
-         if (zout >= a->zout_end) {
-            if (!stbi__zexpand(a, zout, 1)) return 0;
-            zout = a->zout;
-         }
-         *zout++ = (char) z;
-      } else {
-         stbi_uc *p;
-         int len,dist;
-         if (z == 256) {
-            a->zout = zout;
-            if (a->hit_zeof_once && a->num_bits < 16) {
-               // The first time we hit zeof, we inserted 16 extra zero bits into our bit
-               // buffer so the decoder can just do its speculative decoding. But if we
-               // actually consumed any of those bits (which is the case when num_bits < 16),
-               // the stream actually read past the end so it is malformed.
-               return stbi__err("unexpected end","Corrupt PNG");
-            }
-            return 1;
-         }
-         if (z >= 286) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, length codes 286 and 287 must not appear in compressed data
-         z -= 257;
-         len = stbi__zlength_base[z];
-         if (stbi__zlength_extra[z]) len += stbi__zreceive(a, stbi__zlength_extra[z]);
-         z = stbi__zhuffman_decode(a, &a->z_distance);
-         if (z < 0 || z >= 30) return stbi__err("bad huffman code","Corrupt PNG"); // per DEFLATE, distance codes 30 and 31 must not appear in compressed data
-         dist = stbi__zdist_base[z];
-         if (stbi__zdist_extra[z]) dist += stbi__zreceive(a, stbi__zdist_extra[z]);
-         if (zout - a->zout_start < dist) return stbi__err("bad dist","Corrupt PNG");
-         if (len > a->zout_end - zout) {
-            if (!stbi__zexpand(a, zout, len)) return 0;
-            zout = a->zout;
-         }
-         p = (stbi_uc *) (zout - dist);
-         if (dist == 1) { // run of one byte; common in images.
-            stbi_uc v = *p;
-            if (len) { do *zout++ = v; while (--len); }
-         } else {
-            if (len) { do *zout++ = *p++; while (--len); }
-         }
-      }
-   }
-}
-
-static int stbi__compute_huffman_codes(stbi__zbuf *a)
-{
-   static const stbi_uc length_dezigzag[19] = { 16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15 };
-   stbi__zhuffman z_codelength;
-   stbi_uc lencodes[286+32+137];//padding for maximum single op
-   stbi_uc codelength_sizes[19];
-   int i,n;
-
-   int hlit  = stbi__zreceive(a,5) + 257;
-   int hdist = stbi__zreceive(a,5) + 1;
-   int hclen = stbi__zreceive(a,4) + 4;
-   int ntot  = hlit + hdist;
-
-   memset(codelength_sizes, 0, sizeof(codelength_sizes));
-   for (i=0; i < hclen; ++i) {
-      int s = stbi__zreceive(a,3);
-      codelength_sizes[length_dezigzag[i]] = (stbi_uc) s;
-   }
-   if (!stbi__zbuild_huffman(&z_codelength, codelength_sizes, 19)) return 0;
-
-   n = 0;
-   while (n < ntot) {
-      int c = stbi__zhuffman_decode(a, &z_codelength);
-      if (c < 0 || c >= 19) return stbi__err("bad codelengths", "Corrupt PNG");
-      if (c < 16)
-         lencodes[n++] = (stbi_uc) c;
-      else {
-         stbi_uc fill = 0;
-         if (c == 16) {
-            c = stbi__zreceive(a,2)+3;
-            if (n == 0) return stbi__err("bad codelengths", "Corrupt PNG");
-            fill = lencodes[n-1];
-         } else if (c == 17) {
-            c = stbi__zreceive(a,3)+3;
-         } else if (c == 18) {
-            c = stbi__zreceive(a,7)+11;
-         } else {
-            return stbi__err("bad codelengths", "Corrupt PNG");
-         }
-         if (ntot - n < c) return stbi__err("bad codelengths", "Corrupt PNG");
-         memset(lencodes+n, fill, c);
-         n += c;
-      }
-   }
-   if (n != ntot) return stbi__err("bad codelengths","Corrupt PNG");
-   if (!stbi__zbuild_huffman(&a->z_length, lencodes, hlit)) return 0;
-   if (!stbi__zbuild_huffman(&a->z_distance, lencodes+hlit, hdist)) return 0;
-   return 1;
-}
-
-static int stbi__parse_uncompressed_block(stbi__zbuf *a)
-{
-   stbi_uc header[4];
-   int len,nlen,k;
-   if (a->num_bits & 7)
-      stbi__zreceive(a, a->num_bits & 7); // discard
-   // drain the bit-packed data into header
-   k = 0;
-   while (a->num_bits > 0) {
-      header[k++] = (stbi_uc) (a->code_buffer & 255); // suppress MSVC run-time check
-      a->code_buffer >>= 8;
-      a->num_bits -= 8;
-   }
-   if (a->num_bits < 0) return stbi__err("zlib corrupt","Corrupt PNG");
-   // now fill header the normal way
-   while (k < 4)
-      header[k++] = stbi__zget8(a);
-   len  = header[1] * 256 + header[0];
-   nlen = header[3] * 256 + header[2];
-   if (nlen != (len ^ 0xffff)) return stbi__err("zlib corrupt","Corrupt PNG");
-   if (a->zbuffer + len > a->zbuffer_end) return stbi__err("read past buffer","Corrupt PNG");
-   if (a->zout + len > a->zout_end)
-      if (!stbi__zexpand(a, a->zout, len)) return 0;
-   memcpy(a->zout, a->zbuffer, len);
-   a->zbuffer += len;
-   a->zout += len;
-   return 1;
-}
-
-static int stbi__parse_zlib_header(stbi__zbuf *a)
-{
-   int cmf   = stbi__zget8(a);
-   int cm    = cmf & 15;
-   /* int cinfo = cmf >> 4; */
-   int flg   = stbi__zget8(a);
-   if (stbi__zeof(a)) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
-   if ((cmf*256+flg) % 31 != 0) return stbi__err("bad zlib header","Corrupt PNG"); // zlib spec
-   if (flg & 32) return stbi__err("no preset dict","Corrupt PNG"); // preset dictionary not allowed in png
-   if (cm != 8) return stbi__err("bad compression","Corrupt PNG"); // DEFLATE required for png
-   // window = 1 << (8 + cinfo)... but who cares, we fully buffer output
-   return 1;
-}
-
-static const stbi_uc stbi__zdefault_length[STBI__ZNSYMS] =
-{
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
-   8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
-   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,8,8,8,8,8,8,8,8
-};
-static const stbi_uc stbi__zdefault_distance[32] =
-{
-   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5
-};
-/*
-Init algorithm:
-{
-   int i;   // use <= to match clearly with spec
-   for (i=0; i <= 143; ++i)     stbi__zdefault_length[i]   = 8;
-   for (   ; i <= 255; ++i)     stbi__zdefault_length[i]   = 9;
-   for (   ; i <= 279; ++i)     stbi__zdefault_length[i]   = 7;
-   for (   ; i <= 287; ++i)     stbi__zdefault_length[i]   = 8;
-
-   for (i=0; i <=  31; ++i)     stbi__zdefault_distance[i] = 5;
-}
-*/
-
-static int stbi__parse_zlib(stbi__zbuf *a, int parse_header)
-{
-   int final, type;
-   if (parse_header)
-      if (!stbi__parse_zlib_header(a)) return 0;
-   a->num_bits = 0;
-   a->code_buffer = 0;
-   a->hit_zeof_once = 0;
-   do {
-      final = stbi__zreceive(a,1);
-      type = stbi__zreceive(a,2);
-      if (type == 0) {
-         if (!stbi__parse_uncompressed_block(a)) return 0;
-      } else if (type == 3) {
-         return 0;
-      } else {
-         if (type == 1) {
-            // use fixed code lengths
-            if (!stbi__zbuild_huffman(&a->z_length  , stbi__zdefault_length  , STBI__ZNSYMS)) return 0;
-            if (!stbi__zbuild_huffman(&a->z_distance, stbi__zdefault_distance,  32)) return 0;
-         } else {
-            if (!stbi__compute_huffman_codes(a)) return 0;
-         }
-         if (!stbi__parse_huffman_block(a)) return 0;
-      }
-   } while (!final);
-   return 1;
-}
-
-static int stbi__do_zlib(stbi__zbuf *a, char *obuf, int olen, int exp, int parse_header)
-{
-   a->zout_start = obuf;
-   a->zout       = obuf;
-   a->zout_end   = obuf + olen;
-   a->z_expandable = exp;
-
-   return stbi__parse_zlib(a, parse_header);
-}
-
-STBIDEF char *stbi_zlib_decode_malloc_guesssize(const char *buffer, int len, int initial_size, int *outlen)
-{
-   stbi__zbuf a;
-   char *p = (char *) stbi__malloc(initial_size);
-   if (p == NULL) return NULL;
-   a.zbuffer = (stbi_uc *) buffer;
-   a.zbuffer_end = (stbi_uc *) buffer + len;
-   if (stbi__do_zlib(&a, p, initial_size, 1, 1)) {
-      if (outlen) *outlen = (int) (a.zout - a.zout_start);
-      return a.zout_start;
-   } else {
-      STBI_FREE(a.zout_start);
-      return NULL;
-   }
-}
-
-STBIDEF char *stbi_zlib_decode_malloc(char const *buffer, int len, int *outlen)
-{
-   return stbi_zlib_decode_malloc_guesssize(buffer, len, 16384, outlen);
-}
-
-STBIDEF char *stbi_zlib_decode_malloc_guesssize_headerflag(const char *buffer, int len, int initial_size, int *outlen, int parse_header)
-{
-   stbi__zbuf a;
-   char *p = (char *) stbi__malloc(initial_size);
-   if (p == NULL) return NULL;
-   a.zbuffer = (stbi_uc *) buffer;
-   a.zbuffer_end = (stbi_uc *) buffer + len;
-   if (stbi__do_zlib(&a, p, initial_size, 1, parse_header)) {
-      if (outlen) *outlen = (int) (a.zout - a.zout_start);
-      return a.zout_start;
-   } else {
-      STBI_FREE(a.zout_start);
-      return NULL;
-   }
-}
-
-STBIDEF int stbi_zlib_decode_buffer(char *obuffer, int olen, char const *ibuffer, int ilen)
-{
-   stbi__zbuf a;
-   a.zbuffer = (stbi_uc *) ibuffer;
-   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
-   if (stbi__do_zlib(&a, obuffer, olen, 0, 1))
-      return (int) (a.zout - a.zout_start);
-   else
-      return -1;
-}
-
-STBIDEF char *stbi_zlib_decode_noheader_malloc(char const *buffer, int len, int *outlen)
-{
-   stbi__zbuf a;
-   char *p = (char *) stbi__malloc(16384);
-   if (p == NULL) return NULL;
-   a.zbuffer = (stbi_uc *) buffer;
-   a.zbuffer_end = (stbi_uc *) buffer+len;
-   if (stbi__do_zlib(&a, p, 16384, 1, 0)) {
-      if (outlen) *outlen = (int) (a.zout - a.zout_start);
-      return a.zout_start;
-   } else {
-      STBI_FREE(a.zout_start);
-      return NULL;
-   }
-}
-
-STBIDEF int stbi_zlib_decode_noheader_buffer(char *obuffer, int olen, const char *ibuffer, int ilen)
-{
-   stbi__zbuf a;
-   a.zbuffer = (stbi_uc *) ibuffer;
-   a.zbuffer_end = (stbi_uc *) ibuffer + ilen;
-   if (stbi__do_zlib(&a, obuffer, olen, 0, 0))
-      return (int) (a.zout - a.zout_start);
-   else
-      return -1;
-}
-#endif
-
-// public domain "baseline" PNG decoder   v0.10  Sean Barrett 2006-11-18
-//    simple implementation
-//      - only 8-bit samples
-//      - no CRC checking
-//      - allocates lots of intermediate memory
-//        - avoids problem of streaming data between subsystems
-//        - avoids explicit window management
-//    performance
-//      - uses stb_zlib, a PD zlib implementation with fast huffman decoding
-
-#ifndef STBI_NO_PNG
-typedef struct
-{
-   stbi__uint32 length;
-   stbi__uint32 type;
-} stbi__pngchunk;
-
-static stbi__pngchunk stbi__get_chunk_header(stbi__context *s)
-{
-   stbi__pngchunk c;
-   c.length = stbi__get32be(s);
-   c.type   = stbi__get32be(s);
-   return c;
-}
-
-static int stbi__check_png_header(stbi__context *s)
-{
-   static const stbi_uc png_sig[8] = { 137,80,78,71,13,10,26,10 };
-   int i;
-   for (i=0; i < 8; ++i)
-      if (stbi__get8(s) != png_sig[i]) return stbi__err("bad png sig","Not a PNG");
-   return 1;
-}
-
-typedef struct
-{
-   stbi__context *s;
-   stbi_uc *idata, *expanded, *out;
-   int depth;
-} stbi__png;
-
-
-enum {
-   STBI__F_none=0,
-   STBI__F_sub=1,
-   STBI__F_up=2,
-   STBI__F_avg=3,
-   STBI__F_paeth=4,
-   // synthetic filter used for first scanline to avoid needing a dummy row of 0s
-   STBI__F_avg_first
-};
-
-static stbi_uc first_row_filter[5] =
-{
-   STBI__F_none,
-   STBI__F_sub,
-   STBI__F_none,
-   STBI__F_avg_first,
-   STBI__F_sub // Paeth with b=c=0 turns out to be equivalent to sub
-};
-
-static int stbi__paeth(int a, int b, int c)
-{
-   // This formulation looks very different from the reference in the PNG spec, but is
-   // actually equivalent and has favorable data dependencies and admits straightforward
-   // generation of branch-free code, which helps performance significantly.
-   int thresh = c*3 - (a + b);
-   int lo = a < b ? a : b;
-   int hi = a < b ? b : a;
-   int t0 = (hi <= thresh) ? lo : c;
-   int t1 = (thresh <= lo) ? hi : t0;
-   return t1;
-}
-
-static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 };
-
-// adds an extra all-255 alpha channel
-// dest == src is legal
-// img_n must be 1 or 3
-static void stbi__create_png_alpha_expand8(stbi_uc *dest, stbi_uc *src, stbi__uint32 x, int img_n)
-{
-   int i;
-   // must process data backwards since we allow dest==src
-   if (img_n == 1) {
-      for (i=x-1; i >= 0; --i) {
-         dest[i*2+1] = 255;
-         dest[i*2+0] = src[i];
-      }
-   } else {
-      STBI_ASSERT(img_n == 3);
-      for (i=x-1; i >= 0; --i) {
-         dest[i*4+3] = 255;
-         dest[i*4+2] = src[i*3+2];
-         dest[i*4+1] = src[i*3+1];
-         dest[i*4+0] = src[i*3+0];
-      }
-   }
-}
-
-// create the png data from post-deflated data
-static int stbi__create_png_image_raw(stbi__png *a, stbi_uc *raw, stbi__uint32 raw_len, int out_n, stbi__uint32 x, stbi__uint32 y, int depth, int color)
-{
-   int bytes = (depth == 16 ? 2 : 1);
-   stbi__context *s = a->s;
-   stbi__uint32 i,j,stride = x*out_n*bytes;
-   stbi__uint32 img_len, img_width_bytes;
-   stbi_uc *filter_buf;
-   int all_ok = 1;
-   int k;
-   int img_n = s->img_n; // copy it into a local for later
-
-   int output_bytes = out_n*bytes;
-   int filter_bytes = img_n*bytes;
-   int width = x;
-
-   STBI_ASSERT(out_n == s->img_n || out_n == s->img_n+1);
-   a->out = (stbi_uc *) stbi__malloc_mad3(x, y, output_bytes, 0); // extra bytes to write off the end into
-   if (!a->out) return stbi__err("outofmem", "Out of memory");
-
-   // note: error exits here don't need to clean up a->out individually,
-   // stbi__do_png always does on error.
-   if (!stbi__mad3sizes_valid(img_n, x, depth, 7)) return stbi__err("too large", "Corrupt PNG");
-   img_width_bytes = (((img_n * x * depth) + 7) >> 3);
-   if (!stbi__mad2sizes_valid(img_width_bytes, y, img_width_bytes)) return stbi__err("too large", "Corrupt PNG");
-   img_len = (img_width_bytes + 1) * y;
-
-   // we used to check for exact match between raw_len and img_len on non-interlaced PNGs,
-   // but issue #276 reported a PNG in the wild that had extra data at the end (all zeros),
-   // so just check for raw_len < img_len always.
-   if (raw_len < img_len) return stbi__err("not enough pixels","Corrupt PNG");
-
-   // Allocate two scan lines worth of filter workspace buffer.
-   filter_buf = (stbi_uc *) stbi__malloc_mad2(img_width_bytes, 2, 0);
-   if (!filter_buf) return stbi__err("outofmem", "Out of memory");
-
-   // Filtering for low-bit-depth images
-   if (depth < 8) {
-      filter_bytes = 1;
-      width = img_width_bytes;
-   }
-
-   for (j=0; j < y; ++j) {
-      // cur/prior filter buffers alternate
-      stbi_uc *cur = filter_buf + (j & 1)*img_width_bytes;
-      stbi_uc *prior = filter_buf + (~j & 1)*img_width_bytes;
-      stbi_uc *dest = a->out + stride*j;
-      int nk = width * filter_bytes;
-      int filter = *raw++;
-
-      // check filter type
-      if (filter > 4) {
-         all_ok = stbi__err("invalid filter","Corrupt PNG");
-         break;
-      }
-
-      // if first row, use special filter that doesn't sample previous row
-      if (j == 0) filter = first_row_filter[filter];
-
-      // perform actual filtering
-      switch (filter) {
-      case STBI__F_none:
-         memcpy(cur, raw, nk);
-         break;
-      case STBI__F_sub:
-         memcpy(cur, raw, filter_bytes);
-         for (k = filter_bytes; k < nk; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + cur[k-filter_bytes]);
-         break;
-      case STBI__F_up:
-         for (k = 0; k < nk; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + prior[k]);
-         break;
-      case STBI__F_avg:
-         for (k = 0; k < filter_bytes; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + (prior[k]>>1));
-         for (k = filter_bytes; k < nk; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + ((prior[k] + cur[k-filter_bytes])>>1));
-         break;
-      case STBI__F_paeth:
-         for (k = 0; k < filter_bytes; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + prior[k]); // prior[k] == stbi__paeth(0,prior[k],0)
-         for (k = filter_bytes; k < nk; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + stbi__paeth(cur[k-filter_bytes], prior[k], prior[k-filter_bytes]));
-         break;
-      case STBI__F_avg_first:
-         memcpy(cur, raw, filter_bytes);
-         for (k = filter_bytes; k < nk; ++k)
-            cur[k] = STBI__BYTECAST(raw[k] + (cur[k-filter_bytes] >> 1));
-         break;
-      }
-
-      raw += nk;
-
-      // expand decoded bits in cur to dest, also adding an extra alpha channel if desired
-      if (depth < 8) {
-         stbi_uc scale = (color == 0) ? stbi__depth_scale_table[depth] : 1; // scale grayscale values to 0..255 range
-         stbi_uc *in = cur;
-         stbi_uc *out = dest;
-         stbi_uc inb = 0;
-         stbi__uint32 nsmp = x*img_n;
-
-         // expand bits to bytes first
-         if (depth == 4) {
-            for (i=0; i < nsmp; ++i) {
-               if ((i & 1) == 0) inb = *in++;
-               *out++ = scale * (inb >> 4);
-               inb <<= 4;
-            }
-         } else if (depth == 2) {
-            for (i=0; i < nsmp; ++i) {
-               if ((i & 3) == 0) inb = *in++;
-               *out++ = scale * (inb >> 6);
-               inb <<= 2;
-            }
-         } else {
-            STBI_ASSERT(depth == 1);
-            for (i=0; i < nsmp; ++i) {
-               if ((i & 7) == 0) inb = *in++;
-               *out++ = scale * (inb >> 7);
-               inb <<= 1;
-            }
-         }
-
-         // insert alpha=255 values if desired
-         if (img_n != out_n)
-            stbi__create_png_alpha_expand8(dest, dest, x, img_n);
-      } else if (depth == 8) {
-         if (img_n == out_n)
-            memcpy(dest, cur, x*img_n);
-         else
-            stbi__create_png_alpha_expand8(dest, cur, x, img_n);
-      } else if (depth == 16) {
-         // convert the image data from big-endian to platform-native
-         stbi__uint16 *dest16 = (stbi__uint16*)dest;
-         stbi__uint32 nsmp = x*img_n;
-
-         if (img_n == out_n) {
-            for (i = 0; i < nsmp; ++i, ++dest16, cur += 2)
-               *dest16 = (cur[0] << 8) | cur[1];
-         } else {
-            STBI_ASSERT(img_n+1 == out_n);
-            if (img_n == 1) {
-               for (i = 0; i < x; ++i, dest16 += 2, cur += 2) {
-                  dest16[0] = (cur[0] << 8) | cur[1];
-                  dest16[1] = 0xffff;
-               }
-            } else {
-               STBI_ASSERT(img_n == 3);
-               for (i = 0; i < x; ++i, dest16 += 4, cur += 6) {
-                  dest16[0] = (cur[0] << 8) | cur[1];
-                  dest16[1] = (cur[2] << 8) | cur[3];
-                  dest16[2] = (cur[4] << 8) | cur[5];
-                  dest16[3] = 0xffff;
-               }
-            }
-         }
-      }
-   }
-
-   STBI_FREE(filter_buf);
-   if (!all_ok) return 0;
-
-   return 1;
-}
-
-static int stbi__create_png_image(stbi__png *a, stbi_uc *image_data, stbi__uint32 image_data_len, int out_n, int depth, int color, int interlaced)
-{
-   int bytes = (depth == 16 ? 2 : 1);
-   int out_bytes = out_n * bytes;
-   stbi_uc *final;
-   int p;
-   if (!interlaced)
-      return stbi__create_png_image_raw(a, image_data, image_data_len, out_n, a->s->img_x, a->s->img_y, depth, color);
-
-   // de-interlacing
-   final = (stbi_uc *) stbi__malloc_mad3(a->s->img_x, a->s->img_y, out_bytes, 0);
-   if (!final) return stbi__err("outofmem", "Out of memory");
-   for (p=0; p < 7; ++p) {
-      int xorig[] = { 0,4,0,2,0,1,0 };
-      int yorig[] = { 0,0,4,0,2,0,1 };
-      int xspc[]  = { 8,8,4,4,2,2,1 };
-      int yspc[]  = { 8,8,8,4,4,2,2 };
-      int i,j,x,y;
-      // pass1_x[4] = 0, pass1_x[5] = 1, pass1_x[12] = 1
-      x = (a->s->img_x - xorig[p] + xspc[p]-1) / xspc[p];
-      y = (a->s->img_y - yorig[p] + yspc[p]-1) / yspc[p];
-      if (x && y) {
-         stbi__uint32 img_len = ((((a->s->img_n * x * depth) + 7) >> 3) + 1) * y;
-         if (!stbi__create_png_image_raw(a, image_data, image_data_len, out_n, x, y, depth, color)) {
-            STBI_FREE(final);
-            return 0;
-         }
-         for (j=0; j < y; ++j) {
-            for (i=0; i < x; ++i) {
-               int out_y = j*yspc[p]+yorig[p];
-               int out_x = i*xspc[p]+xorig[p];
-               memcpy(final + out_y*a->s->img_x*out_bytes + out_x*out_bytes,
-                      a->out + (j*x+i)*out_bytes, out_bytes);
-            }
-         }
-         STBI_FREE(a->out);
-         image_data += img_len;
-         image_data_len -= img_len;
-      }
-   }
-   a->out = final;
-
-   return 1;
-}
-
-static int stbi__compute_transparency(stbi__png *z, stbi_uc tc[3], int out_n)
-{
-   stbi__context *s = z->s;
-   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-   stbi_uc *p = z->out;
-
-   // compute color-based transparency, assuming we've
-   // already got 255 as the alpha value in the output
-   STBI_ASSERT(out_n == 2 || out_n == 4);
-
-   if (out_n == 2) {
-      for (i=0; i < pixel_count; ++i) {
-         p[1] = (p[0] == tc[0] ? 0 : 255);
-         p += 2;
-      }
-   } else {
-      for (i=0; i < pixel_count; ++i) {
-         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
-            p[3] = 0;
-         p += 4;
-      }
-   }
-   return 1;
-}
-
-static int stbi__compute_transparency16(stbi__png *z, stbi__uint16 tc[3], int out_n)
-{
-   stbi__context *s = z->s;
-   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-   stbi__uint16 *p = (stbi__uint16*) z->out;
-
-   // compute color-based transparency, assuming we've
-   // already got 65535 as the alpha value in the output
-   STBI_ASSERT(out_n == 2 || out_n == 4);
-
-   if (out_n == 2) {
-      for (i = 0; i < pixel_count; ++i) {
-         p[1] = (p[0] == tc[0] ? 0 : 65535);
-         p += 2;
-      }
-   } else {
-      for (i = 0; i < pixel_count; ++i) {
-         if (p[0] == tc[0] && p[1] == tc[1] && p[2] == tc[2])
-            p[3] = 0;
-         p += 4;
-      }
-   }
-   return 1;
-}
-
-static int stbi__expand_png_palette(stbi__png *a, stbi_uc *palette, int len, int pal_img_n)
-{
-   stbi__uint32 i, pixel_count = a->s->img_x * a->s->img_y;
-   stbi_uc *p, *temp_out, *orig = a->out;
-
-   p = (stbi_uc *) stbi__malloc_mad2(pixel_count, pal_img_n, 0);
-   if (p == NULL) return stbi__err("outofmem", "Out of memory");
-
-   // between here and free(out) below, exitting would leak
-   temp_out = p;
-
-   if (pal_img_n == 3) {
-      for (i=0; i < pixel_count; ++i) {
-         int n = orig[i]*4;
-         p[0] = palette[n  ];
-         p[1] = palette[n+1];
-         p[2] = palette[n+2];
-         p += 3;
-      }
-   } else {
-      for (i=0; i < pixel_count; ++i) {
-         int n = orig[i]*4;
-         p[0] = palette[n  ];
-         p[1] = palette[n+1];
-         p[2] = palette[n+2];
-         p[3] = palette[n+3];
-         p += 4;
-      }
-   }
-   STBI_FREE(a->out);
-   a->out = temp_out;
-
-   STBI_NOTUSED(len);
-
-   return 1;
-}
-
-static int stbi__unpremultiply_on_load_global = 0;
-static int stbi__de_iphone_flag_global = 0;
-
-STBIDEF void stbi_set_unpremultiply_on_load(int flag_true_if_should_unpremultiply)
-{
-   stbi__unpremultiply_on_load_global = flag_true_if_should_unpremultiply;
-}
-
-STBIDEF void stbi_convert_iphone_png_to_rgb(int flag_true_if_should_convert)
-{
-   stbi__de_iphone_flag_global = flag_true_if_should_convert;
-}
-
-#ifndef STBI_THREAD_LOCAL
-#define stbi__unpremultiply_on_load  stbi__unpremultiply_on_load_global
-#define stbi__de_iphone_flag  stbi__de_iphone_flag_global
-#else
-static STBI_THREAD_LOCAL int stbi__unpremultiply_on_load_local, stbi__unpremultiply_on_load_set;
-static STBI_THREAD_LOCAL int stbi__de_iphone_flag_local, stbi__de_iphone_flag_set;
-
-STBIDEF void stbi_set_unpremultiply_on_load_thread(int flag_true_if_should_unpremultiply)
-{
-   stbi__unpremultiply_on_load_local = flag_true_if_should_unpremultiply;
-   stbi__unpremultiply_on_load_set = 1;
-}
-
-STBIDEF void stbi_convert_iphone_png_to_rgb_thread(int flag_true_if_should_convert)
-{
-   stbi__de_iphone_flag_local = flag_true_if_should_convert;
-   stbi__de_iphone_flag_set = 1;
-}
-
-#define stbi__unpremultiply_on_load  (stbi__unpremultiply_on_load_set           \
-                                       ? stbi__unpremultiply_on_load_local      \
-                                       : stbi__unpremultiply_on_load_global)
-#define stbi__de_iphone_flag  (stbi__de_iphone_flag_set                         \
-                                ? stbi__de_iphone_flag_local                    \
-                                : stbi__de_iphone_flag_global)
-#endif // STBI_THREAD_LOCAL
-
-static void stbi__de_iphone(stbi__png *z)
-{
-   stbi__context *s = z->s;
-   stbi__uint32 i, pixel_count = s->img_x * s->img_y;
-   stbi_uc *p = z->out;
-
-   if (s->img_out_n == 3) {  // convert bgr to rgb
-      for (i=0; i < pixel_count; ++i) {
-         stbi_uc t = p[0];
-         p[0] = p[2];
-         p[2] = t;
-         p += 3;
-      }
-   } else {
-      STBI_ASSERT(s->img_out_n == 4);
-      if (stbi__unpremultiply_on_load) {
-         // convert bgr to rgb and unpremultiply
-         for (i=0; i < pixel_count; ++i) {
-            stbi_uc a = p[3];
-            stbi_uc t = p[0];
-            if (a) {
-               stbi_uc half = a / 2;
-               p[0] = (p[2] * 255 + half) / a;
-               p[1] = (p[1] * 255 + half) / a;
-               p[2] = ( t   * 255 + half) / a;
-            } else {
-               p[0] = p[2];
-               p[2] = t;
-            }
-            p += 4;
-         }
-      } else {
-         // convert bgr to rgb
-         for (i=0; i < pixel_count; ++i) {
-            stbi_uc t = p[0];
-            p[0] = p[2];
-            p[2] = t;
-            p += 4;
-         }
-      }
-   }
-}
-
-#define STBI__PNG_TYPE(a,b,c,d)  (((unsigned) (a) << 24) + ((unsigned) (b) << 16) + ((unsigned) (c) << 8) + (unsigned) (d))
-
-static int stbi__parse_png_file(stbi__png *z, int scan, int req_comp)
-{
-   stbi_uc palette[1024], pal_img_n=0;
-   stbi_uc has_trans=0, tc[3]={0};
-   stbi__uint16 tc16[3];
-   stbi__uint32 ioff=0, idata_limit=0, i, pal_len=0;
-   int first=1,k,interlace=0, color=0, is_iphone=0;
-   stbi__context *s = z->s;
-
-   z->expanded = NULL;
-   z->idata = NULL;
-   z->out = NULL;
-
-   if (!stbi__check_png_header(s)) return 0;
-
-   if (scan == STBI__SCAN_type) return 1;
-
-   for (;;) {
-      stbi__pngchunk c = stbi__get_chunk_header(s);
-      switch (c.type) {
-         case STBI__PNG_TYPE('C','g','B','I'):
-            is_iphone = 1;
-            stbi__skip(s, c.length);
-            break;
-         case STBI__PNG_TYPE('I','H','D','R'): {
-            int comp,filter;
-            if (!first) return stbi__err("multiple IHDR","Corrupt PNG");
-            first = 0;
-            if (c.length != 13) return stbi__err("bad IHDR len","Corrupt PNG");
-            s->img_x = stbi__get32be(s);
-            s->img_y = stbi__get32be(s);
-            if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
-            if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
-            z->depth = stbi__get8(s);  if (z->depth != 1 && z->depth != 2 && z->depth != 4 && z->depth != 8 && z->depth != 16)  return stbi__err("1/2/4/8/16-bit only","PNG not supported: 1/2/4/8/16-bit only");
-            color = stbi__get8(s);  if (color > 6)         return stbi__err("bad ctype","Corrupt PNG");
-            if (color == 3 && z->depth == 16)                  return stbi__err("bad ctype","Corrupt PNG");
-            if (color == 3) pal_img_n = 3; else if (color & 1) return stbi__err("bad ctype","Corrupt PNG");
-            comp  = stbi__get8(s);  if (comp) return stbi__err("bad comp method","Corrupt PNG");
-            filter= stbi__get8(s);  if (filter) return stbi__err("bad filter method","Corrupt PNG");
-            interlace = stbi__get8(s); if (interlace>1) return stbi__err("bad interlace method","Corrupt PNG");
-            if (!s->img_x || !s->img_y) return stbi__err("0-pixel image","Corrupt PNG");
-            if (!pal_img_n) {
-               s->img_n = (color & 2 ? 3 : 1) + (color & 4 ? 1 : 0);
-               if ((1 << 30) / s->img_x / s->img_n < s->img_y) return stbi__err("too large", "Image too large to decode");
-            } else {
-               // if paletted, then pal_n is our final components, and
-               // img_n is # components to decompress/filter.
-               s->img_n = 1;
-               if ((1 << 30) / s->img_x / 4 < s->img_y) return stbi__err("too large","Corrupt PNG");
-            }
-            // even with SCAN_header, have to scan to see if we have a tRNS
-            break;
-         }
-
-         case STBI__PNG_TYPE('P','L','T','E'):  {
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (c.length > 256*3) return stbi__err("invalid PLTE","Corrupt PNG");
-            pal_len = c.length / 3;
-            if (pal_len * 3 != c.length) return stbi__err("invalid PLTE","Corrupt PNG");
-            for (i=0; i < pal_len; ++i) {
-               palette[i*4+0] = stbi__get8(s);
-               palette[i*4+1] = stbi__get8(s);
-               palette[i*4+2] = stbi__get8(s);
-               palette[i*4+3] = 255;
-            }
-            break;
-         }
-
-         case STBI__PNG_TYPE('t','R','N','S'): {
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (z->idata) return stbi__err("tRNS after IDAT","Corrupt PNG");
-            if (pal_img_n) {
-               if (scan == STBI__SCAN_header) { s->img_n = 4; return 1; }
-               if (pal_len == 0) return stbi__err("tRNS before PLTE","Corrupt PNG");
-               if (c.length > pal_len) return stbi__err("bad tRNS len","Corrupt PNG");
-               pal_img_n = 4;
-               for (i=0; i < c.length; ++i)
-                  palette[i*4+3] = stbi__get8(s);
-            } else {
-               if (!(s->img_n & 1)) return stbi__err("tRNS with alpha","Corrupt PNG");
-               if (c.length != (stbi__uint32) s->img_n*2) return stbi__err("bad tRNS len","Corrupt PNG");
-               has_trans = 1;
-               // non-paletted with tRNS = constant alpha. if header-scanning, we can stop now.
-               if (scan == STBI__SCAN_header) { ++s->img_n; return 1; }
-               if (z->depth == 16) {
-                  for (k = 0; k < s->img_n && k < 3; ++k) // extra loop test to suppress false GCC warning
-                     tc16[k] = (stbi__uint16)stbi__get16be(s); // copy the values as-is
-               } else {
-                  for (k = 0; k < s->img_n && k < 3; ++k)
-                     tc[k] = (stbi_uc)(stbi__get16be(s) & 255) * stbi__depth_scale_table[z->depth]; // non 8-bit images will be larger
-               }
-            }
-            break;
-         }
-
-         case STBI__PNG_TYPE('I','D','A','T'): {
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (pal_img_n && !pal_len) return stbi__err("no PLTE","Corrupt PNG");
-            if (scan == STBI__SCAN_header) {
-               // header scan definitely stops at first IDAT
-               if (pal_img_n)
-                  s->img_n = pal_img_n;
-               return 1;
-            }
-            if (c.length > (1u << 30)) return stbi__err("IDAT size limit", "IDAT section larger than 2^30 bytes");
-            if ((int)(ioff + c.length) < (int)ioff) return 0;
-            if (ioff + c.length > idata_limit) {
-               stbi__uint32 idata_limit_old = idata_limit;
-               stbi_uc *p;
-               if (idata_limit == 0) idata_limit = c.length > 4096 ? c.length : 4096;
-               while (ioff + c.length > idata_limit)
-                  idata_limit *= 2;
-               STBI_NOTUSED(idata_limit_old);
-               p = (stbi_uc *) STBI_REALLOC_SIZED(z->idata, idata_limit_old, idata_limit); if (p == NULL) return stbi__err("outofmem", "Out of memory");
-               z->idata = p;
-            }
-            if (!stbi__getn(s, z->idata+ioff,c.length)) return stbi__err("outofdata","Corrupt PNG");
-            ioff += c.length;
-            break;
-         }
-
-         case STBI__PNG_TYPE('I','E','N','D'): {
-            stbi__uint32 raw_len, bpl;
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if (scan != STBI__SCAN_load) return 1;
-            if (z->idata == NULL) return stbi__err("no IDAT","Corrupt PNG");
-            // initial guess for decoded data size to avoid unnecessary reallocs
-            bpl = (s->img_x * z->depth + 7) / 8; // bytes per line, per component
-            raw_len = bpl * s->img_y * s->img_n /* pixels */ + s->img_y /* filter mode per row */;
-            z->expanded = (stbi_uc *) stbi_zlib_decode_malloc_guesssize_headerflag((char *) z->idata, ioff, raw_len, (int *) &raw_len, !is_iphone);
-            if (z->expanded == NULL) return 0; // zlib should set error
-            STBI_FREE(z->idata); z->idata = NULL;
-            if ((req_comp == s->img_n+1 && req_comp != 3 && !pal_img_n) || has_trans)
-               s->img_out_n = s->img_n+1;
-            else
-               s->img_out_n = s->img_n;
-            if (!stbi__create_png_image(z, z->expanded, raw_len, s->img_out_n, z->depth, color, interlace)) return 0;
-            if (has_trans) {
-               if (z->depth == 16) {
-                  if (!stbi__compute_transparency16(z, tc16, s->img_out_n)) return 0;
-               } else {
-                  if (!stbi__compute_transparency(z, tc, s->img_out_n)) return 0;
-               }
-            }
-            if (is_iphone && stbi__de_iphone_flag && s->img_out_n > 2)
-               stbi__de_iphone(z);
-            if (pal_img_n) {
-               // pal_img_n == 3 or 4
-               s->img_n = pal_img_n; // record the actual colors we had
-               s->img_out_n = pal_img_n;
-               if (req_comp >= 3) s->img_out_n = req_comp;
-               if (!stbi__expand_png_palette(z, palette, pal_len, s->img_out_n))
-                  return 0;
-            } else if (has_trans) {
-               // non-paletted image with tRNS -> source image has (constant) alpha
-               ++s->img_n;
-            }
-            STBI_FREE(z->expanded); z->expanded = NULL;
-            // end of PNG chunk, read and skip CRC
-            stbi__get32be(s);
-            return 1;
-         }
-
-         default:
-            // if critical, fail
-            if (first) return stbi__err("first not IHDR", "Corrupt PNG");
-            if ((c.type & (1 << 29)) == 0) {
-               #ifndef STBI_NO_FAILURE_STRINGS
-               // not threadsafe
-               static char invalid_chunk[] = "XXXX PNG chunk not known";
-               invalid_chunk[0] = STBI__BYTECAST(c.type >> 24);
-               invalid_chunk[1] = STBI__BYTECAST(c.type >> 16);
-               invalid_chunk[2] = STBI__BYTECAST(c.type >>  8);
-               invalid_chunk[3] = STBI__BYTECAST(c.type >>  0);
-               #endif
-               return stbi__err(invalid_chunk, "PNG not supported: unknown PNG chunk type");
-            }
-            stbi__skip(s, c.length);
-            break;
-      }
-      // end of PNG chunk, read and skip CRC
-      stbi__get32be(s);
-   }
-}
-
-static void *stbi__do_png(stbi__png *p, int *x, int *y, int *n, int req_comp, stbi__result_info *ri)
-{
-   void *result=NULL;
-   if (req_comp < 0 || req_comp > 4) return stbi__errpuc("bad req_comp", "Internal error");
-   if (stbi__parse_png_file(p, STBI__SCAN_load, req_comp)) {
-      if (p->depth <= 8)
-         ri->bits_per_channel = 8;
-      else if (p->depth == 16)
-         ri->bits_per_channel = 16;
-      else
-         return stbi__errpuc("bad bits_per_channel", "PNG not supported: unsupported color depth");
-      result = p->out;
-      p->out = NULL;
-      if (req_comp && req_comp != p->s->img_out_n) {
-         if (ri->bits_per_channel == 8)
-            result = stbi__convert_format((unsigned char *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
-         else
-            result = stbi__convert_format16((stbi__uint16 *) result, p->s->img_out_n, req_comp, p->s->img_x, p->s->img_y);
-         p->s->img_out_n = req_comp;
-         if (result == NULL) return result;
-      }
-      *x = p->s->img_x;
-      *y = p->s->img_y;
-      if (n) *n = p->s->img_n;
-   }
-   STBI_FREE(p->out);      p->out      = NULL;
-   STBI_FREE(p->expanded); p->expanded = NULL;
-   STBI_FREE(p->idata);    p->idata    = NULL;
-
-   return result;
-}
-
-static void *stbi__png_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   stbi__png p;
-   p.s = s;
-   return stbi__do_png(&p, x,y,comp,req_comp, ri);
-}
-
-static int stbi__png_test(stbi__context *s)
-{
-   int r;
-   r = stbi__check_png_header(s);
-   stbi__rewind(s);
-   return r;
-}
-
-static int stbi__png_info_raw(stbi__png *p, int *x, int *y, int *comp)
-{
-   if (!stbi__parse_png_file(p, STBI__SCAN_header, 0)) {
-      stbi__rewind( p->s );
-      return 0;
-   }
-   if (x) *x = p->s->img_x;
-   if (y) *y = p->s->img_y;
-   if (comp) *comp = p->s->img_n;
-   return 1;
-}
-
-static int stbi__png_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   stbi__png p;
-   p.s = s;
-   return stbi__png_info_raw(&p, x, y, comp);
-}
-
-static int stbi__png_is16(stbi__context *s)
-{
-   stbi__png p;
-   p.s = s;
-   if (!stbi__png_info_raw(&p, NULL, NULL, NULL))
-	   return 0;
-   if (p.depth != 16) {
-      stbi__rewind(p.s);
-      return 0;
-   }
-   return 1;
-}
-#endif
-
-// Microsoft/Windows BMP image
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_test_raw(stbi__context *s)
-{
-   int r;
-   int sz;
-   if (stbi__get8(s) != 'B') return 0;
-   if (stbi__get8(s) != 'M') return 0;
-   stbi__get32le(s); // discard filesize
-   stbi__get16le(s); // discard reserved
-   stbi__get16le(s); // discard reserved
-   stbi__get32le(s); // discard data offset
-   sz = stbi__get32le(s);
-   r = (sz == 12 || sz == 40 || sz == 56 || sz == 108 || sz == 124);
-   return r;
-}
-
-static int stbi__bmp_test(stbi__context *s)
-{
-   int r = stbi__bmp_test_raw(s);
-   stbi__rewind(s);
-   return r;
-}
-
-
-// returns 0..31 for the highest set bit
-static int stbi__high_bit(unsigned int z)
-{
-   int n=0;
-   if (z == 0) return -1;
-   if (z >= 0x10000) { n += 16; z >>= 16; }
-   if (z >= 0x00100) { n +=  8; z >>=  8; }
-   if (z >= 0x00010) { n +=  4; z >>=  4; }
-   if (z >= 0x00004) { n +=  2; z >>=  2; }
-   if (z >= 0x00002) { n +=  1;/* >>=  1;*/ }
-   return n;
-}
-
-static int stbi__bitcount(unsigned int a)
-{
-   a = (a & 0x55555555) + ((a >>  1) & 0x55555555); // max 2
-   a = (a & 0x33333333) + ((a >>  2) & 0x33333333); // max 4
-   a = (a + (a >> 4)) & 0x0f0f0f0f; // max 8 per 4, now 8 bits
-   a = (a + (a >> 8)); // max 16 per 8 bits
-   a = (a + (a >> 16)); // max 32 per 8 bits
-   return a & 0xff;
-}
-
-// extract an arbitrarily-aligned N-bit value (N=bits)
-// from v, and then make it 8-bits long and fractionally
-// extend it to full full range.
-static int stbi__shiftsigned(unsigned int v, int shift, int bits)
-{
-   static unsigned int mul_table[9] = {
-      0,
-      0xff/*0b11111111*/, 0x55/*0b01010101*/, 0x49/*0b01001001*/, 0x11/*0b00010001*/,
-      0x21/*0b00100001*/, 0x41/*0b01000001*/, 0x81/*0b10000001*/, 0x01/*0b00000001*/,
-   };
-   static unsigned int shift_table[9] = {
-      0, 0,0,1,0,2,4,6,0,
-   };
-   if (shift < 0)
-      v <<= -shift;
-   else
-      v >>= shift;
-   STBI_ASSERT(v < 256);
-   v >>= (8-bits);
-   STBI_ASSERT(bits >= 0 && bits <= 8);
-   return (int) ((unsigned) v * mul_table[bits]) >> shift_table[bits];
-}
-
-typedef struct
-{
-   int bpp, offset, hsz;
-   unsigned int mr,mg,mb,ma, all_a;
-   int extra_read;
-} stbi__bmp_data;
-
-static int stbi__bmp_set_mask_defaults(stbi__bmp_data *info, int compress)
-{
-   // BI_BITFIELDS specifies masks explicitly, don't override
-   if (compress == 3)
-      return 1;
-
-   if (compress == 0) {
-      if (info->bpp == 16) {
-         info->mr = 31u << 10;
-         info->mg = 31u <<  5;
-         info->mb = 31u <<  0;
-      } else if (info->bpp == 32) {
-         info->mr = 0xffu << 16;
-         info->mg = 0xffu <<  8;
-         info->mb = 0xffu <<  0;
-         info->ma = 0xffu << 24;
-         info->all_a = 0; // if all_a is 0 at end, then we loaded alpha channel but it was all 0
-      } else {
-         // otherwise, use defaults, which is all-0
-         info->mr = info->mg = info->mb = info->ma = 0;
-      }
-      return 1;
-   }
-   return 0; // error
-}
-
-static void *stbi__bmp_parse_header(stbi__context *s, stbi__bmp_data *info)
-{
-   int hsz;
-   if (stbi__get8(s) != 'B' || stbi__get8(s) != 'M') return stbi__errpuc("not BMP", "Corrupt BMP");
-   stbi__get32le(s); // discard filesize
-   stbi__get16le(s); // discard reserved
-   stbi__get16le(s); // discard reserved
-   info->offset = stbi__get32le(s);
-   info->hsz = hsz = stbi__get32le(s);
-   info->mr = info->mg = info->mb = info->ma = 0;
-   info->extra_read = 14;
-
-   if (info->offset < 0) return stbi__errpuc("bad BMP", "bad BMP");
-
-   if (hsz != 12 && hsz != 40 && hsz != 56 && hsz != 108 && hsz != 124) return stbi__errpuc("unknown BMP", "BMP type not supported: unknown");
-   if (hsz == 12) {
-      s->img_x = stbi__get16le(s);
-      s->img_y = stbi__get16le(s);
-   } else {
-      s->img_x = stbi__get32le(s);
-      s->img_y = stbi__get32le(s);
-   }
-   if (stbi__get16le(s) != 1) return stbi__errpuc("bad BMP", "bad BMP");
-   info->bpp = stbi__get16le(s);
-   if (hsz != 12) {
-      int compress = stbi__get32le(s);
-      if (compress == 1 || compress == 2) return stbi__errpuc("BMP RLE", "BMP type not supported: RLE");
-      if (compress >= 4) return stbi__errpuc("BMP JPEG/PNG", "BMP type not supported: unsupported compression"); // this includes PNG/JPEG modes
-      if (compress == 3 && info->bpp != 16 && info->bpp != 32) return stbi__errpuc("bad BMP", "bad BMP"); // bitfields requires 16 or 32 bits/pixel
-      stbi__get32le(s); // discard sizeof
-      stbi__get32le(s); // discard hres
-      stbi__get32le(s); // discard vres
-      stbi__get32le(s); // discard colorsused
-      stbi__get32le(s); // discard max important
-      if (hsz == 40 || hsz == 56) {
-         if (hsz == 56) {
-            stbi__get32le(s);
-            stbi__get32le(s);
-            stbi__get32le(s);
-            stbi__get32le(s);
-         }
-         if (info->bpp == 16 || info->bpp == 32) {
-            if (compress == 0) {
-               stbi__bmp_set_mask_defaults(info, compress);
-            } else if (compress == 3) {
-               info->mr = stbi__get32le(s);
-               info->mg = stbi__get32le(s);
-               info->mb = stbi__get32le(s);
-               info->extra_read += 12;
-               // not documented, but generated by photoshop and handled by mspaint
-               if (info->mr == info->mg && info->mg == info->mb) {
-                  // ?!?!?
-                  return stbi__errpuc("bad BMP", "bad BMP");
-               }
-            } else
-               return stbi__errpuc("bad BMP", "bad BMP");
-         }
-      } else {
-         // V4/V5 header
-         int i;
-         if (hsz != 108 && hsz != 124)
-            return stbi__errpuc("bad BMP", "bad BMP");
-         info->mr = stbi__get32le(s);
-         info->mg = stbi__get32le(s);
-         info->mb = stbi__get32le(s);
-         info->ma = stbi__get32le(s);
-         if (compress != 3) // override mr/mg/mb unless in BI_BITFIELDS mode, as per docs
-            stbi__bmp_set_mask_defaults(info, compress);
-         stbi__get32le(s); // discard color space
-         for (i=0; i < 12; ++i)
-            stbi__get32le(s); // discard color space parameters
-         if (hsz == 124) {
-            stbi__get32le(s); // discard rendering intent
-            stbi__get32le(s); // discard offset of profile data
-            stbi__get32le(s); // discard size of profile data
-            stbi__get32le(s); // discard reserved
-         }
-      }
-   }
-   return (void *) 1;
-}
-
-
-static void *stbi__bmp_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   stbi_uc *out;
-   unsigned int mr=0,mg=0,mb=0,ma=0, all_a;
-   stbi_uc pal[256][4];
-   int psize=0,i,j,width;
-   int flip_vertically, pad, target;
-   stbi__bmp_data info;
-   STBI_NOTUSED(ri);
-
-   info.all_a = 255;
-   if (stbi__bmp_parse_header(s, &info) == NULL)
-      return NULL; // error code already set
-
-   flip_vertically = ((int) s->img_y) > 0;
-   s->img_y = abs((int) s->img_y);
-
-   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-
-   mr = info.mr;
-   mg = info.mg;
-   mb = info.mb;
-   ma = info.ma;
-   all_a = info.all_a;
-
-   if (info.hsz == 12) {
-      if (info.bpp < 24)
-         psize = (info.offset - info.extra_read - 24) / 3;
-   } else {
-      if (info.bpp < 16)
-         psize = (info.offset - info.extra_read - info.hsz) >> 2;
-   }
-   if (psize == 0) {
-      // accept some number of extra bytes after the header, but if the offset points either to before
-      // the header ends or implies a large amount of extra data, reject the file as malformed
-      int bytes_read_so_far = s->callback_already_read + (int)(s->img_buffer - s->img_buffer_original);
-      int header_limit = 1024; // max we actually read is below 256 bytes currently.
-      int extra_data_limit = 256*4; // what ordinarily goes here is a palette; 256 entries*4 bytes is its max size.
-      if (bytes_read_so_far <= 0 || bytes_read_so_far > header_limit) {
-         return stbi__errpuc("bad header", "Corrupt BMP");
-      }
-      // we established that bytes_read_so_far is positive and sensible.
-      // the first half of this test rejects offsets that are either too small positives, or
-      // negative, and guarantees that info.offset >= bytes_read_so_far > 0. this in turn
-      // ensures the number computed in the second half of the test can't overflow.
-      if (info.offset < bytes_read_so_far || info.offset - bytes_read_so_far > extra_data_limit) {
-         return stbi__errpuc("bad offset", "Corrupt BMP");
-      } else {
-         stbi__skip(s, info.offset - bytes_read_so_far);
-      }
-   }
-
-   if (info.bpp == 24 && ma == 0xff000000)
-      s->img_n = 3;
-   else
-      s->img_n = ma ? 4 : 3;
-   if (req_comp && req_comp >= 3) // we can directly decode 3 or 4
-      target = req_comp;
-   else
-      target = s->img_n; // if they want monochrome, we'll post-convert
-
-   // sanity-check size
-   if (!stbi__mad3sizes_valid(target, s->img_x, s->img_y, 0))
-      return stbi__errpuc("too large", "Corrupt BMP");
-
-   out = (stbi_uc *) stbi__malloc_mad3(target, s->img_x, s->img_y, 0);
-   if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   if (info.bpp < 16) {
-      int z=0;
-      if (psize == 0 || psize > 256) { STBI_FREE(out); return stbi__errpuc("invalid", "Corrupt BMP"); }
-      for (i=0; i < psize; ++i) {
-         pal[i][2] = stbi__get8(s);
-         pal[i][1] = stbi__get8(s);
-         pal[i][0] = stbi__get8(s);
-         if (info.hsz != 12) stbi__get8(s);
-         pal[i][3] = 255;
-      }
-      stbi__skip(s, info.offset - info.extra_read - info.hsz - psize * (info.hsz == 12 ? 3 : 4));
-      if (info.bpp == 1) width = (s->img_x + 7) >> 3;
-      else if (info.bpp == 4) width = (s->img_x + 1) >> 1;
-      else if (info.bpp == 8) width = s->img_x;
-      else { STBI_FREE(out); return stbi__errpuc("bad bpp", "Corrupt BMP"); }
-      pad = (-width)&3;
-      if (info.bpp == 1) {
-         for (j=0; j < (int) s->img_y; ++j) {
-            int bit_offset = 7, v = stbi__get8(s);
-            for (i=0; i < (int) s->img_x; ++i) {
-               int color = (v>>bit_offset)&0x1;
-               out[z++] = pal[color][0];
-               out[z++] = pal[color][1];
-               out[z++] = pal[color][2];
-               if (target == 4) out[z++] = 255;
-               if (i+1 == (int) s->img_x) break;
-               if((--bit_offset) < 0) {
-                  bit_offset = 7;
-                  v = stbi__get8(s);
-               }
-            }
-            stbi__skip(s, pad);
-         }
-      } else {
-         for (j=0; j < (int) s->img_y; ++j) {
-            for (i=0; i < (int) s->img_x; i += 2) {
-               int v=stbi__get8(s),v2=0;
-               if (info.bpp == 4) {
-                  v2 = v & 15;
-                  v >>= 4;
-               }
-               out[z++] = pal[v][0];
-               out[z++] = pal[v][1];
-               out[z++] = pal[v][2];
-               if (target == 4) out[z++] = 255;
-               if (i+1 == (int) s->img_x) break;
-               v = (info.bpp == 8) ? stbi__get8(s) : v2;
-               out[z++] = pal[v][0];
-               out[z++] = pal[v][1];
-               out[z++] = pal[v][2];
-               if (target == 4) out[z++] = 255;
-            }
-            stbi__skip(s, pad);
-         }
-      }
-   } else {
-      int rshift=0,gshift=0,bshift=0,ashift=0,rcount=0,gcount=0,bcount=0,acount=0;
-      int z = 0;
-      int easy=0;
-      stbi__skip(s, info.offset - info.extra_read - info.hsz);
-      if (info.bpp == 24) width = 3 * s->img_x;
-      else if (info.bpp == 16) width = 2*s->img_x;
-      else /* bpp = 32 and pad = 0 */ width=0;
-      pad = (-width) & 3;
-      if (info.bpp == 24) {
-         easy = 1;
-      } else if (info.bpp == 32) {
-         if (mb == 0xff && mg == 0xff00 && mr == 0x00ff0000 && ma == 0xff000000)
-            easy = 2;
-      }
-      if (!easy) {
-         if (!mr || !mg || !mb) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
-         // right shift amt to put high bit in position #7
-         rshift = stbi__high_bit(mr)-7; rcount = stbi__bitcount(mr);
-         gshift = stbi__high_bit(mg)-7; gcount = stbi__bitcount(mg);
-         bshift = stbi__high_bit(mb)-7; bcount = stbi__bitcount(mb);
-         ashift = stbi__high_bit(ma)-7; acount = stbi__bitcount(ma);
-         if (rcount > 8 || gcount > 8 || bcount > 8 || acount > 8) { STBI_FREE(out); return stbi__errpuc("bad masks", "Corrupt BMP"); }
-      }
-      for (j=0; j < (int) s->img_y; ++j) {
-         if (easy) {
-            for (i=0; i < (int) s->img_x; ++i) {
-               unsigned char a;
-               out[z+2] = stbi__get8(s);
-               out[z+1] = stbi__get8(s);
-               out[z+0] = stbi__get8(s);
-               z += 3;
-               a = (easy == 2 ? stbi__get8(s) : 255);
-               all_a |= a;
-               if (target == 4) out[z++] = a;
-            }
-         } else {
-            int bpp = info.bpp;
-            for (i=0; i < (int) s->img_x; ++i) {
-               stbi__uint32 v = (bpp == 16 ? (stbi__uint32) stbi__get16le(s) : stbi__get32le(s));
-               unsigned int a;
-               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mr, rshift, rcount));
-               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mg, gshift, gcount));
-               out[z++] = STBI__BYTECAST(stbi__shiftsigned(v & mb, bshift, bcount));
-               a = (ma ? stbi__shiftsigned(v & ma, ashift, acount) : 255);
-               all_a |= a;
-               if (target == 4) out[z++] = STBI__BYTECAST(a);
-            }
-         }
-         stbi__skip(s, pad);
-      }
-   }
-
-   // if alpha channel is all 0s, replace with all 255s
-   if (target == 4 && all_a == 0)
-      for (i=4*s->img_x*s->img_y-1; i >= 0; i -= 4)
-         out[i] = 255;
-
-   if (flip_vertically) {
-      stbi_uc t;
-      for (j=0; j < (int) s->img_y>>1; ++j) {
-         stbi_uc *p1 = out +      j     *s->img_x*target;
-         stbi_uc *p2 = out + (s->img_y-1-j)*s->img_x*target;
-         for (i=0; i < (int) s->img_x*target; ++i) {
-            t = p1[i]; p1[i] = p2[i]; p2[i] = t;
-         }
-      }
-   }
-
-   if (req_comp && req_comp != target) {
-      out = stbi__convert_format(out, target, req_comp, s->img_x, s->img_y);
-      if (out == NULL) return out; // stbi__convert_format frees input on failure
-   }
-
-   *x = s->img_x;
-   *y = s->img_y;
-   if (comp) *comp = s->img_n;
-   return out;
-}
-#endif
-
-// Targa Truevision - TGA
-// by Jonathan Dummer
-#ifndef STBI_NO_TGA
-// returns STBI_rgb or whatever, 0 on error
-static int stbi__tga_get_comp(int bits_per_pixel, int is_grey, int* is_rgb16)
-{
-   // only RGB or RGBA (incl. 16bit) or grey allowed
-   if (is_rgb16) *is_rgb16 = 0;
-   switch(bits_per_pixel) {
-      case 8:  return STBI_grey;
-      case 16: if(is_grey) return STBI_grey_alpha;
-               // fallthrough
-      case 15: if(is_rgb16) *is_rgb16 = 1;
-               return STBI_rgb;
-      case 24: // fallthrough
-      case 32: return bits_per_pixel/8;
-      default: return 0;
-   }
-}
-
-static int stbi__tga_info(stbi__context *s, int *x, int *y, int *comp)
-{
-    int tga_w, tga_h, tga_comp, tga_image_type, tga_bits_per_pixel, tga_colormap_bpp;
-    int sz, tga_colormap_type;
-    stbi__get8(s);                   // discard Offset
-    tga_colormap_type = stbi__get8(s); // colormap type
-    if( tga_colormap_type > 1 ) {
-        stbi__rewind(s);
-        return 0;      // only RGB or indexed allowed
-    }
-    tga_image_type = stbi__get8(s); // image type
-    if ( tga_colormap_type == 1 ) { // colormapped (paletted) image
-        if (tga_image_type != 1 && tga_image_type != 9) {
-            stbi__rewind(s);
-            return 0;
-        }
-        stbi__skip(s,4);       // skip index of first colormap entry and number of entries
-        sz = stbi__get8(s);    //   check bits per palette color entry
-        if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) {
-            stbi__rewind(s);
-            return 0;
-        }
-        stbi__skip(s,4);       // skip image x and y origin
-        tga_colormap_bpp = sz;
-    } else { // "normal" image w/o colormap - only RGB or grey allowed, +/- RLE
-        if ( (tga_image_type != 2) && (tga_image_type != 3) && (tga_image_type != 10) && (tga_image_type != 11) ) {
-            stbi__rewind(s);
-            return 0; // only RGB or grey allowed, +/- RLE
-        }
-        stbi__skip(s,9); // skip colormap specification and image x/y origin
-        tga_colormap_bpp = 0;
-    }
-    tga_w = stbi__get16le(s);
-    if( tga_w < 1 ) {
-        stbi__rewind(s);
-        return 0;   // test width
-    }
-    tga_h = stbi__get16le(s);
-    if( tga_h < 1 ) {
-        stbi__rewind(s);
-        return 0;   // test height
-    }
-    tga_bits_per_pixel = stbi__get8(s); // bits per pixel
-    stbi__get8(s); // ignore alpha bits
-    if (tga_colormap_bpp != 0) {
-        if((tga_bits_per_pixel != 8) && (tga_bits_per_pixel != 16)) {
-            // when using a colormap, tga_bits_per_pixel is the size of the indexes
-            // I don't think anything but 8 or 16bit indexes makes sense
-            stbi__rewind(s);
-            return 0;
-        }
-        tga_comp = stbi__tga_get_comp(tga_colormap_bpp, 0, NULL);
-    } else {
-        tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3) || (tga_image_type == 11), NULL);
-    }
-    if(!tga_comp) {
-      stbi__rewind(s);
-      return 0;
-    }
-    if (x) *x = tga_w;
-    if (y) *y = tga_h;
-    if (comp) *comp = tga_comp;
-    return 1;                   // seems to have passed everything
-}
-
-static int stbi__tga_test(stbi__context *s)
-{
-   int res = 0;
-   int sz, tga_color_type;
-   stbi__get8(s);      //   discard Offset
-   tga_color_type = stbi__get8(s);   //   color type
-   if ( tga_color_type > 1 ) goto errorEnd;   //   only RGB or indexed allowed
-   sz = stbi__get8(s);   //   image type
-   if ( tga_color_type == 1 ) { // colormapped (paletted) image
-      if (sz != 1 && sz != 9) goto errorEnd; // colortype 1 demands image type 1 or 9
-      stbi__skip(s,4);       // skip index of first colormap entry and number of entries
-      sz = stbi__get8(s);    //   check bits per palette color entry
-      if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
-      stbi__skip(s,4);       // skip image x and y origin
-   } else { // "normal" image w/o colormap
-      if ( (sz != 2) && (sz != 3) && (sz != 10) && (sz != 11) ) goto errorEnd; // only RGB or grey allowed, +/- RLE
-      stbi__skip(s,9); // skip colormap specification and image x/y origin
-   }
-   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test width
-   if ( stbi__get16le(s) < 1 ) goto errorEnd;      //   test height
-   sz = stbi__get8(s);   //   bits per pixel
-   if ( (tga_color_type == 1) && (sz != 8) && (sz != 16) ) goto errorEnd; // for colormapped images, bpp is size of an index
-   if ( (sz != 8) && (sz != 15) && (sz != 16) && (sz != 24) && (sz != 32) ) goto errorEnd;
-
-   res = 1; // if we got this far, everything's good and we can return 1 instead of 0
-
-errorEnd:
-   stbi__rewind(s);
-   return res;
-}
-
-// read 16bit value and convert to 24bit RGB
-static void stbi__tga_read_rgb16(stbi__context *s, stbi_uc* out)
-{
-   stbi__uint16 px = (stbi__uint16)stbi__get16le(s);
-   stbi__uint16 fiveBitMask = 31;
-   // we have 3 channels with 5bits each
-   int r = (px >> 10) & fiveBitMask;
-   int g = (px >> 5) & fiveBitMask;
-   int b = px & fiveBitMask;
-   // Note that this saves the data in RGB(A) order, so it doesn't need to be swapped later
-   out[0] = (stbi_uc)((r * 255)/31);
-   out[1] = (stbi_uc)((g * 255)/31);
-   out[2] = (stbi_uc)((b * 255)/31);
-
-   // some people claim that the most significant bit might be used for alpha
-   // (possibly if an alpha-bit is set in the "image descriptor byte")
-   // but that only made 16bit test images completely translucent..
-   // so let's treat all 15 and 16bit TGAs as RGB with no alpha.
-}
-
-static void *stbi__tga_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   //   read in the TGA header stuff
-   int tga_offset = stbi__get8(s);
-   int tga_indexed = stbi__get8(s);
-   int tga_image_type = stbi__get8(s);
-   int tga_is_RLE = 0;
-   int tga_palette_start = stbi__get16le(s);
-   int tga_palette_len = stbi__get16le(s);
-   int tga_palette_bits = stbi__get8(s);
-   int tga_x_origin = stbi__get16le(s);
-   int tga_y_origin = stbi__get16le(s);
-   int tga_width = stbi__get16le(s);
-   int tga_height = stbi__get16le(s);
-   int tga_bits_per_pixel = stbi__get8(s);
-   int tga_comp, tga_rgb16=0;
-   int tga_inverted = stbi__get8(s);
-   // int tga_alpha_bits = tga_inverted & 15; // the 4 lowest bits - unused (useless?)
-   //   image data
-   unsigned char *tga_data;
-   unsigned char *tga_palette = NULL;
-   int i, j;
-   unsigned char raw_data[4] = {0};
-   int RLE_count = 0;
-   int RLE_repeating = 0;
-   int read_next_pixel = 1;
-   STBI_NOTUSED(ri);
-   STBI_NOTUSED(tga_x_origin); // @TODO
-   STBI_NOTUSED(tga_y_origin); // @TODO
-
-   if (tga_height > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-   if (tga_width > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-
-   //   do a tiny bit of precessing
-   if ( tga_image_type >= 8 )
-   {
-      tga_image_type -= 8;
-      tga_is_RLE = 1;
-   }
-   tga_inverted = 1 - ((tga_inverted >> 5) & 1);
-
-   //   If I'm paletted, then I'll use the number of bits from the palette
-   if ( tga_indexed ) tga_comp = stbi__tga_get_comp(tga_palette_bits, 0, &tga_rgb16);
-   else tga_comp = stbi__tga_get_comp(tga_bits_per_pixel, (tga_image_type == 3), &tga_rgb16);
-
-   if(!tga_comp) // shouldn't really happen, stbi__tga_test() should have ensured basic consistency
-      return stbi__errpuc("bad format", "Can't find out TGA pixelformat");
-
-   //   tga info
-   *x = tga_width;
-   *y = tga_height;
-   if (comp) *comp = tga_comp;
-
-   if (!stbi__mad3sizes_valid(tga_width, tga_height, tga_comp, 0))
-      return stbi__errpuc("too large", "Corrupt TGA");
-
-   tga_data = (unsigned char*)stbi__malloc_mad3(tga_width, tga_height, tga_comp, 0);
-   if (!tga_data) return stbi__errpuc("outofmem", "Out of memory");
-
-   // skip to the data's starting position (offset usually = 0)
-   stbi__skip(s, tga_offset );
-
-   if ( !tga_indexed && !tga_is_RLE && !tga_rgb16 ) {
-      for (i=0; i < tga_height; ++i) {
-         int row = tga_inverted ? tga_height -i - 1 : i;
-         stbi_uc *tga_row = tga_data + row*tga_width*tga_comp;
-         stbi__getn(s, tga_row, tga_width * tga_comp);
-      }
-   } else  {
-      //   do I need to load a palette?
-      if ( tga_indexed)
-      {
-         if (tga_palette_len == 0) {  /* you have to have at least one entry! */
-            STBI_FREE(tga_data);
-            return stbi__errpuc("bad palette", "Corrupt TGA");
-         }
-
-         //   any data to skip? (offset usually = 0)
-         stbi__skip(s, tga_palette_start );
-         //   load the palette
-         tga_palette = (unsigned char*)stbi__malloc_mad2(tga_palette_len, tga_comp, 0);
-         if (!tga_palette) {
-            STBI_FREE(tga_data);
-            return stbi__errpuc("outofmem", "Out of memory");
-         }
-         if (tga_rgb16) {
-            stbi_uc *pal_entry = tga_palette;
-            STBI_ASSERT(tga_comp == STBI_rgb);
-            for (i=0; i < tga_palette_len; ++i) {
-               stbi__tga_read_rgb16(s, pal_entry);
-               pal_entry += tga_comp;
-            }
-         } else if (!stbi__getn(s, tga_palette, tga_palette_len * tga_comp)) {
-               STBI_FREE(tga_data);
-               STBI_FREE(tga_palette);
-               return stbi__errpuc("bad palette", "Corrupt TGA");
-         }
-      }
-      //   load the data
-      for (i=0; i < tga_width * tga_height; ++i)
-      {
-         //   if I'm in RLE mode, do I need to get a RLE stbi__pngchunk?
-         if ( tga_is_RLE )
-         {
-            if ( RLE_count == 0 )
-            {
-               //   yep, get the next byte as a RLE command
-               int RLE_cmd = stbi__get8(s);
-               RLE_count = 1 + (RLE_cmd & 127);
-               RLE_repeating = RLE_cmd >> 7;
-               read_next_pixel = 1;
-            } else if ( !RLE_repeating )
-            {
-               read_next_pixel = 1;
-            }
-         } else
-         {
-            read_next_pixel = 1;
-         }
-         //   OK, if I need to read a pixel, do it now
-         if ( read_next_pixel )
-         {
-            //   load however much data we did have
-            if ( tga_indexed )
-            {
-               // read in index, then perform the lookup
-               int pal_idx = (tga_bits_per_pixel == 8) ? stbi__get8(s) : stbi__get16le(s);
-               if ( pal_idx >= tga_palette_len ) {
-                  // invalid index
-                  pal_idx = 0;
-               }
-               pal_idx *= tga_comp;
-               for (j = 0; j < tga_comp; ++j) {
-                  raw_data[j] = tga_palette[pal_idx+j];
-               }
-            } else if(tga_rgb16) {
-               STBI_ASSERT(tga_comp == STBI_rgb);
-               stbi__tga_read_rgb16(s, raw_data);
-            } else {
-               //   read in the data raw
-               for (j = 0; j < tga_comp; ++j) {
-                  raw_data[j] = stbi__get8(s);
-               }
-            }
-            //   clear the reading flag for the next pixel
-            read_next_pixel = 0;
-         } // end of reading a pixel
-
-         // copy data
-         for (j = 0; j < tga_comp; ++j)
-           tga_data[i*tga_comp+j] = raw_data[j];
-
-         //   in case we're in RLE mode, keep counting down
-         --RLE_count;
-      }
-      //   do I need to invert the image?
-      if ( tga_inverted )
-      {
-         for (j = 0; j*2 < tga_height; ++j)
-         {
-            int index1 = j * tga_width * tga_comp;
-            int index2 = (tga_height - 1 - j) * tga_width * tga_comp;
-            for (i = tga_width * tga_comp; i > 0; --i)
-            {
-               unsigned char temp = tga_data[index1];
-               tga_data[index1] = tga_data[index2];
-               tga_data[index2] = temp;
-               ++index1;
-               ++index2;
-            }
-         }
-      }
-      //   clear my palette, if I had one
-      if ( tga_palette != NULL )
-      {
-         STBI_FREE( tga_palette );
-      }
-   }
-
-   // swap RGB - if the source data was RGB16, it already is in the right order
-   if (tga_comp >= 3 && !tga_rgb16)
-   {
-      unsigned char* tga_pixel = tga_data;
-      for (i=0; i < tga_width * tga_height; ++i)
-      {
-         unsigned char temp = tga_pixel[0];
-         tga_pixel[0] = tga_pixel[2];
-         tga_pixel[2] = temp;
-         tga_pixel += tga_comp;
-      }
-   }
-
-   // convert to target component count
-   if (req_comp && req_comp != tga_comp)
-      tga_data = stbi__convert_format(tga_data, tga_comp, req_comp, tga_width, tga_height);
-
-   //   the things I do to get rid of an error message, and yet keep
-   //   Microsoft's C compilers happy... [8^(
-   tga_palette_start = tga_palette_len = tga_palette_bits =
-         tga_x_origin = tga_y_origin = 0;
-   STBI_NOTUSED(tga_palette_start);
-   //   OK, done
-   return tga_data;
-}
-#endif
-
-// *************************************************************************************************
-// Photoshop PSD loader -- PD by Thatcher Ulrich, integration by Nicolas Schulz, tweaked by STB
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_test(stbi__context *s)
-{
-   int r = (stbi__get32be(s) == 0x38425053);
-   stbi__rewind(s);
-   return r;
-}
-
-static int stbi__psd_decode_rle(stbi__context *s, stbi_uc *p, int pixelCount)
-{
-   int count, nleft, len;
-
-   count = 0;
-   while ((nleft = pixelCount - count) > 0) {
-      len = stbi__get8(s);
-      if (len == 128) {
-         // No-op.
-      } else if (len < 128) {
-         // Copy next len+1 bytes literally.
-         len++;
-         if (len > nleft) return 0; // corrupt data
-         count += len;
-         while (len) {
-            *p = stbi__get8(s);
-            p += 4;
-            len--;
-         }
-      } else if (len > 128) {
-         stbi_uc   val;
-         // Next -len+1 bytes in the dest are replicated from next source byte.
-         // (Interpret len as a negative 8-bit int.)
-         len = 257 - len;
-         if (len > nleft) return 0; // corrupt data
-         val = stbi__get8(s);
-         count += len;
-         while (len) {
-            *p = val;
-            p += 4;
-            len--;
-         }
-      }
-   }
-
-   return 1;
-}
-
-static void *stbi__psd_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri, int bpc)
-{
-   int pixelCount;
-   int channelCount, compression;
-   int channel, i;
-   int bitdepth;
-   int w,h;
-   stbi_uc *out;
-   STBI_NOTUSED(ri);
-
-   // Check identifier
-   if (stbi__get32be(s) != 0x38425053)   // "8BPS"
-      return stbi__errpuc("not PSD", "Corrupt PSD image");
-
-   // Check file type version.
-   if (stbi__get16be(s) != 1)
-      return stbi__errpuc("wrong version", "Unsupported version of PSD image");
-
-   // Skip 6 reserved bytes.
-   stbi__skip(s, 6 );
-
-   // Read the number of channels (R, G, B, A, etc).
-   channelCount = stbi__get16be(s);
-   if (channelCount < 0 || channelCount > 16)
-      return stbi__errpuc("wrong channel count", "Unsupported number of channels in PSD image");
-
-   // Read the rows and columns of the image.
-   h = stbi__get32be(s);
-   w = stbi__get32be(s);
-
-   if (h > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-   if (w > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-
-   // Make sure the depth is 8 bits.
-   bitdepth = stbi__get16be(s);
-   if (bitdepth != 8 && bitdepth != 16)
-      return stbi__errpuc("unsupported bit depth", "PSD bit depth is not 8 or 16 bit");
-
-   // Make sure the color mode is RGB.
-   // Valid options are:
-   //   0: Bitmap
-   //   1: Grayscale
-   //   2: Indexed color
-   //   3: RGB color
-   //   4: CMYK color
-   //   7: Multichannel
-   //   8: Duotone
-   //   9: Lab color
-   if (stbi__get16be(s) != 3)
-      return stbi__errpuc("wrong color format", "PSD is not in RGB color format");
-
-   // Skip the Mode Data.  (It's the palette for indexed color; other info for other modes.)
-   stbi__skip(s,stbi__get32be(s) );
-
-   // Skip the image resources.  (resolution, pen tool paths, etc)
-   stbi__skip(s, stbi__get32be(s) );
-
-   // Skip the reserved data.
-   stbi__skip(s, stbi__get32be(s) );
-
-   // Find out if the data is compressed.
-   // Known values:
-   //   0: no compression
-   //   1: RLE compressed
-   compression = stbi__get16be(s);
-   if (compression > 1)
-      return stbi__errpuc("bad compression", "PSD has an unknown compression format");
-
-   // Check size
-   if (!stbi__mad3sizes_valid(4, w, h, 0))
-      return stbi__errpuc("too large", "Corrupt PSD");
-
-   // Create the destination image.
-
-   if (!compression && bitdepth == 16 && bpc == 16) {
-      out = (stbi_uc *) stbi__malloc_mad3(8, w, h, 0);
-      ri->bits_per_channel = 16;
-   } else
-      out = (stbi_uc *) stbi__malloc(4 * w*h);
-
-   if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   pixelCount = w*h;
-
-   // Initialize the data to zero.
-   //memset( out, 0, pixelCount * 4 );
-
-   // Finally, the image data.
-   if (compression) {
-      // RLE as used by .PSD and .TIFF
-      // Loop until you get the number of unpacked bytes you are expecting:
-      //     Read the next source byte into n.
-      //     If n is between 0 and 127 inclusive, copy the next n+1 bytes literally.
-      //     Else if n is between -127 and -1 inclusive, copy the next byte -n+1 times.
-      //     Else if n is 128, noop.
-      // Endloop
-
-      // The RLE-compressed data is preceded by a 2-byte data count for each row in the data,
-      // which we're going to just skip.
-      stbi__skip(s, h * channelCount * 2 );
-
-      // Read the RLE data by channel.
-      for (channel = 0; channel < 4; channel++) {
-         stbi_uc *p;
-
-         p = out+channel;
-         if (channel >= channelCount) {
-            // Fill this channel with default data.
-            for (i = 0; i < pixelCount; i++, p += 4)
-               *p = (channel == 3 ? 255 : 0);
-         } else {
-            // Read the RLE data.
-            if (!stbi__psd_decode_rle(s, p, pixelCount)) {
-               STBI_FREE(out);
-               return stbi__errpuc("corrupt", "bad RLE data");
-            }
-         }
-      }
-
-   } else {
-      // We're at the raw image data.  It's each channel in order (Red, Green, Blue, Alpha, ...)
-      // where each channel consists of an 8-bit (or 16-bit) value for each pixel in the image.
-
-      // Read the data by channel.
-      for (channel = 0; channel < 4; channel++) {
-         if (channel >= channelCount) {
-            // Fill this channel with default data.
-            if (bitdepth == 16 && bpc == 16) {
-               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
-               stbi__uint16 val = channel == 3 ? 65535 : 0;
-               for (i = 0; i < pixelCount; i++, q += 4)
-                  *q = val;
-            } else {
-               stbi_uc *p = out+channel;
-               stbi_uc val = channel == 3 ? 255 : 0;
-               for (i = 0; i < pixelCount; i++, p += 4)
-                  *p = val;
-            }
-         } else {
-            if (ri->bits_per_channel == 16) {    // output bpc
-               stbi__uint16 *q = ((stbi__uint16 *) out) + channel;
-               for (i = 0; i < pixelCount; i++, q += 4)
-                  *q = (stbi__uint16) stbi__get16be(s);
-            } else {
-               stbi_uc *p = out+channel;
-               if (bitdepth == 16) {  // input bpc
-                  for (i = 0; i < pixelCount; i++, p += 4)
-                     *p = (stbi_uc) (stbi__get16be(s) >> 8);
-               } else {
-                  for (i = 0; i < pixelCount; i++, p += 4)
-                     *p = stbi__get8(s);
-               }
-            }
-         }
-      }
-   }
-
-   // remove weird white matte from PSD
-   if (channelCount >= 4) {
-      if (ri->bits_per_channel == 16) {
-         for (i=0; i < w*h; ++i) {
-            stbi__uint16 *pixel = (stbi__uint16 *) out + 4*i;
-            if (pixel[3] != 0 && pixel[3] != 65535) {
-               float a = pixel[3] / 65535.0f;
-               float ra = 1.0f / a;
-               float inv_a = 65535.0f * (1 - ra);
-               pixel[0] = (stbi__uint16) (pixel[0]*ra + inv_a);
-               pixel[1] = (stbi__uint16) (pixel[1]*ra + inv_a);
-               pixel[2] = (stbi__uint16) (pixel[2]*ra + inv_a);
-            }
-         }
-      } else {
-         for (i=0; i < w*h; ++i) {
-            unsigned char *pixel = out + 4*i;
-            if (pixel[3] != 0 && pixel[3] != 255) {
-               float a = pixel[3] / 255.0f;
-               float ra = 1.0f / a;
-               float inv_a = 255.0f * (1 - ra);
-               pixel[0] = (unsigned char) (pixel[0]*ra + inv_a);
-               pixel[1] = (unsigned char) (pixel[1]*ra + inv_a);
-               pixel[2] = (unsigned char) (pixel[2]*ra + inv_a);
-            }
-         }
-      }
-   }
-
-   // convert to desired output format
-   if (req_comp && req_comp != 4) {
-      if (ri->bits_per_channel == 16)
-         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, 4, req_comp, w, h);
-      else
-         out = stbi__convert_format(out, 4, req_comp, w, h);
-      if (out == NULL) return out; // stbi__convert_format frees input on failure
-   }
-
-   if (comp) *comp = 4;
-   *y = h;
-   *x = w;
-
-   return out;
-}
-#endif
-
-// *************************************************************************************************
-// Softimage PIC loader
-// by Tom Seddon
-//
-// See http://softimage.wiki.softimage.com/index.php/INFO:_PIC_file_format
-// See http://ozviz.wasp.uwa.edu.au/~pbourke/dataformats/softimagepic/
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_is4(stbi__context *s,const char *str)
-{
-   int i;
-   for (i=0; i<4; ++i)
-      if (stbi__get8(s) != (stbi_uc)str[i])
-         return 0;
-
-   return 1;
-}
-
-static int stbi__pic_test_core(stbi__context *s)
-{
-   int i;
-
-   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34"))
-      return 0;
-
-   for(i=0;i<84;++i)
-      stbi__get8(s);
-
-   if (!stbi__pic_is4(s,"PICT"))
-      return 0;
-
-   return 1;
-}
-
-typedef struct
-{
-   stbi_uc size,type,channel;
-} stbi__pic_packet;
-
-static stbi_uc *stbi__readval(stbi__context *s, int channel, stbi_uc *dest)
-{
-   int mask=0x80, i;
-
-   for (i=0; i<4; ++i, mask>>=1) {
-      if (channel & mask) {
-         if (stbi__at_eof(s)) return stbi__errpuc("bad file","PIC file too short");
-         dest[i]=stbi__get8(s);
-      }
-   }
-
-   return dest;
-}
-
-static void stbi__copyval(int channel,stbi_uc *dest,const stbi_uc *src)
-{
-   int mask=0x80,i;
-
-   for (i=0;i<4; ++i, mask>>=1)
-      if (channel&mask)
-         dest[i]=src[i];
-}
-
-static stbi_uc *stbi__pic_load_core(stbi__context *s,int width,int height,int *comp, stbi_uc *result)
-{
-   int act_comp=0,num_packets=0,y,chained;
-   stbi__pic_packet packets[10];
-
-   // this will (should...) cater for even some bizarre stuff like having data
-    // for the same channel in multiple packets.
-   do {
-      stbi__pic_packet *packet;
-
-      if (num_packets==sizeof(packets)/sizeof(packets[0]))
-         return stbi__errpuc("bad format","too many packets");
-
-      packet = &packets[num_packets++];
-
-      chained = stbi__get8(s);
-      packet->size    = stbi__get8(s);
-      packet->type    = stbi__get8(s);
-      packet->channel = stbi__get8(s);
-
-      act_comp |= packet->channel;
-
-      if (stbi__at_eof(s))          return stbi__errpuc("bad file","file too short (reading packets)");
-      if (packet->size != 8)  return stbi__errpuc("bad format","packet isn't 8bpp");
-   } while (chained);
-
-   *comp = (act_comp & 0x10 ? 4 : 3); // has alpha channel?
-
-   for(y=0; y<height; ++y) {
-      int packet_idx;
-
-      for(packet_idx=0; packet_idx < num_packets; ++packet_idx) {
-         stbi__pic_packet *packet = &packets[packet_idx];
-         stbi_uc *dest = result+y*width*4;
-
-         switch (packet->type) {
-            default:
-               return stbi__errpuc("bad format","packet has bad compression type");
-
-            case 0: {//uncompressed
-               int x;
-
-               for(x=0;x<width;++x, dest+=4)
-                  if (!stbi__readval(s,packet->channel,dest))
-                     return 0;
-               break;
-            }
-
-            case 1://Pure RLE
-               {
-                  int left=width, i;
-
-                  while (left>0) {
-                     stbi_uc count,value[4];
-
-                     count=stbi__get8(s);
-                     if (stbi__at_eof(s))   return stbi__errpuc("bad file","file too short (pure read count)");
-
-                     if (count > left)
-                        count = (stbi_uc) left;
-
-                     if (!stbi__readval(s,packet->channel,value))  return 0;
-
-                     for(i=0; i<count; ++i,dest+=4)
-                        stbi__copyval(packet->channel,dest,value);
-                     left -= count;
-                  }
-               }
-               break;
-
-            case 2: {//Mixed RLE
-               int left=width;
-               while (left>0) {
-                  int count = stbi__get8(s), i;
-                  if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (mixed read count)");
-
-                  if (count >= 128) { // Repeated
-                     stbi_uc value[4];
-
-                     if (count==128)
-                        count = stbi__get16be(s);
-                     else
-                        count -= 127;
-                     if (count > left)
-                        return stbi__errpuc("bad file","scanline overrun");
-
-                     if (!stbi__readval(s,packet->channel,value))
-                        return 0;
-
-                     for(i=0;i<count;++i, dest += 4)
-                        stbi__copyval(packet->channel,dest,value);
-                  } else { // Raw
-                     ++count;
-                     if (count>left) return stbi__errpuc("bad file","scanline overrun");
-
-                     for(i=0;i<count;++i, dest+=4)
-                        if (!stbi__readval(s,packet->channel,dest))
-                           return 0;
-                  }
-                  left-=count;
-               }
-               break;
-            }
-         }
-      }
-   }
-
-   return result;
-}
-
-static void *stbi__pic_load(stbi__context *s,int *px,int *py,int *comp,int req_comp, stbi__result_info *ri)
-{
-   stbi_uc *result;
-   int i, x,y, internal_comp;
-   STBI_NOTUSED(ri);
-
-   if (!comp) comp = &internal_comp;
-
-   for (i=0; i<92; ++i)
-      stbi__get8(s);
-
-   x = stbi__get16be(s);
-   y = stbi__get16be(s);
-
-   if (y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-   if (x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-
-   if (stbi__at_eof(s))  return stbi__errpuc("bad file","file too short (pic header)");
-   if (!stbi__mad3sizes_valid(x, y, 4, 0)) return stbi__errpuc("too large", "PIC image too large to decode");
-
-   stbi__get32be(s); //skip `ratio'
-   stbi__get16be(s); //skip `fields'
-   stbi__get16be(s); //skip `pad'
-
-   // intermediate buffer is RGBA
-   result = (stbi_uc *) stbi__malloc_mad3(x, y, 4, 0);
-   if (!result) return stbi__errpuc("outofmem", "Out of memory");
-   memset(result, 0xff, x*y*4);
-
-   if (!stbi__pic_load_core(s,x,y,comp, result)) {
-      STBI_FREE(result);
-      result=0;
-   }
-   *px = x;
-   *py = y;
-   if (req_comp == 0) req_comp = *comp;
-   result=stbi__convert_format(result,4,req_comp,x,y);
-
-   return result;
-}
-
-static int stbi__pic_test(stbi__context *s)
-{
-   int r = stbi__pic_test_core(s);
-   stbi__rewind(s);
-   return r;
-}
-#endif
-
-// *************************************************************************************************
-// GIF loader -- public domain by Jean-Marc Lienher -- simplified/shrunk by stb
-
-#ifndef STBI_NO_GIF
-typedef struct
-{
-   stbi__int16 prefix;
-   stbi_uc first;
-   stbi_uc suffix;
-} stbi__gif_lzw;
-
-typedef struct
-{
-   int w,h;
-   stbi_uc *out;                 // output buffer (always 4 components)
-   stbi_uc *background;          // The current "background" as far as a gif is concerned
-   stbi_uc *history;
-   int flags, bgindex, ratio, transparent, eflags;
-   stbi_uc  pal[256][4];
-   stbi_uc lpal[256][4];
-   stbi__gif_lzw codes[8192];
-   stbi_uc *color_table;
-   int parse, step;
-   int lflags;
-   int start_x, start_y;
-   int max_x, max_y;
-   int cur_x, cur_y;
-   int line_size;
-   int delay;
-} stbi__gif;
-
-static int stbi__gif_test_raw(stbi__context *s)
-{
-   int sz;
-   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8') return 0;
-   sz = stbi__get8(s);
-   if (sz != '9' && sz != '7') return 0;
-   if (stbi__get8(s) != 'a') return 0;
-   return 1;
-}
-
-static int stbi__gif_test(stbi__context *s)
-{
-   int r = stbi__gif_test_raw(s);
-   stbi__rewind(s);
-   return r;
-}
-
-static void stbi__gif_parse_colortable(stbi__context *s, stbi_uc pal[256][4], int num_entries, int transp)
-{
-   int i;
-   for (i=0; i < num_entries; ++i) {
-      pal[i][2] = stbi__get8(s);
-      pal[i][1] = stbi__get8(s);
-      pal[i][0] = stbi__get8(s);
-      pal[i][3] = transp == i ? 0 : 255;
-   }
-}
-
-static int stbi__gif_header(stbi__context *s, stbi__gif *g, int *comp, int is_info)
-{
-   stbi_uc version;
-   if (stbi__get8(s) != 'G' || stbi__get8(s) != 'I' || stbi__get8(s) != 'F' || stbi__get8(s) != '8')
-      return stbi__err("not GIF", "Corrupt GIF");
-
-   version = stbi__get8(s);
-   if (version != '7' && version != '9')    return stbi__err("not GIF", "Corrupt GIF");
-   if (stbi__get8(s) != 'a')                return stbi__err("not GIF", "Corrupt GIF");
-
-   stbi__g_failure_reason = "";
-   g->w = stbi__get16le(s);
-   g->h = stbi__get16le(s);
-   g->flags = stbi__get8(s);
-   g->bgindex = stbi__get8(s);
-   g->ratio = stbi__get8(s);
-   g->transparent = -1;
-
-   if (g->w > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
-   if (g->h > STBI_MAX_DIMENSIONS) return stbi__err("too large","Very large image (corrupt?)");
-
-   if (comp != 0) *comp = 4;  // can't actually tell whether it's 3 or 4 until we parse the comments
-
-   if (is_info) return 1;
-
-   if (g->flags & 0x80)
-      stbi__gif_parse_colortable(s,g->pal, 2 << (g->flags & 7), -1);
-
-   return 1;
-}
-
-static int stbi__gif_info_raw(stbi__context *s, int *x, int *y, int *comp)
-{
-   stbi__gif* g = (stbi__gif*) stbi__malloc(sizeof(stbi__gif));
-   if (!g) return stbi__err("outofmem", "Out of memory");
-   if (!stbi__gif_header(s, g, comp, 1)) {
-      STBI_FREE(g);
-      stbi__rewind( s );
-      return 0;
-   }
-   if (x) *x = g->w;
-   if (y) *y = g->h;
-   STBI_FREE(g);
-   return 1;
-}
-
-static void stbi__out_gif_code(stbi__gif *g, stbi__uint16 code)
-{
-   stbi_uc *p, *c;
-   int idx;
-
-   // recurse to decode the prefixes, since the linked-list is backwards,
-   // and working backwards through an interleaved image would be nasty
-   if (g->codes[code].prefix >= 0)
-      stbi__out_gif_code(g, g->codes[code].prefix);
-
-   if (g->cur_y >= g->max_y) return;
-
-   idx = g->cur_x + g->cur_y;
-   p = &g->out[idx];
-   g->history[idx / 4] = 1;
-
-   c = &g->color_table[g->codes[code].suffix * 4];
-   if (c[3] > 128) { // don't render transparent pixels;
-      p[0] = c[2];
-      p[1] = c[1];
-      p[2] = c[0];
-      p[3] = c[3];
-   }
-   g->cur_x += 4;
-
-   if (g->cur_x >= g->max_x) {
-      g->cur_x = g->start_x;
-      g->cur_y += g->step;
-
-      while (g->cur_y >= g->max_y && g->parse > 0) {
-         g->step = (1 << g->parse) * g->line_size;
-         g->cur_y = g->start_y + (g->step >> 1);
-         --g->parse;
-      }
-   }
-}
-
-static stbi_uc *stbi__process_gif_raster(stbi__context *s, stbi__gif *g)
-{
-   stbi_uc lzw_cs;
-   stbi__int32 len, init_code;
-   stbi__uint32 first;
-   stbi__int32 codesize, codemask, avail, oldcode, bits, valid_bits, clear;
-   stbi__gif_lzw *p;
-
-   lzw_cs = stbi__get8(s);
-   if (lzw_cs > 12) return NULL;
-   clear = 1 << lzw_cs;
-   first = 1;
-   codesize = lzw_cs + 1;
-   codemask = (1 << codesize) - 1;
-   bits = 0;
-   valid_bits = 0;
-   for (init_code = 0; init_code < clear; init_code++) {
-      g->codes[init_code].prefix = -1;
-      g->codes[init_code].first = (stbi_uc) init_code;
-      g->codes[init_code].suffix = (stbi_uc) init_code;
-   }
-
-   // support no starting clear code
-   avail = clear+2;
-   oldcode = -1;
-
-   len = 0;
-   for(;;) {
-      if (valid_bits < codesize) {
-         if (len == 0) {
-            len = stbi__get8(s); // start new block
-            if (len == 0)
-               return g->out;
-         }
-         --len;
-         bits |= (stbi__int32) stbi__get8(s) << valid_bits;
-         valid_bits += 8;
-      } else {
-         stbi__int32 code = bits & codemask;
-         bits >>= codesize;
-         valid_bits -= codesize;
-         // @OPTIMIZE: is there some way we can accelerate the non-clear path?
-         if (code == clear) {  // clear code
-            codesize = lzw_cs + 1;
-            codemask = (1 << codesize) - 1;
-            avail = clear + 2;
-            oldcode = -1;
-            first = 0;
-         } else if (code == clear + 1) { // end of stream code
-            stbi__skip(s, len);
-            while ((len = stbi__get8(s)) > 0)
-               stbi__skip(s,len);
-            return g->out;
-         } else if (code <= avail) {
-            if (first) {
-               return stbi__errpuc("no clear code", "Corrupt GIF");
-            }
-
-            if (oldcode >= 0) {
-               p = &g->codes[avail++];
-               if (avail > 8192) {
-                  return stbi__errpuc("too many codes", "Corrupt GIF");
-               }
-
-               p->prefix = (stbi__int16) oldcode;
-               p->first = g->codes[oldcode].first;
-               p->suffix = (code == avail) ? p->first : g->codes[code].first;
-            } else if (code == avail)
-               return stbi__errpuc("illegal code in raster", "Corrupt GIF");
-
-            stbi__out_gif_code(g, (stbi__uint16) code);
-
-            if ((avail & codemask) == 0 && avail <= 0x0FFF) {
-               codesize++;
-               codemask = (1 << codesize) - 1;
-            }
-
-            oldcode = code;
-         } else {
-            return stbi__errpuc("illegal code in raster", "Corrupt GIF");
-         }
-      }
-   }
-}
-
-// this function is designed to support animated gifs, although stb_image doesn't support it
-// two back is the image from two frames ago, used for a very specific disposal format
-static stbi_uc *stbi__gif_load_next(stbi__context *s, stbi__gif *g, int *comp, int req_comp, stbi_uc *two_back)
-{
-   int dispose;
-   int first_frame;
-   int pi;
-   int pcount;
-   STBI_NOTUSED(req_comp);
-
-   // on first frame, any non-written pixels get the background colour (non-transparent)
-   first_frame = 0;
-   if (g->out == 0) {
-      if (!stbi__gif_header(s, g, comp,0)) return 0; // stbi__g_failure_reason set by stbi__gif_header
-      if (!stbi__mad3sizes_valid(4, g->w, g->h, 0))
-         return stbi__errpuc("too large", "GIF image is too large");
-      pcount = g->w * g->h;
-      g->out = (stbi_uc *) stbi__malloc(4 * pcount);
-      g->background = (stbi_uc *) stbi__malloc(4 * pcount);
-      g->history = (stbi_uc *) stbi__malloc(pcount);
-      if (!g->out || !g->background || !g->history)
-         return stbi__errpuc("outofmem", "Out of memory");
-
-      // image is treated as "transparent" at the start - ie, nothing overwrites the current background;
-      // background colour is only used for pixels that are not rendered first frame, after that "background"
-      // color refers to the color that was there the previous frame.
-      memset(g->out, 0x00, 4 * pcount);
-      memset(g->background, 0x00, 4 * pcount); // state of the background (starts transparent)
-      memset(g->history, 0x00, pcount);        // pixels that were affected previous frame
-      first_frame = 1;
-   } else {
-      // second frame - how do we dispose of the previous one?
-      dispose = (g->eflags & 0x1C) >> 2;
-      pcount = g->w * g->h;
-
-      if ((dispose == 3) && (two_back == 0)) {
-         dispose = 2; // if I don't have an image to revert back to, default to the old background
-      }
-
-      if (dispose == 3) { // use previous graphic
-         for (pi = 0; pi < pcount; ++pi) {
-            if (g->history[pi]) {
-               memcpy( &g->out[pi * 4], &two_back[pi * 4], 4 );
-            }
-         }
-      } else if (dispose == 2) {
-         // restore what was changed last frame to background before that frame;
-         for (pi = 0; pi < pcount; ++pi) {
-            if (g->history[pi]) {
-               memcpy( &g->out[pi * 4], &g->background[pi * 4], 4 );
-            }
-         }
-      } else {
-         // This is a non-disposal case eithe way, so just
-         // leave the pixels as is, and they will become the new background
-         // 1: do not dispose
-         // 0:  not specified.
-      }
-
-      // background is what out is after the undoing of the previou frame;
-      memcpy( g->background, g->out, 4 * g->w * g->h );
-   }
-
-   // clear my history;
-   memset( g->history, 0x00, g->w * g->h );        // pixels that were affected previous frame
-
-   for (;;) {
-      int tag = stbi__get8(s);
-      switch (tag) {
-         case 0x2C: /* Image Descriptor */
-         {
-            stbi__int32 x, y, w, h;
-            stbi_uc *o;
-
-            x = stbi__get16le(s);
-            y = stbi__get16le(s);
-            w = stbi__get16le(s);
-            h = stbi__get16le(s);
-            if (((x + w) > (g->w)) || ((y + h) > (g->h)))
-               return stbi__errpuc("bad Image Descriptor", "Corrupt GIF");
-
-            g->line_size = g->w * 4;
-            g->start_x = x * 4;
-            g->start_y = y * g->line_size;
-            g->max_x   = g->start_x + w * 4;
-            g->max_y   = g->start_y + h * g->line_size;
-            g->cur_x   = g->start_x;
-            g->cur_y   = g->start_y;
-
-            // if the width of the specified rectangle is 0, that means
-            // we may not see *any* pixels or the image is malformed;
-            // to make sure this is caught, move the current y down to
-            // max_y (which is what out_gif_code checks).
-            if (w == 0)
-               g->cur_y = g->max_y;
-
-            g->lflags = stbi__get8(s);
-
-            if (g->lflags & 0x40) {
-               g->step = 8 * g->line_size; // first interlaced spacing
-               g->parse = 3;
-            } else {
-               g->step = g->line_size;
-               g->parse = 0;
-            }
-
-            if (g->lflags & 0x80) {
-               stbi__gif_parse_colortable(s,g->lpal, 2 << (g->lflags & 7), g->eflags & 0x01 ? g->transparent : -1);
-               g->color_table = (stbi_uc *) g->lpal;
-            } else if (g->flags & 0x80) {
-               g->color_table = (stbi_uc *) g->pal;
-            } else
-               return stbi__errpuc("missing color table", "Corrupt GIF");
-
-            o = stbi__process_gif_raster(s, g);
-            if (!o) return NULL;
-
-            // if this was the first frame,
-            pcount = g->w * g->h;
-            if (first_frame && (g->bgindex > 0)) {
-               // if first frame, any pixel not drawn to gets the background color
-               for (pi = 0; pi < pcount; ++pi) {
-                  if (g->history[pi] == 0) {
-                     g->pal[g->bgindex][3] = 255; // just in case it was made transparent, undo that; It will be reset next frame if need be;
-                     memcpy( &g->out[pi * 4], &g->pal[g->bgindex], 4 );
-                  }
-               }
-            }
-
-            return o;
-         }
-
-         case 0x21: // Comment Extension.
-         {
-            int len;
-            int ext = stbi__get8(s);
-            if (ext == 0xF9) { // Graphic Control Extension.
-               len = stbi__get8(s);
-               if (len == 4) {
-                  g->eflags = stbi__get8(s);
-                  g->delay = 10 * stbi__get16le(s); // delay - 1/100th of a second, saving as 1/1000ths.
-
-                  // unset old transparent
-                  if (g->transparent >= 0) {
-                     g->pal[g->transparent][3] = 255;
-                  }
-                  if (g->eflags & 0x01) {
-                     g->transparent = stbi__get8(s);
-                     if (g->transparent >= 0) {
-                        g->pal[g->transparent][3] = 0;
-                     }
-                  } else {
-                     // don't need transparent
-                     stbi__skip(s, 1);
-                     g->transparent = -1;
-                  }
-               } else {
-                  stbi__skip(s, len);
-                  break;
-               }
-            }
-            while ((len = stbi__get8(s)) != 0) {
-               stbi__skip(s, len);
-            }
-            break;
-         }
-
-         case 0x3B: // gif stream termination code
-            return (stbi_uc *) s; // using '1' causes warning on some compilers
-
-         default:
-            return stbi__errpuc("unknown code", "Corrupt GIF");
-      }
-   }
-}
-
-static void *stbi__load_gif_main_outofmem(stbi__gif *g, stbi_uc *out, int **delays)
-{
-   STBI_FREE(g->out);
-   STBI_FREE(g->history);
-   STBI_FREE(g->background);
-
-   if (out) STBI_FREE(out);
-   if (delays && *delays) STBI_FREE(*delays);
-   return stbi__errpuc("outofmem", "Out of memory");
-}
-
-static void *stbi__load_gif_main(stbi__context *s, int **delays, int *x, int *y, int *z, int *comp, int req_comp)
-{
-   if (stbi__gif_test(s)) {
-      int layers = 0;
-      stbi_uc *u = 0;
-      stbi_uc *out = 0;
-      stbi_uc *two_back = 0;
-      stbi__gif g;
-      int stride;
-      int out_size = 0;
-      int delays_size = 0;
-
-      STBI_NOTUSED(out_size);
-      STBI_NOTUSED(delays_size);
-
-      memset(&g, 0, sizeof(g));
-      if (delays) {
-         *delays = 0;
-      }
-
-      do {
-         u = stbi__gif_load_next(s, &g, comp, req_comp, two_back);
-         if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
-
-         if (u) {
-            *x = g.w;
-            *y = g.h;
-            ++layers;
-            stride = g.w * g.h * 4;
-
-            if (out) {
-               void *tmp = (stbi_uc*) STBI_REALLOC_SIZED( out, out_size, layers * stride );
-               if (!tmp)
-                  return stbi__load_gif_main_outofmem(&g, out, delays);
-               else {
-                   out = (stbi_uc*) tmp;
-                   out_size = layers * stride;
-               }
-
-               if (delays) {
-                  int *new_delays = (int*) STBI_REALLOC_SIZED( *delays, delays_size, sizeof(int) * layers );
-                  if (!new_delays)
-                     return stbi__load_gif_main_outofmem(&g, out, delays);
-                  *delays = new_delays;
-                  delays_size = layers * sizeof(int);
-               }
-            } else {
-               out = (stbi_uc*)stbi__malloc( layers * stride );
-               if (!out)
-                  return stbi__load_gif_main_outofmem(&g, out, delays);
-               out_size = layers * stride;
-               if (delays) {
-                  *delays = (int*) stbi__malloc( layers * sizeof(int) );
-                  if (!*delays)
-                     return stbi__load_gif_main_outofmem(&g, out, delays);
-                  delays_size = layers * sizeof(int);
-               }
-            }
-            memcpy( out + ((layers - 1) * stride), u, stride );
-            if (layers >= 2) {
-               two_back = out - 2 * stride;
-            }
-
-            if (delays) {
-               (*delays)[layers - 1U] = g.delay;
-            }
-         }
-      } while (u != 0);
-
-      // free temp buffer;
-      STBI_FREE(g.out);
-      STBI_FREE(g.history);
-      STBI_FREE(g.background);
-
-      // do the final conversion after loading everything;
-      if (req_comp && req_comp != 4)
-         out = stbi__convert_format(out, 4, req_comp, layers * g.w, g.h);
-
-      *z = layers;
-      return out;
-   } else {
-      return stbi__errpuc("not GIF", "Image was not as a gif type.");
-   }
-}
-
-static void *stbi__gif_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   stbi_uc *u = 0;
-   stbi__gif g;
-   memset(&g, 0, sizeof(g));
-   STBI_NOTUSED(ri);
-
-   u = stbi__gif_load_next(s, &g, comp, req_comp, 0);
-   if (u == (stbi_uc *) s) u = 0;  // end of animated gif marker
-   if (u) {
-      *x = g.w;
-      *y = g.h;
-
-      // moved conversion to after successful load so that the same
-      // can be done for multiple frames.
-      if (req_comp && req_comp != 4)
-         u = stbi__convert_format(u, 4, req_comp, g.w, g.h);
-   } else if (g.out) {
-      // if there was an error and we allocated an image buffer, free it!
-      STBI_FREE(g.out);
-   }
-
-   // free buffers needed for multiple frame loading;
-   STBI_FREE(g.history);
-   STBI_FREE(g.background);
-
-   return u;
-}
-
-static int stbi__gif_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   return stbi__gif_info_raw(s,x,y,comp);
-}
-#endif
-
-// *************************************************************************************************
-// Radiance RGBE HDR loader
-// originally by Nicolas Schulz
-#ifndef STBI_NO_HDR
-static int stbi__hdr_test_core(stbi__context *s, const char *signature)
-{
-   int i;
-   for (i=0; signature[i]; ++i)
-      if (stbi__get8(s) != signature[i])
-          return 0;
-   stbi__rewind(s);
-   return 1;
-}
-
-static int stbi__hdr_test(stbi__context* s)
-{
-   int r = stbi__hdr_test_core(s, "#?RADIANCE\n");
-   stbi__rewind(s);
-   if(!r) {
-       r = stbi__hdr_test_core(s, "#?RGBE\n");
-       stbi__rewind(s);
-   }
-   return r;
-}
-
-#define STBI__HDR_BUFLEN  1024
-static char *stbi__hdr_gettoken(stbi__context *z, char *buffer)
-{
-   int len=0;
-   char c = '\0';
-
-   c = (char) stbi__get8(z);
-
-   while (!stbi__at_eof(z) && c != '\n') {
-      buffer[len++] = c;
-      if (len == STBI__HDR_BUFLEN-1) {
-         // flush to end of line
-         while (!stbi__at_eof(z) && stbi__get8(z) != '\n')
-            ;
-         break;
-      }
-      c = (char) stbi__get8(z);
-   }
-
-   buffer[len] = 0;
-   return buffer;
-}
-
-static void stbi__hdr_convert(float *output, stbi_uc *input, int req_comp)
-{
-   if ( input[3] != 0 ) {
-      float f1;
-      // Exponent
-      f1 = (float) ldexp(1.0f, input[3] - (int)(128 + 8));
-      if (req_comp <= 2)
-         output[0] = (input[0] + input[1] + input[2]) * f1 / 3;
-      else {
-         output[0] = input[0] * f1;
-         output[1] = input[1] * f1;
-         output[2] = input[2] * f1;
-      }
-      if (req_comp == 2) output[1] = 1;
-      if (req_comp == 4) output[3] = 1;
-   } else {
-      switch (req_comp) {
-         case 4: output[3] = 1; /* fallthrough */
-         case 3: output[0] = output[1] = output[2] = 0;
-                 break;
-         case 2: output[1] = 1; /* fallthrough */
-         case 1: output[0] = 0;
-                 break;
-      }
-   }
-}
-
-static float *stbi__hdr_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   char buffer[STBI__HDR_BUFLEN];
-   char *token;
-   int valid = 0;
-   int width, height;
-   stbi_uc *scanline;
-   float *hdr_data;
-   int len;
-   unsigned char count, value;
-   int i, j, k, c1,c2, z;
-   const char *headerToken;
-   STBI_NOTUSED(ri);
-
-   // Check identifier
-   headerToken = stbi__hdr_gettoken(s,buffer);
-   if (strcmp(headerToken, "#?RADIANCE") != 0 && strcmp(headerToken, "#?RGBE") != 0)
-      return stbi__errpf("not HDR", "Corrupt HDR image");
-
-   // Parse header
-   for(;;) {
-      token = stbi__hdr_gettoken(s,buffer);
-      if (token[0] == 0) break;
-      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
-   }
-
-   if (!valid)    return stbi__errpf("unsupported format", "Unsupported HDR format");
-
-   // Parse width and height
-   // can't use sscanf() if we're not using stdio!
-   token = stbi__hdr_gettoken(s,buffer);
-   if (strncmp(token, "-Y ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
-   token += 3;
-   height = (int) strtol(token, &token, 10);
-   while (*token == ' ') ++token;
-   if (strncmp(token, "+X ", 3))  return stbi__errpf("unsupported data layout", "Unsupported HDR format");
-   token += 3;
-   width = (int) strtol(token, NULL, 10);
-
-   if (height > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
-   if (width > STBI_MAX_DIMENSIONS) return stbi__errpf("too large","Very large image (corrupt?)");
-
-   *x = width;
-   *y = height;
-
-   if (comp) *comp = 3;
-   if (req_comp == 0) req_comp = 3;
-
-   if (!stbi__mad4sizes_valid(width, height, req_comp, sizeof(float), 0))
-      return stbi__errpf("too large", "HDR image is too large");
-
-   // Read data
-   hdr_data = (float *) stbi__malloc_mad4(width, height, req_comp, sizeof(float), 0);
-   if (!hdr_data)
-      return stbi__errpf("outofmem", "Out of memory");
-
-   // Load image data
-   // image data is stored as some number of sca
-   if ( width < 8 || width >= 32768) {
-      // Read flat data
-      for (j=0; j < height; ++j) {
-         for (i=0; i < width; ++i) {
-            stbi_uc rgbe[4];
-           main_decode_loop:
-            stbi__getn(s, rgbe, 4);
-            stbi__hdr_convert(hdr_data + j * width * req_comp + i * req_comp, rgbe, req_comp);
-         }
-      }
-   } else {
-      // Read RLE-encoded data
-      scanline = NULL;
-
-      for (j = 0; j < height; ++j) {
-         c1 = stbi__get8(s);
-         c2 = stbi__get8(s);
-         len = stbi__get8(s);
-         if (c1 != 2 || c2 != 2 || (len & 0x80)) {
-            // not run-length encoded, so we have to actually use THIS data as a decoded
-            // pixel (note this can't be a valid pixel--one of RGB must be >= 128)
-            stbi_uc rgbe[4];
-            rgbe[0] = (stbi_uc) c1;
-            rgbe[1] = (stbi_uc) c2;
-            rgbe[2] = (stbi_uc) len;
-            rgbe[3] = (stbi_uc) stbi__get8(s);
-            stbi__hdr_convert(hdr_data, rgbe, req_comp);
-            i = 1;
-            j = 0;
-            STBI_FREE(scanline);
-            goto main_decode_loop; // yes, this makes no sense
-         }
-         len <<= 8;
-         len |= stbi__get8(s);
-         if (len != width) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("invalid decoded scanline length", "corrupt HDR"); }
-         if (scanline == NULL) {
-            scanline = (stbi_uc *) stbi__malloc_mad2(width, 4, 0);
-            if (!scanline) {
-               STBI_FREE(hdr_data);
-               return stbi__errpf("outofmem", "Out of memory");
-            }
-         }
-
-         for (k = 0; k < 4; ++k) {
-            int nleft;
-            i = 0;
-            while ((nleft = width - i) > 0) {
-               count = stbi__get8(s);
-               if (count > 128) {
-                  // Run
-                  value = stbi__get8(s);
-                  count -= 128;
-                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
-                  for (z = 0; z < count; ++z)
-                     scanline[i++ * 4 + k] = value;
-               } else {
-                  // Dump
-                  if ((count == 0) || (count > nleft)) { STBI_FREE(hdr_data); STBI_FREE(scanline); return stbi__errpf("corrupt", "bad RLE data in HDR"); }
-                  for (z = 0; z < count; ++z)
-                     scanline[i++ * 4 + k] = stbi__get8(s);
-               }
-            }
-         }
-         for (i=0; i < width; ++i)
-            stbi__hdr_convert(hdr_data+(j*width + i)*req_comp, scanline + i*4, req_comp);
-      }
-      if (scanline)
-         STBI_FREE(scanline);
-   }
-
-   return hdr_data;
-}
-
-static int stbi__hdr_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   char buffer[STBI__HDR_BUFLEN];
-   char *token;
-   int valid = 0;
-   int dummy;
-
-   if (!x) x = &dummy;
-   if (!y) y = &dummy;
-   if (!comp) comp = &dummy;
-
-   if (stbi__hdr_test(s) == 0) {
-       stbi__rewind( s );
-       return 0;
-   }
-
-   for(;;) {
-      token = stbi__hdr_gettoken(s,buffer);
-      if (token[0] == 0) break;
-      if (strcmp(token, "FORMAT=32-bit_rle_rgbe") == 0) valid = 1;
-   }
-
-   if (!valid) {
-       stbi__rewind( s );
-       return 0;
-   }
-   token = stbi__hdr_gettoken(s,buffer);
-   if (strncmp(token, "-Y ", 3)) {
-       stbi__rewind( s );
-       return 0;
-   }
-   token += 3;
-   *y = (int) strtol(token, &token, 10);
-   while (*token == ' ') ++token;
-   if (strncmp(token, "+X ", 3)) {
-       stbi__rewind( s );
-       return 0;
-   }
-   token += 3;
-   *x = (int) strtol(token, NULL, 10);
-   *comp = 3;
-   return 1;
-}
-#endif // STBI_NO_HDR
-
-#ifndef STBI_NO_BMP
-static int stbi__bmp_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   void *p;
-   stbi__bmp_data info;
-
-   info.all_a = 255;
-   p = stbi__bmp_parse_header(s, &info);
-   if (p == NULL) {
-      stbi__rewind( s );
-      return 0;
-   }
-   if (x) *x = s->img_x;
-   if (y) *y = s->img_y;
-   if (comp) {
-      if (info.bpp == 24 && info.ma == 0xff000000)
-         *comp = 3;
-      else
-         *comp = info.ma ? 4 : 3;
-   }
-   return 1;
-}
-#endif
-
-#ifndef STBI_NO_PSD
-static int stbi__psd_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int channelCount, dummy, depth;
-   if (!x) x = &dummy;
-   if (!y) y = &dummy;
-   if (!comp) comp = &dummy;
-   if (stbi__get32be(s) != 0x38425053) {
-       stbi__rewind( s );
-       return 0;
-   }
-   if (stbi__get16be(s) != 1) {
-       stbi__rewind( s );
-       return 0;
-   }
-   stbi__skip(s, 6);
-   channelCount = stbi__get16be(s);
-   if (channelCount < 0 || channelCount > 16) {
-       stbi__rewind( s );
-       return 0;
-   }
-   *y = stbi__get32be(s);
-   *x = stbi__get32be(s);
-   depth = stbi__get16be(s);
-   if (depth != 8 && depth != 16) {
-       stbi__rewind( s );
-       return 0;
-   }
-   if (stbi__get16be(s) != 3) {
-       stbi__rewind( s );
-       return 0;
-   }
-   *comp = 4;
-   return 1;
-}
-
-static int stbi__psd_is16(stbi__context *s)
-{
-   int channelCount, depth;
-   if (stbi__get32be(s) != 0x38425053) {
-       stbi__rewind( s );
-       return 0;
-   }
-   if (stbi__get16be(s) != 1) {
-       stbi__rewind( s );
-       return 0;
-   }
-   stbi__skip(s, 6);
-   channelCount = stbi__get16be(s);
-   if (channelCount < 0 || channelCount > 16) {
-       stbi__rewind( s );
-       return 0;
-   }
-   STBI_NOTUSED(stbi__get32be(s));
-   STBI_NOTUSED(stbi__get32be(s));
-   depth = stbi__get16be(s);
-   if (depth != 16) {
-       stbi__rewind( s );
-       return 0;
-   }
-   return 1;
-}
-#endif
-
-#ifndef STBI_NO_PIC
-static int stbi__pic_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int act_comp=0,num_packets=0,chained,dummy;
-   stbi__pic_packet packets[10];
-
-   if (!x) x = &dummy;
-   if (!y) y = &dummy;
-   if (!comp) comp = &dummy;
-
-   if (!stbi__pic_is4(s,"\x53\x80\xF6\x34")) {
-      stbi__rewind(s);
-      return 0;
-   }
-
-   stbi__skip(s, 88);
-
-   *x = stbi__get16be(s);
-   *y = stbi__get16be(s);
-   if (stbi__at_eof(s)) {
-      stbi__rewind( s);
-      return 0;
-   }
-   if ( (*x) != 0 && (1 << 28) / (*x) < (*y)) {
-      stbi__rewind( s );
-      return 0;
-   }
-
-   stbi__skip(s, 8);
-
-   do {
-      stbi__pic_packet *packet;
-
-      if (num_packets==sizeof(packets)/sizeof(packets[0]))
-         return 0;
-
-      packet = &packets[num_packets++];
-      chained = stbi__get8(s);
-      packet->size    = stbi__get8(s);
-      packet->type    = stbi__get8(s);
-      packet->channel = stbi__get8(s);
-      act_comp |= packet->channel;
-
-      if (stbi__at_eof(s)) {
-          stbi__rewind( s );
-          return 0;
-      }
-      if (packet->size != 8) {
-          stbi__rewind( s );
-          return 0;
-      }
-   } while (chained);
-
-   *comp = (act_comp & 0x10 ? 4 : 3);
-
-   return 1;
-}
-#endif
-
-// *************************************************************************************************
-// Portable Gray Map and Portable Pixel Map loader
-// by Ken Miller
-//
-// PGM: http://netpbm.sourceforge.net/doc/pgm.html
-// PPM: http://netpbm.sourceforge.net/doc/ppm.html
-//
-// Known limitations:
-//    Does not support comments in the header section
-//    Does not support ASCII image data (formats P2 and P3)
-
-#ifndef STBI_NO_PNM
-
-static int      stbi__pnm_test(stbi__context *s)
-{
-   char p, t;
-   p = (char) stbi__get8(s);
-   t = (char) stbi__get8(s);
-   if (p != 'P' || (t != '5' && t != '6')) {
-       stbi__rewind( s );
-       return 0;
-   }
-   return 1;
-}
-
-static void *stbi__pnm_load(stbi__context *s, int *x, int *y, int *comp, int req_comp, stbi__result_info *ri)
-{
-   stbi_uc *out;
-   STBI_NOTUSED(ri);
-
-   ri->bits_per_channel = stbi__pnm_info(s, (int *)&s->img_x, (int *)&s->img_y, (int *)&s->img_n);
-   if (ri->bits_per_channel == 0)
-      return 0;
-
-   if (s->img_y > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-   if (s->img_x > STBI_MAX_DIMENSIONS) return stbi__errpuc("too large","Very large image (corrupt?)");
-
-   *x = s->img_x;
-   *y = s->img_y;
-   if (comp) *comp = s->img_n;
-
-   if (!stbi__mad4sizes_valid(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0))
-      return stbi__errpuc("too large", "PNM too large");
-
-   out = (stbi_uc *) stbi__malloc_mad4(s->img_n, s->img_x, s->img_y, ri->bits_per_channel / 8, 0);
-   if (!out) return stbi__errpuc("outofmem", "Out of memory");
-   if (!stbi__getn(s, out, s->img_n * s->img_x * s->img_y * (ri->bits_per_channel / 8))) {
-      STBI_FREE(out);
-      return stbi__errpuc("bad PNM", "PNM file truncated");
-   }
-
-   if (req_comp && req_comp != s->img_n) {
-      if (ri->bits_per_channel == 16) {
-         out = (stbi_uc *) stbi__convert_format16((stbi__uint16 *) out, s->img_n, req_comp, s->img_x, s->img_y);
-      } else {
-         out = stbi__convert_format(out, s->img_n, req_comp, s->img_x, s->img_y);
-      }
-      if (out == NULL) return out; // stbi__convert_format frees input on failure
-   }
-   return out;
-}
-
-static int      stbi__pnm_isspace(char c)
-{
-   return c == ' ' || c == '\t' || c == '\n' || c == '\v' || c == '\f' || c == '\r';
-}
-
-static void     stbi__pnm_skip_whitespace(stbi__context *s, char *c)
-{
-   for (;;) {
-      while (!stbi__at_eof(s) && stbi__pnm_isspace(*c))
-         *c = (char) stbi__get8(s);
-
-      if (stbi__at_eof(s) || *c != '#')
-         break;
-
-      while (!stbi__at_eof(s) && *c != '\n' && *c != '\r' )
-         *c = (char) stbi__get8(s);
-   }
-}
-
-static int      stbi__pnm_isdigit(char c)
-{
-   return c >= '0' && c <= '9';
-}
-
-static int      stbi__pnm_getinteger(stbi__context *s, char *c)
-{
-   int value = 0;
-
-   while (!stbi__at_eof(s) && stbi__pnm_isdigit(*c)) {
-      value = value*10 + (*c - '0');
-      *c = (char) stbi__get8(s);
-      if((value > 214748364) || (value == 214748364 && *c > '7'))
-          return stbi__err("integer parse overflow", "Parsing an integer in the PPM header overflowed a 32-bit int");
-   }
-
-   return value;
-}
-
-static int      stbi__pnm_info(stbi__context *s, int *x, int *y, int *comp)
-{
-   int maxv, dummy;
-   char c, p, t;
-
-   if (!x) x = &dummy;
-   if (!y) y = &dummy;
-   if (!comp) comp = &dummy;
-
-   stbi__rewind(s);
-
-   // Get identifier
-   p = (char) stbi__get8(s);
-   t = (char) stbi__get8(s);
-   if (p != 'P' || (t != '5' && t != '6')) {
-       stbi__rewind(s);
-       return 0;
-   }
-
-   *comp = (t == '6') ? 3 : 1;  // '5' is 1-component .pgm; '6' is 3-component .ppm
-
-   c = (char) stbi__get8(s);
-   stbi__pnm_skip_whitespace(s, &c);
-
-   *x = stbi__pnm_getinteger(s, &c); // read width
-   if(*x == 0)
-       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
-   stbi__pnm_skip_whitespace(s, &c);
-
-   *y = stbi__pnm_getinteger(s, &c); // read height
-   if (*y == 0)
-       return stbi__err("invalid width", "PPM image header had zero or overflowing width");
-   stbi__pnm_skip_whitespace(s, &c);
-
-   maxv = stbi__pnm_getinteger(s, &c);  // read max value
-   if (maxv > 65535)
-      return stbi__err("max value > 65535", "PPM image supports only 8-bit and 16-bit images");
-   else if (maxv > 255)
-      return 16;
-   else
-      return 8;
-}
-
-static int stbi__pnm_is16(stbi__context *s)
-{
-   if (stbi__pnm_info(s, NULL, NULL, NULL) == 16)
-	   return 1;
-   return 0;
-}
-#endif
-
-static int stbi__info_main(stbi__context *s, int *x, int *y, int *comp)
-{
-   #ifndef STBI_NO_JPEG
-   if (stbi__jpeg_info(s, x, y, comp)) return 1;
-   #endif
-
-   #ifndef STBI_NO_PNG
-   if (stbi__png_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_GIF
-   if (stbi__gif_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_BMP
-   if (stbi__bmp_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PSD
-   if (stbi__psd_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PIC
-   if (stbi__pic_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PNM
-   if (stbi__pnm_info(s, x, y, comp))  return 1;
-   #endif
-
-   #ifndef STBI_NO_HDR
-   if (stbi__hdr_info(s, x, y, comp))  return 1;
-   #endif
-
-   // test tga last because it's a crappy test!
-   #ifndef STBI_NO_TGA
-   if (stbi__tga_info(s, x, y, comp))
-       return 1;
-   #endif
-   return stbi__err("unknown image type", "Image not of any known type, or corrupt");
-}
-
-static int stbi__is_16_main(stbi__context *s)
-{
-   #ifndef STBI_NO_PNG
-   if (stbi__png_is16(s))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PSD
-   if (stbi__psd_is16(s))  return 1;
-   #endif
-
-   #ifndef STBI_NO_PNM
-   if (stbi__pnm_is16(s))  return 1;
-   #endif
-   return 0;
-}
-
-#ifndef STBI_NO_STDIO
-STBIDEF int stbi_info(char const *filename, int *x, int *y, int *comp)
-{
-    FILE *f = stbi__fopen(filename, "rb");
-    int result;
-    if (!f) return stbi__err("can't fopen", "Unable to open file");
-    result = stbi_info_from_file(f, x, y, comp);
-    fclose(f);
-    return result;
-}
-
-STBIDEF int stbi_info_from_file(FILE *f, int *x, int *y, int *comp)
-{
-   int r;
-   stbi__context s;
-   long pos = ftell(f);
-   stbi__start_file(&s, f);
-   r = stbi__info_main(&s,x,y,comp);
-   fseek(f,pos,SEEK_SET);
-   return r;
-}
-
-STBIDEF int stbi_is_16_bit(char const *filename)
-{
-    FILE *f = stbi__fopen(filename, "rb");
-    int result;
-    if (!f) return stbi__err("can't fopen", "Unable to open file");
-    result = stbi_is_16_bit_from_file(f);
-    fclose(f);
-    return result;
-}
-
-STBIDEF int stbi_is_16_bit_from_file(FILE *f)
-{
-   int r;
-   stbi__context s;
-   long pos = ftell(f);
-   stbi__start_file(&s, f);
-   r = stbi__is_16_main(&s);
-   fseek(f,pos,SEEK_SET);
-   return r;
-}
-#endif // !STBI_NO_STDIO
-
-STBIDEF int stbi_info_from_memory(stbi_uc const *buffer, int len, int *x, int *y, int *comp)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__info_main(&s,x,y,comp);
-}
-
-STBIDEF int stbi_info_from_callbacks(stbi_io_callbacks const *c, void *user, int *x, int *y, int *comp)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
-   return stbi__info_main(&s,x,y,comp);
-}
-
-STBIDEF int stbi_is_16_bit_from_memory(stbi_uc const *buffer, int len)
-{
-   stbi__context s;
-   stbi__start_mem(&s,buffer,len);
-   return stbi__is_16_main(&s);
-}
-
-STBIDEF int stbi_is_16_bit_from_callbacks(stbi_io_callbacks const *c, void *user)
-{
-   stbi__context s;
-   stbi__start_callbacks(&s, (stbi_io_callbacks *) c, user);
-   return stbi__is_16_main(&s);
-}
-
-#endif // STB_IMAGE_IMPLEMENTATION
-
-/*
-   revision history:
-      2.20  (2019-02-07) support utf8 filenames in Windows; fix warnings and platform ifdefs
-      2.19  (2018-02-11) fix warning
-      2.18  (2018-01-30) fix warnings
-      2.17  (2018-01-29) change sbti__shiftsigned to avoid clang -O2 bug
-                         1-bit BMP
-                         *_is_16_bit api
-                         avoid warnings
-      2.16  (2017-07-23) all functions have 16-bit variants;
-                         STBI_NO_STDIO works again;
-                         compilation fixes;
-                         fix rounding in unpremultiply;
-                         optimize vertical flip;
-                         disable raw_len validation;
-                         documentation fixes
-      2.15  (2017-03-18) fix png-1,2,4 bug; now all Imagenet JPGs decode;
-                         warning fixes; disable run-time SSE detection on gcc;
-                         uniform handling of optional "return" values;
-                         thread-safe initialization of zlib tables
-      2.14  (2017-03-03) remove deprecated STBI_JPEG_OLD; fixes for Imagenet JPGs
-      2.13  (2016-11-29) add 16-bit API, only supported for PNG right now
-      2.12  (2016-04-02) fix typo in 2.11 PSD fix that caused crashes
-      2.11  (2016-04-02) allocate large structures on the stack
-                         remove white matting for transparent PSD
-                         fix reported channel count for PNG & BMP
-                         re-enable SSE2 in non-gcc 64-bit
-                         support RGB-formatted JPEG
-                         read 16-bit PNGs (only as 8-bit)
-      2.10  (2016-01-22) avoid warning introduced in 2.09 by STBI_REALLOC_SIZED
-      2.09  (2016-01-16) allow comments in PNM files
-                         16-bit-per-pixel TGA (not bit-per-component)
-                         info() for TGA could break due to .hdr handling
-                         info() for BMP to shares code instead of sloppy parse
-                         can use STBI_REALLOC_SIZED if allocator doesn't support realloc
-                         code cleanup
-      2.08  (2015-09-13) fix to 2.07 cleanup, reading RGB PSD as RGBA
-      2.07  (2015-09-13) fix compiler warnings
-                         partial animated GIF support
-                         limited 16-bpc PSD support
-                         #ifdef unused functions
-                         bug with < 92 byte PIC,PNM,HDR,TGA
-      2.06  (2015-04-19) fix bug where PSD returns wrong '*comp' value
-      2.05  (2015-04-19) fix bug in progressive JPEG handling, fix warning
-      2.04  (2015-04-15) try to re-enable SIMD on MinGW 64-bit
-      2.03  (2015-04-12) extra corruption checking (mmozeiko)
-                         stbi_set_flip_vertically_on_load (nguillemot)
-                         fix NEON support; fix mingw support
-      2.02  (2015-01-19) fix incorrect assert, fix warning
-      2.01  (2015-01-17) fix various warnings; suppress SIMD on gcc 32-bit without -msse2
-      2.00b (2014-12-25) fix STBI_MALLOC in progressive JPEG
-      2.00  (2014-12-25) optimize JPG, including x86 SSE2 & NEON SIMD (ryg)
-                         progressive JPEG (stb)
-                         PGM/PPM support (Ken Miller)
-                         STBI_MALLOC,STBI_REALLOC,STBI_FREE
-                         GIF bugfix -- seemingly never worked
-                         STBI_NO_*, STBI_ONLY_*
-      1.48  (2014-12-14) fix incorrectly-named assert()
-      1.47  (2014-12-14) 1/2/4-bit PNG support, both direct and paletted (Omar Cornut & stb)
-                         optimize PNG (ryg)
-                         fix bug in interlaced PNG with user-specified channel count (stb)
-      1.46  (2014-08-26)
-              fix broken tRNS chunk (colorkey-style transparency) in non-paletted PNG
-      1.45  (2014-08-16)
-              fix MSVC-ARM internal compiler error by wrapping malloc
-      1.44  (2014-08-07)
-              various warning fixes from Ronny Chevalier
-      1.43  (2014-07-15)
-              fix MSVC-only compiler problem in code changed in 1.42
-      1.42  (2014-07-09)
-              don't define _CRT_SECURE_NO_WARNINGS (affects user code)
-              fixes to stbi__cleanup_jpeg path
-              added STBI_ASSERT to avoid requiring assert.h
-      1.41  (2014-06-25)
-              fix search&replace from 1.36 that messed up comments/error messages
-      1.40  (2014-06-22)
-              fix gcc struct-initialization warning
-      1.39  (2014-06-15)
-              fix to TGA optimization when req_comp != number of components in TGA;
-              fix to GIF loading because BMP wasn't rewinding (whoops, no GIFs in my test suite)
-              add support for BMP version 5 (more ignored fields)
-      1.38  (2014-06-06)
-              suppress MSVC warnings on integer casts truncating values
-              fix accidental rename of 'skip' field of I/O
-      1.37  (2014-06-04)
-              remove duplicate typedef
-      1.36  (2014-06-03)
-              convert to header file single-file library
-              if de-iphone isn't set, load iphone images color-swapped instead of returning NULL
-      1.35  (2014-05-27)
-              various warnings
-              fix broken STBI_SIMD path
-              fix bug where stbi_load_from_file no longer left file pointer in correct place
-              fix broken non-easy path for 32-bit BMP (possibly never used)
-              TGA optimization by Arseny Kapoulkine
-      1.34  (unknown)
-              use STBI_NOTUSED in stbi__resample_row_generic(), fix one more leak in tga failure case
-      1.33  (2011-07-14)
-              make stbi_is_hdr work in STBI_NO_HDR (as specified), minor compiler-friendly improvements
-      1.32  (2011-07-13)
-              support for "info" function for all supported filetypes (SpartanJ)
-      1.31  (2011-06-20)
-              a few more leak fixes, bug in PNG handling (SpartanJ)
-      1.30  (2011-06-11)
-              added ability to load files via callbacks to accomidate custom input streams (Ben Wenger)
-              removed deprecated format-specific test/load functions
-              removed support for installable file formats (stbi_loader) -- would have been broken for IO callbacks anyway
-              error cases in bmp and tga give messages and don't leak (Raymond Barbiero, grisha)
-              fix inefficiency in decoding 32-bit BMP (David Woo)
-      1.29  (2010-08-16)
-              various warning fixes from Aurelien Pocheville
-      1.28  (2010-08-01)
-              fix bug in GIF palette transparency (SpartanJ)
-      1.27  (2010-08-01)
-              cast-to-stbi_uc to fix warnings
-      1.26  (2010-07-24)
-              fix bug in file buffering for PNG reported by SpartanJ
-      1.25  (2010-07-17)
-              refix trans_data warning (Won Chun)
-      1.24  (2010-07-12)
-              perf improvements reading from files on platforms with lock-heavy fgetc()
-              minor perf improvements for jpeg
-              deprecated type-specific functions so we'll get feedback if they're needed
-              attempt to fix trans_data warning (Won Chun)
-      1.23    fixed bug in iPhone support
-      1.22  (2010-07-10)
-              removed image *writing* support
-              stbi_info support from Jetro Lauha
-              GIF support from Jean-Marc Lienher
-              iPhone PNG-extensions from James Brown
-              warning-fixes from Nicolas Schulz and Janez Zemva (i.stbi__err. Janez (U+017D)emva)
-      1.21    fix use of 'stbi_uc' in header (reported by jon blow)
-      1.20    added support for Softimage PIC, by Tom Seddon
-      1.19    bug in interlaced PNG corruption check (found by ryg)
-      1.18  (2008-08-02)
-              fix a threading bug (local mutable static)
-      1.17    support interlaced PNG
-      1.16    major bugfix - stbi__convert_format converted one too many pixels
-      1.15    initialize some fields for thread safety
-      1.14    fix threadsafe conversion bug
-              header-file-only version (#define STBI_HEADER_FILE_ONLY before including)
-      1.13    threadsafe
-      1.12    const qualifiers in the API
-      1.11    Support installable IDCT, colorspace conversion routines
-      1.10    Fixes for 64-bit (don't use "unsigned long")
-              optimized upsampling by Fabian "ryg" Giesen
-      1.09    Fix format-conversion for PSD code (bad global variables!)
-      1.08    Thatcher Ulrich's PSD code integrated by Nicolas Schulz
-      1.07    attempt to fix C++ warning/errors again
-      1.06    attempt to fix C++ warning/errors again
-      1.05    fix TGA loading to return correct *comp and use good luminance calc
-      1.04    default float alpha is 1, not 255; use 'void *' for stbi_image_free
-      1.03    bugfixes to STBI_NO_STDIO, STBI_NO_HDR
-      1.02    support for (subset of) HDR files, float interface for preferred access to them
-      1.01    fix bug: possible bug in handling right-side up bmps... not sure
-              fix bug: the stbi__bmp_load() and stbi__tga_load() functions didn't work at all
-      1.00    interface to zlib that skips zlib header
-      0.99    correct handling of alpha in palette
-      0.98    TGA loader by lonesock; dynamically add loaders (untested)
-      0.97    jpeg errors on too large a file; also catch another malloc failure
-      0.96    fix detection of invalid v value - particleman@mollyrocket forum
-      0.95    during header scan, seek to markers in case of padding
-      0.94    STBI_NO_STDIO to disable stdio usage; rename all #defines the same
-      0.93    handle jpegtran output; verbose errors
-      0.92    read 4,8,16,24,32-bit BMP files of several formats
-      0.91    output 24-bit Windows 3.0 BMP files
-      0.90    fix a few more warnings; bump version number to approach 1.0
-      0.61    bugfixes due to Marc LeBlanc, Christopher Lloyd
-      0.60    fix compiling as c++
-      0.59    fix warnings: merge Dave Moore's -Wall fixes
-      0.58    fix bug: zlib uncompressed mode len/nlen was wrong endian
-      0.57    fix bug: jpg last huffman symbol before marker was >9 bits but less than 16 available
-      0.56    fix bug: zlib uncompressed mode len vs. nlen
-      0.55    fix bug: restart_interval not initialized to 0
-      0.54    allow NULL for 'int *comp'
-      0.53    fix bug in png 3->4; speedup png decoding
-      0.52    png handles req_comp=3,4 directly; minor cleanup; jpeg comments
-      0.51    obey req_comp requests, 1-component jpegs return as 1-component,
-              on 'test' only check type, not whether we support this variant
-      0.50  (2006-11-19)
-              first released version
-*/
-
-
-/*
-------------------------------------------------------------------------------
-This software is available under 2 licenses -- choose whichever you prefer.
-------------------------------------------------------------------------------
-ALTERNATIVE A - MIT License
-Copyright (c) 2017 Sean Barrett
-Permission is hereby granted, free of charge, to any person obtaining a copy of
-this software and associated documentation files (the "Software"), to deal in
-the Software without restriction, including without limitation the rights to
-use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
-of the Software, and to permit persons to whom the Software is furnished to do
-so, subject to the following conditions:
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-------------------------------------------------------------------------------
-ALTERNATIVE B - Public Domain (www.unlicense.org)
-This is free and unencumbered software released into the public domain.
-Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
-software, either in source code form or as a compiled binary, for any purpose,
-commercial or non-commercial, and by any means.
-In jurisdictions that recognize copyright laws, the author or authors of this
-software dedicate any and all copyright interest in the software to the public
-domain. We make this dedication for the benefit of the public at large and to
-the detriment of our heirs and successors. We intend this dedication to be an
-overt act of relinquishment in perpetuity of all present and future rights to
-this software under copyright law.
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
-ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
-WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------------
-*/
diff --git a/pixelflux/include/xxhash.c b/pixelflux/include/xxhash.c
deleted file mode 100644
index e60cc37..0000000
--- a/pixelflux/include/xxhash.c
+++ /dev/null
@@ -1,42 +0,0 @@
-/*
- * xxHash - Extremely Fast Hash algorithm
- * Copyright (C) 2012-2023 Yann Collet
- *
- * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other materials provided with the
- *      distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at:
- *   - xxHash homepage: https://www.xxhash.com
- *   - xxHash source repository: https://github.com/Cyan4973/xxHash
- */
-
-/*
- * xxhash.c instantiates functions defined in xxhash.h
- */
-
-#define XXH_STATIC_LINKING_ONLY /* access advanced declarations */
-#define XXH_IMPLEMENTATION      /* access definitions */
-
-#include "xxhash.h"
diff --git a/pixelflux/include/xxhash.h b/pixelflux/include/xxhash.h
deleted file mode 100644
index 78fc2e8..0000000
--- a/pixelflux/include/xxhash.h
+++ /dev/null
@@ -1,7238 +0,0 @@
-/*
- * xxHash - Extremely Fast Hash algorithm
- * Header File
- * Copyright (C) 2012-2023 Yann Collet
- *
- * BSD 2-Clause License (https://www.opensource.org/licenses/bsd-license.php)
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are
- * met:
- *
- *    * Redistributions of source code must retain the above copyright
- *      notice, this list of conditions and the following disclaimer.
- *    * Redistributions in binary form must reproduce the above
- *      copyright notice, this list of conditions and the following disclaimer
- *      in the documentation and/or other materials provided with the
- *      distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * You can contact the author at:
- *   - xxHash homepage: https://www.xxhash.com
- *   - xxHash source repository: https://github.com/Cyan4973/xxHash
- */
-
-/*!
- * @mainpage xxHash
- *
- * xxHash is an extremely fast non-cryptographic hash algorithm, working at RAM speed
- * limits.
- *
- * It is proposed in four flavors, in three families:
- * 1. @ref XXH32_family
- *   - Classic 32-bit hash function. Simple, compact, and runs on almost all
- *     32-bit and 64-bit systems.
- * 2. @ref XXH64_family
- *   - Classic 64-bit adaptation of XXH32. Just as simple, and runs well on most
- *     64-bit systems (but _not_ 32-bit systems).
- * 3. @ref XXH3_family
- *   - Modern 64-bit and 128-bit hash function family which features improved
- *     strength and performance across the board, especially on smaller data.
- *     It benefits greatly from SIMD and 64-bit without requiring it.
- *
- * Benchmarks
- * ---
- * The reference system uses an Intel i7-9700K CPU, and runs Ubuntu x64 20.04.
- * The open source benchmark program is compiled with clang v10.0 using -O3 flag.
- *
- * | Hash Name            | ISA ext | Width | Large Data Speed | Small Data Velocity |
- * | -------------------- | ------- | ----: | ---------------: | ------------------: |
- * | XXH3_64bits()        | @b AVX2 |    64 |        59.4 GB/s |               133.1 |
- * | MeowHash             | AES-NI  |   128 |        58.2 GB/s |                52.5 |
- * | XXH3_128bits()       | @b AVX2 |   128 |        57.9 GB/s |               118.1 |
- * | CLHash               | PCLMUL  |    64 |        37.1 GB/s |                58.1 |
- * | XXH3_64bits()        | @b SSE2 |    64 |        31.5 GB/s |               133.1 |
- * | XXH3_128bits()       | @b SSE2 |   128 |        29.6 GB/s |               118.1 |
- * | RAM sequential read  |         |   N/A |        28.0 GB/s |                 N/A |
- * | ahash                | AES-NI  |    64 |        22.5 GB/s |               107.2 |
- * | City64               |         |    64 |        22.0 GB/s |                76.6 |
- * | T1ha2                |         |    64 |        22.0 GB/s |                99.0 |
- * | City128              |         |   128 |        21.7 GB/s |                57.7 |
- * | FarmHash             | AES-NI  |    64 |        21.3 GB/s |                71.9 |
- * | XXH64()              |         |    64 |        19.4 GB/s |                71.0 |
- * | SpookyHash           |         |    64 |        19.3 GB/s |                53.2 |
- * | Mum                  |         |    64 |        18.0 GB/s |                67.0 |
- * | CRC32C               | SSE4.2  |    32 |        13.0 GB/s |                57.9 |
- * | XXH32()              |         |    32 |         9.7 GB/s |                71.9 |
- * | City32               |         |    32 |         9.1 GB/s |                66.0 |
- * | Blake3*              | @b AVX2 |   256 |         4.4 GB/s |                 8.1 |
- * | Murmur3              |         |    32 |         3.9 GB/s |                56.1 |
- * | SipHash*             |         |    64 |         3.0 GB/s |                43.2 |
- * | Blake3*              | @b SSE2 |   256 |         2.4 GB/s |                 8.1 |
- * | HighwayHash          |         |    64 |         1.4 GB/s |                 6.0 |
- * | FNV64                |         |    64 |         1.2 GB/s |                62.7 |
- * | Blake2*              |         |   256 |         1.1 GB/s |                 5.1 |
- * | SHA1*                |         |   160 |         0.8 GB/s |                 5.6 |
- * | MD5*                 |         |   128 |         0.6 GB/s |                 7.8 |
- * @note
- *   - Hashes which require a specific ISA extension are noted. SSE2 is also noted,
- *     even though it is mandatory on x64.
- *   - Hashes with an asterisk are cryptographic. Note that MD5 is non-cryptographic
- *     by modern standards.
- *   - Small data velocity is a rough average of algorithm's efficiency for small
- *     data. For more accurate information, see the wiki.
- *   - More benchmarks and strength tests are found on the wiki:
- *         https://github.com/Cyan4973/xxHash/wiki
- *
- * Usage
- * ------
- * All xxHash variants use a similar API. Changing the algorithm is a trivial
- * substitution.
- *
- * @pre
- *    For functions which take an input and length parameter, the following
- *    requirements are assumed:
- *    - The range from [`input`, `input + length`) is valid, readable memory.
- *      - The only exception is if the `length` is `0`, `input` may be `NULL`.
- *    - For C++, the objects must have the *TriviallyCopyable* property, as the
- *      functions access bytes directly as if it was an array of `unsigned char`.
- *
- * @anchor single_shot_example
- * **Single Shot**
- *
- * These functions are stateless functions which hash a contiguous block of memory,
- * immediately returning the result. They are the easiest and usually the fastest
- * option.
- *
- * XXH32(), XXH64(), XXH3_64bits(), XXH3_128bits()
- *
- * @code{.c}
- *   #include <string.h>
- *   #include "xxhash.h"
- *
- *   // Example for a function which hashes a null terminated string with XXH32().
- *   XXH32_hash_t hash_string(const char* string, XXH32_hash_t seed)
- *   {
- *       // NULL pointers are only valid if the length is zero
- *       size_t length = (string == NULL) ? 0 : strlen(string);
- *       return XXH32(string, length, seed);
- *   }
- * @endcode
- *
- *
- * @anchor streaming_example
- * **Streaming**
- *
- * These groups of functions allow incremental hashing of unknown size, even
- * more than what would fit in a size_t.
- *
- * XXH32_reset(), XXH64_reset(), XXH3_64bits_reset(), XXH3_128bits_reset()
- *
- * @code{.c}
- *   #include <stdio.h>
- *   #include <assert.h>
- *   #include "xxhash.h"
- *   // Example for a function which hashes a FILE incrementally with XXH3_64bits().
- *   XXH64_hash_t hashFile(FILE* f)
- *   {
- *       // Allocate a state struct. Do not just use malloc() or new.
- *       XXH3_state_t* state = XXH3_createState();
- *       assert(state != NULL && "Out of memory!");
- *       // Reset the state to start a new hashing session.
- *       XXH3_64bits_reset(state);
- *       char buffer[4096];
- *       size_t count;
- *       // Read the file in chunks
- *       while ((count = fread(buffer, 1, sizeof(buffer), f)) != 0) {
- *           // Run update() as many times as necessary to process the data
- *           XXH3_64bits_update(state, buffer, count);
- *       }
- *       // Retrieve the finalized hash. This will not change the state.
- *       XXH64_hash_t result = XXH3_64bits_digest(state);
- *       // Free the state. Do not use free().
- *       XXH3_freeState(state);
- *       return result;
- *   }
- * @endcode
- *
- * Streaming functions generate the xxHash value from an incremental input.
- * This method is slower than single-call functions, due to state management.
- * For small inputs, prefer `XXH32()` and `XXH64()`, which are better optimized.
- *
- * An XXH state must first be allocated using `XXH*_createState()`.
- *
- * Start a new hash by initializing the state with a seed using `XXH*_reset()`.
- *
- * Then, feed the hash state by calling `XXH*_update()` as many times as necessary.
- *
- * The function returns an error code, with 0 meaning OK, and any other value
- * meaning there is an error.
- *
- * Finally, a hash value can be produced anytime, by using `XXH*_digest()`.
- * This function returns the nn-bits hash as an int or long long.
- *
- * It's still possible to continue inserting input into the hash state after a
- * digest, and generate new hash values later on by invoking `XXH*_digest()`.
- *
- * When done, release the state using `XXH*_freeState()`.
- *
- *
- * @anchor canonical_representation_example
- * **Canonical Representation**
- *
- * The default return values from XXH functions are unsigned 32, 64 and 128 bit
- * integers.
- * This the simplest and fastest format for further post-processing.
- *
- * However, this leaves open the question of what is the order on the byte level,
- * since little and big endian conventions will store the same number differently.
- *
- * The canonical representation settles this issue by mandating big-endian
- * convention, the same convention as human-readable numbers (large digits first).
- *
- * When writing hash values to storage, sending them over a network, or printing
- * them, it's highly recommended to use the canonical representation to ensure
- * portability across a wider range of systems, present and future.
- *
- * The following functions allow transformation of hash values to and from
- * canonical format.
- *
- * XXH32_canonicalFromHash(), XXH32_hashFromCanonical(),
- * XXH64_canonicalFromHash(), XXH64_hashFromCanonical(),
- * XXH128_canonicalFromHash(), XXH128_hashFromCanonical(),
- *
- * @code{.c}
- *   #include <stdio.h>
- *   #include "xxhash.h"
- *
- *   // Example for a function which prints XXH32_hash_t in human readable format
- *   void printXxh32(XXH32_hash_t hash)
- *   {
- *       XXH32_canonical_t cano;
- *       XXH32_canonicalFromHash(&cano, hash);
- *       size_t i;
- *       for(i = 0; i < sizeof(cano.digest); ++i) {
- *           printf("%02x", cano.digest[i]);
- *       }
- *       printf("\n");
- *   }
- *
- *   // Example for a function which converts XXH32_canonical_t to XXH32_hash_t
- *   XXH32_hash_t convertCanonicalToXxh32(XXH32_canonical_t cano)
- *   {
- *       XXH32_hash_t hash = XXH32_hashFromCanonical(&cano);
- *       return hash;
- *   }
- * @endcode
- *
- *
- * @file xxhash.h
- * xxHash prototypes and implementation
- */
-
-#if defined (__cplusplus)
-extern "C" {
-#endif
-
-/* ****************************
- *  INLINE mode
- ******************************/
-/*!
- * @defgroup public Public API
- * Contains details on the public xxHash functions.
- * @{
- */
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Gives access to internal state declaration, required for static allocation.
- *
- * Incompatible with dynamic linking, due to risks of ABI changes.
- *
- * Usage:
- * @code{.c}
- *     #define XXH_STATIC_LINKING_ONLY
- *     #include "xxhash.h"
- * @endcode
- */
-#  define XXH_STATIC_LINKING_ONLY
-/* Do not undef XXH_STATIC_LINKING_ONLY for Doxygen */
-
-/*!
- * @brief Gives access to internal definitions.
- *
- * Usage:
- * @code{.c}
- *     #define XXH_STATIC_LINKING_ONLY
- *     #define XXH_IMPLEMENTATION
- *     #include "xxhash.h"
- * @endcode
- */
-#  define XXH_IMPLEMENTATION
-/* Do not undef XXH_IMPLEMENTATION for Doxygen */
-
-/*!
- * @brief Exposes the implementation and marks all functions as `inline`.
- *
- * Use these build macros to inline xxhash into the target unit.
- * Inlining improves performance on small inputs, especially when the length is
- * expressed as a compile-time constant:
- *
- *  https://fastcompression.blogspot.com/2018/03/xxhash-for-small-keys-impressive-power.html
- *
- * It also keeps xxHash symbols private to the unit, so they are not exported.
- *
- * Usage:
- * @code{.c}
- *     #define XXH_INLINE_ALL
- *     #include "xxhash.h"
- * @endcode
- * Do not compile and link xxhash.o as a separate object, as it is not useful.
- */
-#  define XXH_INLINE_ALL
-#  undef XXH_INLINE_ALL
-/*!
- * @brief Exposes the implementation without marking functions as inline.
- */
-#  define XXH_PRIVATE_API
-#  undef XXH_PRIVATE_API
-/*!
- * @brief Emulate a namespace by transparently prefixing all symbols.
- *
- * If you want to include _and expose_ xxHash functions from within your own
- * library, but also want to avoid symbol collisions with other libraries which
- * may also include xxHash, you can use @ref XXH_NAMESPACE to automatically prefix
- * any public symbol from xxhash library with the value of @ref XXH_NAMESPACE
- * (therefore, avoid empty or numeric values).
- *
- * Note that no change is required within the calling program as long as it
- * includes `xxhash.h`: Regular symbol names will be automatically translated
- * by this header.
- */
-#  define XXH_NAMESPACE /* YOUR NAME HERE */
-#  undef XXH_NAMESPACE
-#endif
-
-#if (defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)) \
-    && !defined(XXH_INLINE_ALL_31684351384)
-   /* this section should be traversed only once */
-#  define XXH_INLINE_ALL_31684351384
-   /* give access to the advanced API, required to compile implementations */
-#  undef XXH_STATIC_LINKING_ONLY   /* avoid macro redef */
-#  define XXH_STATIC_LINKING_ONLY
-   /* make all functions private */
-#  undef XXH_PUBLIC_API
-#  if defined(__GNUC__)
-#    define XXH_PUBLIC_API static __inline __attribute__((__unused__))
-#  elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */)
-#    define XXH_PUBLIC_API static inline
-#  elif defined(_MSC_VER)
-#    define XXH_PUBLIC_API static __inline
-#  else
-     /* note: this version may generate warnings for unused static functions */
-#    define XXH_PUBLIC_API static
-#  endif
-
-   /*
-    * This part deals with the special case where a unit wants to inline xxHash,
-    * but "xxhash.h" has previously been included without XXH_INLINE_ALL,
-    * such as part of some previously included *.h header file.
-    * Without further action, the new include would just be ignored,
-    * and functions would effectively _not_ be inlined (silent failure).
-    * The following macros solve this situation by prefixing all inlined names,
-    * avoiding naming collision with previous inclusions.
-    */
-   /* Before that, we unconditionally #undef all symbols,
-    * in case they were already defined with XXH_NAMESPACE.
-    * They will then be redefined for XXH_INLINE_ALL
-    */
-#  undef XXH_versionNumber
-    /* XXH32 */
-#  undef XXH32
-#  undef XXH32_createState
-#  undef XXH32_freeState
-#  undef XXH32_reset
-#  undef XXH32_update
-#  undef XXH32_digest
-#  undef XXH32_copyState
-#  undef XXH32_canonicalFromHash
-#  undef XXH32_hashFromCanonical
-    /* XXH64 */
-#  undef XXH64
-#  undef XXH64_createState
-#  undef XXH64_freeState
-#  undef XXH64_reset
-#  undef XXH64_update
-#  undef XXH64_digest
-#  undef XXH64_copyState
-#  undef XXH64_canonicalFromHash
-#  undef XXH64_hashFromCanonical
-    /* XXH3_64bits */
-#  undef XXH3_64bits
-#  undef XXH3_64bits_withSecret
-#  undef XXH3_64bits_withSeed
-#  undef XXH3_64bits_withSecretandSeed
-#  undef XXH3_createState
-#  undef XXH3_freeState
-#  undef XXH3_copyState
-#  undef XXH3_64bits_reset
-#  undef XXH3_64bits_reset_withSeed
-#  undef XXH3_64bits_reset_withSecret
-#  undef XXH3_64bits_update
-#  undef XXH3_64bits_digest
-#  undef XXH3_generateSecret
-    /* XXH3_128bits */
-#  undef XXH128
-#  undef XXH3_128bits
-#  undef XXH3_128bits_withSeed
-#  undef XXH3_128bits_withSecret
-#  undef XXH3_128bits_reset
-#  undef XXH3_128bits_reset_withSeed
-#  undef XXH3_128bits_reset_withSecret
-#  undef XXH3_128bits_reset_withSecretandSeed
-#  undef XXH3_128bits_update
-#  undef XXH3_128bits_digest
-#  undef XXH128_isEqual
-#  undef XXH128_cmp
-#  undef XXH128_canonicalFromHash
-#  undef XXH128_hashFromCanonical
-    /* Finally, free the namespace itself */
-#  undef XXH_NAMESPACE
-
-    /* employ the namespace for XXH_INLINE_ALL */
-#  define XXH_NAMESPACE XXH_INLINE_
-   /*
-    * Some identifiers (enums, type names) are not symbols,
-    * but they must nonetheless be renamed to avoid redeclaration.
-    * Alternative solution: do not redeclare them.
-    * However, this requires some #ifdefs, and has a more dispersed impact.
-    * Meanwhile, renaming can be achieved in a single place.
-    */
-#  define XXH_IPREF(Id)   XXH_NAMESPACE ## Id
-#  define XXH_OK XXH_IPREF(XXH_OK)
-#  define XXH_ERROR XXH_IPREF(XXH_ERROR)
-#  define XXH_errorcode XXH_IPREF(XXH_errorcode)
-#  define XXH32_canonical_t  XXH_IPREF(XXH32_canonical_t)
-#  define XXH64_canonical_t  XXH_IPREF(XXH64_canonical_t)
-#  define XXH128_canonical_t XXH_IPREF(XXH128_canonical_t)
-#  define XXH32_state_s XXH_IPREF(XXH32_state_s)
-#  define XXH32_state_t XXH_IPREF(XXH32_state_t)
-#  define XXH64_state_s XXH_IPREF(XXH64_state_s)
-#  define XXH64_state_t XXH_IPREF(XXH64_state_t)
-#  define XXH3_state_s  XXH_IPREF(XXH3_state_s)
-#  define XXH3_state_t  XXH_IPREF(XXH3_state_t)
-#  define XXH128_hash_t XXH_IPREF(XXH128_hash_t)
-   /* Ensure the header is parsed again, even if it was previously included */
-#  undef XXHASH_H_5627135585666179
-#  undef XXHASH_H_STATIC_13879238742
-#endif /* XXH_INLINE_ALL || XXH_PRIVATE_API */
-
-/* ****************************************************************
- *  Stable API
- *****************************************************************/
-#ifndef XXHASH_H_5627135585666179
-#define XXHASH_H_5627135585666179 1
-
-/*! @brief Marks a global symbol. */
-#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
-#  if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
-#    ifdef XXH_EXPORT
-#      define XXH_PUBLIC_API __declspec(dllexport)
-#    elif XXH_IMPORT
-#      define XXH_PUBLIC_API __declspec(dllimport)
-#    endif
-#  else
-#    define XXH_PUBLIC_API   /* do nothing */
-#  endif
-#endif
-
-#ifdef XXH_NAMESPACE
-#  define XXH_CAT(A,B) A##B
-#  define XXH_NAME2(A,B) XXH_CAT(A,B)
-#  define XXH_versionNumber XXH_NAME2(XXH_NAMESPACE, XXH_versionNumber)
-/* XXH32 */
-#  define XXH32 XXH_NAME2(XXH_NAMESPACE, XXH32)
-#  define XXH32_createState XXH_NAME2(XXH_NAMESPACE, XXH32_createState)
-#  define XXH32_freeState XXH_NAME2(XXH_NAMESPACE, XXH32_freeState)
-#  define XXH32_reset XXH_NAME2(XXH_NAMESPACE, XXH32_reset)
-#  define XXH32_update XXH_NAME2(XXH_NAMESPACE, XXH32_update)
-#  define XXH32_digest XXH_NAME2(XXH_NAMESPACE, XXH32_digest)
-#  define XXH32_copyState XXH_NAME2(XXH_NAMESPACE, XXH32_copyState)
-#  define XXH32_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH32_canonicalFromHash)
-#  define XXH32_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH32_hashFromCanonical)
-/* XXH64 */
-#  define XXH64 XXH_NAME2(XXH_NAMESPACE, XXH64)
-#  define XXH64_createState XXH_NAME2(XXH_NAMESPACE, XXH64_createState)
-#  define XXH64_freeState XXH_NAME2(XXH_NAMESPACE, XXH64_freeState)
-#  define XXH64_reset XXH_NAME2(XXH_NAMESPACE, XXH64_reset)
-#  define XXH64_update XXH_NAME2(XXH_NAMESPACE, XXH64_update)
-#  define XXH64_digest XXH_NAME2(XXH_NAMESPACE, XXH64_digest)
-#  define XXH64_copyState XXH_NAME2(XXH_NAMESPACE, XXH64_copyState)
-#  define XXH64_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH64_canonicalFromHash)
-#  define XXH64_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH64_hashFromCanonical)
-/* XXH3_64bits */
-#  define XXH3_64bits XXH_NAME2(XXH_NAMESPACE, XXH3_64bits)
-#  define XXH3_64bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecret)
-#  define XXH3_64bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSeed)
-#  define XXH3_64bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_withSecretandSeed)
-#  define XXH3_createState XXH_NAME2(XXH_NAMESPACE, XXH3_createState)
-#  define XXH3_freeState XXH_NAME2(XXH_NAMESPACE, XXH3_freeState)
-#  define XXH3_copyState XXH_NAME2(XXH_NAMESPACE, XXH3_copyState)
-#  define XXH3_64bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset)
-#  define XXH3_64bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSeed)
-#  define XXH3_64bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecret)
-#  define XXH3_64bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_reset_withSecretandSeed)
-#  define XXH3_64bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_update)
-#  define XXH3_64bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_64bits_digest)
-#  define XXH3_generateSecret XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret)
-#  define XXH3_generateSecret_fromSeed XXH_NAME2(XXH_NAMESPACE, XXH3_generateSecret_fromSeed)
-/* XXH3_128bits */
-#  define XXH128 XXH_NAME2(XXH_NAMESPACE, XXH128)
-#  define XXH3_128bits XXH_NAME2(XXH_NAMESPACE, XXH3_128bits)
-#  define XXH3_128bits_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSeed)
-#  define XXH3_128bits_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecret)
-#  define XXH3_128bits_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_withSecretandSeed)
-#  define XXH3_128bits_reset XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset)
-#  define XXH3_128bits_reset_withSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSeed)
-#  define XXH3_128bits_reset_withSecret XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecret)
-#  define XXH3_128bits_reset_withSecretandSeed XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_reset_withSecretandSeed)
-#  define XXH3_128bits_update XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_update)
-#  define XXH3_128bits_digest XXH_NAME2(XXH_NAMESPACE, XXH3_128bits_digest)
-#  define XXH128_isEqual XXH_NAME2(XXH_NAMESPACE, XXH128_isEqual)
-#  define XXH128_cmp     XXH_NAME2(XXH_NAMESPACE, XXH128_cmp)
-#  define XXH128_canonicalFromHash XXH_NAME2(XXH_NAMESPACE, XXH128_canonicalFromHash)
-#  define XXH128_hashFromCanonical XXH_NAME2(XXH_NAMESPACE, XXH128_hashFromCanonical)
-#endif
-
-
-/* *************************************
-*  Compiler specifics
-***************************************/
-
-/* specific declaration modes for Windows */
-#if !defined(XXH_INLINE_ALL) && !defined(XXH_PRIVATE_API)
-#  if defined(_WIN32) && defined(_MSC_VER) && (defined(XXH_IMPORT) || defined(XXH_EXPORT))
-#    ifdef XXH_EXPORT
-#      define XXH_PUBLIC_API __declspec(dllexport)
-#    elif XXH_IMPORT
-#      define XXH_PUBLIC_API __declspec(dllimport)
-#    endif
-#  else
-#    define XXH_PUBLIC_API   /* do nothing */
-#  endif
-#endif
-
-#if defined (__GNUC__)
-# define XXH_CONSTF  __attribute__((__const__))
-# define XXH_PUREF   __attribute__((__pure__))
-# define XXH_MALLOCF __attribute__((__malloc__))
-#else
-# define XXH_CONSTF  /* disable */
-# define XXH_PUREF
-# define XXH_MALLOCF
-#endif
-
-/* *************************************
-*  Version
-***************************************/
-#define XXH_VERSION_MAJOR    0
-#define XXH_VERSION_MINOR    8
-#define XXH_VERSION_RELEASE  3
-/*! @brief Version number, encoded as two digits each */
-#define XXH_VERSION_NUMBER  (XXH_VERSION_MAJOR *100*100 + XXH_VERSION_MINOR *100 + XXH_VERSION_RELEASE)
-
-/*!
- * @brief Obtains the xxHash version.
- *
- * This is mostly useful when xxHash is compiled as a shared library,
- * since the returned value comes from the library, as opposed to header file.
- *
- * @return @ref XXH_VERSION_NUMBER of the invoked library.
- */
-XXH_PUBLIC_API XXH_CONSTF unsigned XXH_versionNumber (void);
-
-
-/* ****************************
-*  Common basic types
-******************************/
-#include <stddef.h>   /* size_t */
-/*!
- * @brief Exit code for the streaming API.
- */
-typedef enum {
-    XXH_OK = 0, /*!< OK */
-    XXH_ERROR   /*!< Error */
-} XXH_errorcode;
-
-
-/*-**********************************************************************
-*  32-bit hash
-************************************************************************/
-#if defined(XXH_DOXYGEN) /* Don't show <stdint.h> include */
-/*!
- * @brief An unsigned 32-bit integer.
- *
- * Not necessarily defined to `uint32_t` but functionally equivalent.
- */
-typedef uint32_t XXH32_hash_t;
-
-#elif !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   ifdef _AIX
-#     include <inttypes.h>
-#   else
-#     include <stdint.h>
-#   endif
-    typedef uint32_t XXH32_hash_t;
-
-#else
-#   include <limits.h>
-#   if UINT_MAX == 0xFFFFFFFFUL
-      typedef unsigned int XXH32_hash_t;
-#   elif ULONG_MAX == 0xFFFFFFFFUL
-      typedef unsigned long XXH32_hash_t;
-#   else
-#     error "unsupported platform: need a 32-bit type"
-#   endif
-#endif
-
-/*!
- * @}
- *
- * @defgroup XXH32_family XXH32 family
- * @ingroup public
- * Contains functions used in the classic 32-bit xxHash algorithm.
- *
- * @note
- *   XXH32 is useful for older platforms, with no or poor 64-bit performance.
- *   Note that the @ref XXH3_family provides competitive speed for both 32-bit
- *   and 64-bit systems, and offers true 64/128 bit hash results.
- *
- * @see @ref XXH64_family, @ref XXH3_family : Other xxHash families
- * @see @ref XXH32_impl for implementation details
- * @{
- */
-
-/*!
- * @brief Calculates the 32-bit hash of @p input using xxHash32.
- *
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- * @param seed The 32-bit seed to alter the hash's output predictably.
- *
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 32-bit xxHash32 value.
- *
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32 (const void* input, size_t length, XXH32_hash_t seed);
-
-#ifndef XXH_NO_STREAM
-/*!
- * @typedef struct XXH32_state_s XXH32_state_t
- * @brief The opaque state struct for the XXH32 streaming API.
- *
- * @see XXH32_state_s for details.
- * @see @ref streaming_example "Streaming Example"
- */
-typedef struct XXH32_state_s XXH32_state_t;
-
-/*!
- * @brief Allocates an @ref XXH32_state_t.
- *
- * @return An allocated pointer of @ref XXH32_state_t on success.
- * @return `NULL` on failure.
- *
- * @note Must be freed with XXH32_freeState().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_MALLOCF XXH32_state_t* XXH32_createState(void);
-/*!
- * @brief Frees an @ref XXH32_state_t.
- *
- * @param statePtr A pointer to an @ref XXH32_state_t allocated with @ref XXH32_createState().
- *
- * @return @ref XXH_OK.
- *
- * @note @p statePtr must be allocated with XXH32_createState().
- *
- * @see @ref streaming_example "Streaming Example"
- *
- */
-XXH_PUBLIC_API XXH_errorcode  XXH32_freeState(XXH32_state_t* statePtr);
-/*!
- * @brief Copies one @ref XXH32_state_t to another.
- *
- * @param dst_state The state to copy to.
- * @param src_state The state to copy from.
- * @pre
- *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
- */
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dst_state, const XXH32_state_t* src_state);
-
-/*!
- * @brief Resets an @ref XXH32_state_t to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- * @param seed The 32-bit seed to alter the hash result predictably.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note This function resets and seeds a state. Call it before @ref XXH32_update().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH32_reset  (XXH32_state_t* statePtr, XXH32_hash_t seed);
-
-/*!
- * @brief Consumes a block of @p input to an @ref XXH32_state_t.
- *
- * @param statePtr The state struct to update.
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note Call this to incrementally consume blocks of data.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH32_update (XXH32_state_t* statePtr, const void* input, size_t length);
-
-/*!
- * @brief Returns the calculated hash value from an @ref XXH32_state_t.
- *
- * @param statePtr The state struct to calculate the hash from.
- *
- * @pre
- *  @p statePtr must not be `NULL`.
- *
- * @return The calculated 32-bit xxHash32 value from that state.
- *
- * @note
- *   Calling XXH32_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_digest (const XXH32_state_t* statePtr);
-#endif /* !XXH_NO_STREAM */
-
-/*******   Canonical representation   *******/
-
-/*!
- * @brief Canonical (big endian) representation of @ref XXH32_hash_t.
- */
-typedef struct {
-    unsigned char digest[4]; /*!< Hash bytes, big endian */
-} XXH32_canonical_t;
-
-/*!
- * @brief Converts an @ref XXH32_hash_t to a big endian @ref XXH32_canonical_t.
- *
- * @param dst  The @ref XXH32_canonical_t pointer to be stored to.
- * @param hash The @ref XXH32_hash_t to be converted.
- *
- * @pre
- *   @p dst must not be `NULL`.
- *
- * @see @ref canonical_representation_example "Canonical Representation Example"
- */
-XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash);
-
-/*!
- * @brief Converts an @ref XXH32_canonical_t to a native @ref XXH32_hash_t.
- *
- * @param src The @ref XXH32_canonical_t to convert.
- *
- * @pre
- *   @p src must not be `NULL`.
- *
- * @return The converted hash.
- *
- * @see @ref canonical_representation_example "Canonical Representation Example"
- */
-XXH_PUBLIC_API XXH_PUREF XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src);
-
-
-/*! @cond Doxygen ignores this part */
-#ifdef __has_attribute
-# define XXH_HAS_ATTRIBUTE(x) __has_attribute(x)
-#else
-# define XXH_HAS_ATTRIBUTE(x) 0
-#endif
-/*! @endcond */
-
-/*! @cond Doxygen ignores this part */
-/*
- * C23 __STDC_VERSION__ number hasn't been specified yet. For now
- * leave as `201711L` (C17 + 1).
- * TODO: Update to correct value when its been specified.
- */
-#define XXH_C23_VN 201711L
-/*! @endcond */
-
-/*! @cond Doxygen ignores this part */
-/* C-language Attributes are added in C23. */
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN) && defined(__has_c_attribute)
-# define XXH_HAS_C_ATTRIBUTE(x) __has_c_attribute(x)
-#else
-# define XXH_HAS_C_ATTRIBUTE(x) 0
-#endif
-/*! @endcond */
-
-/*! @cond Doxygen ignores this part */
-#if defined(__cplusplus) && defined(__has_cpp_attribute)
-# define XXH_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
-#else
-# define XXH_HAS_CPP_ATTRIBUTE(x) 0
-#endif
-/*! @endcond */
-
-/*! @cond Doxygen ignores this part */
-/*
- * Define XXH_FALLTHROUGH macro for annotating switch case with the 'fallthrough' attribute
- * introduced in CPP17 and C23.
- * CPP17 : https://en.cppreference.com/w/cpp/language/attributes/fallthrough
- * C23   : https://en.cppreference.com/w/c/language/attributes/fallthrough
- */
-#if XXH_HAS_C_ATTRIBUTE(fallthrough) || XXH_HAS_CPP_ATTRIBUTE(fallthrough)
-# define XXH_FALLTHROUGH [[fallthrough]]
-#elif XXH_HAS_ATTRIBUTE(__fallthrough__)
-# define XXH_FALLTHROUGH __attribute__ ((__fallthrough__))
-#else
-# define XXH_FALLTHROUGH /* fallthrough */
-#endif
-/*! @endcond */
-
-/*! @cond Doxygen ignores this part */
-/*
- * Define XXH_NOESCAPE for annotated pointers in public API.
- * https://clang.llvm.org/docs/AttributeReference.html#noescape
- * As of writing this, only supported by clang.
- */
-#if XXH_HAS_ATTRIBUTE(noescape)
-# define XXH_NOESCAPE __attribute__((__noescape__))
-#else
-# define XXH_NOESCAPE
-#endif
-/*! @endcond */
-
-
-/*!
- * @}
- * @ingroup public
- * @{
- */
-
-#ifndef XXH_NO_LONG_LONG
-/*-**********************************************************************
-*  64-bit hash
-************************************************************************/
-#if defined(XXH_DOXYGEN) /* don't include <stdint.h> */
-/*!
- * @brief An unsigned 64-bit integer.
- *
- * Not necessarily defined to `uint64_t` but functionally equivalent.
- */
-typedef uint64_t XXH64_hash_t;
-#elif !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   ifdef _AIX
-#     include <inttypes.h>
-#   else
-#     include <stdint.h>
-#   endif
-   typedef uint64_t XXH64_hash_t;
-#else
-#  include <limits.h>
-#  if defined(__LP64__) && ULONG_MAX == 0xFFFFFFFFFFFFFFFFULL
-     /* LP64 ABI says uint64_t is unsigned long */
-     typedef unsigned long XXH64_hash_t;
-#  else
-     /* the following type must have a width of 64-bit */
-     typedef unsigned long long XXH64_hash_t;
-#  endif
-#endif
-
-/*!
- * @}
- *
- * @defgroup XXH64_family XXH64 family
- * @ingroup public
- * @{
- * Contains functions used in the classic 64-bit xxHash algorithm.
- *
- * @note
- *   XXH3 provides competitive speed for both 32-bit and 64-bit systems,
- *   and offers true 64/128 bit hash results.
- *   It provides better speed for systems with vector processing capabilities.
- */
-
-/*!
- * @brief Calculates the 64-bit hash of @p input using xxHash64.
- *
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- * @param seed The 64-bit seed to alter the hash's output predictably.
- *
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 64-bit xxHash64 value.
- *
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
-
-/*******   Streaming   *******/
-#ifndef XXH_NO_STREAM
-/*!
- * @brief The opaque state struct for the XXH64 streaming API.
- *
- * @see XXH64_state_s for details.
- * @see @ref streaming_example "Streaming Example"
- */
-typedef struct XXH64_state_s XXH64_state_t;   /* incomplete type */
-
-/*!
- * @brief Allocates an @ref XXH64_state_t.
- *
- * @return An allocated pointer of @ref XXH64_state_t on success.
- * @return `NULL` on failure.
- *
- * @note Must be freed with XXH64_freeState().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_MALLOCF XXH64_state_t* XXH64_createState(void);
-
-/*!
- * @brief Frees an @ref XXH64_state_t.
- *
- * @param statePtr A pointer to an @ref XXH64_state_t allocated with @ref XXH64_createState().
- *
- * @return @ref XXH_OK.
- *
- * @note @p statePtr must be allocated with XXH64_createState().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode  XXH64_freeState(XXH64_state_t* statePtr);
-
-/*!
- * @brief Copies one @ref XXH64_state_t to another.
- *
- * @param dst_state The state to copy to.
- * @param src_state The state to copy from.
- * @pre
- *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
- */
-XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dst_state, const XXH64_state_t* src_state);
-
-/*!
- * @brief Resets an @ref XXH64_state_t to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- * @param seed The 64-bit seed to alter the hash result predictably.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note This function resets and seeds a state. Call it before @ref XXH64_update().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset  (XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed);
-
-/*!
- * @brief Consumes a block of @p input to an @ref XXH64_state_t.
- *
- * @param statePtr The state struct to update.
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note Call this to incrementally consume blocks of data.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH64_update (XXH_NOESCAPE XXH64_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
-
-/*!
- * @brief Returns the calculated hash value from an @ref XXH64_state_t.
- *
- * @param statePtr The state struct to calculate the hash from.
- *
- * @pre
- *  @p statePtr must not be `NULL`.
- *
- * @return The calculated 64-bit xxHash64 value from that state.
- *
- * @note
- *   Calling XXH64_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_digest (XXH_NOESCAPE const XXH64_state_t* statePtr);
-#endif /* !XXH_NO_STREAM */
-/*******   Canonical representation   *******/
-
-/*!
- * @brief Canonical (big endian) representation of @ref XXH64_hash_t.
- */
-typedef struct { unsigned char digest[sizeof(XXH64_hash_t)]; } XXH64_canonical_t;
-
-/*!
- * @brief Converts an @ref XXH64_hash_t to a big endian @ref XXH64_canonical_t.
- *
- * @param dst The @ref XXH64_canonical_t pointer to be stored to.
- * @param hash The @ref XXH64_hash_t to be converted.
- *
- * @pre
- *   @p dst must not be `NULL`.
- *
- * @see @ref canonical_representation_example "Canonical Representation Example"
- */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash);
-
-/*!
- * @brief Converts an @ref XXH64_canonical_t to a native @ref XXH64_hash_t.
- *
- * @param src The @ref XXH64_canonical_t to convert.
- *
- * @pre
- *   @p src must not be `NULL`.
- *
- * @return The converted hash.
- *
- * @see @ref canonical_representation_example "Canonical Representation Example"
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src);
-
-#ifndef XXH_NO_XXH3
-
-/*!
- * @}
- * ************************************************************************
- * @defgroup XXH3_family XXH3 family
- * @ingroup public
- * @{
- *
- * XXH3 is a more recent hash algorithm featuring:
- *  - Improved speed for both small and large inputs
- *  - True 64-bit and 128-bit outputs
- *  - SIMD acceleration
- *  - Improved 32-bit viability
- *
- * Speed analysis methodology is explained here:
- *
- *    https://fastcompression.blogspot.com/2019/03/presenting-xxh3.html
- *
- * Compared to XXH64, expect XXH3 to run approximately
- * ~2x faster on large inputs and >3x faster on small ones,
- * exact differences vary depending on platform.
- *
- * XXH3's speed benefits greatly from SIMD and 64-bit arithmetic,
- * but does not require it.
- * Most 32-bit and 64-bit targets that can run XXH32 smoothly can run XXH3
- * at competitive speeds, even without vector support. Further details are
- * explained in the implementation.
- *
- * XXH3 has a fast scalar implementation, but it also includes accelerated SIMD
- * implementations for many common platforms:
- *   - AVX512
- *   - AVX2
- *   - SSE2
- *   - ARM NEON
- *   - WebAssembly SIMD128
- *   - POWER8 VSX
- *   - s390x ZVector
- * This can be controlled via the @ref XXH_VECTOR macro, but it automatically
- * selects the best version according to predefined macros. For the x86 family, an
- * automatic runtime dispatcher is included separately in @ref xxh_x86dispatch.c.
- *
- * XXH3 implementation is portable:
- * it has a generic C90 formulation that can be compiled on any platform,
- * all implementations generate exactly the same hash value on all platforms.
- * Starting from v0.8.0, it's also labelled "stable", meaning that
- * any future version will also generate the same hash value.
- *
- * XXH3 offers 2 variants, _64bits and _128bits.
- *
- * When only 64 bits are needed, prefer invoking the _64bits variant, as it
- * reduces the amount of mixing, resulting in faster speed on small inputs.
- * It's also generally simpler to manipulate a scalar return type than a struct.
- *
- * The API supports one-shot hashing, streaming mode, and custom secrets.
- */
-
-/*!
- * @ingroup tuning
- * @brief Possible values for @ref XXH_VECTOR.
- *
- * Unless set explicitly, determined automatically.
- */
-#  define XXH_SCALAR 0 /*!< Portable scalar version */
-#  define XXH_SSE2   1 /*!< SSE2 for Pentium 4, Opteron, all x86_64. */
-#  define XXH_AVX2   2 /*!< AVX2 for Haswell and Bulldozer */
-#  define XXH_AVX512 3 /*!< AVX512 for Skylake and Icelake */
-#  define XXH_NEON   4 /*!< NEON for most ARMv7-A, all AArch64, and WASM SIMD128 */
-#  define XXH_VSX    5 /*!< VSX and ZVector for POWER8/z13 (64-bit) */
-#  define XXH_SVE    6 /*!< SVE for some ARMv8-A and ARMv9-A */
-#  define XXH_LSX    7 /*!< LSX (128-bit SIMD) for LoongArch64 */
-
-
-/*-**********************************************************************
-*  XXH3 64-bit variant
-************************************************************************/
-
-/*!
- * @brief Calculates 64-bit unseeded variant of XXH3 hash of @p input.
- *
- * @param input  The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- *
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 64-bit XXH3 hash value.
- *
- * @note
- *   This is equivalent to @ref XXH3_64bits_withSeed() with a seed of `0`, however
- *   it may have slightly better performance due to constant propagation of the
- *   defaults.
- *
- * @see
- *    XXH3_64bits_withSeed(), XXH3_64bits_withSecret(): other seeding variants
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length);
-
-/*!
- * @brief Calculates 64-bit seeded variant of XXH3 hash of @p input.
- *
- * @param input  The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- * @param seed   The 64-bit seed to alter the hash result predictably.
- *
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 64-bit XXH3 hash value.
- *
- * @note
- *    seed == 0 produces the same results as @ref XXH3_64bits().
- *
- * This variant generates a custom secret on the fly based on default secret
- * altered using the @p seed value.
- *
- * While this operation is decently fast, note that it's not completely free.
- *
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed);
-
-/*!
- * The bare minimum size for a custom secret.
- *
- * @see
- *  XXH3_64bits_withSecret(), XXH3_64bits_reset_withSecret(),
- *  XXH3_128bits_withSecret(), XXH3_128bits_reset_withSecret().
- */
-#define XXH3_SECRET_SIZE_MIN 136
-
-/*!
- * @brief Calculates 64-bit variant of XXH3 with a custom "secret".
- *
- * @param data       The block of data to be hashed, at least @p len bytes in size.
- * @param len        The length of @p data, in bytes.
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- *
- * @return The calculated 64-bit XXH3 hash value.
- *
- * @pre
- *   The memory between @p data and @p data + @p len must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p data may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * It's possible to provide any blob of bytes as a "secret" to generate the hash.
- * This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
- * However, the quality of the secret impacts the dispersion of the hash algorithm.
- * Therefore, the secret _must_ look like a bunch of random bytes.
- * Avoid "trivial" or structured data such as repeated sequences or a text document.
- * Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing @ref XXH3_generateSecret() instead (see below).
- * It will generate a proper high entropy secret derived from the blob of bytes.
- * Another advantage of using XXH3_generateSecret() is that
- * it guarantees that all bits within the initial blob of bytes
- * will impact every bit of the output.
- * This is not necessarily the case when using the blob of bytes directly
- * because, when hashing _small_ inputs, only a portion of the secret is employed.
- *
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t XXH3_64bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
-
-
-/*******   Streaming   *******/
-#ifndef XXH_NO_STREAM
-/*
- * Streaming requires state maintenance.
- * This operation costs memory and CPU.
- * As a consequence, streaming is slower than one-shot hashing.
- * For better performance, prefer one-shot functions whenever applicable.
- */
-
-/*!
- * @brief The opaque state struct for the XXH3 streaming API.
- *
- * @see XXH3_state_s for details.
- * @see @ref streaming_example "Streaming Example"
- */
-typedef struct XXH3_state_s XXH3_state_t;
-XXH_PUBLIC_API XXH_MALLOCF XXH3_state_t* XXH3_createState(void);
-XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr);
-
-/*!
- * @brief Copies one @ref XXH3_state_t to another.
- *
- * @param dst_state The state to copy to.
- * @param src_state The state to copy from.
- * @pre
- *   @p dst_state and @p src_state must not be `NULL` and must not overlap.
- */
-XXH_PUBLIC_API void XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state);
-
-/*!
- * @brief Resets an @ref XXH3_state_t to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note
- *   - This function resets `statePtr` and generate a secret with default parameters.
- *   - Call this function before @ref XXH3_64bits_update().
- *   - Digest will be equivalent to `XXH3_64bits()`.
- *
- * @see @ref streaming_example "Streaming Example"
- *
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
-
-/*!
- * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- * @param seed     The 64-bit seed to alter the hash result predictably.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note
- *   - This function resets `statePtr` and generate a secret from `seed`.
- *   - Call this function before @ref XXH3_64bits_update().
- *   - Digest will be equivalent to `XXH3_64bits_withSeed()`.
- *
- * @see @ref streaming_example "Streaming Example"
- *
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
-
-/*!
- * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note
- *   `secret` is referenced, it _must outlive_ the hash streaming session.
- *
- * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
- * and the quality of produced hash values depends on secret's entropy
- * (secret's content should look like a bunch of random bytes).
- * When in doubt about the randomness of a candidate `secret`,
- * consider employing `XXH3_generateSecret()` instead (see below).
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
-
-/*!
- * @brief Consumes a block of @p input to an @ref XXH3_state_t.
- *
- * @param statePtr The state struct to update.
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- * @pre
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note Call this to incrementally consume blocks of data.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_64bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
-
-/*!
- * @brief Returns the calculated XXH3 64-bit hash value from an @ref XXH3_state_t.
- *
- * @param statePtr The state struct to calculate the hash from.
- *
- * @pre
- *  @p statePtr must not be `NULL`.
- *
- * @return The calculated XXH3 64-bit hash value from that state.
- *
- * @note
- *   Calling XXH3_64bits_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t  XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
-#endif /* !XXH_NO_STREAM */
-
-/* note : canonical representation of XXH3 is the same as XXH64
- * since they both produce XXH64_hash_t values */
-
-
-/*-**********************************************************************
-*  XXH3 128-bit variant
-************************************************************************/
-
-/*!
- * @brief The return value from 128-bit hashes.
- *
- * Stored in little endian order, although the fields themselves are in native
- * endianness.
- */
-typedef struct {
-    XXH64_hash_t low64;   /*!< `value & 0xFFFFFFFFFFFFFFFF` */
-    XXH64_hash_t high64;  /*!< `value >> 64` */
-} XXH128_hash_t;
-
-/*!
- * @brief Calculates 128-bit unseeded variant of XXH3 of @p data.
- *
- * @param data The block of data to be hashed, at least @p length bytes in size.
- * @param len  The length of @p data, in bytes.
- *
- * @return The calculated 128-bit variant of XXH3 value.
- *
- * The 128-bit variant of XXH3 has more strength, but it has a bit of overhead
- * for shorter inputs.
- *
- * This is equivalent to @ref XXH3_128bits_withSeed() with a seed of `0`, however
- * it may have slightly better performance due to constant propagation of the
- * defaults.
- *
- * @see XXH3_128bits_withSeed(), XXH3_128bits_withSecret(): other seeding variants
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* data, size_t len);
-/*! @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
- *
- * @param data The block of data to be hashed, at least @p length bytes in size.
- * @param len  The length of @p data, in bytes.
- * @param seed The 64-bit seed to alter the hash result predictably.
- *
- * @return The calculated 128-bit variant of XXH3 value.
- *
- * @note
- *    seed == 0 produces the same results as @ref XXH3_64bits().
- *
- * This variant generates a custom secret on the fly based on default secret
- * altered using the @p seed value.
- *
- * While this operation is decently fast, note that it's not completely free.
- *
- * @see XXH3_128bits(), XXH3_128bits_withSecret(): other seeding variants
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSeed(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
-/*!
- * @brief Calculates 128-bit variant of XXH3 with a custom "secret".
- *
- * @param data       The block of data to be hashed, at least @p len bytes in size.
- * @param len        The length of @p data, in bytes.
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- *
- * @return The calculated 128-bit variant of XXH3 value.
- *
- * It's possible to provide any blob of bytes as a "secret" to generate the hash.
- * This makes it more difficult for an external actor to prepare an intentional collision.
- * The main condition is that @p secretSize *must* be large enough (>= @ref XXH3_SECRET_SIZE_MIN).
- * However, the quality of the secret impacts the dispersion of the hash algorithm.
- * Therefore, the secret _must_ look like a bunch of random bytes.
- * Avoid "trivial" or structured data such as repeated sequences or a text document.
- * Whenever in doubt about the "randomness" of the blob of bytes,
- * consider employing @ref XXH3_generateSecret() instead (see below).
- * It will generate a proper high entropy secret derived from the blob of bytes.
- * Another advantage of using XXH3_generateSecret() is that
- * it guarantees that all bits within the initial blob of bytes
- * will impact every bit of the output.
- * This is not necessarily the case when using the blob of bytes directly
- * because, when hashing _small_ inputs, only a portion of the secret is employed.
- *
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_withSecret(XXH_NOESCAPE const void* data, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize);
-
-/*******   Streaming   *******/
-#ifndef XXH_NO_STREAM
-/*
- * Streaming requires state maintenance.
- * This operation costs memory and CPU.
- * As a consequence, streaming is slower than one-shot hashing.
- * For better performance, prefer one-shot functions whenever applicable.
- *
- * XXH3_128bits uses the same XXH3_state_t as XXH3_64bits().
- * Use already declared XXH3_createState() and XXH3_freeState().
- *
- * All reset and streaming functions have same meaning as their 64-bit counterpart.
- */
-
-/*!
- * @brief Resets an @ref XXH3_state_t to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note
- *   - This function resets `statePtr` and generate a secret with default parameters.
- *   - Call it before @ref XXH3_128bits_update().
- *   - Digest will be equivalent to `XXH3_128bits()`.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr);
-
-/*!
- * @brief Resets an @ref XXH3_state_t with 64-bit seed to begin a new hash.
- *
- * @param statePtr The state struct to reset.
- * @param seed     The 64-bit seed to alter the hash result predictably.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note
- *   - This function resets `statePtr` and generate a secret from `seed`.
- *   - Call it before @ref XXH3_128bits_update().
- *   - Digest will be equivalent to `XXH3_128bits_withSeed()`.
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed);
-/*!
- * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
- *
- * @param statePtr   The state struct to reset.
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * `secret` is referenced, it _must outlive_ the hash streaming session.
- * Similar to one-shot API, `secretSize` must be >= @ref XXH3_SECRET_SIZE_MIN,
- * and the quality of produced hash values depends on secret's entropy
- * (secret's content should look like a bunch of random bytes).
- * When in doubt about the randomness of a candidate `secret`,
- * consider employing `XXH3_generateSecret()` instead (see below).
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize);
-
-/*!
- * @brief Consumes a block of @p input to an @ref XXH3_state_t.
- *
- * Call this to incrementally consume blocks of data.
- *
- * @param statePtr The state struct to update.
- * @param input The block of data to be hashed, at least @p length bytes in size.
- * @param length The length of @p input, in bytes.
- *
- * @pre
- *   @p statePtr must not be `NULL`.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @note
- *   The memory between @p input and @p input + @p length must be valid,
- *   readable, contiguous memory. However, if @p length is `0`, @p input may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_128bits_update (XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* input, size_t length);
-
-/*!
- * @brief Returns the calculated XXH3 128-bit hash value from an @ref XXH3_state_t.
- *
- * @param statePtr The state struct to calculate the hash from.
- *
- * @pre
- *  @p statePtr must not be `NULL`.
- *
- * @return The calculated XXH3 128-bit hash value from that state.
- *
- * @note
- *   Calling XXH3_128bits_digest() will not affect @p statePtr, so you can update,
- *   digest, and update again.
- *
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* statePtr);
-#endif /* !XXH_NO_STREAM */
-
-/* Following helper functions make it possible to compare XXH128_hast_t values.
- * Since XXH128_hash_t is a structure, this capability is not offered by the language.
- * Note: For better performance, these functions can be inlined using XXH_INLINE_ALL */
-
-/*!
- * @brief Check equality of two XXH128_hash_t values
- *
- * @param h1 The 128-bit hash value.
- * @param h2 Another 128-bit hash value.
- *
- * @return `1` if `h1` and `h2` are equal.
- * @return `0` if they are not.
- */
-XXH_PUBLIC_API XXH_PUREF int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2);
-
-/*!
- * @brief Compares two @ref XXH128_hash_t
- *
- * This comparator is compatible with stdlib's `qsort()`/`bsearch()`.
- *
- * @param h128_1 Left-hand side value
- * @param h128_2 Right-hand side value
- *
- * @return >0 if @p h128_1  > @p h128_2
- * @return =0 if @p h128_1 == @p h128_2
- * @return <0 if @p h128_1  < @p h128_2
- */
-XXH_PUBLIC_API XXH_PUREF int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2);
-
-
-/*******   Canonical representation   *******/
-typedef struct { unsigned char digest[sizeof(XXH128_hash_t)]; } XXH128_canonical_t;
-
-
-/*!
- * @brief Converts an @ref XXH128_hash_t to a big endian @ref XXH128_canonical_t.
- *
- * @param dst  The @ref XXH128_canonical_t pointer to be stored to.
- * @param hash The @ref XXH128_hash_t to be converted.
- *
- * @pre
- *   @p dst must not be `NULL`.
- * @see @ref canonical_representation_example "Canonical Representation Example"
- */
-XXH_PUBLIC_API void XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash);
-
-/*!
- * @brief Converts an @ref XXH128_canonical_t to a native @ref XXH128_hash_t.
- *
- * @param src The @ref XXH128_canonical_t to convert.
- *
- * @pre
- *   @p src must not be `NULL`.
- *
- * @return The converted hash.
- * @see @ref canonical_representation_example "Canonical Representation Example"
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src);
-
-
-#endif  /* !XXH_NO_XXH3 */
-#endif  /* XXH_NO_LONG_LONG */
-
-/*!
- * @}
- */
-#endif /* XXHASH_H_5627135585666179 */
-
-
-
-#if defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742)
-#define XXHASH_H_STATIC_13879238742
-/* ****************************************************************************
- * This section contains declarations which are not guaranteed to remain stable.
- * They may change in future versions, becoming incompatible with a different
- * version of the library.
- * These declarations should only be used with static linking.
- * Never use them in association with dynamic linking!
- ***************************************************************************** */
-
-/*
- * These definitions are only present to allow static allocation
- * of XXH states, on stack or in a struct, for example.
- * Never **ever** access their members directly.
- */
-
-/*!
- * @internal
- * @brief Structure for XXH32 streaming API.
- *
- * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
- * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
- * an opaque type. This allows fields to safely be changed.
- *
- * Typedef'd to @ref XXH32_state_t.
- * Do not access the members of this struct directly.
- * @see XXH64_state_s, XXH3_state_s
- */
-struct XXH32_state_s {
-   XXH32_hash_t total_len_32; /*!< Total length hashed, modulo 2^32 */
-   XXH32_hash_t large_len;    /*!< Whether the hash is >= 16 (handles @ref total_len_32 overflow) */
-   XXH32_hash_t acc[4];       /*!< Accumulator lanes */
-   unsigned char buffer[16];  /*!< Internal buffer for partial reads. */
-   XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */
-   XXH32_hash_t reserved;     /*!< Reserved field. Do not read nor write to it. */
-};   /* typedef'd to XXH32_state_t */
-
-
-#ifndef XXH_NO_LONG_LONG  /* defined when there is no 64-bit support */
-
-/*!
- * @internal
- * @brief Structure for XXH64 streaming API.
- *
- * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
- * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined. Otherwise it is
- * an opaque type. This allows fields to safely be changed.
- *
- * Typedef'd to @ref XXH64_state_t.
- * Do not access the members of this struct directly.
- * @see XXH32_state_s, XXH3_state_s
- */
-struct XXH64_state_s {
-   XXH64_hash_t total_len;    /*!< Total length hashed. This is always 64-bit. */
-   XXH64_hash_t acc[4];       /*!< Accumulator lanes */
-   unsigned char buffer[32];  /*!< Internal buffer for partial reads.. */
-   XXH32_hash_t bufferedSize; /*!< Amount of data in @ref buffer */
-   XXH32_hash_t reserved32;   /*!< Reserved field, needed for padding anyways*/
-   XXH64_hash_t reserved64;   /*!< Reserved field. Do not read or write to it. */
-};   /* typedef'd to XXH64_state_t */
-
-#ifndef XXH_NO_XXH3
-
-#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L) /* >= C11 */
-#  define XXH_ALIGN(n)      _Alignas(n)
-#elif defined(__cplusplus) && (__cplusplus >= 201103L) /* >= C++11 */
-/* In C++ alignas() is a keyword */
-#  define XXH_ALIGN(n)      alignas(n)
-#elif defined(__GNUC__)
-#  define XXH_ALIGN(n)      __attribute__ ((aligned(n)))
-#elif defined(_MSC_VER)
-#  define XXH_ALIGN(n)      __declspec(align(n))
-#else
-#  define XXH_ALIGN(n)   /* disabled */
-#endif
-
-/* Old GCC versions only accept the attribute after the type in structures. */
-#if !(defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L))   /* C11+ */ \
-    && ! (defined(__cplusplus) && (__cplusplus >= 201103L)) /* >= C++11 */ \
-    && defined(__GNUC__)
-#   define XXH_ALIGN_MEMBER(align, type) type XXH_ALIGN(align)
-#else
-#   define XXH_ALIGN_MEMBER(align, type) XXH_ALIGN(align) type
-#endif
-
-/*!
- * @brief The size of the internal XXH3 buffer.
- *
- * This is the optimal update size for incremental hashing.
- *
- * @see XXH3_64b_update(), XXH3_128b_update().
- */
-#define XXH3_INTERNALBUFFER_SIZE 256
-
-/*!
- * @internal
- * @brief Default size of the secret buffer (and @ref XXH3_kSecret).
- *
- * This is the size used in @ref XXH3_kSecret and the seeded functions.
- *
- * Not to be confused with @ref XXH3_SECRET_SIZE_MIN.
- */
-#define XXH3_SECRET_DEFAULT_SIZE 192
-
-/*!
- * @internal
- * @brief Structure for XXH3 streaming API.
- *
- * @note This is only defined when @ref XXH_STATIC_LINKING_ONLY,
- * @ref XXH_INLINE_ALL, or @ref XXH_IMPLEMENTATION is defined.
- * Otherwise it is an opaque type.
- * Never use this definition in combination with dynamic library.
- * This allows fields to safely be changed in the future.
- *
- * @note ** This structure has a strict alignment requirement of 64 bytes!! **
- * Do not allocate this with `malloc()` or `new`,
- * it will not be sufficiently aligned.
- * Use @ref XXH3_createState() and @ref XXH3_freeState(), or stack allocation.
- *
- * Typedef'd to @ref XXH3_state_t.
- * Do never access the members of this struct directly.
- *
- * @see XXH3_INITSTATE() for stack initialization.
- * @see XXH3_createState(), XXH3_freeState().
- * @see XXH32_state_s, XXH64_state_s
- */
-struct XXH3_state_s {
-   XXH_ALIGN_MEMBER(64, XXH64_hash_t acc[8]);
-       /*!< The 8 accumulators. See @ref XXH32_state_s::v and @ref XXH64_state_s::v */
-   XXH_ALIGN_MEMBER(64, unsigned char customSecret[XXH3_SECRET_DEFAULT_SIZE]);
-       /*!< Used to store a custom secret generated from a seed. */
-   XXH_ALIGN_MEMBER(64, unsigned char buffer[XXH3_INTERNALBUFFER_SIZE]);
-       /*!< The internal buffer. @see XXH32_state_s::mem32 */
-   XXH32_hash_t bufferedSize;
-       /*!< The amount of memory in @ref buffer, @see XXH32_state_s::memsize */
-   XXH32_hash_t useSeed;
-       /*!< Reserved field. Needed for padding on 64-bit. */
-   size_t nbStripesSoFar;
-       /*!< Number or stripes processed. */
-   XXH64_hash_t totalLen;
-       /*!< Total length hashed. 64-bit even on 32-bit targets. */
-   size_t nbStripesPerBlock;
-       /*!< Number of stripes per block. */
-   size_t secretLimit;
-       /*!< Size of @ref customSecret or @ref extSecret */
-   XXH64_hash_t seed;
-       /*!< Seed for _withSeed variants. Must be zero otherwise, @see XXH3_INITSTATE() */
-   XXH64_hash_t reserved64;
-       /*!< Reserved field. */
-   const unsigned char* extSecret;
-       /*!< Reference to an external secret for the _withSecret variants, NULL
-        *   for other variants. */
-   /* note: there may be some padding at the end due to alignment on 64 bytes */
-}; /* typedef'd to XXH3_state_t */
-
-#undef XXH_ALIGN_MEMBER
-
-/*!
- * @brief Initializes a stack-allocated `XXH3_state_s`.
- *
- * When the @ref XXH3_state_t structure is merely emplaced on stack,
- * it should be initialized with XXH3_INITSTATE() or a memset()
- * in case its first reset uses XXH3_NNbits_reset_withSeed().
- * This init can be omitted if the first reset uses default or _withSecret mode.
- * This operation isn't necessary when the state is created with XXH3_createState().
- * Note that this doesn't prepare the state for a streaming operation,
- * it's still necessary to use XXH3_NNbits_reset*() afterwards.
- */
-#define XXH3_INITSTATE(XXH3_state_ptr)                       \
-    do {                                                     \
-        XXH3_state_t* tmp_xxh3_state_ptr = (XXH3_state_ptr); \
-        tmp_xxh3_state_ptr->seed = 0;                        \
-        tmp_xxh3_state_ptr->extSecret = NULL;                \
-    } while(0)
-
-
-/*!
- * @brief Calculates the 128-bit hash of @p data using XXH3.
- *
- * @param data The block of data to be hashed, at least @p len bytes in size.
- * @param len  The length of @p data, in bytes.
- * @param seed The 64-bit seed to alter the hash's output predictably.
- *
- * @pre
- *   The memory between @p data and @p data + @p len must be valid,
- *   readable, contiguous memory. However, if @p len is `0`, @p data may be
- *   `NULL`. In C++, this also must be *TriviallyCopyable*.
- *
- * @return The calculated 128-bit XXH3 value.
- *
- * @see @ref single_shot_example "Single Shot Example" for an example.
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t XXH128(XXH_NOESCAPE const void* data, size_t len, XXH64_hash_t seed);
-
-
-/* ===   Experimental API   === */
-/* Symbols defined below must be considered tied to a specific library version. */
-
-/*!
- * @brief Derive a high-entropy secret from any user-defined content, named customSeed.
- *
- * @param secretBuffer    A writable buffer for derived high-entropy secret data.
- * @param secretSize      Size of secretBuffer, in bytes.  Must be >= XXH3_SECRET_SIZE_MIN.
- * @param customSeed      A user-defined content.
- * @param customSeedSize  Size of customSeed, in bytes.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * The generated secret can be used in combination with `*_withSecret()` functions.
- * The `_withSecret()` variants are useful to provide a higher level of protection
- * than 64-bit seed, as it becomes much more difficult for an external actor to
- * guess how to impact the calculation logic.
- *
- * The function accepts as input a custom seed of any length and any content,
- * and derives from it a high-entropy secret of length @p secretSize into an
- * already allocated buffer @p secretBuffer.
- *
- * The generated secret can then be used with any `*_withSecret()` variant.
- * The functions @ref XXH3_128bits_withSecret(), @ref XXH3_64bits_withSecret(),
- * @ref XXH3_128bits_reset_withSecret() and @ref XXH3_64bits_reset_withSecret()
- * are part of this list. They all accept a `secret` parameter
- * which must be large enough for implementation reasons (>= @ref XXH3_SECRET_SIZE_MIN)
- * _and_ feature very high entropy (consist of random-looking bytes).
- * These conditions can be a high bar to meet, so @ref XXH3_generateSecret() can
- * be employed to ensure proper quality.
- *
- * @p customSeed can be anything. It can have any size, even small ones,
- * and its content can be anything, even "poor entropy" sources such as a bunch
- * of zeroes. The resulting `secret` will nonetheless provide all required qualities.
- *
- * @pre
- *   - @p secretSize must be >= @ref XXH3_SECRET_SIZE_MIN
- *   - When @p customSeedSize > 0, supplying NULL as customSeed is undefined behavior.
- *
- * Example code:
- * @code{.c}
- *    #include <stdio.h>
- *    #include <stdlib.h>
- *    #include <string.h>
- *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
- *    #include "xxhash.h"
- *    // Hashes argv[2] using the entropy from argv[1].
- *    int main(int argc, char* argv[])
- *    {
- *        char secret[XXH3_SECRET_SIZE_MIN];
- *        if (argv != 3) { return 1; }
- *        XXH3_generateSecret(secret, sizeof(secret), argv[1], strlen(argv[1]));
- *        XXH64_hash_t h = XXH3_64bits_withSecret(
- *             argv[2], strlen(argv[2]),
- *             secret, sizeof(secret)
- *        );
- *        printf("%016llx\n", (unsigned long long) h);
- *    }
- * @endcode
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize);
-
-/*!
- * @brief Generate the same secret as the _withSeed() variants.
- *
- * @param secretBuffer A writable buffer of @ref XXH3_SECRET_DEFAULT_SIZE bytes
- * @param seed         The 64-bit seed to alter the hash result predictably.
- *
- * The generated secret can be used in combination with
- *`*_withSecret()` and `_withSecretandSeed()` variants.
- *
- * Example C++ `std::string` hash class:
- * @code{.cpp}
- *    #include <string>
- *    #define XXH_STATIC_LINKING_ONLY // expose unstable API
- *    #include "xxhash.h"
- *    // Slow, seeds each time
- *    class HashSlow {
- *        XXH64_hash_t seed;
- *    public:
- *        HashSlow(XXH64_hash_t s) : seed{s} {}
- *        size_t operator()(const std::string& x) const {
- *            return size_t{XXH3_64bits_withSeed(x.c_str(), x.length(), seed)};
- *        }
- *    };
- *    // Fast, caches the seeded secret for future uses.
- *    class HashFast {
- *        unsigned char secret[XXH3_SECRET_DEFAULT_SIZE];
- *    public:
- *        HashFast(XXH64_hash_t s) {
- *            XXH3_generateSecret_fromSeed(secret, seed);
- *        }
- *        size_t operator()(const std::string& x) const {
- *            return size_t{
- *                XXH3_64bits_withSecret(x.c_str(), x.length(), secret, sizeof(secret))
- *            };
- *        }
- *    };
- * @endcode
- */
-XXH_PUBLIC_API void XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed);
-
-/*!
- * @brief Maximum size of "short" key in bytes.
- */
-#define XXH3_MIDSIZE_MAX 240
-
-/*!
- * @brief Calculates 64/128-bit seeded variant of XXH3 hash of @p data.
- *
- * @param data       The block of data to be hashed, at least @p len bytes in size.
- * @param len        The length of @p data, in bytes.
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- * @param seed       The 64-bit seed to alter the hash result predictably.
- *
- * These variants generate hash values using either:
- * - @p seed for "short" keys (< @ref XXH3_MIDSIZE_MAX = 240 bytes)
- * - @p secret for "large" keys (>= @ref XXH3_MIDSIZE_MAX).
- *
- * This generally benefits speed, compared to `_withSeed()` or `_withSecret()`.
- * `_withSeed()` has to generate the secret on the fly for "large" keys.
- * It's fast, but can be perceptible for "not so large" keys (< 1 KB).
- * `_withSecret()` has to generate the masks on the fly for "small" keys,
- * which requires more instructions than _withSeed() variants.
- * Therefore, _withSecretandSeed variant combines the best of both worlds.
- *
- * When @p secret has been generated by XXH3_generateSecret_fromSeed(),
- * this variant produces *exactly* the same results as `_withSeed()` variant,
- * hence offering only a pure speed benefit on "large" input,
- * by skipping the need to regenerate the secret for every large input.
- *
- * Another usage scenario is to hash the secret to a 64-bit hash value,
- * for example with XXH3_64bits(), which then becomes the seed,
- * and then employ both the seed and the secret in _withSecretandSeed().
- * On top of speed, an added benefit is that each bit in the secret
- * has a 50% chance to swap each bit in the output, via its impact to the seed.
- *
- * This is not guaranteed when using the secret directly in "small data" scenarios,
- * because only portions of the secret are employed for small data.
- */
-XXH_PUBLIC_API XXH_PUREF XXH64_hash_t
-XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* data, size_t len,
-                              XXH_NOESCAPE const void* secret, size_t secretSize,
-                              XXH64_hash_t seed);
-
-/*!
- * @brief Calculates 128-bit seeded variant of XXH3 hash of @p data.
- *
- * @param data       The memory segment to be hashed, at least @p len bytes in size.
- * @param length     The length of @p data, in bytes.
- * @param secret     The secret used to alter hash result predictably.
- * @param secretSize The length of @p secret, in bytes (must be >= XXH3_SECRET_SIZE_MIN)
- * @param seed64     The 64-bit seed to alter the hash result predictably.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @see XXH3_64bits_withSecretandSeed(): contract is the same.
- */
-XXH_PUBLIC_API XXH_PUREF XXH128_hash_t
-XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length,
-                               XXH_NOESCAPE const void* secret, size_t secretSize,
-                               XXH64_hash_t seed64);
-
-#ifndef XXH_NO_STREAM
-/*!
- * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
- *
- * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- * @param seed64     The 64-bit seed to alter the hash result predictably.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @see XXH3_64bits_withSecretandSeed(). Contract is identical.
- */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
-                                    XXH_NOESCAPE const void* secret, size_t secretSize,
-                                    XXH64_hash_t seed64);
-
-/*!
- * @brief Resets an @ref XXH3_state_t with secret data to begin a new hash.
- *
- * @param statePtr   A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
- * @param secret     The secret data.
- * @param secretSize The length of @p secret, in bytes.
- * @param seed64     The 64-bit seed to alter the hash result predictably.
- *
- * @return @ref XXH_OK on success.
- * @return @ref XXH_ERROR on failure.
- *
- * @see XXH3_64bits_withSecretandSeed(). Contract is identical.
- *
- * Note: there was a bug in an earlier version of this function (<= v0.8.2)
- * that would make it generate an incorrect hash value
- * when @p seed == 0 and @p length < XXH3_MIDSIZE_MAX
- * and @p secret is different from XXH3_generateSecret_fromSeed().
- * As stated in the contract, the correct hash result must be
- * the same as XXH3_128bits_withSeed() when @p length <= XXH3_MIDSIZE_MAX.
- * Results generated by this older version are wrong, hence not comparable.
- */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr,
-                                     XXH_NOESCAPE const void* secret, size_t secretSize,
-                                     XXH64_hash_t seed64);
-
-#endif /* !XXH_NO_STREAM */
-
-#endif  /* !XXH_NO_XXH3 */
-#endif  /* XXH_NO_LONG_LONG */
-#if defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API)
-#  define XXH_IMPLEMENTATION
-#endif
-
-#endif  /* defined(XXH_STATIC_LINKING_ONLY) && !defined(XXHASH_H_STATIC_13879238742) */
-
-
-/* ======================================================================== */
-/* ======================================================================== */
-/* ======================================================================== */
-
-
-/*-**********************************************************************
- * xxHash implementation
- *-**********************************************************************
- * xxHash's implementation used to be hosted inside xxhash.c.
- *
- * However, inlining requires implementation to be visible to the compiler,
- * hence be included alongside the header.
- * Previously, implementation was hosted inside xxhash.c,
- * which was then #included when inlining was activated.
- * This construction created issues with a few build and install systems,
- * as it required xxhash.c to be stored in /include directory.
- *
- * xxHash implementation is now directly integrated within xxhash.h.
- * As a consequence, xxhash.c is no longer needed in /include.
- *
- * xxhash.c is still available and is still useful.
- * In a "normal" setup, when xxhash is not inlined,
- * xxhash.h only exposes the prototypes and public symbols,
- * while xxhash.c can be built into an object file xxhash.o
- * which can then be linked into the final binary.
- ************************************************************************/
-
-#if ( defined(XXH_INLINE_ALL) || defined(XXH_PRIVATE_API) \
-   || defined(XXH_IMPLEMENTATION) ) && !defined(XXH_IMPLEM_13a8737387)
-#  define XXH_IMPLEM_13a8737387
-
-/* *************************************
-*  Tuning parameters
-***************************************/
-
-/*!
- * @defgroup tuning Tuning parameters
- * @{
- *
- * Various macros to control xxHash's behavior.
- */
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Define this to disable 64-bit code.
- *
- * Useful if only using the @ref XXH32_family and you have a strict C90 compiler.
- */
-#  define XXH_NO_LONG_LONG
-#  undef XXH_NO_LONG_LONG /* don't actually */
-/*!
- * @brief Controls how unaligned memory is accessed.
- *
- * By default, access to unaligned memory is controlled by `memcpy()`, which is
- * safe and portable.
- *
- * Unfortunately, on some target/compiler combinations, the generated assembly
- * is sub-optimal.
- *
- * The below switch allow selection of a different access method
- * in the search for improved performance.
- *
- * @par Possible options:
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=0` (default): `memcpy`
- *   @par
- *     Use `memcpy()`. Safe and portable. Note that most modern compilers will
- *     eliminate the function call and treat it as an unaligned access.
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=1`: `__attribute__((aligned(1)))`
- *   @par
- *     Depends on compiler extensions and is therefore not portable.
- *     This method is safe _if_ your compiler supports it,
- *     and *generally* as fast or faster than `memcpy`.
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=2`: Direct cast
- *  @par
- *     Casts directly and dereferences. This method doesn't depend on the
- *     compiler, but it violates the C standard as it directly dereferences an
- *     unaligned pointer. It can generate buggy code on targets which do not
- *     support unaligned memory accesses, but in some circumstances, it's the
- *     only known way to get the most performance.
- *
- *  - `XXH_FORCE_MEMORY_ACCESS=3`: Byteshift
- *  @par
- *     Also portable. This can generate the best code on old compilers which don't
- *     inline small `memcpy()` calls, and it might also be faster on big-endian
- *     systems which lack a native byteswap instruction. However, some compilers
- *     will emit literal byteshifts even if the target supports unaligned access.
- *
- *
- * @warning
- *   Methods 1 and 2 rely on implementation-defined behavior. Use these with
- *   care, as what works on one compiler/platform/optimization level may cause
- *   another to read garbage data or even crash.
- *
- * See https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html for details.
- *
- * Prefer these methods in priority order (0 > 3 > 1 > 2)
- */
-#  define XXH_FORCE_MEMORY_ACCESS 0
-
-/*!
- * @def XXH_SIZE_OPT
- * @brief Controls how much xxHash optimizes for size.
- *
- * xxHash, when compiled, tends to result in a rather large binary size. This
- * is mostly due to heavy usage to forced inlining and constant folding of the
- * @ref XXH3_family to increase performance.
- *
- * However, some developers prefer size over speed. This option can
- * significantly reduce the size of the generated code. When using the `-Os`
- * or `-Oz` options on GCC or Clang, this is defined to 1 by default,
- * otherwise it is defined to 0.
- *
- * Most of these size optimizations can be controlled manually.
- *
- * This is a number from 0-2.
- *  - `XXH_SIZE_OPT` == 0: Default. xxHash makes no size optimizations. Speed
- *    comes first.
- *  - `XXH_SIZE_OPT` == 1: Default for `-Os` and `-Oz`. xxHash is more
- *    conservative and disables hacks that increase code size. It implies the
- *    options @ref XXH_NO_INLINE_HINTS == 1, @ref XXH_FORCE_ALIGN_CHECK == 0,
- *    and @ref XXH3_NEON_LANES == 8 if they are not already defined.
- *  - `XXH_SIZE_OPT` == 2: xxHash tries to make itself as small as possible.
- *    Performance may cry. For example, the single shot functions just use the
- *    streaming API.
- */
-#  define XXH_SIZE_OPT 0
-
-/*!
- * @def XXH_FORCE_ALIGN_CHECK
- * @brief If defined to non-zero, adds a special path for aligned inputs (XXH32()
- * and XXH64() only).
- *
- * This is an important performance trick for architectures without decent
- * unaligned memory access performance.
- *
- * It checks for input alignment, and when conditions are met, uses a "fast
- * path" employing direct 32-bit/64-bit reads, resulting in _dramatically
- * faster_ read speed.
- *
- * The check costs one initial branch per hash, which is generally negligible,
- * but not zero.
- *
- * Moreover, it's not useful to generate an additional code path if memory
- * access uses the same instruction for both aligned and unaligned
- * addresses (e.g. x86 and aarch64).
- *
- * In these cases, the alignment check can be removed by setting this macro to 0.
- * Then the code will always use unaligned memory access.
- * Align check is automatically disabled on x86, x64, ARM64, and some ARM chips
- * which are platforms known to offer good unaligned memory accesses performance.
- *
- * It is also disabled by default when @ref XXH_SIZE_OPT >= 1.
- *
- * This option does not affect XXH3 (only XXH32 and XXH64).
- */
-#  define XXH_FORCE_ALIGN_CHECK 0
-
-/*!
- * @def XXH_NO_INLINE_HINTS
- * @brief When non-zero, sets all functions to `static`.
- *
- * By default, xxHash tries to force the compiler to inline almost all internal
- * functions.
- *
- * This can usually improve performance due to reduced jumping and improved
- * constant folding, but significantly increases the size of the binary which
- * might not be favorable.
- *
- * Additionally, sometimes the forced inlining can be detrimental to performance,
- * depending on the architecture.
- *
- * XXH_NO_INLINE_HINTS marks all internal functions as static, giving the
- * compiler full control on whether to inline or not.
- *
- * When not optimizing (-O0), using `-fno-inline` with GCC or Clang, or if
- * @ref XXH_SIZE_OPT >= 1, this will automatically be defined.
- */
-#  define XXH_NO_INLINE_HINTS 0
-
-/*!
- * @def XXH3_INLINE_SECRET
- * @brief Determines whether to inline the XXH3 withSecret code.
- *
- * When the secret size is known, the compiler can improve the performance
- * of XXH3_64bits_withSecret() and XXH3_128bits_withSecret().
- *
- * However, if the secret size is not known, it doesn't have any benefit. This
- * happens when xxHash is compiled into a global symbol. Therefore, if
- * @ref XXH_INLINE_ALL is *not* defined, this will be defined to 0.
- *
- * Additionally, this defaults to 0 on GCC 12+, which has an issue with function pointers
- * that are *sometimes* force inline on -Og, and it is impossible to automatically
- * detect this optimization level.
- */
-#  define XXH3_INLINE_SECRET 0
-
-/*!
- * @def XXH32_ENDJMP
- * @brief Whether to use a jump for `XXH32_finalize`.
- *
- * For performance, `XXH32_finalize` uses multiple branches in the finalizer.
- * This is generally preferable for performance,
- * but depending on exact architecture, a jmp may be preferable.
- *
- * This setting is only possibly making a difference for very small inputs.
- */
-#  define XXH32_ENDJMP 0
-
-/*!
- * @internal
- * @brief Redefines old internal names.
- *
- * For compatibility with code that uses xxHash's internals before the names
- * were changed to improve namespacing. There is no other reason to use this.
- */
-#  define XXH_OLD_NAMES
-#  undef XXH_OLD_NAMES /* don't actually use, it is ugly. */
-
-/*!
- * @def XXH_NO_STREAM
- * @brief Disables the streaming API.
- *
- * When xxHash is not inlined and the streaming functions are not used, disabling
- * the streaming functions can improve code size significantly, especially with
- * the @ref XXH3_family which tends to make constant folded copies of itself.
- */
-#  define XXH_NO_STREAM
-#  undef XXH_NO_STREAM /* don't actually */
-#endif /* XXH_DOXYGEN */
-/*!
- * @}
- */
-
-#ifndef XXH_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */
-   /* prefer __packed__ structures (method 1) for GCC
-    * < ARMv7 with unaligned access (e.g. Raspbian armhf) still uses byte shifting, so we use memcpy
-    * which for some reason does unaligned loads. */
-#  if defined(__GNUC__) && !(defined(__ARM_ARCH) && __ARM_ARCH < 7 && defined(__ARM_FEATURE_UNALIGNED))
-#    define XXH_FORCE_MEMORY_ACCESS 1
-#  endif
-#endif
-
-#ifndef XXH_SIZE_OPT
-   /* default to 1 for -Os or -Oz */
-#  if (defined(__GNUC__) || defined(__clang__)) && defined(__OPTIMIZE_SIZE__)
-#    define XXH_SIZE_OPT 1
-#  else
-#    define XXH_SIZE_OPT 0
-#  endif
-#endif
-
-#ifndef XXH_FORCE_ALIGN_CHECK  /* can be defined externally */
-   /* don't check on sizeopt, x86, aarch64, or arm when unaligned access is available */
-#  if XXH_SIZE_OPT >= 1 || \
-      defined(__i386)  || defined(__x86_64__) || defined(__aarch64__) || defined(__ARM_FEATURE_UNALIGNED) \
-   || defined(_M_IX86) || defined(_M_X64)     || defined(_M_ARM64)    || defined(_M_ARM) /* visual */
-#    define XXH_FORCE_ALIGN_CHECK 0
-#  else
-#    define XXH_FORCE_ALIGN_CHECK 1
-#  endif
-#endif
-
-#ifndef XXH_NO_INLINE_HINTS
-#  if XXH_SIZE_OPT >= 1 || defined(__NO_INLINE__)  /* -O0, -fno-inline */
-#    define XXH_NO_INLINE_HINTS 1
-#  else
-#    define XXH_NO_INLINE_HINTS 0
-#  endif
-#endif
-
-#ifndef XXH3_INLINE_SECRET
-#  if (defined(__GNUC__) && !defined(__clang__) && __GNUC__ >= 12) \
-     || !defined(XXH_INLINE_ALL)
-#    define XXH3_INLINE_SECRET 0
-#  else
-#    define XXH3_INLINE_SECRET 1
-#  endif
-#endif
-
-#ifndef XXH32_ENDJMP
-/* generally preferable for performance */
-#  define XXH32_ENDJMP 0
-#endif
-
-/*!
- * @defgroup impl Implementation
- * @{
- */
-
-
-/* *************************************
-*  Includes & Memory related functions
-***************************************/
-#if defined(XXH_NO_STREAM)
-/* nothing */
-#elif defined(XXH_NO_STDLIB)
-
-/* When requesting to disable any mention of stdlib,
- * the library loses the ability to invoked malloc / free.
- * In practice, it means that functions like `XXH*_createState()`
- * will always fail, and return NULL.
- * This flag is useful in situations where
- * xxhash.h is integrated into some kernel, embedded or limited environment
- * without access to dynamic allocation.
- */
-
-static XXH_CONSTF void* XXH_malloc(size_t s) { (void)s; return NULL; }
-static void XXH_free(void* p) { (void)p; }
-
-#else
-
-/*
- * Modify the local functions below should you wish to use
- * different memory routines for malloc() and free()
- */
-#include <stdlib.h>
-
-/*!
- * @internal
- * @brief Modify this function to use a different routine than malloc().
- */
-static XXH_MALLOCF void* XXH_malloc(size_t s) { return malloc(s); }
-
-/*!
- * @internal
- * @brief Modify this function to use a different routine than free().
- */
-static void XXH_free(void* p) { free(p); }
-
-#endif  /* XXH_NO_STDLIB */
-
-#include <string.h>
-
-/*!
- * @internal
- * @brief Modify this function to use a different routine than memcpy().
- */
-static void* XXH_memcpy(void* dest, const void* src, size_t size)
-{
-    return memcpy(dest,src,size);
-}
-
-#include <limits.h>   /* ULLONG_MAX */
-
-
-/* *************************************
-*  Compiler Specific Options
-***************************************/
-#ifdef _MSC_VER /* Visual Studio warning fix */
-#  pragma warning(disable : 4127) /* disable: C4127: conditional expression is constant */
-#endif
-
-#if XXH_NO_INLINE_HINTS  /* disable inlining hints */
-#  if defined(__GNUC__) || defined(__clang__)
-#    define XXH_FORCE_INLINE static __attribute__((__unused__))
-#  else
-#    define XXH_FORCE_INLINE static
-#  endif
-#  define XXH_NO_INLINE static
-/* enable inlining hints */
-#elif defined(__GNUC__) || defined(__clang__)
-#  define XXH_FORCE_INLINE static __inline__ __attribute__((__always_inline__, __unused__))
-#  define XXH_NO_INLINE static __attribute__((__noinline__))
-#elif defined(_MSC_VER)  /* Visual Studio */
-#  define XXH_FORCE_INLINE static __forceinline
-#  define XXH_NO_INLINE static __declspec(noinline)
-#elif defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L))   /* C99 */
-#  define XXH_FORCE_INLINE static inline
-#  define XXH_NO_INLINE static
-#else
-#  define XXH_FORCE_INLINE static
-#  define XXH_NO_INLINE static
-#endif
-
-#if defined(XXH_INLINE_ALL)
-#  define XXH_STATIC XXH_FORCE_INLINE
-#else
-#  define XXH_STATIC static
-#endif
-
-#if XXH3_INLINE_SECRET
-#  define XXH3_WITH_SECRET_INLINE XXH_FORCE_INLINE
-#else
-#  define XXH3_WITH_SECRET_INLINE XXH_NO_INLINE
-#endif
-
-#if ((defined(sun) || defined(__sun)) && __cplusplus) /* Solaris includes __STDC_VERSION__ with C++. Tested with GCC 5.5 */
-#  define XXH_RESTRICT   /* disable */
-#elif defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L   /* >= C99 */
-#  define XXH_RESTRICT   restrict
-#elif (defined (__GNUC__) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) \
-   || (defined (__clang__)) \
-   || (defined (_MSC_VER) && (_MSC_VER >= 1400)) \
-   || (defined (__INTEL_COMPILER) && (__INTEL_COMPILER >= 1300))
-/*
- * There are a LOT more compilers that recognize __restrict but this
- * covers the major ones.
- */
-#  define XXH_RESTRICT   __restrict
-#else
-#  define XXH_RESTRICT   /* disable */
-#endif
-
-/* *************************************
-*  Debug
-***************************************/
-/*!
- * @ingroup tuning
- * @def XXH_DEBUGLEVEL
- * @brief Sets the debugging level.
- *
- * XXH_DEBUGLEVEL is expected to be defined externally, typically via the
- * compiler's command line options. The value must be a number.
- */
-#ifndef XXH_DEBUGLEVEL
-#  ifdef DEBUGLEVEL /* backwards compat */
-#    define XXH_DEBUGLEVEL DEBUGLEVEL
-#  else
-#    define XXH_DEBUGLEVEL 0
-#  endif
-#endif
-
-#if (XXH_DEBUGLEVEL>=1)
-#  include <assert.h>   /* note: can still be disabled with NDEBUG */
-#  define XXH_ASSERT(c)   assert(c)
-#else
-#  if defined(__INTEL_COMPILER)
-#    define XXH_ASSERT(c)   XXH_ASSUME((unsigned char) (c))
-#  else
-#    define XXH_ASSERT(c)   XXH_ASSUME(c)
-#  endif
-#endif
-
-/* note: use after variable declarations */
-#ifndef XXH_STATIC_ASSERT
-#  if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 201112L)    /* C11 */
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { _Static_assert((c),m); } while(0)
-#  elif defined(__cplusplus) && (__cplusplus >= 201103L)            /* C++11 */
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { static_assert((c),m); } while(0)
-#  else
-#    define XXH_STATIC_ASSERT_WITH_MESSAGE(c,m) do { struct xxh_sa { char x[(c) ? 1 : -1]; }; } while(0)
-#  endif
-#  define XXH_STATIC_ASSERT(c) XXH_STATIC_ASSERT_WITH_MESSAGE((c),#c)
-#endif
-
-/*!
- * @internal
- * @def XXH_COMPILER_GUARD(var)
- * @brief Used to prevent unwanted optimizations for @p var.
- *
- * It uses an empty GCC inline assembly statement with a register constraint
- * which forces @p var into a general purpose register (eg eax, ebx, ecx
- * on x86) and marks it as modified.
- *
- * This is used in a few places to avoid unwanted autovectorization (e.g.
- * XXH32_round()). All vectorization we want is explicit via intrinsics,
- * and _usually_ isn't wanted elsewhere.
- *
- * We also use it to prevent unwanted constant folding for AArch64 in
- * XXH3_initCustomSecret_scalar().
- */
-#if defined(__GNUC__) || defined(__clang__)
-#  define XXH_COMPILER_GUARD(var) __asm__("" : "+r" (var))
-#else
-#  define XXH_COMPILER_GUARD(var) ((void)0)
-#endif
-
-/* Specifically for NEON vectors which use the "w" constraint, on
- * Clang. */
-#if defined(__clang__) && defined(__ARM_ARCH) && !defined(__wasm__)
-#  define XXH_COMPILER_GUARD_CLANG_NEON(var) __asm__("" : "+w" (var))
-#else
-#  define XXH_COMPILER_GUARD_CLANG_NEON(var) ((void)0)
-#endif
-
-/* *************************************
-*  Basic Types
-***************************************/
-#if !defined (__VMS) \
- && (defined (__cplusplus) \
- || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-#   ifdef _AIX
-#     include <inttypes.h>
-#   else
-#     include <stdint.h>
-#   endif
-    typedef uint8_t xxh_u8;
-#else
-    typedef unsigned char xxh_u8;
-#endif
-typedef XXH32_hash_t xxh_u32;
-
-#ifdef XXH_OLD_NAMES
-#  warning "XXH_OLD_NAMES is planned to be removed starting v0.9. If the program depends on it, consider moving away from it by employing newer type names directly"
-#  define BYTE xxh_u8
-#  define U8   xxh_u8
-#  define U32  xxh_u32
-#endif
-
-/* ***   Memory access   *** */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_read32(const void* ptr)
- * @brief Reads an unaligned 32-bit integer from @p ptr in native endianness.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- *
- * @param ptr The pointer to read from.
- * @return The 32-bit native endian integer from the bytes at @p ptr.
- */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_readLE32(const void* ptr)
- * @brief Reads an unaligned 32-bit little endian integer from @p ptr.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- *
- * @param ptr The pointer to read from.
- * @return The 32-bit little endian integer from the bytes at @p ptr.
- */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_readBE32(const void* ptr)
- * @brief Reads an unaligned 32-bit big endian integer from @p ptr.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- *
- * @param ptr The pointer to read from.
- * @return The 32-bit big endian integer from the bytes at @p ptr.
- */
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_readLE32_align(const void* ptr, XXH_alignment align)
- * @brief Like @ref XXH_readLE32(), but has an option for aligned reads.
- *
- * Affected by @ref XXH_FORCE_MEMORY_ACCESS.
- * Note that when @ref XXH_FORCE_ALIGN_CHECK == 0, the @p align parameter is
- * always @ref XXH_alignment::XXH_unaligned.
- *
- * @param ptr The pointer to read from.
- * @param align Whether @p ptr is aligned.
- * @pre
- *   If @p align == @ref XXH_alignment::XXH_aligned, @p ptr must be 4 byte
- *   aligned.
- * @return The 32-bit little endian integer from the bytes at @p ptr.
- */
-
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-/*
- * Manual byteshift. Best for old compilers which don't inline memcpy.
- * We actually directly use XXH_readLE32 and XXH_readBE32.
- */
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
-
-/*
- * Force direct memory access. Only works on CPU which support unaligned memory
- * access in hardware.
- */
-static xxh_u32 XXH_read32(const void* memPtr) { return *(const xxh_u32*) memPtr; }
-
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
-
-/*
- * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
- * documentation claimed that it only increased the alignment, but actually it
- * can decrease it on gcc, clang, and icc:
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
- * https://gcc.godbolt.org/z/xYez1j67Y.
- */
-#ifdef XXH_OLD_NAMES
-typedef union { xxh_u32 u32; } __attribute__((__packed__)) unalign;
-#endif
-static xxh_u32 XXH_read32(const void* ptr)
-{
-    typedef __attribute__((__aligned__(1))) xxh_u32 xxh_unalign32;
-    return *((const xxh_unalign32*)ptr);
-}
-
-#else
-
-/*
- * Portable and safe solution. Generally efficient.
- * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
- */
-static xxh_u32 XXH_read32(const void* memPtr)
-{
-    xxh_u32 val;
-    XXH_memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
-
-#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
-
-
-/* ***   Endianness   *** */
-
-/*!
- * @ingroup tuning
- * @def XXH_CPU_LITTLE_ENDIAN
- * @brief Whether the target is little endian.
- *
- * Defined to 1 if the target is little endian, or 0 if it is big endian.
- * It can be defined externally, for example on the compiler command line.
- *
- * If it is not defined,
- * a runtime check (which is usually constant folded) is used instead.
- *
- * @note
- *   This is not necessarily defined to an integer constant.
- *
- * @see XXH_isLittleEndian() for the runtime check.
- */
-#ifndef XXH_CPU_LITTLE_ENDIAN
-/*
- * Try to detect endianness automatically, to avoid the nonstandard behavior
- * in `XXH_isLittleEndian()`
- */
-#  if defined(_WIN32) /* Windows is always little endian */ \
-     || defined(__LITTLE_ENDIAN__) \
-     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
-#    define XXH_CPU_LITTLE_ENDIAN 1
-#  elif defined(__BIG_ENDIAN__) \
-     || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#    define XXH_CPU_LITTLE_ENDIAN 0
-#  else
-/*!
- * @internal
- * @brief Runtime check for @ref XXH_CPU_LITTLE_ENDIAN.
- *
- * Most compilers will constant fold this.
- */
-static int XXH_isLittleEndian(void)
-{
-    /*
-     * Portable and well-defined behavior.
-     * Don't use static: it is detrimental to performance.
-     */
-    const union { xxh_u32 u; xxh_u8 c[4]; } one = { 1 };
-    return one.c[0];
-}
-#   define XXH_CPU_LITTLE_ENDIAN   XXH_isLittleEndian()
-#  endif
-#endif
-
-
-
-
-/* ****************************************
-*  Compiler-specific Functions and Macros
-******************************************/
-#define XXH_GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
-
-#ifdef __has_builtin
-#  define XXH_HAS_BUILTIN(x) __has_builtin(x)
-#else
-#  define XXH_HAS_BUILTIN(x) 0
-#endif
-
-
-
-/*
- * C23 and future versions have standard "unreachable()".
- * Once it has been implemented reliably we can add it as an
- * additional case:
- *
- * ```
- * #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= XXH_C23_VN)
- * #  include <stddef.h>
- * #  ifdef unreachable
- * #    define XXH_UNREACHABLE() unreachable()
- * #  endif
- * #endif
- * ```
- *
- * Note C++23 also has std::unreachable() which can be detected
- * as follows:
- * ```
- * #if defined(__cpp_lib_unreachable) && (__cpp_lib_unreachable >= 202202L)
- * #  include <utility>
- * #  define XXH_UNREACHABLE() std::unreachable()
- * #endif
- * ```
- * NB: `__cpp_lib_unreachable` is defined in the `<version>` header.
- * We don't use that as including `<utility>` in `extern "C"` blocks
- * doesn't work on GCC12
- */
-
-#if XXH_HAS_BUILTIN(__builtin_unreachable)
-#  define XXH_UNREACHABLE() __builtin_unreachable()
-
-#elif defined(_MSC_VER)
-#  define XXH_UNREACHABLE() __assume(0)
-
-#else
-#  define XXH_UNREACHABLE()
-#endif
-
-#if XXH_HAS_BUILTIN(__builtin_assume)
-#  define XXH_ASSUME(c) __builtin_assume(c)
-#else
-#  define XXH_ASSUME(c) if (!(c)) { XXH_UNREACHABLE(); }
-#endif
-
-/*!
- * @internal
- * @def XXH_rotl32(x,r)
- * @brief 32-bit rotate left.
- *
- * @param x The 32-bit integer to be rotated.
- * @param r The number of bits to rotate.
- * @pre
- *   @p r > 0 && @p r < 32
- * @note
- *   @p x and @p r may be evaluated multiple times.
- * @return The rotated result.
- */
-#if !defined(NO_CLANG_BUILTIN) && XXH_HAS_BUILTIN(__builtin_rotateleft32) \
-                               && XXH_HAS_BUILTIN(__builtin_rotateleft64)
-#  define XXH_rotl32 __builtin_rotateleft32
-#  define XXH_rotl64 __builtin_rotateleft64
-#elif XXH_HAS_BUILTIN(__builtin_stdc_rotate_left)
-#  define XXH_rotl32 __builtin_stdc_rotate_left
-#  define XXH_rotl64 __builtin_stdc_rotate_left
-/* Note: although _rotl exists for minGW (GCC under windows), performance seems poor */
-#elif defined(_MSC_VER)
-#  define XXH_rotl32(x,r) _rotl(x,r)
-#  define XXH_rotl64(x,r) _rotl64(x,r)
-#else
-#  define XXH_rotl32(x,r) (((x) << (r)) | ((x) >> (32 - (r))))
-#  define XXH_rotl64(x,r) (((x) << (r)) | ((x) >> (64 - (r))))
-#endif
-
-/*!
- * @internal
- * @fn xxh_u32 XXH_swap32(xxh_u32 x)
- * @brief A 32-bit byteswap.
- *
- * @param x The 32-bit integer to byteswap.
- * @return @p x, byteswapped.
- */
-#if defined(_MSC_VER)     /* Visual Studio */
-#  define XXH_swap32 _byteswap_ulong
-#elif XXH_GCC_VERSION >= 403
-#  define XXH_swap32 __builtin_bswap32
-#else
-static xxh_u32 XXH_swap32 (xxh_u32 x)
-{
-    return  ((x << 24) & 0xff000000 ) |
-            ((x <<  8) & 0x00ff0000 ) |
-            ((x >>  8) & 0x0000ff00 ) |
-            ((x >> 24) & 0x000000ff );
-}
-#endif
-
-
-/* ***************************
-*  Memory reads
-*****************************/
-
-/*!
- * @internal
- * @brief Enum to indicate whether a pointer is aligned.
- */
-typedef enum {
-    XXH_aligned,  /*!< Aligned */
-    XXH_unaligned /*!< Possibly unaligned */
-} XXH_alignment;
-
-/*
- * XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load.
- *
- * This is ideal for older compilers which don't inline memcpy.
- */
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-
-XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[0]
-         | ((xxh_u32)bytePtr[1] << 8)
-         | ((xxh_u32)bytePtr[2] << 16)
-         | ((xxh_u32)bytePtr[3] << 24);
-}
-
-XXH_FORCE_INLINE xxh_u32 XXH_readBE32(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[3]
-         | ((xxh_u32)bytePtr[2] << 8)
-         | ((xxh_u32)bytePtr[1] << 16)
-         | ((xxh_u32)bytePtr[0] << 24);
-}
-
-#else
-XXH_FORCE_INLINE xxh_u32 XXH_readLE32(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_read32(ptr) : XXH_swap32(XXH_read32(ptr));
-}
-
-static xxh_u32 XXH_readBE32(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap32(XXH_read32(ptr)) : XXH_read32(ptr);
-}
-#endif
-
-XXH_FORCE_INLINE xxh_u32
-XXH_readLE32_align(const void* ptr, XXH_alignment align)
-{
-    if (align==XXH_unaligned) {
-        return XXH_readLE32(ptr);
-    } else {
-        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u32*)ptr : XXH_swap32(*(const xxh_u32*)ptr);
-    }
-}
-
-
-/* *************************************
-*  Misc
-***************************************/
-/*! @ingroup public */
-XXH_PUBLIC_API unsigned XXH_versionNumber (void) { return XXH_VERSION_NUMBER; }
-
-
-/* *******************************************************************
-*  32-bit hash functions
-*********************************************************************/
-/*!
- * @}
- * @defgroup XXH32_impl XXH32 implementation
- * @ingroup impl
- *
- * Details on the XXH32 implementation.
- * @{
- */
- /* #define instead of static const, to be used as initializers */
-#define XXH_PRIME32_1  0x9E3779B1U  /*!< 0b10011110001101110111100110110001 */
-#define XXH_PRIME32_2  0x85EBCA77U  /*!< 0b10000101111010111100101001110111 */
-#define XXH_PRIME32_3  0xC2B2AE3DU  /*!< 0b11000010101100101010111000111101 */
-#define XXH_PRIME32_4  0x27D4EB2FU  /*!< 0b00100111110101001110101100101111 */
-#define XXH_PRIME32_5  0x165667B1U  /*!< 0b00010110010101100110011110110001 */
-
-#ifdef XXH_OLD_NAMES
-#  define PRIME32_1 XXH_PRIME32_1
-#  define PRIME32_2 XXH_PRIME32_2
-#  define PRIME32_3 XXH_PRIME32_3
-#  define PRIME32_4 XXH_PRIME32_4
-#  define PRIME32_5 XXH_PRIME32_5
-#endif
-
-/*!
- * @internal
- * @brief Normal stripe processing routine.
- *
- * This shuffles the bits so that any bit from @p input impacts several bits in
- * @p acc.
- *
- * @param acc The accumulator lane.
- * @param input The stripe of input to mix.
- * @return The mixed accumulator lane.
- */
-static xxh_u32 XXH32_round(xxh_u32 acc, xxh_u32 input)
-{
-    acc += input * XXH_PRIME32_2;
-    acc  = XXH_rotl32(acc, 13);
-    acc *= XXH_PRIME32_1;
-#if (defined(__SSE4_1__) || defined(__aarch64__) || defined(__wasm_simd128__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
-    /*
-     * UGLY HACK:
-     * A compiler fence is used to prevent GCC and Clang from
-     * autovectorizing the XXH32 loop (pragmas and attributes don't work for some
-     * reason) without globally disabling SSE4.1.
-     *
-     * The reason we want to avoid vectorization is because despite working on
-     * 4 integers at a time, there are multiple factors slowing XXH32 down on
-     * SSE4:
-     * - There's a ridiculous amount of lag from pmulld (10 cycles of latency on
-     *   newer chips!) making it slightly slower to multiply four integers at
-     *   once compared to four integers independently. Even when pmulld was
-     *   fastest, Sandy/Ivy Bridge, it is still not worth it to go into SSE
-     *   just to multiply unless doing a long operation.
-     *
-     * - Four instructions are required to rotate,
-     *      movqda tmp,  v // not required with VEX encoding
-     *      pslld  tmp, 13 // tmp <<= 13
-     *      psrld  v,   19 // x >>= 19
-     *      por    v,  tmp // x |= tmp
-     *   compared to one for scalar:
-     *      roll   v, 13    // reliably fast across the board
-     *      shldl  v, v, 13 // Sandy Bridge and later prefer this for some reason
-     *
-     * - Instruction level parallelism is actually more beneficial here because
-     *   the SIMD actually serializes this operation: While v1 is rotating, v2
-     *   can load data, while v3 can multiply. SSE forces them to operate
-     *   together.
-     *
-     * This is also enabled on AArch64, as Clang is *very aggressive* in vectorizing
-     * the loop. NEON is only faster on the A53, and with the newer cores, it is less
-     * than half the speed.
-     *
-     * Additionally, this is used on WASM SIMD128 because it JITs to the same
-     * SIMD instructions and has the same issue.
-     */
-    XXH_COMPILER_GUARD(acc);
-#endif
-    return acc;
-}
-
-/*!
- * @internal
- * @brief Mixes all bits to finalize the hash.
- *
- * The final mix ensures that all input bits have a chance to impact any bit in
- * the output digest, resulting in an unbiased distribution.
- *
- * @param hash The hash to avalanche.
- * @return The avalanched hash.
- */
-static xxh_u32 XXH32_avalanche(xxh_u32 hash)
-{
-    hash ^= hash >> 15;
-    hash *= XXH_PRIME32_2;
-    hash ^= hash >> 13;
-    hash *= XXH_PRIME32_3;
-    hash ^= hash >> 16;
-    return hash;
-}
-
-#define XXH_get32bits(p) XXH_readLE32_align(p, align)
-
-/*!
- * @internal
- * @brief Sets up the initial accumulator state for XXH32().
- */
-XXH_FORCE_INLINE void
-XXH32_initAccs(xxh_u32 *acc, xxh_u32 seed)
-{
-    XXH_ASSERT(acc != NULL);
-    acc[0] = seed + XXH_PRIME32_1 + XXH_PRIME32_2;
-    acc[1] = seed + XXH_PRIME32_2;
-    acc[2] = seed + 0;
-    acc[3] = seed - XXH_PRIME32_1;
-}
-
-/*!
- * @internal
- * @brief Consumes a block of data for XXH32().
- *
- * @return the end input pointer.
- */
-XXH_FORCE_INLINE const xxh_u8 *
-XXH32_consumeLong(
-    xxh_u32 *XXH_RESTRICT acc,
-    xxh_u8 const *XXH_RESTRICT input,
-    size_t len,
-    XXH_alignment align
-)
-{
-    const xxh_u8* const bEnd = input + len;
-    const xxh_u8* const limit = bEnd - 15;
-    XXH_ASSERT(acc != NULL);
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(len >= 16);
-    do {
-        acc[0] = XXH32_round(acc[0], XXH_get32bits(input)); input += 4;
-        acc[1] = XXH32_round(acc[1], XXH_get32bits(input)); input += 4;
-        acc[2] = XXH32_round(acc[2], XXH_get32bits(input)); input += 4;
-        acc[3] = XXH32_round(acc[3], XXH_get32bits(input)); input += 4;
-    } while (input < limit);
-
-    return input;
-}
-
-/*!
- * @internal
- * @brief Merges the accumulator lanes together for XXH32()
- */
-XXH_FORCE_INLINE XXH_PUREF xxh_u32
-XXH32_mergeAccs(const xxh_u32 *acc)
-{
-    XXH_ASSERT(acc != NULL);
-    return XXH_rotl32(acc[0], 1)  + XXH_rotl32(acc[1], 7)
-         + XXH_rotl32(acc[2], 12) + XXH_rotl32(acc[3], 18);
-}
-
-/*!
- * @internal
- * @brief Processes the last 0-15 bytes of @p ptr.
- *
- * There may be up to 15 bytes remaining to consume from the input.
- * This final stage will digest them to ensure that all input bytes are present
- * in the final mix.
- *
- * @param hash The hash to finalize.
- * @param ptr The pointer to the remaining input.
- * @param len The remaining length, modulo 16.
- * @param align Whether @p ptr is aligned.
- * @return The finalized hash.
- * @see XXH64_finalize().
- */
-static XXH_PUREF xxh_u32
-XXH32_finalize(xxh_u32 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
-{
-#define XXH_PROCESS1 do {                             \
-    hash += (*ptr++) * XXH_PRIME32_5;                 \
-    hash = XXH_rotl32(hash, 11) * XXH_PRIME32_1;      \
-} while (0)
-
-#define XXH_PROCESS4 do {                             \
-    hash += XXH_get32bits(ptr) * XXH_PRIME32_3;       \
-    ptr += 4;                                         \
-    hash  = XXH_rotl32(hash, 17) * XXH_PRIME32_4;     \
-} while (0)
-
-    if (ptr==NULL) XXH_ASSERT(len == 0);
-
-    /* Compact rerolled version; generally faster */
-    if (!XXH32_ENDJMP) {
-        len &= 15;
-        while (len >= 4) {
-            XXH_PROCESS4;
-            len -= 4;
-        }
-        while (len > 0) {
-            XXH_PROCESS1;
-            --len;
-        }
-        return XXH32_avalanche(hash);
-    } else {
-         switch(len&15) /* or switch(bEnd - p) */ {
-           case 12:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 8:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 4:       XXH_PROCESS4;
-                         return XXH32_avalanche(hash);
-
-           case 13:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 9:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 5:       XXH_PROCESS4;
-                         XXH_PROCESS1;
-                         return XXH32_avalanche(hash);
-
-           case 14:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 10:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 6:       XXH_PROCESS4;
-                         XXH_PROCESS1;
-                         XXH_PROCESS1;
-                         return XXH32_avalanche(hash);
-
-           case 15:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 11:      XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 7:       XXH_PROCESS4;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 3:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 2:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 1:       XXH_PROCESS1;
-                         XXH_FALLTHROUGH;  /* fallthrough */
-           case 0:       return XXH32_avalanche(hash);
-        }
-        XXH_ASSERT(0);
-        return hash;   /* reaching this point is deemed impossible */
-    }
-}
-
-#ifdef XXH_OLD_NAMES
-#  define PROCESS1 XXH_PROCESS1
-#  define PROCESS4 XXH_PROCESS4
-#else
-#  undef XXH_PROCESS1
-#  undef XXH_PROCESS4
-#endif
-
-/*!
- * @internal
- * @brief The implementation for @ref XXH32().
- *
- * @param input , len , seed Directly passed from @ref XXH32().
- * @param align Whether @p input is aligned.
- * @return The calculated hash.
- */
-XXH_FORCE_INLINE XXH_PUREF xxh_u32
-XXH32_endian_align(const xxh_u8* input, size_t len, xxh_u32 seed, XXH_alignment align)
-{
-    xxh_u32 h32;
-
-    if (input==NULL) XXH_ASSERT(len == 0);
-
-    if (len>=16) {
-        xxh_u32 acc[4];
-        XXH32_initAccs(acc, seed);
-
-        input = XXH32_consumeLong(acc, input, len, align);
-
-        h32 = XXH32_mergeAccs(acc);
-    } else {
-        h32  = seed + XXH_PRIME32_5;
-    }
-
-    h32 += (xxh_u32)len;
-
-    return XXH32_finalize(h32, input, len&15, align);
-}
-
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH32_hash_t XXH32 (const void* input, size_t len, XXH32_hash_t seed)
-{
-#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
-    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH32_state_t state;
-    XXH32_reset(&state, seed);
-    XXH32_update(&state, (const xxh_u8*)input, len);
-    return XXH32_digest(&state);
-#else
-    if (XXH_FORCE_ALIGN_CHECK) {
-        if ((((size_t)input) & 3) == 0) {   /* Input is 4-bytes aligned, leverage the speed benefit */
-            return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
-    }   }
-
-    return XXH32_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
-#endif
-}
-
-
-
-/*******   Hash streaming   *******/
-#ifndef XXH_NO_STREAM
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH32_state_t* XXH32_createState(void)
-{
-    return (XXH32_state_t*)XXH_malloc(sizeof(XXH32_state_t));
-}
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH_errorcode XXH32_freeState(XXH32_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-}
-
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API void XXH32_copyState(XXH32_state_t* dstState, const XXH32_state_t* srcState)
-{
-    XXH_memcpy(dstState, srcState, sizeof(*dstState));
-}
-
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH_errorcode XXH32_reset(XXH32_state_t* statePtr, XXH32_hash_t seed)
-{
-    XXH_ASSERT(statePtr != NULL);
-    memset(statePtr, 0, sizeof(*statePtr));
-    XXH32_initAccs(statePtr->acc, seed);
-    return XXH_OK;
-}
-
-
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH32_update(XXH32_state_t* state, const void* input, size_t len)
-{
-    if (input==NULL) {
-        XXH_ASSERT(len == 0);
-        return XXH_OK;
-    }
-
-    state->total_len_32 += (XXH32_hash_t)len;
-    state->large_len |= (XXH32_hash_t)((len>=16) | (state->total_len_32>=16));
-
-    XXH_ASSERT(state->bufferedSize < sizeof(state->buffer));
-    if (len < sizeof(state->buffer) - state->bufferedSize)  {   /* fill in tmp buffer */
-        XXH_memcpy(state->buffer + state->bufferedSize, input, len);
-        state->bufferedSize += (XXH32_hash_t)len;
-        return XXH_OK;
-    }
-
-    {   const xxh_u8* xinput = (const xxh_u8*)input;
-        const xxh_u8* const bEnd = xinput + len;
-
-        if (state->bufferedSize) {   /* non-empty buffer: complete first */
-            XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize);
-            xinput += sizeof(state->buffer) - state->bufferedSize;
-            /* then process one round */
-            (void)XXH32_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned);
-            state->bufferedSize = 0;
-        }
-
-        XXH_ASSERT(xinput <= bEnd);
-        if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) {
-            /* Process the remaining data */
-            xinput = XXH32_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned);
-        }
-
-        if (xinput < bEnd) {
-            /* Copy the leftover to the tmp buffer */
-            XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput));
-            state->bufferedSize = (unsigned)(bEnd-xinput);
-        }
-    }
-
-    return XXH_OK;
-}
-
-
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH32_hash_t XXH32_digest(const XXH32_state_t* state)
-{
-    xxh_u32 h32;
-
-    if (state->large_len) {
-        h32 = XXH32_mergeAccs(state->acc);
-    } else {
-        h32 = state->acc[2] /* == seed */ + XXH_PRIME32_5;
-    }
-
-    h32 += state->total_len_32;
-
-    return XXH32_finalize(h32, state->buffer, state->bufferedSize, XXH_aligned);
-}
-#endif /* !XXH_NO_STREAM */
-
-/*******   Canonical representation   *******/
-
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API void XXH32_canonicalFromHash(XXH32_canonical_t* dst, XXH32_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH32_canonical_t) == sizeof(XXH32_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap32(hash);
-    XXH_memcpy(dst, &hash, sizeof(*dst));
-}
-/*! @ingroup XXH32_family */
-XXH_PUBLIC_API XXH32_hash_t XXH32_hashFromCanonical(const XXH32_canonical_t* src)
-{
-    return XXH_readBE32(src);
-}
-
-
-#ifndef XXH_NO_LONG_LONG
-
-/* *******************************************************************
-*  64-bit hash functions
-*********************************************************************/
-/*!
- * @}
- * @ingroup impl
- * @{
- */
-/*******   Memory access   *******/
-
-typedef XXH64_hash_t xxh_u64;
-
-#ifdef XXH_OLD_NAMES
-#  define U64 xxh_u64
-#endif
-
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-/*
- * Manual byteshift. Best for old compilers which don't inline memcpy.
- * We actually directly use XXH_readLE64 and XXH_readBE64.
- */
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==2))
-
-/* Force direct memory access. Only works on CPU which support unaligned memory access in hardware */
-static xxh_u64 XXH_read64(const void* memPtr)
-{
-    return *(const xxh_u64*) memPtr;
-}
-
-#elif (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==1))
-
-/*
- * __attribute__((aligned(1))) is supported by gcc and clang. Originally the
- * documentation claimed that it only increased the alignment, but actually it
- * can decrease it on gcc, clang, and icc:
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=69502,
- * https://gcc.godbolt.org/z/xYez1j67Y.
- */
-#ifdef XXH_OLD_NAMES
-typedef union { xxh_u32 u32; xxh_u64 u64; } __attribute__((__packed__)) unalign64;
-#endif
-static xxh_u64 XXH_read64(const void* ptr)
-{
-    typedef __attribute__((__aligned__(1))) xxh_u64 xxh_unalign64;
-    return *((const xxh_unalign64*)ptr);
-}
-
-#else
-
-/*
- * Portable and safe solution. Generally efficient.
- * see: https://fastcompression.blogspot.com/2015/08/accessing-unaligned-memory.html
- */
-static xxh_u64 XXH_read64(const void* memPtr)
-{
-    xxh_u64 val;
-    XXH_memcpy(&val, memPtr, sizeof(val));
-    return val;
-}
-
-#endif   /* XXH_FORCE_DIRECT_MEMORY_ACCESS */
-
-#if defined(_MSC_VER)     /* Visual Studio */
-#  define XXH_swap64 _byteswap_uint64
-#elif XXH_GCC_VERSION >= 403
-#  define XXH_swap64 __builtin_bswap64
-#else
-static xxh_u64 XXH_swap64(xxh_u64 x)
-{
-    return  ((x << 56) & 0xff00000000000000ULL) |
-            ((x << 40) & 0x00ff000000000000ULL) |
-            ((x << 24) & 0x0000ff0000000000ULL) |
-            ((x << 8)  & 0x000000ff00000000ULL) |
-            ((x >> 8)  & 0x00000000ff000000ULL) |
-            ((x >> 24) & 0x0000000000ff0000ULL) |
-            ((x >> 40) & 0x000000000000ff00ULL) |
-            ((x >> 56) & 0x00000000000000ffULL);
-}
-#endif
-
-
-/* XXH_FORCE_MEMORY_ACCESS==3 is an endian-independent byteshift load. */
-#if (defined(XXH_FORCE_MEMORY_ACCESS) && (XXH_FORCE_MEMORY_ACCESS==3))
-
-XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[0]
-         | ((xxh_u64)bytePtr[1] << 8)
-         | ((xxh_u64)bytePtr[2] << 16)
-         | ((xxh_u64)bytePtr[3] << 24)
-         | ((xxh_u64)bytePtr[4] << 32)
-         | ((xxh_u64)bytePtr[5] << 40)
-         | ((xxh_u64)bytePtr[6] << 48)
-         | ((xxh_u64)bytePtr[7] << 56);
-}
-
-XXH_FORCE_INLINE xxh_u64 XXH_readBE64(const void* memPtr)
-{
-    const xxh_u8* bytePtr = (const xxh_u8 *)memPtr;
-    return bytePtr[7]
-         | ((xxh_u64)bytePtr[6] << 8)
-         | ((xxh_u64)bytePtr[5] << 16)
-         | ((xxh_u64)bytePtr[4] << 24)
-         | ((xxh_u64)bytePtr[3] << 32)
-         | ((xxh_u64)bytePtr[2] << 40)
-         | ((xxh_u64)bytePtr[1] << 48)
-         | ((xxh_u64)bytePtr[0] << 56);
-}
-
-#else
-XXH_FORCE_INLINE xxh_u64 XXH_readLE64(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_read64(ptr) : XXH_swap64(XXH_read64(ptr));
-}
-
-static xxh_u64 XXH_readBE64(const void* ptr)
-{
-    return XXH_CPU_LITTLE_ENDIAN ? XXH_swap64(XXH_read64(ptr)) : XXH_read64(ptr);
-}
-#endif
-
-XXH_FORCE_INLINE xxh_u64
-XXH_readLE64_align(const void* ptr, XXH_alignment align)
-{
-    if (align==XXH_unaligned)
-        return XXH_readLE64(ptr);
-    else
-        return XXH_CPU_LITTLE_ENDIAN ? *(const xxh_u64*)ptr : XXH_swap64(*(const xxh_u64*)ptr);
-}
-
-
-/*******   xxh64   *******/
-/*!
- * @}
- * @defgroup XXH64_impl XXH64 implementation
- * @ingroup impl
- *
- * Details on the XXH64 implementation.
- * @{
- */
-/* #define rather that static const, to be used as initializers */
-#define XXH_PRIME64_1  0x9E3779B185EBCA87ULL  /*!< 0b1001111000110111011110011011000110000101111010111100101010000111 */
-#define XXH_PRIME64_2  0xC2B2AE3D27D4EB4FULL  /*!< 0b1100001010110010101011100011110100100111110101001110101101001111 */
-#define XXH_PRIME64_3  0x165667B19E3779F9ULL  /*!< 0b0001011001010110011001111011000110011110001101110111100111111001 */
-#define XXH_PRIME64_4  0x85EBCA77C2B2AE63ULL  /*!< 0b1000010111101011110010100111011111000010101100101010111001100011 */
-#define XXH_PRIME64_5  0x27D4EB2F165667C5ULL  /*!< 0b0010011111010100111010110010111100010110010101100110011111000101 */
-
-#ifdef XXH_OLD_NAMES
-#  define PRIME64_1 XXH_PRIME64_1
-#  define PRIME64_2 XXH_PRIME64_2
-#  define PRIME64_3 XXH_PRIME64_3
-#  define PRIME64_4 XXH_PRIME64_4
-#  define PRIME64_5 XXH_PRIME64_5
-#endif
-
-/*! @copydoc XXH32_round */
-static xxh_u64 XXH64_round(xxh_u64 acc, xxh_u64 input)
-{
-    acc += input * XXH_PRIME64_2;
-    acc  = XXH_rotl64(acc, 31);
-    acc *= XXH_PRIME64_1;
-#if (defined(__AVX512F__)) && !defined(XXH_ENABLE_AUTOVECTORIZE)
-    /*
-     * DISABLE AUTOVECTORIZATION:
-     * A compiler fence is used to prevent GCC and Clang from
-     * autovectorizing the XXH64 loop (pragmas and attributes don't work for some
-     * reason) without globally disabling AVX512.
-     *
-     * Autovectorization of XXH64 tends to be detrimental,
-     * though the exact outcome may change depending on exact cpu and compiler version.
-     * For information, it has been reported as detrimental for Skylake-X,
-     * but possibly beneficial for Zen4.
-     *
-     * The default is to disable auto-vectorization,
-     * but you can select to enable it instead using `XXH_ENABLE_AUTOVECTORIZE` build variable.
-     */
-    XXH_COMPILER_GUARD(acc);
-#endif
-    return acc;
-}
-
-static xxh_u64 XXH64_mergeRound(xxh_u64 acc, xxh_u64 val)
-{
-    val  = XXH64_round(0, val);
-    acc ^= val;
-    acc  = acc * XXH_PRIME64_1 + XXH_PRIME64_4;
-    return acc;
-}
-
-/*! @copydoc XXH32_avalanche */
-static xxh_u64 XXH64_avalanche(xxh_u64 hash)
-{
-    hash ^= hash >> 33;
-    hash *= XXH_PRIME64_2;
-    hash ^= hash >> 29;
-    hash *= XXH_PRIME64_3;
-    hash ^= hash >> 32;
-    return hash;
-}
-
-
-#define XXH_get64bits(p) XXH_readLE64_align(p, align)
-
-/*!
- * @internal
- * @brief Sets up the initial accumulator state for XXH64().
- */
-XXH_FORCE_INLINE void
-XXH64_initAccs(xxh_u64 *acc, xxh_u64 seed)
-{
-    XXH_ASSERT(acc != NULL);
-    acc[0] = seed + XXH_PRIME64_1 + XXH_PRIME64_2;
-    acc[1] = seed + XXH_PRIME64_2;
-    acc[2] = seed + 0;
-    acc[3] = seed - XXH_PRIME64_1;
-}
-
-/*!
- * @internal
- * @brief Consumes a block of data for XXH64().
- *
- * @return the end input pointer.
- */
-XXH_FORCE_INLINE const xxh_u8 *
-XXH64_consumeLong(
-    xxh_u64 *XXH_RESTRICT acc,
-    xxh_u8 const *XXH_RESTRICT input,
-    size_t len,
-    XXH_alignment align
-)
-{
-    const xxh_u8* const bEnd = input + len;
-    const xxh_u8* const limit = bEnd - 31;
-    XXH_ASSERT(acc != NULL);
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(len >= 32);
-    do {
-        /* reroll on 32-bit */
-        if (sizeof(void *) < sizeof(xxh_u64)) {
-            size_t i;
-            for (i = 0; i < 4; i++) {
-                acc[i] = XXH64_round(acc[i], XXH_get64bits(input));
-                input += 8;
-            }
-        } else {
-            acc[0] = XXH64_round(acc[0], XXH_get64bits(input)); input += 8;
-            acc[1] = XXH64_round(acc[1], XXH_get64bits(input)); input += 8;
-            acc[2] = XXH64_round(acc[2], XXH_get64bits(input)); input += 8;
-            acc[3] = XXH64_round(acc[3], XXH_get64bits(input)); input += 8;
-        }
-    } while (input < limit);
-
-    return input;
-}
-
-/*!
- * @internal
- * @brief Merges the accumulator lanes together for XXH64()
- */
-XXH_FORCE_INLINE XXH_PUREF xxh_u64
-XXH64_mergeAccs(const xxh_u64 *acc)
-{
-    XXH_ASSERT(acc != NULL);
-    {
-        xxh_u64 h64 = XXH_rotl64(acc[0], 1) + XXH_rotl64(acc[1], 7)
-                    + XXH_rotl64(acc[2], 12) + XXH_rotl64(acc[3], 18);
-        /* reroll on 32-bit */
-        if (sizeof(void *) < sizeof(xxh_u64)) {
-            size_t i;
-            for (i = 0; i < 4; i++) {
-                h64 = XXH64_mergeRound(h64, acc[i]);
-            }
-        } else {
-            h64 = XXH64_mergeRound(h64, acc[0]);
-            h64 = XXH64_mergeRound(h64, acc[1]);
-            h64 = XXH64_mergeRound(h64, acc[2]);
-            h64 = XXH64_mergeRound(h64, acc[3]);
-        }
-        return h64;
-    }
-}
-
-/*!
- * @internal
- * @brief Processes the last 0-31 bytes of @p ptr.
- *
- * There may be up to 31 bytes remaining to consume from the input.
- * This final stage will digest them to ensure that all input bytes are present
- * in the final mix.
- *
- * @param hash The hash to finalize.
- * @param ptr The pointer to the remaining input.
- * @param len The remaining length, modulo 32.
- * @param align Whether @p ptr is aligned.
- * @return The finalized hash
- * @see XXH32_finalize().
- */
-XXH_STATIC XXH_PUREF xxh_u64
-XXH64_finalize(xxh_u64 hash, const xxh_u8* ptr, size_t len, XXH_alignment align)
-{
-    if (ptr==NULL) XXH_ASSERT(len == 0);
-    len &= 31;
-    while (len >= 8) {
-        xxh_u64 const k1 = XXH64_round(0, XXH_get64bits(ptr));
-        ptr += 8;
-        hash ^= k1;
-        hash  = XXH_rotl64(hash,27) * XXH_PRIME64_1 + XXH_PRIME64_4;
-        len -= 8;
-    }
-    if (len >= 4) {
-        hash ^= (xxh_u64)(XXH_get32bits(ptr)) * XXH_PRIME64_1;
-        ptr += 4;
-        hash = XXH_rotl64(hash, 23) * XXH_PRIME64_2 + XXH_PRIME64_3;
-        len -= 4;
-    }
-    while (len > 0) {
-        hash ^= (*ptr++) * XXH_PRIME64_5;
-        hash = XXH_rotl64(hash, 11) * XXH_PRIME64_1;
-        --len;
-    }
-    return  XXH64_avalanche(hash);
-}
-
-#ifdef XXH_OLD_NAMES
-#  define PROCESS1_64 XXH_PROCESS1_64
-#  define PROCESS4_64 XXH_PROCESS4_64
-#  define PROCESS8_64 XXH_PROCESS8_64
-#else
-#  undef XXH_PROCESS1_64
-#  undef XXH_PROCESS4_64
-#  undef XXH_PROCESS8_64
-#endif
-
-/*!
- * @internal
- * @brief The implementation for @ref XXH64().
- *
- * @param input , len , seed Directly passed from @ref XXH64().
- * @param align Whether @p input is aligned.
- * @return The calculated hash.
- */
-XXH_FORCE_INLINE XXH_PUREF xxh_u64
-XXH64_endian_align(const xxh_u8* input, size_t len, xxh_u64 seed, XXH_alignment align)
-{
-    xxh_u64 h64;
-    if (input==NULL) XXH_ASSERT(len == 0);
-
-    if (len>=32) {  /* Process a large block of data */
-        xxh_u64 acc[4];
-        XXH64_initAccs(acc, seed);
-
-        input = XXH64_consumeLong(acc, input, len, align);
-
-        h64 = XXH64_mergeAccs(acc);
-    } else {
-        h64  = seed + XXH_PRIME64_5;
-    }
-
-    h64 += (xxh_u64) len;
-
-    return XXH64_finalize(h64, input, len, align);
-}
-
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64 (XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
-{
-#if !defined(XXH_NO_STREAM) && XXH_SIZE_OPT >= 2
-    /* Simple version, good for code maintenance, but unfortunately slow for small inputs */
-    XXH64_state_t state;
-    XXH64_reset(&state, seed);
-    XXH64_update(&state, (const xxh_u8*)input, len);
-    return XXH64_digest(&state);
-#else
-    if (XXH_FORCE_ALIGN_CHECK) {
-        if ((((size_t)input) & 7)==0) {  /* Input is aligned, let's leverage the speed advantage */
-            return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_aligned);
-    }   }
-
-    return XXH64_endian_align((const xxh_u8*)input, len, seed, XXH_unaligned);
-
-#endif
-}
-
-/*******   Hash Streaming   *******/
-#ifndef XXH_NO_STREAM
-/*! @ingroup XXH64_family*/
-XXH_PUBLIC_API XXH64_state_t* XXH64_createState(void)
-{
-    return (XXH64_state_t*)XXH_malloc(sizeof(XXH64_state_t));
-}
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_freeState(XXH64_state_t* statePtr)
-{
-    XXH_free(statePtr);
-    return XXH_OK;
-}
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API void XXH64_copyState(XXH_NOESCAPE XXH64_state_t* dstState, const XXH64_state_t* srcState)
-{
-    XXH_memcpy(dstState, srcState, sizeof(*dstState));
-}
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API XXH_errorcode XXH64_reset(XXH_NOESCAPE XXH64_state_t* statePtr, XXH64_hash_t seed)
-{
-    XXH_ASSERT(statePtr != NULL);
-    memset(statePtr, 0, sizeof(*statePtr));
-    XXH64_initAccs(statePtr->acc, seed);
-    return XXH_OK;
-}
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH64_update (XXH_NOESCAPE XXH64_state_t* state, XXH_NOESCAPE const void* input, size_t len)
-{
-    if (input==NULL) {
-        XXH_ASSERT(len == 0);
-        return XXH_OK;
-    }
-
-    state->total_len += len;
-
-    XXH_ASSERT(state->bufferedSize <= sizeof(state->buffer));
-    if (len < sizeof(state->buffer) - state->bufferedSize)  {   /* fill in tmp buffer */
-        XXH_memcpy(state->buffer + state->bufferedSize, input, len);
-        state->bufferedSize += (XXH32_hash_t)len;
-        return XXH_OK;
-    }
-
-    {   const xxh_u8* xinput = (const xxh_u8*)input;
-        const xxh_u8* const bEnd = xinput + len;
-
-        if (state->bufferedSize) {   /* non-empty buffer => complete first */
-            XXH_memcpy(state->buffer + state->bufferedSize, xinput, sizeof(state->buffer) - state->bufferedSize);
-            xinput += sizeof(state->buffer) - state->bufferedSize;
-            /* and process one round */
-            (void)XXH64_consumeLong(state->acc, state->buffer, sizeof(state->buffer), XXH_aligned);
-            state->bufferedSize = 0;
-        }
-
-        XXH_ASSERT(xinput <= bEnd);
-        if ((size_t)(bEnd - xinput) >= sizeof(state->buffer)) {
-            /* Process the remaining data */
-            xinput = XXH64_consumeLong(state->acc, xinput, (size_t)(bEnd - xinput), XXH_unaligned);
-        }
-
-        if (xinput < bEnd) {
-            /* Copy the leftover to the tmp buffer */
-            XXH_memcpy(state->buffer, xinput, (size_t)(bEnd-xinput));
-            state->bufferedSize = (unsigned)(bEnd-xinput);
-        }
-    }
-
-    return XXH_OK;
-}
-
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_digest(XXH_NOESCAPE const XXH64_state_t* state)
-{
-    xxh_u64 h64;
-
-    if (state->total_len >= 32) {
-        h64 = XXH64_mergeAccs(state->acc);
-    } else {
-        h64  = state->acc[2] /*seed*/ + XXH_PRIME64_5;
-    }
-
-    h64 += (xxh_u64) state->total_len;
-
-    return XXH64_finalize(h64, state->buffer, (size_t)state->total_len, XXH_aligned);
-}
-#endif /* !XXH_NO_STREAM */
-
-/******* Canonical representation   *******/
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API void XXH64_canonicalFromHash(XXH_NOESCAPE XXH64_canonical_t* dst, XXH64_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH64_canonical_t) == sizeof(XXH64_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) hash = XXH_swap64(hash);
-    XXH_memcpy(dst, &hash, sizeof(*dst));
-}
-
-/*! @ingroup XXH64_family */
-XXH_PUBLIC_API XXH64_hash_t XXH64_hashFromCanonical(XXH_NOESCAPE const XXH64_canonical_t* src)
-{
-    return XXH_readBE64(src);
-}
-
-#ifndef XXH_NO_XXH3
-
-/* *********************************************************************
-*  XXH3
-*  New generation hash designed for speed on small keys and vectorization
-************************************************************************ */
-/*!
- * @}
- * @defgroup XXH3_impl XXH3 implementation
- * @ingroup impl
- * @{
- */
-
-/* ===   Compiler specifics   === */
-
-
-#if (defined(__GNUC__) && (__GNUC__ >= 3))  \
-  || (defined(__INTEL_COMPILER) && (__INTEL_COMPILER >= 800)) \
-  || defined(__clang__)
-#    define XXH_likely(x) __builtin_expect(x, 1)
-#    define XXH_unlikely(x) __builtin_expect(x, 0)
-#else
-#    define XXH_likely(x) (x)
-#    define XXH_unlikely(x) (x)
-#endif
-
-#ifndef XXH_HAS_INCLUDE
-#  ifdef __has_include
-/*
- * Not defined as XXH_HAS_INCLUDE(x) (function-like) because
- * this causes segfaults in Apple Clang 4.2 (on Mac OS X 10.7 Lion)
- */
-#    define XXH_HAS_INCLUDE __has_include
-#  else
-#    define XXH_HAS_INCLUDE(x) 0
-#  endif
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-#  if defined(__ARM_FEATURE_SVE)
-#    include <arm_sve.h>
-#  endif
-#  if defined(__ARM_NEON__) || defined(__ARM_NEON) \
-   || (defined(_M_ARM) && _M_ARM >= 7) \
-   || defined(_M_ARM64) || defined(_M_ARM64EC) \
-   || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* WASM SIMD128 via SIMDe */
-#    define inline __inline__  /* circumvent a clang bug */
-#    include <arm_neon.h>
-#    undef inline
-#  elif defined(__AVX2__)
-#    include <immintrin.h>
-#  elif defined(__SSE2__)
-#    include <emmintrin.h>
-#  elif defined(__loongarch_sx)
-#    include <lsxintrin.h>
-#  endif
-#endif
-
-#if defined(_MSC_VER)
-#  include <intrin.h>
-#endif
-
-/*
- * One goal of XXH3 is to make it fast on both 32-bit and 64-bit, while
- * remaining a true 64-bit/128-bit hash function.
- *
- * This is done by prioritizing a subset of 64-bit operations that can be
- * emulated without too many steps on the average 32-bit machine.
- *
- * For example, these two lines seem similar, and run equally fast on 64-bit:
- *
- *   xxh_u64 x;
- *   x ^= (x >> 47); // good
- *   x ^= (x >> 13); // bad
- *
- * However, to a 32-bit machine, there is a major difference.
- *
- * x ^= (x >> 47) looks like this:
- *
- *   x.lo ^= (x.hi >> (47 - 32));
- *
- * while x ^= (x >> 13) looks like this:
- *
- *   // note: funnel shifts are not usually cheap.
- *   x.lo ^= (x.lo >> 13) | (x.hi << (32 - 13));
- *   x.hi ^= (x.hi >> 13);
- *
- * The first one is significantly faster than the second, simply because the
- * shift is larger than 32. This means:
- *  - All the bits we need are in the upper 32 bits, so we can ignore the lower
- *    32 bits in the shift.
- *  - The shift result will always fit in the lower 32 bits, and therefore,
- *    we can ignore the upper 32 bits in the xor.
- *
- * Thanks to this optimization, XXH3 only requires these features to be efficient:
- *
- *  - Usable unaligned access
- *  - A 32-bit or 64-bit ALU
- *      - If 32-bit, a decent ADC instruction
- *  - A 32 or 64-bit multiply with a 64-bit result
- *  - For the 128-bit variant, a decent byteswap helps short inputs.
- *
- * The first two are already required by XXH32, and almost all 32-bit and 64-bit
- * platforms which can run XXH32 can run XXH3 efficiently.
- *
- * Thumb-1, the classic 16-bit only subset of ARM's instruction set, is one
- * notable exception.
- *
- * First of all, Thumb-1 lacks support for the UMULL instruction which
- * performs the important long multiply. This means numerous __aeabi_lmul
- * calls.
- *
- * Second of all, the 8 functional registers are just not enough.
- * Setup for __aeabi_lmul, byteshift loads, pointers, and all arithmetic need
- * Lo registers, and this shuffling results in thousands more MOVs than A32.
- *
- * A32 and T32 don't have this limitation. They can access all 14 registers,
- * do a 32->64 multiply with UMULL, and the flexible operand allowing free
- * shifts is helpful, too.
- *
- * Therefore, we do a quick sanity check.
- *
- * If compiling Thumb-1 for a target which supports ARM instructions, we will
- * emit a warning, as it is not a "sane" platform to compile for.
- *
- * Usually, if this happens, it is because of an accident and you probably need
- * to specify -march, as you likely meant to compile for a newer architecture.
- *
- * Credit: large sections of the vectorial and asm source code paths
- *         have been contributed by @easyaspi314
- */
-#if defined(__thumb__) && !defined(__thumb2__) && defined(__ARM_ARCH_ISA_ARM)
-#   warning "XXH3 is highly inefficient without ARM or Thumb-2."
-#endif
-
-/* ==========================================
- * Vectorization detection
- * ========================================== */
-
-#ifdef XXH_DOXYGEN
-/*!
- * @ingroup tuning
- * @brief Overrides the vectorization implementation chosen for XXH3.
- *
- * Can be defined to 0 to disable SIMD or any of the values mentioned in
- * @ref XXH_VECTOR_TYPE.
- *
- * If this is not defined, it uses predefined macros to determine the best
- * implementation.
- */
-#  define XXH_VECTOR XXH_SCALAR
-/*!
- * @ingroup tuning
- * @brief Selects the minimum alignment for XXH3's accumulators.
- *
- * When using SIMD, this should match the alignment required for said vector
- * type, so, for example, 32 for AVX2.
- *
- * Default: Auto detected.
- */
-#  define XXH_ACC_ALIGN 8
-#endif
-
-/* Actual definition */
-#ifndef XXH_DOXYGEN
-#endif
-
-#ifndef XXH_VECTOR    /* can be defined on command line */
-#  if defined(__ARM_FEATURE_SVE)
-#    define XXH_VECTOR XXH_SVE
-#  elif ( \
-        defined(__ARM_NEON__) || defined(__ARM_NEON) /* gcc */ \
-     || defined(_M_ARM) || defined(_M_ARM64) || defined(_M_ARM64EC) /* msvc */ \
-     || (defined(__wasm_simd128__) && XXH_HAS_INCLUDE(<arm_neon.h>)) /* wasm simd128 via SIMDe */ \
-   ) && ( \
-        defined(_WIN32) || defined(__LITTLE_ENDIAN__) /* little endian only */ \
-    || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) \
-   )
-#    define XXH_VECTOR XXH_NEON
-#  elif defined(__AVX512F__)
-#    define XXH_VECTOR XXH_AVX512
-#  elif defined(__AVX2__)
-#    define XXH_VECTOR XXH_AVX2
-#  elif defined(__SSE2__) || defined(_M_AMD64) || defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP == 2))
-#    define XXH_VECTOR XXH_SSE2
-#  elif (defined(__PPC64__) && defined(__POWER8_VECTOR__)) \
-     || (defined(__s390x__) && defined(__VEC__)) \
-     && defined(__GNUC__) /* TODO: IBM XL */
-#    define XXH_VECTOR XXH_VSX
-#  elif defined(__loongarch_sx)
-#    define XXH_VECTOR XXH_LSX
-#  else
-#    define XXH_VECTOR XXH_SCALAR
-#  endif
-#endif
-
-/* __ARM_FEATURE_SVE is only supported by GCC & Clang. */
-#if (XXH_VECTOR == XXH_SVE) && !defined(__ARM_FEATURE_SVE)
-#  ifdef _MSC_VER
-#    pragma warning(once : 4606)
-#  else
-#    warning "__ARM_FEATURE_SVE isn't supported. Use SCALAR instead."
-#  endif
-#  undef XXH_VECTOR
-#  define XXH_VECTOR XXH_SCALAR
-#endif
-
-/*
- * Controls the alignment of the accumulator,
- * for compatibility with aligned vector loads, which are usually faster.
- */
-#ifndef XXH_ACC_ALIGN
-#  if defined(XXH_X86DISPATCH)
-#     define XXH_ACC_ALIGN 64  /* for compatibility with avx512 */
-#  elif XXH_VECTOR == XXH_SCALAR  /* scalar */
-#     define XXH_ACC_ALIGN 8
-#  elif XXH_VECTOR == XXH_SSE2  /* sse2 */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == XXH_AVX2  /* avx2 */
-#     define XXH_ACC_ALIGN 32
-#  elif XXH_VECTOR == XXH_NEON  /* neon */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == XXH_VSX   /* vsx */
-#     define XXH_ACC_ALIGN 16
-#  elif XXH_VECTOR == XXH_AVX512  /* avx512 */
-#     define XXH_ACC_ALIGN 64
-#  elif XXH_VECTOR == XXH_SVE   /* sve */
-#     define XXH_ACC_ALIGN 64
-#  elif XXH_VECTOR == XXH_LSX   /* lsx */
-#     define XXH_ACC_ALIGN 64
-#  endif
-#endif
-
-#if defined(XXH_X86DISPATCH) || XXH_VECTOR == XXH_SSE2 \
-    || XXH_VECTOR == XXH_AVX2 || XXH_VECTOR == XXH_AVX512
-#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
-#elif XXH_VECTOR == XXH_SVE
-#  define XXH_SEC_ALIGN XXH_ACC_ALIGN
-#else
-#  define XXH_SEC_ALIGN 8
-#endif
-
-#if defined(__GNUC__) || defined(__clang__)
-#  define XXH_ALIASING __attribute__((__may_alias__))
-#else
-#  define XXH_ALIASING /* nothing */
-#endif
-
-/*
- * UGLY HACK:
- * GCC usually generates the best code with -O3 for xxHash.
- *
- * However, when targeting AVX2, it is overzealous in its unrolling resulting
- * in code roughly 3/4 the speed of Clang.
- *
- * There are other issues, such as GCC splitting _mm256_loadu_si256 into
- * _mm_loadu_si128 + _mm256_inserti128_si256. This is an optimization which
- * only applies to Sandy and Ivy Bridge... which don't even support AVX2.
- *
- * That is why when compiling the AVX2 version, it is recommended to use either
- *   -O2 -mavx2 -march=haswell
- * or
- *   -O2 -mavx2 -mno-avx256-split-unaligned-load
- * for decent performance, or to use Clang instead.
- *
- * Fortunately, we can control the first one with a pragma that forces GCC into
- * -O2, but the other one we can't control without "failed to inline always
- * inline function due to target mismatch" warnings.
- */
-#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
-  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
-#  pragma GCC push_options
-#  pragma GCC optimize("-O2")
-#endif
-
-#if XXH_VECTOR == XXH_NEON
-
-/*
- * UGLY HACK: While AArch64 GCC on Linux does not seem to care, on macOS, GCC -O3
- * optimizes out the entire hashLong loop because of the aliasing violation.
- *
- * However, GCC is also inefficient at load-store optimization with vld1q/vst1q,
- * so the only option is to mark it as aliasing.
- */
-typedef uint64x2_t xxh_aliasing_uint64x2_t XXH_ALIASING;
-
-/*!
- * @internal
- * @brief `vld1q_u64` but faster and alignment-safe.
- *
- * On AArch64, unaligned access is always safe, but on ARMv7-a, it is only
- * *conditionally* safe (`vld1` has an alignment bit like `movdq[ua]` in x86).
- *
- * GCC for AArch64 sees `vld1q_u8` as an intrinsic instead of a load, so it
- * prohibits load-store optimizations. Therefore, a direct dereference is used.
- *
- * Otherwise, `vld1q_u8` is used with `vreinterpretq_u8_u64` to do a safe
- * unaligned load.
- */
-#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__)
-XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr) /* silence -Wcast-align */
-{
-    return *(xxh_aliasing_uint64x2_t const *)ptr;
-}
-#else
-XXH_FORCE_INLINE uint64x2_t XXH_vld1q_u64(void const* ptr)
-{
-    return vreinterpretq_u64_u8(vld1q_u8((uint8_t const*)ptr));
-}
-#endif
-
-/*!
- * @internal
- * @brief `vmlal_u32` on low and high halves of a vector.
- *
- * This is a workaround for AArch64 GCC < 11 which implemented arm_neon.h with
- * inline assembly and were therefore incapable of merging the `vget_{low, high}_u32`
- * with `vmlal_u32`.
- */
-#if defined(__aarch64__) && defined(__GNUC__) && !defined(__clang__) && __GNUC__ < 11
-XXH_FORCE_INLINE uint64x2_t
-XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
-{
-    /* Inline assembly is the only way */
-    __asm__("umlal   %0.2d, %1.2s, %2.2s" : "+w" (acc) : "w" (lhs), "w" (rhs));
-    return acc;
-}
-XXH_FORCE_INLINE uint64x2_t
-XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
-{
-    /* This intrinsic works as expected */
-    return vmlal_high_u32(acc, lhs, rhs);
-}
-#else
-/* Portable intrinsic versions */
-XXH_FORCE_INLINE uint64x2_t
-XXH_vmlal_low_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
-{
-    return vmlal_u32(acc, vget_low_u32(lhs), vget_low_u32(rhs));
-}
-/*! @copydoc XXH_vmlal_low_u32
- * Assume the compiler converts this to vmlal_high_u32 on aarch64 */
-XXH_FORCE_INLINE uint64x2_t
-XXH_vmlal_high_u32(uint64x2_t acc, uint32x4_t lhs, uint32x4_t rhs)
-{
-    return vmlal_u32(acc, vget_high_u32(lhs), vget_high_u32(rhs));
-}
-#endif
-
-/*!
- * @ingroup tuning
- * @brief Controls the NEON to scalar ratio for XXH3
- *
- * This can be set to 2, 4, 6, or 8.
- *
- * ARM Cortex CPUs are _very_ sensitive to how their pipelines are used.
- *
- * For example, the Cortex-A73 can dispatch 3 micro-ops per cycle, but only 2 of those
- * can be NEON. If you are only using NEON instructions, you are only using 2/3 of the CPU
- * bandwidth.
- *
- * This is even more noticeable on the more advanced cores like the Cortex-A76 which
- * can dispatch 8 micro-ops per cycle, but still only 2 NEON micro-ops at once.
- *
- * Therefore, to make the most out of the pipeline, it is beneficial to run 6 NEON lanes
- * and 2 scalar lanes, which is chosen by default.
- *
- * This does not apply to Apple processors or 32-bit processors, which run better with
- * full NEON. These will default to 8. Additionally, size-optimized builds run 8 lanes.
- *
- * This change benefits CPUs with large micro-op buffers without negatively affecting
- * most other CPUs:
- *
- *  | Chipset               | Dispatch type       | NEON only | 6:2 hybrid | Diff. |
- *  |:----------------------|:--------------------|----------:|-----------:|------:|
- *  | Snapdragon 730 (A76)  | 2 NEON/8 micro-ops  |  8.8 GB/s |  10.1 GB/s |  ~16% |
- *  | Snapdragon 835 (A73)  | 2 NEON/3 micro-ops  |  5.1 GB/s |   5.3 GB/s |   ~5% |
- *  | Marvell PXA1928 (A53) | In-order dual-issue |  1.9 GB/s |   1.9 GB/s |    0% |
- *  | Apple M1              | 4 NEON/8 micro-ops  | 37.3 GB/s |  36.1 GB/s |  ~-3% |
- *
- * It also seems to fix some bad codegen on GCC, making it almost as fast as clang.
- *
- * When using WASM SIMD128, if this is 2 or 6, SIMDe will scalarize 2 of the lanes meaning
- * it effectively becomes worse 4.
- *
- * @see XXH3_accumulate_512_neon()
- */
-# ifndef XXH3_NEON_LANES
-#  if (defined(__aarch64__) || defined(__arm64__) || defined(_M_ARM64) || defined(_M_ARM64EC)) \
-   && !defined(__APPLE__) && XXH_SIZE_OPT <= 0
-#   define XXH3_NEON_LANES 6
-#  else
-#   define XXH3_NEON_LANES XXH_ACC_NB
-#  endif
-# endif
-#endif  /* XXH_VECTOR == XXH_NEON */
-
-/*
- * VSX and Z Vector helpers.
- *
- * This is very messy, and any pull requests to clean this up are welcome.
- *
- * There are a lot of problems with supporting VSX and s390x, due to
- * inconsistent intrinsics, spotty coverage, and multiple endiannesses.
- */
-#if XXH_VECTOR == XXH_VSX
-/* Annoyingly, these headers _may_ define three macros: `bool`, `vector`,
- * and `pixel`. This is a problem for obvious reasons.
- *
- * These keywords are unnecessary; the spec literally says they are
- * equivalent to `__bool`, `__vector`, and `__pixel` and may be undef'd
- * after including the header.
- *
- * We use pragma push_macro/pop_macro to keep the namespace clean. */
-#  pragma push_macro("bool")
-#  pragma push_macro("vector")
-#  pragma push_macro("pixel")
-/* silence potential macro redefined warnings */
-#  undef bool
-#  undef vector
-#  undef pixel
-
-#  if defined(__s390x__)
-#    include <s390intrin.h>
-#  else
-#    include <altivec.h>
-#  endif
-
-/* Restore the original macro values, if applicable. */
-#  pragma pop_macro("pixel")
-#  pragma pop_macro("vector")
-#  pragma pop_macro("bool")
-
-typedef __vector unsigned long long xxh_u64x2;
-typedef __vector unsigned char xxh_u8x16;
-typedef __vector unsigned xxh_u32x4;
-
-/*
- * UGLY HACK: Similar to aarch64 macOS GCC, s390x GCC has the same aliasing issue.
- */
-typedef xxh_u64x2 xxh_aliasing_u64x2 XXH_ALIASING;
-
-# ifndef XXH_VSX_BE
-#  if defined(__BIG_ENDIAN__) \
-  || (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
-#    define XXH_VSX_BE 1
-#  elif defined(__VEC_ELEMENT_REG_ORDER__) && __VEC_ELEMENT_REG_ORDER__ == __ORDER_BIG_ENDIAN__
-#    warning "-maltivec=be is not recommended. Please use native endianness."
-#    define XXH_VSX_BE 1
-#  else
-#    define XXH_VSX_BE 0
-#  endif
-# endif /* !defined(XXH_VSX_BE) */
-
-# if XXH_VSX_BE
-#  if defined(__POWER9_VECTOR__) || (defined(__clang__) && defined(__s390x__))
-#    define XXH_vec_revb vec_revb
-#  else
-/*!
- * A polyfill for POWER9's vec_revb().
- */
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_revb(xxh_u64x2 val)
-{
-    xxh_u8x16 const vByteSwap = { 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
-                                  0x0F, 0x0E, 0x0D, 0x0C, 0x0B, 0x0A, 0x09, 0x08 };
-    return vec_perm(val, val, vByteSwap);
-}
-#  endif
-# endif /* XXH_VSX_BE */
-
-/*!
- * Performs an unaligned vector load and byte swaps it on big endian.
- */
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_loadu(const void *ptr)
-{
-    xxh_u64x2 ret;
-    XXH_memcpy(&ret, ptr, sizeof(xxh_u64x2));
-# if XXH_VSX_BE
-    ret = XXH_vec_revb(ret);
-# endif
-    return ret;
-}
-
-/*
- * vec_mulo and vec_mule are very problematic intrinsics on PowerPC
- *
- * These intrinsics weren't added until GCC 8, despite existing for a while,
- * and they are endian dependent. Also, their meaning swap depending on version.
- * */
-# if defined(__s390x__)
- /* s390x is always big endian, no issue on this platform */
-#  define XXH_vec_mulo vec_mulo
-#  define XXH_vec_mule vec_mule
-# elif defined(__clang__) && XXH_HAS_BUILTIN(__builtin_altivec_vmuleuw) && !defined(__ibmxl__)
-/* Clang has a better way to control this, we can just use the builtin which doesn't swap. */
- /* The IBM XL Compiler (which defined __clang__) only implements the vec_* operations */
-#  define XXH_vec_mulo __builtin_altivec_vmulouw
-#  define XXH_vec_mule __builtin_altivec_vmuleuw
-# else
-/* gcc needs inline assembly */
-/* Adapted from https://github.com/google/highwayhash/blob/master/highwayhash/hh_vsx.h. */
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mulo(xxh_u32x4 a, xxh_u32x4 b)
-{
-    xxh_u64x2 result;
-    __asm__("vmulouw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
-    return result;
-}
-XXH_FORCE_INLINE xxh_u64x2 XXH_vec_mule(xxh_u32x4 a, xxh_u32x4 b)
-{
-    xxh_u64x2 result;
-    __asm__("vmuleuw %0, %1, %2" : "=v" (result) : "v" (a), "v" (b));
-    return result;
-}
-# endif /* XXH_vec_mulo, XXH_vec_mule */
-#endif /* XXH_VECTOR == XXH_VSX */
-
-#if XXH_VECTOR == XXH_SVE
-#define ACCRND(acc, offset) \
-do { \
-    svuint64_t input_vec = svld1_u64(mask, xinput + offset);         \
-    svuint64_t secret_vec = svld1_u64(mask, xsecret + offset);       \
-    svuint64_t mixed = sveor_u64_x(mask, secret_vec, input_vec);     \
-    svuint64_t swapped = svtbl_u64(input_vec, kSwap);                \
-    svuint64_t mixed_lo = svextw_u64_x(mask, mixed);                 \
-    svuint64_t mixed_hi = svlsr_n_u64_x(mask, mixed, 32);            \
-    svuint64_t mul = svmad_u64_x(mask, mixed_lo, mixed_hi, swapped); \
-    acc = svadd_u64_x(mask, acc, mul);                               \
-} while (0)
-#endif /* XXH_VECTOR == XXH_SVE */
-
-/* prefetch
- * can be disabled, by declaring XXH_NO_PREFETCH build macro */
-#if defined(XXH_NO_PREFETCH)
-#  define XXH_PREFETCH(ptr)  (void)(ptr)  /* disabled */
-#else
-#  if XXH_SIZE_OPT >= 1
-#    define XXH_PREFETCH(ptr) (void)(ptr)
-#  elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))  /* _mm_prefetch() not defined outside of x86/x64 */
-#    include <mmintrin.h>   /* https://msdn.microsoft.com/fr-fr/library/84szxsww(v=vs.90).aspx */
-#    define XXH_PREFETCH(ptr)  _mm_prefetch((const char*)(ptr), _MM_HINT_T0)
-#  elif defined(__GNUC__) && ( (__GNUC__ >= 4) || ( (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1) ) )
-#    define XXH_PREFETCH(ptr)  __builtin_prefetch((ptr), 0 /* rw==read */, 3 /* locality */)
-#  else
-#    define XXH_PREFETCH(ptr) (void)(ptr)  /* disabled */
-#  endif
-#endif  /* XXH_NO_PREFETCH */
-
-
-/* ==========================================
- * XXH3 default settings
- * ========================================== */
-
-#define XXH_SECRET_DEFAULT_SIZE 192   /* minimum XXH3_SECRET_SIZE_MIN */
-
-#if (XXH_SECRET_DEFAULT_SIZE < XXH3_SECRET_SIZE_MIN)
-#  error "default keyset is not large enough"
-#endif
-
-/*! Pseudorandom secret taken directly from FARSH. */
-XXH_ALIGN(64) static const xxh_u8 XXH3_kSecret[XXH_SECRET_DEFAULT_SIZE] = {
-    0xb8, 0xfe, 0x6c, 0x39, 0x23, 0xa4, 0x4b, 0xbe, 0x7c, 0x01, 0x81, 0x2c, 0xf7, 0x21, 0xad, 0x1c,
-    0xde, 0xd4, 0x6d, 0xe9, 0x83, 0x90, 0x97, 0xdb, 0x72, 0x40, 0xa4, 0xa4, 0xb7, 0xb3, 0x67, 0x1f,
-    0xcb, 0x79, 0xe6, 0x4e, 0xcc, 0xc0, 0xe5, 0x78, 0x82, 0x5a, 0xd0, 0x7d, 0xcc, 0xff, 0x72, 0x21,
-    0xb8, 0x08, 0x46, 0x74, 0xf7, 0x43, 0x24, 0x8e, 0xe0, 0x35, 0x90, 0xe6, 0x81, 0x3a, 0x26, 0x4c,
-    0x3c, 0x28, 0x52, 0xbb, 0x91, 0xc3, 0x00, 0xcb, 0x88, 0xd0, 0x65, 0x8b, 0x1b, 0x53, 0x2e, 0xa3,
-    0x71, 0x64, 0x48, 0x97, 0xa2, 0x0d, 0xf9, 0x4e, 0x38, 0x19, 0xef, 0x46, 0xa9, 0xde, 0xac, 0xd8,
-    0xa8, 0xfa, 0x76, 0x3f, 0xe3, 0x9c, 0x34, 0x3f, 0xf9, 0xdc, 0xbb, 0xc7, 0xc7, 0x0b, 0x4f, 0x1d,
-    0x8a, 0x51, 0xe0, 0x4b, 0xcd, 0xb4, 0x59, 0x31, 0xc8, 0x9f, 0x7e, 0xc9, 0xd9, 0x78, 0x73, 0x64,
-    0xea, 0xc5, 0xac, 0x83, 0x34, 0xd3, 0xeb, 0xc3, 0xc5, 0x81, 0xa0, 0xff, 0xfa, 0x13, 0x63, 0xeb,
-    0x17, 0x0d, 0xdd, 0x51, 0xb7, 0xf0, 0xda, 0x49, 0xd3, 0x16, 0x55, 0x26, 0x29, 0xd4, 0x68, 0x9e,
-    0x2b, 0x16, 0xbe, 0x58, 0x7d, 0x47, 0xa1, 0xfc, 0x8f, 0xf8, 0xb8, 0xd1, 0x7a, 0xd0, 0x31, 0xce,
-    0x45, 0xcb, 0x3a, 0x8f, 0x95, 0x16, 0x04, 0x28, 0xaf, 0xd7, 0xfb, 0xca, 0xbb, 0x4b, 0x40, 0x7e,
-};
-
-static const xxh_u64 PRIME_MX1 = 0x165667919E3779F9ULL;  /*!< 0b0001011001010110011001111001000110011110001101110111100111111001 */
-static const xxh_u64 PRIME_MX2 = 0x9FB21C651E98DF25ULL;  /*!< 0b1001111110110010000111000110010100011110100110001101111100100101 */
-
-#ifdef XXH_OLD_NAMES
-#  define kSecret XXH3_kSecret
-#endif
-
-#ifdef XXH_DOXYGEN
-/*!
- * @brief Calculates a 32-bit to 64-bit long multiply.
- *
- * Implemented as a macro.
- *
- * Wraps `__emulu` on MSVC x86 because it tends to call `__allmul` when it doesn't
- * need to (but it shouldn't need to anyways, it is about 7 instructions to do
- * a 64x64 multiply...). Since we know that this will _always_ emit `MULL`, we
- * use that instead of the normal method.
- *
- * If you are compiling for platforms like Thumb-1 and don't have a better option,
- * you may also want to write your own long multiply routine here.
- *
- * @param x, y Numbers to be multiplied
- * @return 64-bit product of the low 32 bits of @p x and @p y.
- */
-XXH_FORCE_INLINE xxh_u64
-XXH_mult32to64(xxh_u64 x, xxh_u64 y)
-{
-   return (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF);
-}
-#elif defined(_MSC_VER) && defined(_M_IX86)
-#    define XXH_mult32to64(x, y) __emulu((unsigned)(x), (unsigned)(y))
-#else
-/*
- * Downcast + upcast is usually better than masking on older compilers like
- * GCC 4.2 (especially 32-bit ones), all without affecting newer compilers.
- *
- * The other method, (x & 0xFFFFFFFF) * (y & 0xFFFFFFFF), will AND both operands
- * and perform a full 64x64 multiply -- entirely redundant on 32-bit.
- */
-#    define XXH_mult32to64(x, y) ((xxh_u64)(xxh_u32)(x) * (xxh_u64)(xxh_u32)(y))
-#endif
-
-/*!
- * @brief Calculates a 64->128-bit long multiply.
- *
- * Uses `__uint128_t` and `_umul128` if available, otherwise uses a scalar
- * version.
- *
- * @param lhs , rhs The 64-bit integers to be multiplied
- * @return The 128-bit result represented in an @ref XXH128_hash_t.
- */
-static XXH128_hash_t
-XXH_mult64to128(xxh_u64 lhs, xxh_u64 rhs)
-{
-    /*
-     * GCC/Clang __uint128_t method.
-     *
-     * On most 64-bit targets, GCC and Clang define a __uint128_t type.
-     * This is usually the best way as it usually uses a native long 64-bit
-     * multiply, such as MULQ on x86_64 or MUL + UMULH on aarch64.
-     *
-     * Usually.
-     *
-     * Despite being a 32-bit platform, Clang (and emscripten) define this type
-     * despite not having the arithmetic for it. This results in a laggy
-     * compiler builtin call which calculates a full 128-bit multiply.
-     * In that case it is best to use the portable one.
-     * https://github.com/Cyan4973/xxHash/issues/211#issuecomment-515575677
-     */
-#if (defined(__GNUC__) || defined(__clang__)) && !defined(__wasm__) \
-    && defined(__SIZEOF_INT128__) \
-    || (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)
-
-    __uint128_t const product = (__uint128_t)lhs * (__uint128_t)rhs;
-    XXH128_hash_t r128;
-    r128.low64  = (xxh_u64)(product);
-    r128.high64 = (xxh_u64)(product >> 64);
-    return r128;
-
-    /*
-     * MSVC for x64's _umul128 method.
-     *
-     * xxh_u64 _umul128(xxh_u64 Multiplier, xxh_u64 Multiplicand, xxh_u64 *HighProduct);
-     *
-     * This compiles to single operand MUL on x64.
-     */
-#elif (defined(_M_X64) || defined(_M_IA64)) && !defined(_M_ARM64EC)
-
-#ifndef _MSC_VER
-#   pragma intrinsic(_umul128)
-#endif
-    xxh_u64 product_high;
-    xxh_u64 const product_low = _umul128(lhs, rhs, &product_high);
-    XXH128_hash_t r128;
-    r128.low64  = product_low;
-    r128.high64 = product_high;
-    return r128;
-
-    /*
-     * MSVC for ARM64's __umulh method.
-     *
-     * This compiles to the same MUL + UMULH as GCC/Clang's __uint128_t method.
-     */
-#elif defined(_M_ARM64) || defined(_M_ARM64EC)
-
-#ifndef _MSC_VER
-#   pragma intrinsic(__umulh)
-#endif
-    XXH128_hash_t r128;
-    r128.low64  = lhs * rhs;
-    r128.high64 = __umulh(lhs, rhs);
-    return r128;
-
-#else
-    /*
-     * Portable scalar method. Optimized for 32-bit and 64-bit ALUs.
-     *
-     * This is a fast and simple grade school multiply, which is shown below
-     * with base 10 arithmetic instead of base 0x100000000.
-     *
-     *           9 3 // D2 lhs = 93
-     *         x 7 5 // D2 rhs = 75
-     *     ----------
-     *           1 5 // D2 lo_lo = (93 % 10) * (75 % 10) = 15
-     *         4 5 | // D2 hi_lo = (93 / 10) * (75 % 10) = 45
-     *         2 1 | // D2 lo_hi = (93 % 10) * (75 / 10) = 21
-     *     + 6 3 | | // D2 hi_hi = (93 / 10) * (75 / 10) = 63
-     *     ---------
-     *         2 7 | // D2 cross = (15 / 10) + (45 % 10) + 21 = 27
-     *     + 6 7 | | // D2 upper = (27 / 10) + (45 / 10) + 63 = 67
-     *     ---------
-     *       6 9 7 5 // D4 res = (27 * 10) + (15 % 10) + (67 * 100) = 6975
-     *
-     * The reasons for adding the products like this are:
-     *  1. It avoids manual carry tracking. Just like how
-     *     (9 * 9) + 9 + 9 = 99, the same applies with this for UINT64_MAX.
-     *     This avoids a lot of complexity.
-     *
-     *  2. It hints for, and on Clang, compiles to, the powerful UMAAL
-     *     instruction available in ARM's Digital Signal Processing extension
-     *     in 32-bit ARMv6 and later, which is shown below:
-     *
-     *         void UMAAL(xxh_u32 *RdLo, xxh_u32 *RdHi, xxh_u32 Rn, xxh_u32 Rm)
-     *         {
-     *             xxh_u64 product = (xxh_u64)*RdLo * (xxh_u64)*RdHi + Rn + Rm;
-     *             *RdLo = (xxh_u32)(product & 0xFFFFFFFF);
-     *             *RdHi = (xxh_u32)(product >> 32);
-     *         }
-     *
-     *     This instruction was designed for efficient long multiplication, and
-     *     allows this to be calculated in only 4 instructions at speeds
-     *     comparable to some 64-bit ALUs.
-     *
-     *  3. It isn't terrible on other platforms. Usually this will be a couple
-     *     of 32-bit ADD/ADCs.
-     */
-
-    /* First calculate all of the cross products. */
-    xxh_u64 const lo_lo = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs & 0xFFFFFFFF);
-    xxh_u64 const hi_lo = XXH_mult32to64(lhs >> 32,        rhs & 0xFFFFFFFF);
-    xxh_u64 const lo_hi = XXH_mult32to64(lhs & 0xFFFFFFFF, rhs >> 32);
-    xxh_u64 const hi_hi = XXH_mult32to64(lhs >> 32,        rhs >> 32);
-
-    /* Now add the products together. These will never overflow. */
-    xxh_u64 const cross = (lo_lo >> 32) + (hi_lo & 0xFFFFFFFF) + lo_hi;
-    xxh_u64 const upper = (hi_lo >> 32) + (cross >> 32)        + hi_hi;
-    xxh_u64 const lower = (cross << 32) | (lo_lo & 0xFFFFFFFF);
-
-    XXH128_hash_t r128;
-    r128.low64  = lower;
-    r128.high64 = upper;
-    return r128;
-#endif
-}
-
-/*!
- * @brief Calculates a 64-bit to 128-bit multiply, then XOR folds it.
- *
- * The reason for the separate function is to prevent passing too many structs
- * around by value. This will hopefully inline the multiply, but we don't force it.
- *
- * @param lhs , rhs The 64-bit integers to multiply
- * @return The low 64 bits of the product XOR'd by the high 64 bits.
- * @see XXH_mult64to128()
- */
-static xxh_u64
-XXH3_mul128_fold64(xxh_u64 lhs, xxh_u64 rhs)
-{
-    XXH128_hash_t product = XXH_mult64to128(lhs, rhs);
-    return product.low64 ^ product.high64;
-}
-
-/*! Seems to produce slightly better code on GCC for some reason. */
-XXH_FORCE_INLINE XXH_CONSTF xxh_u64 XXH_xorshift64(xxh_u64 v64, int shift)
-{
-    XXH_ASSERT(0 <= shift && shift < 64);
-    return v64 ^ (v64 >> shift);
-}
-
-/*
- * This is a fast avalanche stage,
- * suitable when input bits are already partially mixed
- */
-static XXH64_hash_t XXH3_avalanche(xxh_u64 h64)
-{
-    h64 = XXH_xorshift64(h64, 37);
-    h64 *= PRIME_MX1;
-    h64 = XXH_xorshift64(h64, 32);
-    return h64;
-}
-
-/*
- * This is a stronger avalanche,
- * inspired by Pelle Evensen's rrmxmx
- * preferable when input has not been previously mixed
- */
-static XXH64_hash_t XXH3_rrmxmx(xxh_u64 h64, xxh_u64 len)
-{
-    /* this mix is inspired by Pelle Evensen's rrmxmx */
-    h64 ^= XXH_rotl64(h64, 49) ^ XXH_rotl64(h64, 24);
-    h64 *= PRIME_MX2;
-    h64 ^= (h64 >> 35) + len ;
-    h64 *= PRIME_MX2;
-    return XXH_xorshift64(h64, 28);
-}
-
-
-/* ==========================================
- * Short keys
- * ==========================================
- * One of the shortcomings of XXH32 and XXH64 was that their performance was
- * sub-optimal on short lengths. It used an iterative algorithm which strongly
- * favored lengths that were a multiple of 4 or 8.
- *
- * Instead of iterating over individual inputs, we use a set of single shot
- * functions which piece together a range of lengths and operate in constant time.
- *
- * Additionally, the number of multiplies has been significantly reduced. This
- * reduces latency, especially when emulating 64-bit multiplies on 32-bit.
- *
- * Depending on the platform, this may or may not be faster than XXH32, but it
- * is almost guaranteed to be faster than XXH64.
- */
-
-/*
- * At very short lengths, there isn't enough input to fully hide secrets, or use
- * the entire secret.
- *
- * There is also only a limited amount of mixing we can do before significantly
- * impacting performance.
- *
- * Therefore, we use different sections of the secret and always mix two secret
- * samples with an XOR. This should have no effect on performance on the
- * seedless or withSeed variants because everything _should_ be constant folded
- * by modern compilers.
- *
- * The XOR mixing hides individual parts of the secret and increases entropy.
- *
- * This adds an extra layer of strength for custom secrets.
- */
-XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
-XXH3_len_1to3_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(1 <= len && len <= 3);
-    XXH_ASSERT(secret != NULL);
-    /*
-     * len = 1: combined = { input[0], 0x01, input[0], input[0] }
-     * len = 2: combined = { input[1], 0x02, input[0], input[1] }
-     * len = 3: combined = { input[2], 0x03, input[0], input[1] }
-     */
-    {   xxh_u8  const c1 = input[0];
-        xxh_u8  const c2 = input[len >> 1];
-        xxh_u8  const c3 = input[len - 1];
-        xxh_u32 const combined = ((xxh_u32)c1 << 16) | ((xxh_u32)c2  << 24)
-                               | ((xxh_u32)c3 <<  0) | ((xxh_u32)len << 8);
-        xxh_u64 const bitflip = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
-        xxh_u64 const keyed = (xxh_u64)combined ^ bitflip;
-        return XXH64_avalanche(keyed);
-    }
-}
-
-XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
-XXH3_len_4to8_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(4 <= len && len <= 8);
-    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
-    {   xxh_u32 const input1 = XXH_readLE32(input);
-        xxh_u32 const input2 = XXH_readLE32(input + len - 4);
-        xxh_u64 const bitflip = (XXH_readLE64(secret+8) ^ XXH_readLE64(secret+16)) - seed;
-        xxh_u64 const input64 = input2 + (((xxh_u64)input1) << 32);
-        xxh_u64 const keyed = input64 ^ bitflip;
-        return XXH3_rrmxmx(keyed, len);
-    }
-}
-
-XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
-XXH3_len_9to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(9 <= len && len <= 16);
-    {   xxh_u64 const bitflip1 = (XXH_readLE64(secret+24) ^ XXH_readLE64(secret+32)) + seed;
-        xxh_u64 const bitflip2 = (XXH_readLE64(secret+40) ^ XXH_readLE64(secret+48)) - seed;
-        xxh_u64 const input_lo = XXH_readLE64(input)           ^ bitflip1;
-        xxh_u64 const input_hi = XXH_readLE64(input + len - 8) ^ bitflip2;
-        xxh_u64 const acc = len
-                          + XXH_swap64(input_lo) + input_hi
-                          + XXH3_mul128_fold64(input_lo, input_hi);
-        return XXH3_avalanche(acc);
-    }
-}
-
-XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
-XXH3_len_0to16_64b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(len <= 16);
-    {   if (XXH_likely(len >  8)) return XXH3_len_9to16_64b(input, len, secret, seed);
-        if (XXH_likely(len >= 4)) return XXH3_len_4to8_64b(input, len, secret, seed);
-        if (len) return XXH3_len_1to3_64b(input, len, secret, seed);
-        return XXH64_avalanche(seed ^ (XXH_readLE64(secret+56) ^ XXH_readLE64(secret+64)));
-    }
-}
-
-/*
- * DISCLAIMER: There are known *seed-dependent* multicollisions here due to
- * multiplication by zero, affecting hashes of lengths 17 to 240.
- *
- * However, they are very unlikely.
- *
- * Keep this in mind when using the unseeded XXH3_64bits() variant: As with all
- * unseeded non-cryptographic hashes, it does not attempt to defend itself
- * against specially crafted inputs, only random inputs.
- *
- * Compared to classic UMAC where a 1 in 2^31 chance of 4 consecutive bytes
- * cancelling out the secret is taken an arbitrary number of times (addressed
- * in XXH3_accumulate_512), this collision is very unlikely with random inputs
- * and/or proper seeding:
- *
- * This only has a 1 in 2^63 chance of 8 consecutive bytes cancelling out, in a
- * function that is only called up to 16 times per hash with up to 240 bytes of
- * input.
- *
- * This is not too bad for a non-cryptographic hash function, especially with
- * only 64 bit outputs.
- *
- * The 128-bit variant (which trades some speed for strength) is NOT affected
- * by this, although it is always a good idea to use a proper seed if you care
- * about strength.
- */
-XXH_FORCE_INLINE xxh_u64 XXH3_mix16B(const xxh_u8* XXH_RESTRICT input,
-                                     const xxh_u8* XXH_RESTRICT secret, xxh_u64 seed64)
-{
-#if defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__i386__) && defined(__SSE2__)  /* x86 + SSE2 */ \
-  && !defined(XXH_ENABLE_AUTOVECTORIZE)      /* Define to disable like XXH32 hack */
-    /*
-     * UGLY HACK:
-     * GCC for x86 tends to autovectorize the 128-bit multiply, resulting in
-     * slower code.
-     *
-     * By forcing seed64 into a register, we disrupt the cost model and
-     * cause it to scalarize. See `XXH32_round()`
-     *
-     * FIXME: Clang's output is still _much_ faster -- On an AMD Ryzen 3600,
-     * XXH3_64bits @ len=240 runs at 4.6 GB/s with Clang 9, but 3.3 GB/s on
-     * GCC 9.2, despite both emitting scalar code.
-     *
-     * GCC generates much better scalar code than Clang for the rest of XXH3,
-     * which is why finding a more optimal codepath is an interest.
-     */
-    XXH_COMPILER_GUARD(seed64);
-#endif
-    {   xxh_u64 const input_lo = XXH_readLE64(input);
-        xxh_u64 const input_hi = XXH_readLE64(input+8);
-        return XXH3_mul128_fold64(
-            input_lo ^ (XXH_readLE64(secret)   + seed64),
-            input_hi ^ (XXH_readLE64(secret+8) - seed64)
-        );
-    }
-}
-
-/* For mid range keys, XXH3 uses a Mum-hash variant. */
-XXH_FORCE_INLINE XXH_PUREF XXH64_hash_t
-XXH3_len_17to128_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                     const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                     XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(16 < len && len <= 128);
-
-    {   xxh_u64 acc = len * XXH_PRIME64_1;
-#if XXH_SIZE_OPT >= 1
-        /* Smaller and cleaner, but slightly slower. */
-        unsigned int i = (unsigned int)(len - 1) / 32;
-        do {
-            acc += XXH3_mix16B(input+16 * i, secret+32*i, seed);
-            acc += XXH3_mix16B(input+len-16*(i+1), secret+32*i+16, seed);
-        } while (i-- != 0);
-#else
-        if (len > 32) {
-            if (len > 64) {
-                if (len > 96) {
-                    acc += XXH3_mix16B(input+48, secret+96, seed);
-                    acc += XXH3_mix16B(input+len-64, secret+112, seed);
-                }
-                acc += XXH3_mix16B(input+32, secret+64, seed);
-                acc += XXH3_mix16B(input+len-48, secret+80, seed);
-            }
-            acc += XXH3_mix16B(input+16, secret+32, seed);
-            acc += XXH3_mix16B(input+len-32, secret+48, seed);
-        }
-        acc += XXH3_mix16B(input+0, secret+0, seed);
-        acc += XXH3_mix16B(input+len-16, secret+16, seed);
-#endif
-        return XXH3_avalanche(acc);
-    }
-}
-
-XXH_NO_INLINE XXH_PUREF XXH64_hash_t
-XXH3_len_129to240_64b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                      XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-
-    #define XXH3_MIDSIZE_STARTOFFSET 3
-    #define XXH3_MIDSIZE_LASTOFFSET  17
-
-    {   xxh_u64 acc = len * XXH_PRIME64_1;
-        xxh_u64 acc_end;
-        unsigned int const nbRounds = (unsigned int)len / 16;
-        unsigned int i;
-        XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-        for (i=0; i<8; i++) {
-            acc += XXH3_mix16B(input+(16*i), secret+(16*i), seed);
-        }
-        /* last bytes */
-        acc_end = XXH3_mix16B(input + len - 16, secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET, seed);
-        XXH_ASSERT(nbRounds >= 8);
-        acc = XXH3_avalanche(acc);
-#if defined(__clang__)                                /* Clang */ \
-    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */ \
-    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
-        /*
-         * UGLY HACK:
-         * Clang for ARMv7-A tries to vectorize this loop, similar to GCC x86.
-         * In everywhere else, it uses scalar code.
-         *
-         * For 64->128-bit multiplies, even if the NEON was 100% optimal, it
-         * would still be slower than UMAAL (see XXH_mult64to128).
-         *
-         * Unfortunately, Clang doesn't handle the long multiplies properly and
-         * converts them to the nonexistent "vmulq_u64" intrinsic, which is then
-         * scalarized into an ugly mess of VMOV.32 instructions.
-         *
-         * This mess is difficult to avoid without turning autovectorization
-         * off completely, but they are usually relatively minor and/or not
-         * worth it to fix.
-         *
-         * This loop is the easiest to fix, as unlike XXH32, this pragma
-         * _actually works_ because it is a loop vectorization instead of an
-         * SLP vectorization.
-         */
-        #pragma clang loop vectorize(disable)
-#endif
-        for (i=8 ; i < nbRounds; i++) {
-            /*
-             * Prevents clang for unrolling the acc loop and interleaving with this one.
-             */
-            XXH_COMPILER_GUARD(acc);
-            acc_end += XXH3_mix16B(input+(16*i), secret+(16*(i-8)) + XXH3_MIDSIZE_STARTOFFSET, seed);
-        }
-        return XXH3_avalanche(acc + acc_end);
-    }
-}
-
-
-/* =======     Long Keys     ======= */
-
-#define XXH_STRIPE_LEN 64
-#define XXH_SECRET_CONSUME_RATE 8   /* nb of secret bytes consumed at each accumulation */
-#define XXH_ACC_NB (XXH_STRIPE_LEN / sizeof(xxh_u64))
-
-#ifdef XXH_OLD_NAMES
-#  define STRIPE_LEN XXH_STRIPE_LEN
-#  define ACC_NB XXH_ACC_NB
-#endif
-
-#ifndef XXH_PREFETCH_DIST
-#  ifdef __clang__
-#    define XXH_PREFETCH_DIST 320
-#  else
-#    if (XXH_VECTOR == XXH_AVX512)
-#      define XXH_PREFETCH_DIST 512
-#    else
-#      define XXH_PREFETCH_DIST 384
-#    endif
-#  endif  /* __clang__ */
-#endif  /* XXH_PREFETCH_DIST */
-
-/*
- * These macros are to generate an XXH3_accumulate() function.
- * The two arguments select the name suffix and target attribute.
- *
- * The name of this symbol is XXH3_accumulate_<name>() and it calls
- * XXH3_accumulate_512_<name>().
- *
- * It may be useful to hand implement this function if the compiler fails to
- * optimize the inline function.
- */
-#define XXH3_ACCUMULATE_TEMPLATE(name)                      \
-void                                                        \
-XXH3_accumulate_##name(xxh_u64* XXH_RESTRICT acc,           \
-                       const xxh_u8* XXH_RESTRICT input,    \
-                       const xxh_u8* XXH_RESTRICT secret,   \
-                       size_t nbStripes)                    \
-{                                                           \
-    size_t n;                                               \
-    for (n = 0; n < nbStripes; n++ ) {                      \
-        const xxh_u8* const in = input + n*XXH_STRIPE_LEN;  \
-        XXH_PREFETCH(in + XXH_PREFETCH_DIST);               \
-        XXH3_accumulate_512_##name(                         \
-                 acc,                                       \
-                 in,                                        \
-                 secret + n*XXH_SECRET_CONSUME_RATE);       \
-    }                                                       \
-}
-
-
-XXH_FORCE_INLINE void XXH_writeLE64(void* dst, xxh_u64 v64)
-{
-    if (!XXH_CPU_LITTLE_ENDIAN) v64 = XXH_swap64(v64);
-    XXH_memcpy(dst, &v64, sizeof(v64));
-}
-
-/* Several intrinsic functions below are supposed to accept __int64 as argument,
- * as documented in https://software.intel.com/sites/landingpage/IntrinsicsGuide/ .
- * However, several environments do not define __int64 type,
- * requiring a workaround.
- */
-#if !defined (__VMS) \
-  && (defined (__cplusplus) \
-  || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) )
-    typedef int64_t xxh_i64;
-#else
-    /* the following type must have a width of 64-bit */
-    typedef long long xxh_i64;
-#endif
-
-
-/*
- * XXH3_accumulate_512 is the tightest loop for long inputs, and it is the most optimized.
- *
- * It is a hardened version of UMAC, based off of FARSH's implementation.
- *
- * This was chosen because it adapts quite well to 32-bit, 64-bit, and SIMD
- * implementations, and it is ridiculously fast.
- *
- * We harden it by mixing the original input to the accumulators as well as the product.
- *
- * This means that in the (relatively likely) case of a multiply by zero, the
- * original input is preserved.
- *
- * On 128-bit inputs, we swap 64-bit pairs when we add the input to improve
- * cross-pollination, as otherwise the upper and lower halves would be
- * essentially independent.
- *
- * This doesn't matter on 64-bit hashes since they all get merged together in
- * the end, so we skip the extra step.
- *
- * Both XXH3_64bits and XXH3_128bits use this subroutine.
- */
-
-#if (XXH_VECTOR == XXH_AVX512) \
-     || (defined(XXH_DISPATCH_AVX512) && XXH_DISPATCH_AVX512 != 0)
-
-#ifndef XXH_TARGET_AVX512
-# define XXH_TARGET_AVX512  /* disable attribute target */
-#endif
-
-XXH_FORCE_INLINE XXH_TARGET_AVX512 void
-XXH3_accumulate_512_avx512(void* XXH_RESTRICT acc,
-                     const void* XXH_RESTRICT input,
-                     const void* XXH_RESTRICT secret)
-{
-    __m512i* const xacc = (__m512i *) acc;
-    XXH_ASSERT((((size_t)acc) & 63) == 0);
-    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
-
-    {
-        /* data_vec    = input[0]; */
-        __m512i const data_vec    = _mm512_loadu_si512   (input);
-        /* key_vec     = secret[0]; */
-        __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        /* data_key    = data_vec ^ key_vec; */
-        __m512i const data_key    = _mm512_xor_si512     (data_vec, key_vec);
-        /* data_key_lo = data_key >> 32; */
-        __m512i const data_key_lo = _mm512_srli_epi64 (data_key, 32);
-        /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-        __m512i const product     = _mm512_mul_epu32     (data_key, data_key_lo);
-        /* xacc[0] += swap(data_vec); */
-        __m512i const data_swap = _mm512_shuffle_epi32(data_vec, (_MM_PERM_ENUM)_MM_SHUFFLE(1, 0, 3, 2));
-        __m512i const sum       = _mm512_add_epi64(*xacc, data_swap);
-        /* xacc[0] += product; */
-        *xacc = _mm512_add_epi64(product, sum);
-    }
-}
-XXH_FORCE_INLINE XXH_TARGET_AVX512 XXH3_ACCUMULATE_TEMPLATE(avx512)
-
-/*
- * XXH3_scrambleAcc: Scrambles the accumulators to improve mixing.
- *
- * Multiplication isn't perfect, as explained by Google in HighwayHash:
- *
- *  // Multiplication mixes/scrambles bytes 0-7 of the 64-bit result to
- *  // varying degrees. In descending order of goodness, bytes
- *  // 3 4 2 5 1 6 0 7 have quality 228 224 164 160 100 96 36 32.
- *  // As expected, the upper and lower bytes are much worse.
- *
- * Source: https://github.com/google/highwayhash/blob/0aaf66b/highwayhash/hh_avx2.h#L291
- *
- * Since our algorithm uses a pseudorandom secret to add some variance into the
- * mix, we don't need to (or want to) mix as often or as much as HighwayHash does.
- *
- * This isn't as tight as XXH3_accumulate, but still written in SIMD to avoid
- * extraction.
- *
- * Both XXH3_64bits and XXH3_128bits use this subroutine.
- */
-
-XXH_FORCE_INLINE XXH_TARGET_AVX512 void
-XXH3_scrambleAcc_avx512(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 63) == 0);
-    XXH_STATIC_ASSERT(XXH_STRIPE_LEN == sizeof(__m512i));
-    {   __m512i* const xacc = (__m512i*) acc;
-        const __m512i prime32 = _mm512_set1_epi32((int)XXH_PRIME32_1);
-
-        /* xacc[0] ^= (xacc[0] >> 47) */
-        __m512i const acc_vec     = *xacc;
-        __m512i const shifted     = _mm512_srli_epi64    (acc_vec, 47);
-        /* xacc[0] ^= secret; */
-        __m512i const key_vec     = _mm512_loadu_si512   (secret);
-        __m512i const data_key    = _mm512_ternarylogic_epi32(key_vec, acc_vec, shifted, 0x96 /* key_vec ^ acc_vec ^ shifted */);
-
-        /* xacc[0] *= XXH_PRIME32_1; */
-        __m512i const data_key_hi = _mm512_srli_epi64 (data_key, 32);
-        __m512i const prod_lo     = _mm512_mul_epu32     (data_key, prime32);
-        __m512i const prod_hi     = _mm512_mul_epu32     (data_key_hi, prime32);
-        *xacc = _mm512_add_epi64(prod_lo, _mm512_slli_epi64(prod_hi, 32));
-    }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_AVX512 void
-XXH3_initCustomSecret_avx512(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 63) == 0);
-    XXH_STATIC_ASSERT(XXH_SEC_ALIGN == 64);
-    XXH_ASSERT(((size_t)customSecret & 63) == 0);
-    (void)(&XXH_writeLE64);
-    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m512i);
-        __m512i const seed_pos = _mm512_set1_epi64((xxh_i64)seed64);
-        __m512i const seed     = _mm512_mask_sub_epi64(seed_pos, 0xAA, _mm512_set1_epi8(0), seed_pos);
-
-        const __m512i* const src  = (const __m512i*) ((const void*) XXH3_kSecret);
-              __m512i* const dest = (      __m512i*) customSecret;
-        int i;
-        XXH_ASSERT(((size_t)src & 63) == 0); /* control alignment */
-        XXH_ASSERT(((size_t)dest & 63) == 0);
-        for (i=0; i < nbRounds; ++i) {
-            dest[i] = _mm512_add_epi64(_mm512_load_si512(src + i), seed);
-    }   }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_AVX2) \
-    || (defined(XXH_DISPATCH_AVX2) && XXH_DISPATCH_AVX2 != 0)
-
-#ifndef XXH_TARGET_AVX2
-# define XXH_TARGET_AVX2  /* disable attribute target */
-#endif
-
-XXH_FORCE_INLINE XXH_TARGET_AVX2 void
-XXH3_accumulate_512_avx2( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 31) == 0);
-    {   __m256i* const xacc    =       (__m256i *) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm256_loadu_si256 requires  a const __m256i * pointer for some reason. */
-        const         __m256i* const xinput  = (const __m256i *) input;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
-        const         __m256i* const xsecret = (const __m256i *) secret;
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
-            /* data_vec    = xinput[i]; */
-            __m256i const data_vec    = _mm256_loadu_si256    (xinput+i);
-            /* key_vec     = xsecret[i]; */
-            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
-            /* data_key    = data_vec ^ key_vec; */
-            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
-            /* data_key_lo = data_key >> 32; */
-            __m256i const data_key_lo = _mm256_srli_epi64 (data_key, 32);
-            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-            __m256i const product     = _mm256_mul_epu32     (data_key, data_key_lo);
-            /* xacc[i] += swap(data_vec); */
-            __m256i const data_swap = _mm256_shuffle_epi32(data_vec, _MM_SHUFFLE(1, 0, 3, 2));
-            __m256i const sum       = _mm256_add_epi64(xacc[i], data_swap);
-            /* xacc[i] += product; */
-            xacc[i] = _mm256_add_epi64(product, sum);
-    }   }
-}
-XXH_FORCE_INLINE XXH_TARGET_AVX2 XXH3_ACCUMULATE_TEMPLATE(avx2)
-
-XXH_FORCE_INLINE XXH_TARGET_AVX2 void
-XXH3_scrambleAcc_avx2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 31) == 0);
-    {   __m256i* const xacc = (__m256i*) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm256_loadu_si256 requires a const __m256i * pointer for some reason. */
-        const         __m256i* const xsecret = (const __m256i *) secret;
-        const __m256i prime32 = _mm256_set1_epi32((int)XXH_PRIME32_1);
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m256i); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47) */
-            __m256i const acc_vec     = xacc[i];
-            __m256i const shifted     = _mm256_srli_epi64    (acc_vec, 47);
-            __m256i const data_vec    = _mm256_xor_si256     (acc_vec, shifted);
-            /* xacc[i] ^= xsecret; */
-            __m256i const key_vec     = _mm256_loadu_si256   (xsecret+i);
-            __m256i const data_key    = _mm256_xor_si256     (data_vec, key_vec);
-
-            /* xacc[i] *= XXH_PRIME32_1; */
-            __m256i const data_key_hi = _mm256_srli_epi64 (data_key, 32);
-            __m256i const prod_lo     = _mm256_mul_epu32     (data_key, prime32);
-            __m256i const prod_hi     = _mm256_mul_epu32     (data_key_hi, prime32);
-            xacc[i] = _mm256_add_epi64(prod_lo, _mm256_slli_epi64(prod_hi, 32));
-        }
-    }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_AVX2 void XXH3_initCustomSecret_avx2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 31) == 0);
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE / sizeof(__m256i)) == 6);
-    XXH_STATIC_ASSERT(XXH_SEC_ALIGN <= 64);
-    (void)(&XXH_writeLE64);
-    XXH_PREFETCH(customSecret);
-    {   __m256i const seed = _mm256_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64, (xxh_i64)(0U - seed64), (xxh_i64)seed64);
-
-        const __m256i* const src  = (const __m256i*) ((const void*) XXH3_kSecret);
-              __m256i*       dest = (      __m256i*) customSecret;
-
-#       if defined(__GNUC__) || defined(__clang__)
-        /*
-         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
-         *   - do not extract the secret from sse registers in the internal loop
-         *   - use less common registers, and avoid pushing these reg into stack
-         */
-        XXH_COMPILER_GUARD(dest);
-#       endif
-        XXH_ASSERT(((size_t)src & 31) == 0); /* control alignment */
-        XXH_ASSERT(((size_t)dest & 31) == 0);
-
-        /* GCC -O2 need unroll loop manually */
-        dest[0] = _mm256_add_epi64(_mm256_load_si256(src+0), seed);
-        dest[1] = _mm256_add_epi64(_mm256_load_si256(src+1), seed);
-        dest[2] = _mm256_add_epi64(_mm256_load_si256(src+2), seed);
-        dest[3] = _mm256_add_epi64(_mm256_load_si256(src+3), seed);
-        dest[4] = _mm256_add_epi64(_mm256_load_si256(src+4), seed);
-        dest[5] = _mm256_add_epi64(_mm256_load_si256(src+5), seed);
-    }
-}
-
-#endif
-
-/* x86dispatch always generates SSE2 */
-#if (XXH_VECTOR == XXH_SSE2) || defined(XXH_X86DISPATCH)
-
-#ifndef XXH_TARGET_SSE2
-# define XXH_TARGET_SSE2  /* disable attribute target */
-#endif
-
-XXH_FORCE_INLINE XXH_TARGET_SSE2 void
-XXH3_accumulate_512_sse2( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    /* SSE2 is just a half-scale version of the AVX2 version. */
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {   __m128i* const xacc    =       (__m128i *) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-        const         __m128i* const xinput  = (const __m128i *) input;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-        const         __m128i* const xsecret = (const __m128i *) secret;
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
-            /* data_vec    = xinput[i]; */
-            __m128i const data_vec    = _mm_loadu_si128   (xinput+i);
-            /* key_vec     = xsecret[i]; */
-            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
-            /* data_key    = data_vec ^ key_vec; */
-            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
-            /* data_key_lo = data_key >> 32; */
-            __m128i const data_key_lo = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-            /* product     = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-            __m128i const product     = _mm_mul_epu32     (data_key, data_key_lo);
-            /* xacc[i] += swap(data_vec); */
-            __m128i const data_swap = _mm_shuffle_epi32(data_vec, _MM_SHUFFLE(1,0,3,2));
-            __m128i const sum       = _mm_add_epi64(xacc[i], data_swap);
-            /* xacc[i] += product; */
-            xacc[i] = _mm_add_epi64(product, sum);
-    }   }
-}
-XXH_FORCE_INLINE XXH_TARGET_SSE2 XXH3_ACCUMULATE_TEMPLATE(sse2)
-
-XXH_FORCE_INLINE XXH_TARGET_SSE2 void
-XXH3_scrambleAcc_sse2(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {   __m128i* const xacc = (__m128i*) acc;
-        /* Unaligned. This is mainly for pointer arithmetic, and because
-         * _mm_loadu_si128 requires a const __m128i * pointer for some reason. */
-        const         __m128i* const xsecret = (const __m128i *) secret;
-        const __m128i prime32 = _mm_set1_epi32((int)XXH_PRIME32_1);
-
-        size_t i;
-        for (i=0; i < XXH_STRIPE_LEN/sizeof(__m128i); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47) */
-            __m128i const acc_vec     = xacc[i];
-            __m128i const shifted     = _mm_srli_epi64    (acc_vec, 47);
-            __m128i const data_vec    = _mm_xor_si128     (acc_vec, shifted);
-            /* xacc[i] ^= xsecret[i]; */
-            __m128i const key_vec     = _mm_loadu_si128   (xsecret+i);
-            __m128i const data_key    = _mm_xor_si128     (data_vec, key_vec);
-
-            /* xacc[i] *= XXH_PRIME32_1; */
-            __m128i const data_key_hi = _mm_shuffle_epi32 (data_key, _MM_SHUFFLE(0, 3, 0, 1));
-            __m128i const prod_lo     = _mm_mul_epu32     (data_key, prime32);
-            __m128i const prod_hi     = _mm_mul_epu32     (data_key_hi, prime32);
-            xacc[i] = _mm_add_epi64(prod_lo, _mm_slli_epi64(prod_hi, 32));
-        }
-    }
-}
-
-XXH_FORCE_INLINE XXH_TARGET_SSE2 void XXH3_initCustomSecret_sse2(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
-    (void)(&XXH_writeLE64);
-    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / sizeof(__m128i);
-
-#       if defined(_MSC_VER) && defined(_M_IX86) && _MSC_VER < 1900
-        /* MSVC 32bit mode does not support _mm_set_epi64x before 2015 */
-        XXH_ALIGN(16) const xxh_i64 seed64x2[2] = { (xxh_i64)seed64, (xxh_i64)(0U - seed64) };
-        __m128i const seed = _mm_load_si128((__m128i const*)seed64x2);
-#       else
-        __m128i const seed = _mm_set_epi64x((xxh_i64)(0U - seed64), (xxh_i64)seed64);
-#       endif
-        int i;
-
-        const void* const src16 = XXH3_kSecret;
-        __m128i* dst16 = (__m128i*) customSecret;
-#       if defined(__GNUC__) || defined(__clang__)
-        /*
-         * On GCC & Clang, marking 'dest' as modified will cause the compiler:
-         *   - do not extract the secret from sse registers in the internal loop
-         *   - use less common registers, and avoid pushing these reg into stack
-         */
-        XXH_COMPILER_GUARD(dst16);
-#       endif
-        XXH_ASSERT(((size_t)src16 & 15) == 0); /* control alignment */
-        XXH_ASSERT(((size_t)dst16 & 15) == 0);
-
-        for (i=0; i < nbRounds; ++i) {
-            dst16[i] = _mm_add_epi64(_mm_load_si128((const __m128i *)src16+i), seed);
-    }   }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_NEON)
-
-/* forward declarations for the scalar routines */
-XXH_FORCE_INLINE void
-XXH3_scalarRound(void* XXH_RESTRICT acc, void const* XXH_RESTRICT input,
-                 void const* XXH_RESTRICT secret, size_t lane);
-
-XXH_FORCE_INLINE void
-XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
-                         void const* XXH_RESTRICT secret, size_t lane);
-
-/*!
- * @internal
- * @brief The bulk processing loop for NEON and WASM SIMD128.
- *
- * The NEON code path is actually partially scalar when running on AArch64. This
- * is to optimize the pipelining and can have up to 15% speedup depending on the
- * CPU, and it also mitigates some GCC codegen issues.
- *
- * @see XXH3_NEON_LANES for configuring this and details about this optimization.
- *
- * NEON's 32-bit to 64-bit long multiply takes a half vector of 32-bit
- * integers instead of the other platforms which mask full 64-bit vectors,
- * so the setup is more complicated than just shifting right.
- *
- * Additionally, there is an optimization for 4 lanes at once noted below.
- *
- * Since, as stated, the most optimal amount of lanes for Cortexes is 6,
- * there needs to be *three* versions of the accumulate operation used
- * for the remaining 2 lanes.
- *
- * WASM's SIMD128 uses SIMDe's arm_neon.h polyfill because the intrinsics overlap
- * nearly perfectly.
- */
-
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_neon( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    XXH_STATIC_ASSERT(XXH3_NEON_LANES > 0 && XXH3_NEON_LANES <= XXH_ACC_NB && XXH3_NEON_LANES % 2 == 0);
-    {   /* GCC for darwin arm64 does not like aliasing here */
-        xxh_aliasing_uint64x2_t* const xacc = (xxh_aliasing_uint64x2_t*) acc;
-        /* We don't use a uint32x4_t pointer because it causes bus errors on ARMv7. */
-        uint8_t const* xinput = (const uint8_t *) input;
-        uint8_t const* xsecret  = (const uint8_t *) secret;
-
-        size_t i;
-#ifdef __wasm_simd128__
-        /*
-         * On WASM SIMD128, Clang emits direct address loads when XXH3_kSecret
-         * is constant propagated, which results in it converting it to this
-         * inside the loop:
-         *
-         *    a = v128.load(XXH3_kSecret +  0 + $secret_offset, offset = 0)
-         *    b = v128.load(XXH3_kSecret + 16 + $secret_offset, offset = 0)
-         *    ...
-         *
-         * This requires a full 32-bit address immediate (and therefore a 6 byte
-         * instruction) as well as an add for each offset.
-         *
-         * Putting an asm guard prevents it from folding (at the cost of losing
-         * the alignment hint), and uses the free offset in `v128.load` instead
-         * of adding secret_offset each time which overall reduces code size by
-         * about a kilobyte and improves performance.
-         */
-        XXH_COMPILER_GUARD(xsecret);
-#endif
-        /* Scalar lanes use the normal scalarRound routine */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarRound(acc, input, secret, i);
-        }
-        i = 0;
-        /* 4 NEON lanes at a time. */
-        for (; i+1 < XXH3_NEON_LANES / 2; i+=2) {
-            /* data_vec = xinput[i]; */
-            uint64x2_t data_vec_1 = XXH_vld1q_u64(xinput  + (i * 16));
-            uint64x2_t data_vec_2 = XXH_vld1q_u64(xinput  + ((i+1) * 16));
-            /* key_vec  = xsecret[i];  */
-            uint64x2_t key_vec_1  = XXH_vld1q_u64(xsecret + (i * 16));
-            uint64x2_t key_vec_2  = XXH_vld1q_u64(xsecret + ((i+1) * 16));
-            /* data_swap = swap(data_vec) */
-            uint64x2_t data_swap_1 = vextq_u64(data_vec_1, data_vec_1, 1);
-            uint64x2_t data_swap_2 = vextq_u64(data_vec_2, data_vec_2, 1);
-            /* data_key = data_vec ^ key_vec; */
-            uint64x2_t data_key_1 = veorq_u64(data_vec_1, key_vec_1);
-            uint64x2_t data_key_2 = veorq_u64(data_vec_2, key_vec_2);
-
-            /*
-             * If we reinterpret the 64x2 vectors as 32x4 vectors, we can use a
-             * de-interleave operation for 4 lanes in 1 step with `vuzpq_u32` to
-             * get one vector with the low 32 bits of each lane, and one vector
-             * with the high 32 bits of each lane.
-             *
-             * The intrinsic returns a double vector because the original ARMv7-a
-             * instruction modified both arguments in place. AArch64 and SIMD128 emit
-             * two instructions from this intrinsic.
-             *
-             *  [ dk11L | dk11H | dk12L | dk12H ] -> [ dk11L | dk12L | dk21L | dk22L ]
-             *  [ dk21L | dk21H | dk22L | dk22H ] -> [ dk11H | dk12H | dk21H | dk22H ]
-             */
-            uint32x4x2_t unzipped = vuzpq_u32(
-                vreinterpretq_u32_u64(data_key_1),
-                vreinterpretq_u32_u64(data_key_2)
-            );
-            /* data_key_lo = data_key & 0xFFFFFFFF */
-            uint32x4_t data_key_lo = unzipped.val[0];
-            /* data_key_hi = data_key >> 32 */
-            uint32x4_t data_key_hi = unzipped.val[1];
-            /*
-             * Then, we can split the vectors horizontally and multiply which, as for most
-             * widening intrinsics, have a variant that works on both high half vectors
-             * for free on AArch64. A similar instruction is available on SIMD128.
-             *
-             * sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi
-             */
-            uint64x2_t sum_1 = XXH_vmlal_low_u32(data_swap_1, data_key_lo, data_key_hi);
-            uint64x2_t sum_2 = XXH_vmlal_high_u32(data_swap_2, data_key_lo, data_key_hi);
-            /*
-             * Clang reorders
-             *    a += b * c;     // umlal   swap.2d, dkl.2s, dkh.2s
-             *    c += a;         // add     acc.2d, acc.2d, swap.2d
-             * to
-             *    c += a;         // add     acc.2d, acc.2d, swap.2d
-             *    c += b * c;     // umlal   acc.2d, dkl.2s, dkh.2s
-             *
-             * While it would make sense in theory since the addition is faster,
-             * for reasons likely related to umlal being limited to certain NEON
-             * pipelines, this is worse. A compiler guard fixes this.
-             */
-            XXH_COMPILER_GUARD_CLANG_NEON(sum_1);
-            XXH_COMPILER_GUARD_CLANG_NEON(sum_2);
-            /* xacc[i] = acc_vec + sum; */
-            xacc[i]   = vaddq_u64(xacc[i], sum_1);
-            xacc[i+1] = vaddq_u64(xacc[i+1], sum_2);
-        }
-        /* Operate on the remaining NEON lanes 2 at a time. */
-        for (; i < XXH3_NEON_LANES / 2; i++) {
-            /* data_vec = xinput[i]; */
-            uint64x2_t data_vec = XXH_vld1q_u64(xinput  + (i * 16));
-            /* key_vec  = xsecret[i];  */
-            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
-            /* acc_vec_2 = swap(data_vec) */
-            uint64x2_t data_swap = vextq_u64(data_vec, data_vec, 1);
-            /* data_key = data_vec ^ key_vec; */
-            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
-            /* For two lanes, just use VMOVN and VSHRN. */
-            /* data_key_lo = data_key & 0xFFFFFFFF; */
-            uint32x2_t data_key_lo = vmovn_u64(data_key);
-            /* data_key_hi = data_key >> 32; */
-            uint32x2_t data_key_hi = vshrn_n_u64(data_key, 32);
-            /* sum = data_swap + (u64x2) data_key_lo * (u64x2) data_key_hi; */
-            uint64x2_t sum = vmlal_u32(data_swap, data_key_lo, data_key_hi);
-            /* Same Clang workaround as before */
-            XXH_COMPILER_GUARD_CLANG_NEON(sum);
-            /* xacc[i] = acc_vec + sum; */
-            xacc[i] = vaddq_u64 (xacc[i], sum);
-        }
-    }
-}
-XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(neon)
-
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_neon(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-
-    {   xxh_aliasing_uint64x2_t* xacc       = (xxh_aliasing_uint64x2_t*) acc;
-        uint8_t const* xsecret = (uint8_t const*) secret;
-
-        size_t i;
-        /* WASM uses operator overloads and doesn't need these. */
-#ifndef __wasm_simd128__
-        /* { prime32_1, prime32_1 } */
-        uint32x2_t const kPrimeLo = vdup_n_u32(XXH_PRIME32_1);
-        /* { 0, prime32_1, 0, prime32_1 } */
-        uint32x4_t const kPrimeHi = vreinterpretq_u32_u64(vdupq_n_u64((xxh_u64)XXH_PRIME32_1 << 32));
-#endif
-
-        /* AArch64 uses both scalar and neon at the same time */
-        for (i = XXH3_NEON_LANES; i < XXH_ACC_NB; i++) {
-            XXH3_scalarScrambleRound(acc, secret, i);
-        }
-        for (i=0; i < XXH3_NEON_LANES / 2; i++) {
-            /* xacc[i] ^= (xacc[i] >> 47); */
-            uint64x2_t acc_vec  = xacc[i];
-            uint64x2_t shifted  = vshrq_n_u64(acc_vec, 47);
-            uint64x2_t data_vec = veorq_u64(acc_vec, shifted);
-
-            /* xacc[i] ^= xsecret[i]; */
-            uint64x2_t key_vec  = XXH_vld1q_u64(xsecret + (i * 16));
-            uint64x2_t data_key = veorq_u64(data_vec, key_vec);
-            /* xacc[i] *= XXH_PRIME32_1 */
-#ifdef __wasm_simd128__
-            /* SIMD128 has multiply by u64x2, use it instead of expanding and scalarizing */
-            xacc[i] = data_key * XXH_PRIME32_1;
-#else
-            /*
-             * Expanded version with portable NEON intrinsics
-             *
-             *    lo(x) * lo(y) + (hi(x) * lo(y) << 32)
-             *
-             * prod_hi = hi(data_key) * lo(prime) << 32
-             *
-             * Since we only need 32 bits of this multiply a trick can be used, reinterpreting the vector
-             * as a uint32x4_t and multiplying by { 0, prime, 0, prime } to cancel out the unwanted bits
-             * and avoid the shift.
-             */
-            uint32x4_t prod_hi = vmulq_u32 (vreinterpretq_u32_u64(data_key), kPrimeHi);
-            /* Extract low bits for vmlal_u32  */
-            uint32x2_t data_key_lo = vmovn_u64(data_key);
-            /* xacc[i] = prod_hi + lo(data_key) * XXH_PRIME32_1; */
-            xacc[i] = vmlal_u32(vreinterpretq_u64_u32(prod_hi), data_key_lo, kPrimeLo);
-#endif
-        }
-    }
-}
-#endif
-
-#if (XXH_VECTOR == XXH_VSX)
-
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_vsx(  void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    /* presumed aligned */
-    xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
-    xxh_u8 const* const xinput   = (xxh_u8 const*) input;   /* no alignment restriction */
-    xxh_u8 const* const xsecret  = (xxh_u8 const*) secret;    /* no alignment restriction */
-    xxh_u64x2 const v32 = { 32, 32 };
-    size_t i;
-    for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
-        /* data_vec = xinput[i]; */
-        xxh_u64x2 const data_vec = XXH_vec_loadu(xinput + 16*i);
-        /* key_vec = xsecret[i]; */
-        xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
-        xxh_u64x2 const data_key = data_vec ^ key_vec;
-        /* shuffled = (data_key << 32) | (data_key >> 32); */
-        xxh_u32x4 const shuffled = (xxh_u32x4)vec_rl(data_key, v32);
-        /* product = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)shuffled & 0xFFFFFFFF); */
-        xxh_u64x2 const product  = XXH_vec_mulo((xxh_u32x4)data_key, shuffled);
-        /* acc_vec = xacc[i]; */
-        xxh_u64x2 acc_vec        = xacc[i];
-        acc_vec += product;
-
-        /* swap high and low halves */
-#ifdef __s390x__
-        acc_vec += vec_permi(data_vec, data_vec, 2);
-#else
-        acc_vec += vec_xxpermdi(data_vec, data_vec, 2);
-#endif
-        xacc[i] = acc_vec;
-    }
-}
-XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(vsx)
-
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_vsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-
-    {   xxh_aliasing_u64x2* const xacc = (xxh_aliasing_u64x2*) acc;
-        const xxh_u8* const xsecret = (const xxh_u8*) secret;
-        /* constants */
-        xxh_u64x2 const v32  = { 32, 32 };
-        xxh_u64x2 const v47 = { 47, 47 };
-        xxh_u32x4 const prime = { XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1, XXH_PRIME32_1 };
-        size_t i;
-        for (i = 0; i < XXH_STRIPE_LEN / sizeof(xxh_u64x2); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47); */
-            xxh_u64x2 const acc_vec  = xacc[i];
-            xxh_u64x2 const data_vec = acc_vec ^ (acc_vec >> v47);
-
-            /* xacc[i] ^= xsecret[i]; */
-            xxh_u64x2 const key_vec  = XXH_vec_loadu(xsecret + 16*i);
-            xxh_u64x2 const data_key = data_vec ^ key_vec;
-
-            /* xacc[i] *= XXH_PRIME32_1 */
-            /* prod_lo = ((xxh_u64x2)data_key & 0xFFFFFFFF) * ((xxh_u64x2)prime & 0xFFFFFFFF);  */
-            xxh_u64x2 const prod_even  = XXH_vec_mule((xxh_u32x4)data_key, prime);
-            /* prod_hi = ((xxh_u64x2)data_key >> 32) * ((xxh_u64x2)prime >> 32);  */
-            xxh_u64x2 const prod_odd  = XXH_vec_mulo((xxh_u32x4)data_key, prime);
-            xacc[i] = prod_odd + (prod_even << v32);
-    }   }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_SVE)
-
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_sve( void* XXH_RESTRICT acc,
-                   const void* XXH_RESTRICT input,
-                   const void* XXH_RESTRICT secret)
-{
-    uint64_t *xacc = (uint64_t *)acc;
-    const uint64_t *xinput = (const uint64_t *)(const void *)input;
-    const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
-    svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
-    uint64_t element_count = svcntd();
-    if (element_count >= 8) {
-        svbool_t mask = svptrue_pat_b64(SV_VL8);
-        svuint64_t vacc = svld1_u64(mask, xacc);
-        ACCRND(vacc, 0);
-        svst1_u64(mask, xacc, vacc);
-    } else if (element_count == 2) {   /* sve128 */
-        svbool_t mask = svptrue_pat_b64(SV_VL2);
-        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
-        svuint64_t acc1 = svld1_u64(mask, xacc + 2);
-        svuint64_t acc2 = svld1_u64(mask, xacc + 4);
-        svuint64_t acc3 = svld1_u64(mask, xacc + 6);
-        ACCRND(acc0, 0);
-        ACCRND(acc1, 2);
-        ACCRND(acc2, 4);
-        ACCRND(acc3, 6);
-        svst1_u64(mask, xacc + 0, acc0);
-        svst1_u64(mask, xacc + 2, acc1);
-        svst1_u64(mask, xacc + 4, acc2);
-        svst1_u64(mask, xacc + 6, acc3);
-    } else {
-        svbool_t mask = svptrue_pat_b64(SV_VL4);
-        svuint64_t acc0 = svld1_u64(mask, xacc + 0);
-        svuint64_t acc1 = svld1_u64(mask, xacc + 4);
-        ACCRND(acc0, 0);
-        ACCRND(acc1, 4);
-        svst1_u64(mask, xacc + 0, acc0);
-        svst1_u64(mask, xacc + 4, acc1);
-    }
-}
-
-XXH_FORCE_INLINE void
-XXH3_accumulate_sve(xxh_u64* XXH_RESTRICT acc,
-               const xxh_u8* XXH_RESTRICT input,
-               const xxh_u8* XXH_RESTRICT secret,
-               size_t nbStripes)
-{
-    if (nbStripes != 0) {
-        uint64_t *xacc = (uint64_t *)acc;
-        const uint64_t *xinput = (const uint64_t *)(const void *)input;
-        const uint64_t *xsecret = (const uint64_t *)(const void *)secret;
-        svuint64_t kSwap = sveor_n_u64_z(svptrue_b64(), svindex_u64(0, 1), 1);
-        uint64_t element_count = svcntd();
-        if (element_count >= 8) {
-            svbool_t mask = svptrue_pat_b64(SV_VL8);
-            svuint64_t vacc = svld1_u64(mask, xacc + 0);
-            do {
-                /* svprfd(svbool_t, void *, enum svfprop); */
-                svprfd(mask, xinput + 128, SV_PLDL1STRM);
-                ACCRND(vacc, 0);
-                xinput += 8;
-                xsecret += 1;
-                nbStripes--;
-           } while (nbStripes != 0);
-
-           svst1_u64(mask, xacc + 0, vacc);
-        } else if (element_count == 2) { /* sve128 */
-            svbool_t mask = svptrue_pat_b64(SV_VL2);
-            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
-            svuint64_t acc1 = svld1_u64(mask, xacc + 2);
-            svuint64_t acc2 = svld1_u64(mask, xacc + 4);
-            svuint64_t acc3 = svld1_u64(mask, xacc + 6);
-            do {
-                svprfd(mask, xinput + 128, SV_PLDL1STRM);
-                ACCRND(acc0, 0);
-                ACCRND(acc1, 2);
-                ACCRND(acc2, 4);
-                ACCRND(acc3, 6);
-                xinput += 8;
-                xsecret += 1;
-                nbStripes--;
-           } while (nbStripes != 0);
-
-           svst1_u64(mask, xacc + 0, acc0);
-           svst1_u64(mask, xacc + 2, acc1);
-           svst1_u64(mask, xacc + 4, acc2);
-           svst1_u64(mask, xacc + 6, acc3);
-        } else {
-            svbool_t mask = svptrue_pat_b64(SV_VL4);
-            svuint64_t acc0 = svld1_u64(mask, xacc + 0);
-            svuint64_t acc1 = svld1_u64(mask, xacc + 4);
-            do {
-                svprfd(mask, xinput + 128, SV_PLDL1STRM);
-                ACCRND(acc0, 0);
-                ACCRND(acc1, 4);
-                xinput += 8;
-                xsecret += 1;
-                nbStripes--;
-           } while (nbStripes != 0);
-
-           svst1_u64(mask, xacc + 0, acc0);
-           svst1_u64(mask, xacc + 4, acc1);
-       }
-    }
-}
-
-#endif
-
-#if (XXH_VECTOR == XXH_LSX)
-#define _LSX_SHUFFLE(z, y, x, w) (((z) << 6) | ((y) << 4) | ((x) << 2) | (w))
-
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_lsx( void* XXH_RESTRICT acc,
-                    const void* XXH_RESTRICT input,
-                    const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {
-        __m128i* const xacc    =       (__m128i *) acc;
-        const __m128i* const xinput  = (const __m128i *) input;
-        const __m128i* const xsecret = (const __m128i *) secret;
-
-        for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {
-            /* data_vec = xinput[i]; */
-            __m128i const data_vec = __lsx_vld(xinput + i, 0);
-            /* key_vec = xsecret[i]; */
-            __m128i const key_vec = __lsx_vld(xsecret + i, 0);
-            /* data_key = data_vec ^ key_vec; */
-            __m128i const data_key = __lsx_vxor_v(data_vec, key_vec);
-            /* data_key_lo = data_key >> 32; */
-            __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32);
-            // __m128i const data_key_lo = __lsx_vsrli_d(data_key, 32);
-            /* product = (data_key & 0xffffffff) * (data_key_lo & 0xffffffff); */
-            __m128i const product = __lsx_vmulwev_d_wu(data_key, data_key_lo);
-            /* xacc[i] += swap(data_vec); */
-            __m128i const data_swap = __lsx_vshuf4i_w(data_vec, _LSX_SHUFFLE(1, 0, 3, 2));
-            __m128i const sum = __lsx_vadd_d(xacc[i], data_swap);
-            /* xacc[i] += product; */
-            xacc[i] = __lsx_vadd_d(product, sum);
-        }
-    }
-}
-XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(lsx)
-
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_lsx(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    XXH_ASSERT((((size_t)acc) & 15) == 0);
-    {
-        __m128i* const xacc = (__m128i*) acc;
-        const __m128i* const xsecret = (const __m128i *) secret;
-        const __m128i prime32 = __lsx_vreplgr2vr_w((int)XXH_PRIME32_1);
-
-        for (size_t i = 0; i < XXH_STRIPE_LEN / sizeof(__m128i); i++) {
-            /* xacc[i] ^= (xacc[i] >> 47) */
-            __m128i const acc_vec = xacc[i];
-            __m128i const shifted = __lsx_vsrli_d(acc_vec, 47);
-            __m128i const data_vec = __lsx_vxor_v(acc_vec, shifted);
-            /* xacc[i] ^= xsecret[i]; */
-            __m128i const key_vec = __lsx_vld(xsecret + i, 0);
-            __m128i const data_key = __lsx_vxor_v(data_vec, key_vec);
-
-            /* xacc[i] *= XXH_PRIME32_1; */
-            __m128i const data_key_hi = __lsx_vsrli_d(data_key, 32);
-            __m128i const prod_lo = __lsx_vmulwev_d_wu(data_key, prime32);
-            __m128i const prod_hi = __lsx_vmulwev_d_wu(data_key_hi, prime32);
-            xacc[i] = __lsx_vadd_d(prod_lo, __lsx_vslli_d(prod_hi, 32));
-        }
-    }
-}
-
-#endif
-
-/* scalar variants - universal */
-
-#if defined(__aarch64__) && (defined(__GNUC__) || defined(__clang__))
-/*
- * In XXH3_scalarRound(), GCC and Clang have a similar codegen issue, where they
- * emit an excess mask and a full 64-bit multiply-add (MADD X-form).
- *
- * While this might not seem like much, as AArch64 is a 64-bit architecture, only
- * big Cortex designs have a full 64-bit multiplier.
- *
- * On the little cores, the smaller 32-bit multiplier is used, and full 64-bit
- * multiplies expand to 2-3 multiplies in microcode. This has a major penalty
- * of up to 4 latency cycles and 2 stall cycles in the multiply pipeline.
- *
- * Thankfully, AArch64 still provides the 32-bit long multiply-add (UMADDL) which does
- * not have this penalty and does the mask automatically.
- */
-XXH_FORCE_INLINE xxh_u64
-XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
-{
-    xxh_u64 ret;
-    /* note: %x = 64-bit register, %w = 32-bit register */
-    __asm__("umaddl %x0, %w1, %w2, %x3" : "=r" (ret) : "r" (lhs), "r" (rhs), "r" (acc));
-    return ret;
-}
-#else
-XXH_FORCE_INLINE xxh_u64
-XXH_mult32to64_add64(xxh_u64 lhs, xxh_u64 rhs, xxh_u64 acc)
-{
-    return XXH_mult32to64((xxh_u32)lhs, (xxh_u32)rhs) + acc;
-}
-#endif
-
-/*!
- * @internal
- * @brief Scalar round for @ref XXH3_accumulate_512_scalar().
- *
- * This is extracted to its own function because the NEON path uses a combination
- * of NEON and scalar.
- */
-XXH_FORCE_INLINE void
-XXH3_scalarRound(void* XXH_RESTRICT acc,
-                 void const* XXH_RESTRICT input,
-                 void const* XXH_RESTRICT secret,
-                 size_t lane)
-{
-    xxh_u64* xacc = (xxh_u64*) acc;
-    xxh_u8 const* xinput  = (xxh_u8 const*) input;
-    xxh_u8 const* xsecret = (xxh_u8 const*) secret;
-    XXH_ASSERT(lane < XXH_ACC_NB);
-    XXH_ASSERT(((size_t)acc & (XXH_ACC_ALIGN-1)) == 0);
-    {
-        xxh_u64 const data_val = XXH_readLE64(xinput + lane * 8);
-        xxh_u64 const data_key = data_val ^ XXH_readLE64(xsecret + lane * 8);
-        xacc[lane ^ 1] += data_val; /* swap adjacent lanes */
-        xacc[lane] = XXH_mult32to64_add64(data_key /* & 0xFFFFFFFF */, data_key >> 32, xacc[lane]);
-    }
-}
-
-/*!
- * @internal
- * @brief Processes a 64 byte block of data using the scalar path.
- */
-XXH_FORCE_INLINE void
-XXH3_accumulate_512_scalar(void* XXH_RESTRICT acc,
-                     const void* XXH_RESTRICT input,
-                     const void* XXH_RESTRICT secret)
-{
-    size_t i;
-    /* ARM GCC refuses to unroll this loop, resulting in a 24% slowdown on ARMv6. */
-#if defined(__GNUC__) && !defined(__clang__) \
-  && (defined(__arm__) || defined(__thumb2__)) \
-  && defined(__ARM_FEATURE_UNALIGNED) /* no unaligned access just wastes bytes */ \
-  && XXH_SIZE_OPT <= 0
-#  pragma GCC unroll 8
-#endif
-    for (i=0; i < XXH_ACC_NB; i++) {
-        XXH3_scalarRound(acc, input, secret, i);
-    }
-}
-XXH_FORCE_INLINE XXH3_ACCUMULATE_TEMPLATE(scalar)
-
-/*!
- * @internal
- * @brief Scalar scramble step for @ref XXH3_scrambleAcc_scalar().
- *
- * This is extracted to its own function because the NEON path uses a combination
- * of NEON and scalar.
- */
-XXH_FORCE_INLINE void
-XXH3_scalarScrambleRound(void* XXH_RESTRICT acc,
-                         void const* XXH_RESTRICT secret,
-                         size_t lane)
-{
-    xxh_u64* const xacc = (xxh_u64*) acc;   /* presumed aligned */
-    const xxh_u8* const xsecret = (const xxh_u8*) secret;   /* no alignment restriction */
-    XXH_ASSERT((((size_t)acc) & (XXH_ACC_ALIGN-1)) == 0);
-    XXH_ASSERT(lane < XXH_ACC_NB);
-    {
-        xxh_u64 const key64 = XXH_readLE64(xsecret + lane * 8);
-        xxh_u64 acc64 = xacc[lane];
-        acc64 = XXH_xorshift64(acc64, 47);
-        acc64 ^= key64;
-        acc64 *= XXH_PRIME32_1;
-        xacc[lane] = acc64;
-    }
-}
-
-/*!
- * @internal
- * @brief Scrambles the accumulators after a large chunk has been read
- */
-XXH_FORCE_INLINE void
-XXH3_scrambleAcc_scalar(void* XXH_RESTRICT acc, const void* XXH_RESTRICT secret)
-{
-    size_t i;
-    for (i=0; i < XXH_ACC_NB; i++) {
-        XXH3_scalarScrambleRound(acc, secret, i);
-    }
-}
-
-XXH_FORCE_INLINE void
-XXH3_initCustomSecret_scalar(void* XXH_RESTRICT customSecret, xxh_u64 seed64)
-{
-    /*
-     * We need a separate pointer for the hack below,
-     * which requires a non-const pointer.
-     * Any decent compiler will optimize this out otherwise.
-     */
-    const xxh_u8* kSecretPtr = XXH3_kSecret;
-    XXH_STATIC_ASSERT((XXH_SECRET_DEFAULT_SIZE & 15) == 0);
-
-#if defined(__GNUC__) && defined(__aarch64__)
-    /*
-     * UGLY HACK:
-     * GCC and Clang generate a bunch of MOV/MOVK pairs for aarch64, and they are
-     * placed sequentially, in order, at the top of the unrolled loop.
-     *
-     * While MOVK is great for generating constants (2 cycles for a 64-bit
-     * constant compared to 4 cycles for LDR), it fights for bandwidth with
-     * the arithmetic instructions.
-     *
-     *   I   L   S
-     * MOVK
-     * MOVK
-     * MOVK
-     * MOVK
-     * ADD
-     * SUB      STR
-     *          STR
-     * By forcing loads from memory (as the asm line causes the compiler to assume
-     * that XXH3_kSecretPtr has been changed), the pipelines are used more
-     * efficiently:
-     *   I   L   S
-     *      LDR
-     *  ADD LDR
-     *  SUB     STR
-     *          STR
-     *
-     * See XXH3_NEON_LANES for details on the pipsline.
-     *
-     * XXH3_64bits_withSeed, len == 256, Snapdragon 835
-     *   without hack: 2654.4 MB/s
-     *   with hack:    3202.9 MB/s
-     */
-    XXH_COMPILER_GUARD(kSecretPtr);
-#endif
-    {   int const nbRounds = XXH_SECRET_DEFAULT_SIZE / 16;
-        int i;
-        for (i=0; i < nbRounds; i++) {
-            /*
-             * The asm hack causes the compiler to assume that kSecretPtr aliases with
-             * customSecret, and on aarch64, this prevented LDP from merging two
-             * loads together for free. Putting the loads together before the stores
-             * properly generates LDP.
-             */
-            xxh_u64 lo = XXH_readLE64(kSecretPtr + 16*i)     + seed64;
-            xxh_u64 hi = XXH_readLE64(kSecretPtr + 16*i + 8) - seed64;
-            XXH_writeLE64((xxh_u8*)customSecret + 16*i,     lo);
-            XXH_writeLE64((xxh_u8*)customSecret + 16*i + 8, hi);
-    }   }
-}
-
-
-typedef void (*XXH3_f_accumulate)(xxh_u64* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, const xxh_u8* XXH_RESTRICT, size_t);
-typedef void (*XXH3_f_scrambleAcc)(void* XXH_RESTRICT, const void*);
-typedef void (*XXH3_f_initCustomSecret)(void* XXH_RESTRICT, xxh_u64);
-
-
-#if (XXH_VECTOR == XXH_AVX512)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_avx512
-#define XXH3_accumulate     XXH3_accumulate_avx512
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx512
-#define XXH3_initCustomSecret XXH3_initCustomSecret_avx512
-
-#elif (XXH_VECTOR == XXH_AVX2)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_avx2
-#define XXH3_accumulate     XXH3_accumulate_avx2
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_avx2
-#define XXH3_initCustomSecret XXH3_initCustomSecret_avx2
-
-#elif (XXH_VECTOR == XXH_SSE2)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_sse2
-#define XXH3_accumulate     XXH3_accumulate_sse2
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_sse2
-#define XXH3_initCustomSecret XXH3_initCustomSecret_sse2
-
-#elif (XXH_VECTOR == XXH_NEON)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_neon
-#define XXH3_accumulate     XXH3_accumulate_neon
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_neon
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#elif (XXH_VECTOR == XXH_VSX)
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_vsx
-#define XXH3_accumulate     XXH3_accumulate_vsx
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_vsx
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#elif (XXH_VECTOR == XXH_SVE)
-#define XXH3_accumulate_512 XXH3_accumulate_512_sve
-#define XXH3_accumulate     XXH3_accumulate_sve
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#elif (XXH_VECTOR == XXH_LSX)
-#define XXH3_accumulate_512 XXH3_accumulate_512_lsx
-#define XXH3_accumulate     XXH3_accumulate_lsx
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_lsx
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#else /* scalar */
-
-#define XXH3_accumulate_512 XXH3_accumulate_512_scalar
-#define XXH3_accumulate     XXH3_accumulate_scalar
-#define XXH3_scrambleAcc    XXH3_scrambleAcc_scalar
-#define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-
-#endif
-
-#if XXH_SIZE_OPT >= 1 /* don't do SIMD for initialization */
-#  undef XXH3_initCustomSecret
-#  define XXH3_initCustomSecret XXH3_initCustomSecret_scalar
-#endif
-
-XXH_FORCE_INLINE void
-XXH3_hashLong_internal_loop(xxh_u64* XXH_RESTRICT acc,
-                      const xxh_u8* XXH_RESTRICT input, size_t len,
-                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate f_acc,
-                            XXH3_f_scrambleAcc f_scramble)
-{
-    size_t const nbStripesPerBlock = (secretSize - XXH_STRIPE_LEN) / XXH_SECRET_CONSUME_RATE;
-    size_t const block_len = XXH_STRIPE_LEN * nbStripesPerBlock;
-    size_t const nb_blocks = (len - 1) / block_len;
-
-    size_t n;
-
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-
-    for (n = 0; n < nb_blocks; n++) {
-        f_acc(acc, input + n*block_len, secret, nbStripesPerBlock);
-        f_scramble(acc, secret + secretSize - XXH_STRIPE_LEN);
-    }
-
-    /* last partial block */
-    XXH_ASSERT(len > XXH_STRIPE_LEN);
-    {   size_t const nbStripes = ((len - 1) - (block_len * nb_blocks)) / XXH_STRIPE_LEN;
-        XXH_ASSERT(nbStripes <= (secretSize / XXH_SECRET_CONSUME_RATE));
-        f_acc(acc, input + nb_blocks*block_len, secret, nbStripes);
-
-        /* last stripe */
-        {   const xxh_u8* const p = input + len - XXH_STRIPE_LEN;
-#define XXH_SECRET_LASTACC_START 7  /* not aligned on 8, last secret is different from acc & scrambler */
-            XXH3_accumulate_512(acc, p, secret + secretSize - XXH_STRIPE_LEN - XXH_SECRET_LASTACC_START);
-    }   }
-}
-
-XXH_FORCE_INLINE xxh_u64
-XXH3_mix2Accs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret)
-{
-    return XXH3_mul128_fold64(
-               acc[0] ^ XXH_readLE64(secret),
-               acc[1] ^ XXH_readLE64(secret+8) );
-}
-
-static XXH_PUREF XXH64_hash_t
-XXH3_mergeAccs(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 start)
-{
-    xxh_u64 result64 = start;
-    size_t i = 0;
-
-    for (i = 0; i < 4; i++) {
-        result64 += XXH3_mix2Accs(acc+2*i, secret + 16*i);
-#if defined(__clang__)                                /* Clang */ \
-    && (defined(__arm__) || defined(__thumb__))       /* ARMv7 */ \
-    && (defined(__ARM_NEON) || defined(__ARM_NEON__)) /* NEON */  \
-    && !defined(XXH_ENABLE_AUTOVECTORIZE)             /* Define to disable */
-        /*
-         * UGLY HACK:
-         * Prevent autovectorization on Clang ARMv7-a. Exact same problem as
-         * the one in XXH3_len_129to240_64b. Speeds up shorter keys > 240b.
-         * XXH3_64bits, len == 256, Snapdragon 835:
-         *   without hack: 2063.7 MB/s
-         *   with hack:    2560.7 MB/s
-         */
-        XXH_COMPILER_GUARD(result64);
-#endif
-    }
-
-    return XXH3_avalanche(result64);
-}
-
-/* do not align on 8, so that the secret is different from the accumulator */
-#define XXH_SECRET_MERGEACCS_START 11
-
-static XXH_PUREF XXH64_hash_t
-XXH3_finalizeLong_64b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, xxh_u64 len)
-{
-    return XXH3_mergeAccs(acc, secret + XXH_SECRET_MERGEACCS_START, len * XXH_PRIME64_1);
-}
-
-#define XXH3_INIT_ACC { XXH_PRIME32_3, XXH_PRIME64_1, XXH_PRIME64_2, XXH_PRIME64_3, \
-                        XXH_PRIME64_4, XXH_PRIME32_2, XXH_PRIME64_5, XXH_PRIME32_1 }
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_hashLong_64b_internal(const void* XXH_RESTRICT input, size_t len,
-                           const void* XXH_RESTRICT secret, size_t secretSize,
-                           XXH3_f_accumulate f_acc,
-                           XXH3_f_scrambleAcc f_scramble)
-{
-    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
-
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, (const xxh_u8*)secret, secretSize, f_acc, f_scramble);
-
-    /* converge into final hash */
-    XXH_STATIC_ASSERT(sizeof(acc) == 64);
-    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-    return XXH3_finalizeLong_64b(acc, (const xxh_u8*)secret, (xxh_u64)len);
-}
-
-/*
- * It's important for performance to transmit secret's size (when it's static)
- * so that the compiler can properly optimize the vectorized loop.
- * This makes a big performance difference for "medium" keys (<1 KB) when using AVX instruction set.
- * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
- * breaks -Og, this is XXH_NO_INLINE.
- */
-XXH3_WITH_SECRET_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSecret(const void* XXH_RESTRICT input, size_t len,
-                             XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64;
-    return XXH3_hashLong_64b_internal(input, len, secret, secretLen, XXH3_accumulate, XXH3_scrambleAcc);
-}
-
-/*
- * It's preferable for performance that XXH3_hashLong is not inlined,
- * as it results in a smaller function for small data, easier to the instruction cache.
- * Note that inside this no_inline function, we do inline the internal loop,
- * and provide a statically defined secret size to allow optimization of vector loop.
- */
-XXH_NO_INLINE XXH_PUREF XXH64_hash_t
-XXH3_hashLong_64b_default(const void* XXH_RESTRICT input, size_t len,
-                          XXH64_hash_t seed64, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_accumulate, XXH3_scrambleAcc);
-}
-
-/*
- * XXH3_hashLong_64b_withSeed():
- * Generate a custom key based on alteration of default XXH3_kSecret with the seed,
- * and then use this key for long mode hashing.
- *
- * This operation is decently fast but nonetheless costs a little bit of time.
- * Try to avoid it whenever possible (typically when seed==0).
- *
- * It's important for performance that XXH3_hashLong is not inlined. Not sure
- * why (uop cache maybe?), but the difference is large and easily measurable.
- */
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed_internal(const void* input, size_t len,
-                                    XXH64_hash_t seed,
-                                    XXH3_f_accumulate f_acc,
-                                    XXH3_f_scrambleAcc f_scramble,
-                                    XXH3_f_initCustomSecret f_initSec)
-{
-#if XXH_SIZE_OPT <= 0
-    if (seed == 0)
-        return XXH3_hashLong_64b_internal(input, len,
-                                          XXH3_kSecret, sizeof(XXH3_kSecret),
-                                          f_acc, f_scramble);
-#endif
-    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
-        f_initSec(secret, seed);
-        return XXH3_hashLong_64b_internal(input, len, secret, sizeof(secret),
-                                          f_acc, f_scramble);
-    }
-}
-
-/*
- * It's important for performance that XXH3_hashLong is not inlined.
- */
-XXH_NO_INLINE XXH64_hash_t
-XXH3_hashLong_64b_withSeed(const void* XXH_RESTRICT input, size_t len,
-                           XXH64_hash_t seed, const xxh_u8* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)secret; (void)secretLen;
-    return XXH3_hashLong_64b_withSeed_internal(input, len, seed,
-                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
-}
-
-
-typedef XXH64_hash_t (*XXH3_hashLong64_f)(const void* XXH_RESTRICT, size_t,
-                                          XXH64_hash_t, const xxh_u8* XXH_RESTRICT, size_t);
-
-XXH_FORCE_INLINE XXH64_hash_t
-XXH3_64bits_internal(const void* XXH_RESTRICT input, size_t len,
-                     XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
-                     XXH3_hashLong64_f f_hashLong)
-{
-    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
-    /*
-     * If an action is to be taken if `secretLen` condition is not respected,
-     * it should be done here.
-     * For now, it's a contract pre-condition.
-     * Adding a check and a branch here would cost performance at every hash.
-     * Also, note that function signature doesn't offer room to return an error.
-     */
-    if (len <= 16)
-        return XXH3_len_0to16_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
-    if (len <= 128)
-        return XXH3_len_17to128_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_len_129to240_64b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    return f_hashLong(input, len, seed64, (const xxh_u8*)secret, secretLen);
-}
-
-
-/* ===   Public entry point   === */
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits(XXH_NOESCAPE const void* input, size_t length)
-{
-    return XXH3_64bits_internal(input, length, 0, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_default);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecret(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize)
-{
-    return XXH3_64bits_internal(input, length, 0, secret, secretSize, XXH3_hashLong_64b_withSecret);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSeed(XXH_NOESCAPE const void* input, size_t length, XXH64_hash_t seed)
-{
-    return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), XXH3_hashLong_64b_withSeed);
-}
-
-XXH_PUBLIC_API XXH64_hash_t
-XXH3_64bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t length, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
-{
-    if (length <= XXH3_MIDSIZE_MAX)
-        return XXH3_64bits_internal(input, length, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_64b_withSecret(input, length, seed, (const xxh_u8*)secret, secretSize);
-}
-
-
-/* ===   XXH3 streaming   === */
-#ifndef XXH_NO_STREAM
-/*
- * Malloc's a pointer that is always aligned to @align.
- *
- * This must be freed with `XXH_alignedFree()`.
- *
- * malloc typically guarantees 16 byte alignment on 64-bit systems and 8 byte
- * alignment on 32-bit. This isn't enough for the 32 byte aligned loads in AVX2
- * or on 32-bit, the 16 byte aligned loads in SSE2 and NEON.
- *
- * This underalignment previously caused a rather obvious crash which went
- * completely unnoticed due to XXH3_createState() not actually being tested.
- * Credit to RedSpah for noticing this bug.
- *
- * The alignment is done manually: Functions like posix_memalign or _mm_malloc
- * are avoided: To maintain portability, we would have to write a fallback
- * like this anyways, and besides, testing for the existence of library
- * functions without relying on external build tools is impossible.
- *
- * The method is simple: Overallocate, manually align, and store the offset
- * to the original behind the returned pointer.
- *
- * Align must be a power of 2 and 8 <= align <= 128.
- */
-static XXH_MALLOCF void* XXH_alignedMalloc(size_t s, size_t align)
-{
-    XXH_ASSERT(align <= 128 && align >= 8); /* range check */
-    XXH_ASSERT((align & (align-1)) == 0);   /* power of 2 */
-    XXH_ASSERT(s != 0 && s < (s + align));  /* empty/overflow */
-    {   /* Overallocate to make room for manual realignment and an offset byte */
-        xxh_u8* base = (xxh_u8*)XXH_malloc(s + align);
-        if (base != NULL) {
-            /*
-             * Get the offset needed to align this pointer.
-             *
-             * Even if the returned pointer is aligned, there will always be
-             * at least one byte to store the offset to the original pointer.
-             */
-            size_t offset = align - ((size_t)base & (align - 1)); /* base % align */
-            /* Add the offset for the now-aligned pointer */
-            xxh_u8* ptr = base + offset;
-
-            XXH_ASSERT((size_t)ptr % align == 0);
-
-            /* Store the offset immediately before the returned pointer. */
-            ptr[-1] = (xxh_u8)offset;
-            return ptr;
-        }
-        return NULL;
-    }
-}
-/*
- * Frees an aligned pointer allocated by XXH_alignedMalloc(). Don't pass
- * normal malloc'd pointers, XXH_alignedMalloc has a specific data layout.
- */
-static void XXH_alignedFree(void* p)
-{
-    if (p != NULL) {
-        xxh_u8* ptr = (xxh_u8*)p;
-        /* Get the offset byte we added in XXH_malloc. */
-        xxh_u8 offset = ptr[-1];
-        /* Free the original malloc'd pointer */
-        xxh_u8* base = ptr - offset;
-        XXH_free(base);
-    }
-}
-/*! @ingroup XXH3_family */
-/*!
- * @brief Allocate an @ref XXH3_state_t.
- *
- * @return An allocated pointer of @ref XXH3_state_t on success.
- * @return `NULL` on failure.
- *
- * @note Must be freed with XXH3_freeState().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH3_state_t* XXH3_createState(void)
-{
-    XXH3_state_t* const state = (XXH3_state_t*)XXH_alignedMalloc(sizeof(XXH3_state_t), 64);
-    if (state==NULL) return NULL;
-    XXH3_INITSTATE(state);
-    return state;
-}
-
-/*! @ingroup XXH3_family */
-/*!
- * @brief Frees an @ref XXH3_state_t.
- *
- * @param statePtr A pointer to an @ref XXH3_state_t allocated with @ref XXH3_createState().
- *
- * @return @ref XXH_OK.
- *
- * @note Must be allocated with XXH3_createState().
- *
- * @see @ref streaming_example "Streaming Example"
- */
-XXH_PUBLIC_API XXH_errorcode XXH3_freeState(XXH3_state_t* statePtr)
-{
-    XXH_alignedFree(statePtr);
-    return XXH_OK;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API void
-XXH3_copyState(XXH_NOESCAPE XXH3_state_t* dst_state, XXH_NOESCAPE const XXH3_state_t* src_state)
-{
-    XXH_memcpy(dst_state, src_state, sizeof(*dst_state));
-}
-
-static void
-XXH3_reset_internal(XXH3_state_t* statePtr,
-                    XXH64_hash_t seed,
-                    const void* secret, size_t secretSize)
-{
-    size_t const initStart = offsetof(XXH3_state_t, bufferedSize);
-    size_t const initLength = offsetof(XXH3_state_t, nbStripesPerBlock) - initStart;
-    XXH_ASSERT(offsetof(XXH3_state_t, nbStripesPerBlock) > initStart);
-    XXH_ASSERT(statePtr != NULL);
-    /* set members from bufferedSize to nbStripesPerBlock (excluded) to 0 */
-    memset((char*)statePtr + initStart, 0, initLength);
-    statePtr->acc[0] = XXH_PRIME32_3;
-    statePtr->acc[1] = XXH_PRIME64_1;
-    statePtr->acc[2] = XXH_PRIME64_2;
-    statePtr->acc[3] = XXH_PRIME64_3;
-    statePtr->acc[4] = XXH_PRIME64_4;
-    statePtr->acc[5] = XXH_PRIME32_2;
-    statePtr->acc[6] = XXH_PRIME64_5;
-    statePtr->acc[7] = XXH_PRIME32_1;
-    statePtr->seed = seed;
-    statePtr->useSeed = (seed != 0);
-    statePtr->extSecret = (const unsigned char*)secret;
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-    statePtr->secretLimit = secretSize - XXH_STRIPE_LEN;
-    statePtr->nbStripesPerBlock = statePtr->secretLimit / XXH_SECRET_CONSUME_RATE;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_reset_internal(statePtr, 0, XXH3_kSecret, XXH_SECRET_DEFAULT_SIZE);
-    return XXH_OK;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    XXH3_reset_internal(statePtr, 0, secret, secretSize);
-    if (secret == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-    return XXH_OK;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    if (seed==0) return XXH3_64bits_reset(statePtr);
-    if ((seed != statePtr->seed) || (statePtr->extSecret != NULL))
-        XXH3_initCustomSecret(statePtr->customSecret, seed);
-    XXH3_reset_internal(statePtr, seed, NULL, XXH_SECRET_DEFAULT_SIZE);
-    return XXH_OK;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed64)
-{
-    if (statePtr == NULL) return XXH_ERROR;
-    if (secret == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-    XXH3_reset_internal(statePtr, seed64, secret, secretSize);
-    statePtr->useSeed = 1; /* always, even if seed64==0 */
-    return XXH_OK;
-}
-
-/*!
- * @internal
- * @brief Processes a large input for XXH3_update() and XXH3_digest_long().
- *
- * Unlike XXH3_hashLong_internal_loop(), this can process data that overlaps a block.
- *
- * @param acc                Pointer to the 8 accumulator lanes
- * @param nbStripesSoFarPtr  In/out pointer to the number of leftover stripes in the block*
- * @param nbStripesPerBlock  Number of stripes in a block
- * @param input              Input pointer
- * @param nbStripes          Number of stripes to process
- * @param secret             Secret pointer
- * @param secretLimit        Offset of the last block in @p secret
- * @param f_acc              Pointer to an XXH3_accumulate implementation
- * @param f_scramble         Pointer to an XXH3_scrambleAcc implementation
- * @return                   Pointer past the end of @p input after processing
- */
-XXH_FORCE_INLINE const xxh_u8 *
-XXH3_consumeStripes(xxh_u64* XXH_RESTRICT acc,
-                    size_t* XXH_RESTRICT nbStripesSoFarPtr, size_t nbStripesPerBlock,
-                    const xxh_u8* XXH_RESTRICT input, size_t nbStripes,
-                    const xxh_u8* XXH_RESTRICT secret, size_t secretLimit,
-                    XXH3_f_accumulate f_acc,
-                    XXH3_f_scrambleAcc f_scramble)
-{
-    const xxh_u8* initialSecret = secret + *nbStripesSoFarPtr * XXH_SECRET_CONSUME_RATE;
-    /* Process full blocks */
-    if (nbStripes >= (nbStripesPerBlock - *nbStripesSoFarPtr)) {
-        /* Process the initial partial block... */
-        size_t nbStripesThisIter = nbStripesPerBlock - *nbStripesSoFarPtr;
-
-        do {
-            /* Accumulate and scramble */
-            f_acc(acc, input, initialSecret, nbStripesThisIter);
-            f_scramble(acc, secret + secretLimit);
-            input += nbStripesThisIter * XXH_STRIPE_LEN;
-            nbStripes -= nbStripesThisIter;
-            /* Then continue the loop with the full block size */
-            nbStripesThisIter = nbStripesPerBlock;
-            initialSecret = secret;
-        } while (nbStripes >= nbStripesPerBlock);
-        *nbStripesSoFarPtr = 0;
-    }
-    /* Process a partial block */
-    if (nbStripes > 0) {
-        f_acc(acc, input, initialSecret, nbStripes);
-        input += nbStripes * XXH_STRIPE_LEN;
-        *nbStripesSoFarPtr += nbStripes;
-    }
-    /* Return end pointer */
-    return input;
-}
-
-#ifndef XXH3_STREAM_USE_STACK
-# if XXH_SIZE_OPT <= 0 && !defined(__clang__) /* clang doesn't need additional stack space */
-#   define XXH3_STREAM_USE_STACK 1
-# endif
-#endif
-/*
- * Both XXH3_64bits_update and XXH3_128bits_update use this routine.
- */
-XXH_FORCE_INLINE XXH_errorcode
-XXH3_update(XXH3_state_t* XXH_RESTRICT const state,
-            const xxh_u8* XXH_RESTRICT input, size_t len,
-            XXH3_f_accumulate f_acc,
-            XXH3_f_scrambleAcc f_scramble)
-{
-    if (input==NULL) {
-        XXH_ASSERT(len == 0);
-        return XXH_OK;
-    }
-
-    XXH_ASSERT(state != NULL);
-    {   const xxh_u8* const bEnd = input + len;
-        const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
-        /* For some reason, gcc and MSVC seem to suffer greatly
-         * when operating accumulators directly into state.
-         * Operating into stack space seems to enable proper optimization.
-         * clang, on the other hand, doesn't seem to need this trick */
-        XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[8];
-        XXH_memcpy(acc, state->acc, sizeof(acc));
-#else
-        xxh_u64* XXH_RESTRICT const acc = state->acc;
-#endif
-        state->totalLen += len;
-        XXH_ASSERT(state->bufferedSize <= XXH3_INTERNALBUFFER_SIZE);
-
-        /* small input : just fill in tmp buffer */
-        if (len <= XXH3_INTERNALBUFFER_SIZE - state->bufferedSize) {
-            XXH_memcpy(state->buffer + state->bufferedSize, input, len);
-            state->bufferedSize += (XXH32_hash_t)len;
-            return XXH_OK;
-        }
-
-        /* total input is now > XXH3_INTERNALBUFFER_SIZE */
-        #define XXH3_INTERNALBUFFER_STRIPES (XXH3_INTERNALBUFFER_SIZE / XXH_STRIPE_LEN)
-        XXH_STATIC_ASSERT(XXH3_INTERNALBUFFER_SIZE % XXH_STRIPE_LEN == 0);   /* clean multiple */
-
-        /*
-         * Internal buffer is partially filled (always, except at beginning)
-         * Complete it, then consume it.
-         */
-        if (state->bufferedSize) {
-            size_t const loadSize = XXH3_INTERNALBUFFER_SIZE - state->bufferedSize;
-            XXH_memcpy(state->buffer + state->bufferedSize, input, loadSize);
-            input += loadSize;
-            XXH3_consumeStripes(acc,
-                               &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                state->buffer, XXH3_INTERNALBUFFER_STRIPES,
-                                secret, state->secretLimit,
-                                f_acc, f_scramble);
-            state->bufferedSize = 0;
-        }
-        XXH_ASSERT(input < bEnd);
-        if (bEnd - input > XXH3_INTERNALBUFFER_SIZE) {
-            size_t nbStripes = (size_t)(bEnd - 1 - input) / XXH_STRIPE_LEN;
-            input = XXH3_consumeStripes(acc,
-                                       &state->nbStripesSoFar, state->nbStripesPerBlock,
-                                       input, nbStripes,
-                                       secret, state->secretLimit,
-                                       f_acc, f_scramble);
-            XXH_memcpy(state->buffer + sizeof(state->buffer) - XXH_STRIPE_LEN, input - XXH_STRIPE_LEN, XXH_STRIPE_LEN);
-
-        }
-        /* Some remaining input (always) : buffer it */
-        XXH_ASSERT(input < bEnd);
-        XXH_ASSERT(bEnd - input <= XXH3_INTERNALBUFFER_SIZE);
-        XXH_ASSERT(state->bufferedSize == 0);
-        XXH_memcpy(state->buffer, input, (size_t)(bEnd-input));
-        state->bufferedSize = (XXH32_hash_t)(bEnd-input);
-#if defined(XXH3_STREAM_USE_STACK) && XXH3_STREAM_USE_STACK >= 1
-        /* save stack accumulators into state */
-        XXH_memcpy(state->acc, acc, sizeof(acc));
-#endif
-    }
-
-    return XXH_OK;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_64bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
-{
-    return XXH3_update(state, (const xxh_u8*)input, len,
-                       XXH3_accumulate, XXH3_scrambleAcc);
-}
-
-
-XXH_FORCE_INLINE void
-XXH3_digest_long (XXH64_hash_t* acc,
-                  const XXH3_state_t* state,
-                  const unsigned char* secret)
-{
-    xxh_u8 lastStripe[XXH_STRIPE_LEN];
-    const xxh_u8* lastStripePtr;
-
-    /*
-     * Digest on a local copy. This way, the state remains unaltered, and it can
-     * continue ingesting more input afterwards.
-     */
-    XXH_memcpy(acc, state->acc, sizeof(state->acc));
-    if (state->bufferedSize >= XXH_STRIPE_LEN) {
-        /* Consume remaining stripes then point to remaining data in buffer */
-        size_t const nbStripes = (state->bufferedSize - 1) / XXH_STRIPE_LEN;
-        size_t nbStripesSoFar = state->nbStripesSoFar;
-        XXH3_consumeStripes(acc,
-                           &nbStripesSoFar, state->nbStripesPerBlock,
-                            state->buffer, nbStripes,
-                            secret, state->secretLimit,
-                            XXH3_accumulate, XXH3_scrambleAcc);
-        lastStripePtr = state->buffer + state->bufferedSize - XXH_STRIPE_LEN;
-    } else {  /* bufferedSize < XXH_STRIPE_LEN */
-        /* Copy to temp buffer */
-        size_t const catchupSize = XXH_STRIPE_LEN - state->bufferedSize;
-        XXH_ASSERT(state->bufferedSize > 0);  /* there is always some input buffered */
-        XXH_memcpy(lastStripe, state->buffer + sizeof(state->buffer) - catchupSize, catchupSize);
-        XXH_memcpy(lastStripe + catchupSize, state->buffer, state->bufferedSize);
-        lastStripePtr = lastStripe;
-    }
-    /* Last stripe */
-    XXH3_accumulate_512(acc,
-                        lastStripePtr,
-                        secret + state->secretLimit - XXH_SECRET_LASTACC_START);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH64_hash_t XXH3_64bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
-{
-    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-    if (state->totalLen > XXH3_MIDSIZE_MAX) {
-        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
-        XXH3_digest_long(acc, state, secret);
-        return XXH3_finalizeLong_64b(acc, secret, (xxh_u64)state->totalLen);
-    }
-    /* totalLen <= XXH3_MIDSIZE_MAX: digesting a short input */
-    if (state->useSeed)
-        return XXH3_64bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-    return XXH3_64bits_withSecret(state->buffer, (size_t)(state->totalLen),
-                                  secret, state->secretLimit + XXH_STRIPE_LEN);
-}
-#endif /* !XXH_NO_STREAM */
-
-
-/* ==========================================
- * XXH3 128 bits (a.k.a XXH128)
- * ==========================================
- * XXH3's 128-bit variant has better mixing and strength than the 64-bit variant,
- * even without counting the significantly larger output size.
- *
- * For example, extra steps are taken to avoid the seed-dependent collisions
- * in 17-240 byte inputs (See XXH3_mix16B and XXH128_mix32B).
- *
- * This strength naturally comes at the cost of some speed, especially on short
- * lengths. Note that longer hashes are about as fast as the 64-bit version
- * due to it using only a slight modification of the 64-bit loop.
- *
- * XXH128 is also more oriented towards 64-bit machines. It is still extremely
- * fast for a _128-bit_ hash on 32-bit (it usually clears XXH64).
- */
-
-XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
-XXH3_len_1to3_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    /* A doubled version of 1to3_64b with different constants. */
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(1 <= len && len <= 3);
-    XXH_ASSERT(secret != NULL);
-    /*
-     * len = 1: combinedl = { input[0], 0x01, input[0], input[0] }
-     * len = 2: combinedl = { input[1], 0x02, input[0], input[1] }
-     * len = 3: combinedl = { input[2], 0x03, input[0], input[1] }
-     */
-    {   xxh_u8 const c1 = input[0];
-        xxh_u8 const c2 = input[len >> 1];
-        xxh_u8 const c3 = input[len - 1];
-        xxh_u32 const combinedl = ((xxh_u32)c1 <<16) | ((xxh_u32)c2 << 24)
-                                | ((xxh_u32)c3 << 0) | ((xxh_u32)len << 8);
-        xxh_u32 const combinedh = XXH_rotl32(XXH_swap32(combinedl), 13);
-        xxh_u64 const bitflipl = (XXH_readLE32(secret) ^ XXH_readLE32(secret+4)) + seed;
-        xxh_u64 const bitfliph = (XXH_readLE32(secret+8) ^ XXH_readLE32(secret+12)) - seed;
-        xxh_u64 const keyed_lo = (xxh_u64)combinedl ^ bitflipl;
-        xxh_u64 const keyed_hi = (xxh_u64)combinedh ^ bitfliph;
-        XXH128_hash_t h128;
-        h128.low64  = XXH64_avalanche(keyed_lo);
-        h128.high64 = XXH64_avalanche(keyed_hi);
-        return h128;
-    }
-}
-
-XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
-XXH3_len_4to8_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(4 <= len && len <= 8);
-    seed ^= (xxh_u64)XXH_swap32((xxh_u32)seed) << 32;
-    {   xxh_u32 const input_lo = XXH_readLE32(input);
-        xxh_u32 const input_hi = XXH_readLE32(input + len - 4);
-        xxh_u64 const input_64 = input_lo + ((xxh_u64)input_hi << 32);
-        xxh_u64 const bitflip = (XXH_readLE64(secret+16) ^ XXH_readLE64(secret+24)) + seed;
-        xxh_u64 const keyed = input_64 ^ bitflip;
-
-        /* Shift len to the left to ensure it is even, this avoids even multiplies. */
-        XXH128_hash_t m128 = XXH_mult64to128(keyed, XXH_PRIME64_1 + (len << 2));
-
-        m128.high64 += (m128.low64 << 1);
-        m128.low64  ^= (m128.high64 >> 3);
-
-        m128.low64   = XXH_xorshift64(m128.low64, 35);
-        m128.low64  *= PRIME_MX2;
-        m128.low64   = XXH_xorshift64(m128.low64, 28);
-        m128.high64  = XXH3_avalanche(m128.high64);
-        return m128;
-    }
-}
-
-XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
-XXH3_len_9to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(input != NULL);
-    XXH_ASSERT(secret != NULL);
-    XXH_ASSERT(9 <= len && len <= 16);
-    {   xxh_u64 const bitflipl = (XXH_readLE64(secret+32) ^ XXH_readLE64(secret+40)) - seed;
-        xxh_u64 const bitfliph = (XXH_readLE64(secret+48) ^ XXH_readLE64(secret+56)) + seed;
-        xxh_u64 const input_lo = XXH_readLE64(input);
-        xxh_u64       input_hi = XXH_readLE64(input + len - 8);
-        XXH128_hash_t m128 = XXH_mult64to128(input_lo ^ input_hi ^ bitflipl, XXH_PRIME64_1);
-        /*
-         * Put len in the middle of m128 to ensure that the length gets mixed to
-         * both the low and high bits in the 128x64 multiply below.
-         */
-        m128.low64 += (xxh_u64)(len - 1) << 54;
-        input_hi   ^= bitfliph;
-        /*
-         * Add the high 32 bits of input_hi to the high 32 bits of m128, then
-         * add the long product of the low 32 bits of input_hi and XXH_PRIME32_2 to
-         * the high 64 bits of m128.
-         *
-         * The best approach to this operation is different on 32-bit and 64-bit.
-         */
-        if (sizeof(void *) < sizeof(xxh_u64)) { /* 32-bit */
-            /*
-             * 32-bit optimized version, which is more readable.
-             *
-             * On 32-bit, it removes an ADC and delays a dependency between the two
-             * halves of m128.high64, but it generates an extra mask on 64-bit.
-             */
-            m128.high64 += (input_hi & 0xFFFFFFFF00000000ULL) + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2);
-        } else {
-            /*
-             * 64-bit optimized (albeit more confusing) version.
-             *
-             * Uses some properties of addition and multiplication to remove the mask:
-             *
-             * Let:
-             *    a = input_hi.lo = (input_hi & 0x00000000FFFFFFFF)
-             *    b = input_hi.hi = (input_hi & 0xFFFFFFFF00000000)
-             *    c = XXH_PRIME32_2
-             *
-             *    a + (b * c)
-             * Inverse Property: x + y - x == y
-             *    a + (b * (1 + c - 1))
-             * Distributive Property: x * (y + z) == (x * y) + (x * z)
-             *    a + (b * 1) + (b * (c - 1))
-             * Identity Property: x * 1 == x
-             *    a + b + (b * (c - 1))
-             *
-             * Substitute a, b, and c:
-             *    input_hi.hi + input_hi.lo + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
-             *
-             * Since input_hi.hi + input_hi.lo == input_hi, we get this:
-             *    input_hi + ((xxh_u64)input_hi.lo * (XXH_PRIME32_2 - 1))
-             */
-            m128.high64 += input_hi + XXH_mult32to64((xxh_u32)input_hi, XXH_PRIME32_2 - 1);
-        }
-        /* m128 ^= XXH_swap64(m128 >> 64); */
-        m128.low64  ^= XXH_swap64(m128.high64);
-
-        {   /* 128x64 multiply: h128 = m128 * XXH_PRIME64_2; */
-            XXH128_hash_t h128 = XXH_mult64to128(m128.low64, XXH_PRIME64_2);
-            h128.high64 += m128.high64 * XXH_PRIME64_2;
-
-            h128.low64   = XXH3_avalanche(h128.low64);
-            h128.high64  = XXH3_avalanche(h128.high64);
-            return h128;
-    }   }
-}
-
-/*
- * Assumption: `secret` size is >= XXH3_SECRET_SIZE_MIN
- */
-XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
-XXH3_len_0to16_128b(const xxh_u8* input, size_t len, const xxh_u8* secret, XXH64_hash_t seed)
-{
-    XXH_ASSERT(len <= 16);
-    {   if (len > 8) return XXH3_len_9to16_128b(input, len, secret, seed);
-        if (len >= 4) return XXH3_len_4to8_128b(input, len, secret, seed);
-        if (len) return XXH3_len_1to3_128b(input, len, secret, seed);
-        {   XXH128_hash_t h128;
-            xxh_u64 const bitflipl = XXH_readLE64(secret+64) ^ XXH_readLE64(secret+72);
-            xxh_u64 const bitfliph = XXH_readLE64(secret+80) ^ XXH_readLE64(secret+88);
-            h128.low64 = XXH64_avalanche(seed ^ bitflipl);
-            h128.high64 = XXH64_avalanche( seed ^ bitfliph);
-            return h128;
-    }   }
-}
-
-/*
- * A bit slower than XXH3_mix16B, but handles multiply by zero better.
- */
-XXH_FORCE_INLINE XXH128_hash_t
-XXH128_mix32B(XXH128_hash_t acc, const xxh_u8* input_1, const xxh_u8* input_2,
-              const xxh_u8* secret, XXH64_hash_t seed)
-{
-    acc.low64  += XXH3_mix16B (input_1, secret+0, seed);
-    acc.low64  ^= XXH_readLE64(input_2) + XXH_readLE64(input_2 + 8);
-    acc.high64 += XXH3_mix16B (input_2, secret+16, seed);
-    acc.high64 ^= XXH_readLE64(input_1) + XXH_readLE64(input_1 + 8);
-    return acc;
-}
-
-
-XXH_FORCE_INLINE XXH_PUREF XXH128_hash_t
-XXH3_len_17to128_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                      const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                      XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(16 < len && len <= 128);
-
-    {   XXH128_hash_t acc;
-        acc.low64 = len * XXH_PRIME64_1;
-        acc.high64 = 0;
-
-#if XXH_SIZE_OPT >= 1
-        {
-            /* Smaller, but slightly slower. */
-            unsigned int i = (unsigned int)(len - 1) / 32;
-            do {
-                acc = XXH128_mix32B(acc, input+16*i, input+len-16*(i+1), secret+32*i, seed);
-            } while (i-- != 0);
-        }
-#else
-        if (len > 32) {
-            if (len > 64) {
-                if (len > 96) {
-                    acc = XXH128_mix32B(acc, input+48, input+len-64, secret+96, seed);
-                }
-                acc = XXH128_mix32B(acc, input+32, input+len-48, secret+64, seed);
-            }
-            acc = XXH128_mix32B(acc, input+16, input+len-32, secret+32, seed);
-        }
-        acc = XXH128_mix32B(acc, input, input+len-16, secret, seed);
-#endif
-        {   XXH128_hash_t h128;
-            h128.low64  = acc.low64 + acc.high64;
-            h128.high64 = (acc.low64    * XXH_PRIME64_1)
-                        + (acc.high64   * XXH_PRIME64_4)
-                        + ((len - seed) * XXH_PRIME64_2);
-            h128.low64  = XXH3_avalanche(h128.low64);
-            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
-            return h128;
-        }
-    }
-}
-
-XXH_NO_INLINE XXH_PUREF XXH128_hash_t
-XXH3_len_129to240_128b(const xxh_u8* XXH_RESTRICT input, size_t len,
-                       const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                       XXH64_hash_t seed)
-{
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN); (void)secretSize;
-    XXH_ASSERT(128 < len && len <= XXH3_MIDSIZE_MAX);
-
-    {   XXH128_hash_t acc;
-        unsigned i;
-        acc.low64 = len * XXH_PRIME64_1;
-        acc.high64 = 0;
-        /*
-         *  We set as `i` as offset + 32. We do this so that unchanged
-         * `len` can be used as upper bound. This reaches a sweet spot
-         * where both x86 and aarch64 get simple agen and good codegen
-         * for the loop.
-         */
-        for (i = 32; i < 160; i += 32) {
-            acc = XXH128_mix32B(acc,
-                                input  + i - 32,
-                                input  + i - 16,
-                                secret + i - 32,
-                                seed);
-        }
-        acc.low64 = XXH3_avalanche(acc.low64);
-        acc.high64 = XXH3_avalanche(acc.high64);
-        /*
-         * NB: `i <= len` will duplicate the last 32-bytes if
-         * len % 32 was zero. This is an unfortunate necessity to keep
-         * the hash result stable.
-         */
-        for (i=160; i <= len; i += 32) {
-            acc = XXH128_mix32B(acc,
-                                input + i - 32,
-                                input + i - 16,
-                                secret + XXH3_MIDSIZE_STARTOFFSET + i - 160,
-                                seed);
-        }
-        /* last bytes */
-        acc = XXH128_mix32B(acc,
-                            input + len - 16,
-                            input + len - 32,
-                            secret + XXH3_SECRET_SIZE_MIN - XXH3_MIDSIZE_LASTOFFSET - 16,
-                            (XXH64_hash_t)0 - seed);
-
-        {   XXH128_hash_t h128;
-            h128.low64  = acc.low64 + acc.high64;
-            h128.high64 = (acc.low64    * XXH_PRIME64_1)
-                        + (acc.high64   * XXH_PRIME64_4)
-                        + ((len - seed) * XXH_PRIME64_2);
-            h128.low64  = XXH3_avalanche(h128.low64);
-            h128.high64 = (XXH64_hash_t)0 - XXH3_avalanche(h128.high64);
-            return h128;
-        }
-    }
-}
-
-static XXH_PUREF XXH128_hash_t
-XXH3_finalizeLong_128b(const xxh_u64* XXH_RESTRICT acc, const xxh_u8* XXH_RESTRICT secret, size_t secretSize, xxh_u64 len)
-{
-    XXH128_hash_t h128;
-    h128.low64 = XXH3_finalizeLong_64b(acc, secret, len);
-    h128.high64 = XXH3_mergeAccs(acc, secret + secretSize
-                                             - XXH_STRIPE_LEN - XXH_SECRET_MERGEACCS_START,
-                                             ~(len * XXH_PRIME64_2));
-    return h128;
-}
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_hashLong_128b_internal(const void* XXH_RESTRICT input, size_t len,
-                            const xxh_u8* XXH_RESTRICT secret, size_t secretSize,
-                            XXH3_f_accumulate f_acc,
-                            XXH3_f_scrambleAcc f_scramble)
-{
-    XXH_ALIGN(XXH_ACC_ALIGN) xxh_u64 acc[XXH_ACC_NB] = XXH3_INIT_ACC;
-
-    XXH3_hashLong_internal_loop(acc, (const xxh_u8*)input, len, secret, secretSize, f_acc, f_scramble);
-
-    /* converge into final hash */
-    XXH_STATIC_ASSERT(sizeof(acc) == 64);
-    XXH_ASSERT(secretSize >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-    return XXH3_finalizeLong_128b(acc, secret, secretSize, (xxh_u64)len);
-}
-
-/*
- * It's important for performance that XXH3_hashLong() is not inlined.
- */
-XXH_NO_INLINE XXH_PUREF XXH128_hash_t
-XXH3_hashLong_128b_default(const void* XXH_RESTRICT input, size_t len,
-                           XXH64_hash_t seed64,
-                           const void* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64; (void)secret; (void)secretLen;
-    return XXH3_hashLong_128b_internal(input, len, XXH3_kSecret, sizeof(XXH3_kSecret),
-                                       XXH3_accumulate, XXH3_scrambleAcc);
-}
-
-/*
- * It's important for performance to pass @p secretLen (when it's static)
- * to the compiler, so that it can properly optimize the vectorized loop.
- *
- * When the secret size is unknown, or on GCC 12 where the mix of NO_INLINE and FORCE_INLINE
- * breaks -Og, this is XXH_NO_INLINE.
- */
-XXH3_WITH_SECRET_INLINE XXH128_hash_t
-XXH3_hashLong_128b_withSecret(const void* XXH_RESTRICT input, size_t len,
-                              XXH64_hash_t seed64,
-                              const void* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)seed64;
-    return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, secretLen,
-                                       XXH3_accumulate, XXH3_scrambleAcc);
-}
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_hashLong_128b_withSeed_internal(const void* XXH_RESTRICT input, size_t len,
-                                XXH64_hash_t seed64,
-                                XXH3_f_accumulate f_acc,
-                                XXH3_f_scrambleAcc f_scramble,
-                                XXH3_f_initCustomSecret f_initSec)
-{
-    if (seed64 == 0)
-        return XXH3_hashLong_128b_internal(input, len,
-                                           XXH3_kSecret, sizeof(XXH3_kSecret),
-                                           f_acc, f_scramble);
-    {   XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
-        f_initSec(secret, seed64);
-        return XXH3_hashLong_128b_internal(input, len, (const xxh_u8*)secret, sizeof(secret),
-                                           f_acc, f_scramble);
-    }
-}
-
-/*
- * It's important for performance that XXH3_hashLong is not inlined.
- */
-XXH_NO_INLINE XXH128_hash_t
-XXH3_hashLong_128b_withSeed(const void* input, size_t len,
-                            XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen)
-{
-    (void)secret; (void)secretLen;
-    return XXH3_hashLong_128b_withSeed_internal(input, len, seed64,
-                XXH3_accumulate, XXH3_scrambleAcc, XXH3_initCustomSecret);
-}
-
-typedef XXH128_hash_t (*XXH3_hashLong128_f)(const void* XXH_RESTRICT, size_t,
-                                            XXH64_hash_t, const void* XXH_RESTRICT, size_t);
-
-XXH_FORCE_INLINE XXH128_hash_t
-XXH3_128bits_internal(const void* input, size_t len,
-                      XXH64_hash_t seed64, const void* XXH_RESTRICT secret, size_t secretLen,
-                      XXH3_hashLong128_f f_hl128)
-{
-    XXH_ASSERT(secretLen >= XXH3_SECRET_SIZE_MIN);
-    /*
-     * If an action is to be taken if `secret` conditions are not respected,
-     * it should be done here.
-     * For now, it's a contract pre-condition.
-     * Adding a check and a branch here would cost performance at every hash.
-     */
-    if (len <= 16)
-        return XXH3_len_0to16_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, seed64);
-    if (len <= 128)
-        return XXH3_len_17to128_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_len_129to240_128b((const xxh_u8*)input, len, (const xxh_u8*)secret, secretLen, seed64);
-    return f_hl128(input, len, seed64, secret, secretLen);
-}
-
-
-/* ===   Public XXH128 API   === */
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits(XXH_NOESCAPE const void* input, size_t len)
-{
-    return XXH3_128bits_internal(input, len, 0,
-                                 XXH3_kSecret, sizeof(XXH3_kSecret),
-                                 XXH3_hashLong_128b_default);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecret(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize)
-{
-    return XXH3_128bits_internal(input, len, 0,
-                                 (const xxh_u8*)secret, secretSize,
-                                 XXH3_hashLong_128b_withSecret);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSeed(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
-{
-    return XXH3_128bits_internal(input, len, seed,
-                                 XXH3_kSecret, sizeof(XXH3_kSecret),
-                                 XXH3_hashLong_128b_withSeed);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH3_128bits_withSecretandSeed(XXH_NOESCAPE const void* input, size_t len, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
-{
-    if (len <= XXH3_MIDSIZE_MAX)
-        return XXH3_128bits_internal(input, len, seed, XXH3_kSecret, sizeof(XXH3_kSecret), NULL);
-    return XXH3_hashLong_128b_withSecret(input, len, seed, secret, secretSize);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH128(XXH_NOESCAPE const void* input, size_t len, XXH64_hash_t seed)
-{
-    return XXH3_128bits_withSeed(input, len, seed);
-}
-
-
-/* ===   XXH3 128-bit streaming   === */
-#ifndef XXH_NO_STREAM
-/*
- * All initialization and update functions are identical to 64-bit streaming variant.
- * The only difference is the finalization routine.
- */
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset(XXH_NOESCAPE XXH3_state_t* statePtr)
-{
-    return XXH3_64bits_reset(statePtr);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecret(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize)
-{
-    return XXH3_64bits_reset_withSecret(statePtr, secret, secretSize);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH64_hash_t seed)
-{
-    return XXH3_64bits_reset_withSeed(statePtr, seed);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_reset_withSecretandSeed(XXH_NOESCAPE XXH3_state_t* statePtr, XXH_NOESCAPE const void* secret, size_t secretSize, XXH64_hash_t seed)
-{
-    return XXH3_64bits_reset_withSecretandSeed(statePtr, secret, secretSize, seed);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_128bits_update(XXH_NOESCAPE XXH3_state_t* state, XXH_NOESCAPE const void* input, size_t len)
-{
-    return XXH3_64bits_update(state, input, len);
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t XXH3_128bits_digest (XXH_NOESCAPE const XXH3_state_t* state)
-{
-    const unsigned char* const secret = (state->extSecret == NULL) ? state->customSecret : state->extSecret;
-    if (state->totalLen > XXH3_MIDSIZE_MAX) {
-        XXH_ALIGN(XXH_ACC_ALIGN) XXH64_hash_t acc[XXH_ACC_NB];
-        XXH3_digest_long(acc, state, secret);
-        XXH_ASSERT(state->secretLimit + XXH_STRIPE_LEN >= sizeof(acc) + XXH_SECRET_MERGEACCS_START);
-        return XXH3_finalizeLong_128b(acc, secret, state->secretLimit + XXH_STRIPE_LEN,  (xxh_u64)state->totalLen);
-    }
-    /* len <= XXH3_MIDSIZE_MAX : short code */
-    if (state->useSeed)
-        return XXH3_128bits_withSeed(state->buffer, (size_t)state->totalLen, state->seed);
-    return XXH3_128bits_withSecret(state->buffer, (size_t)(state->totalLen),
-                                   secret, state->secretLimit + XXH_STRIPE_LEN);
-}
-#endif /* !XXH_NO_STREAM */
-/* 128-bit utility functions */
-
-#include <string.h>   /* memcmp, memcpy */
-
-/* return : 1 is equal, 0 if different */
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API int XXH128_isEqual(XXH128_hash_t h1, XXH128_hash_t h2)
-{
-    /* note : XXH128_hash_t is compact, it has no padding byte */
-    return !(memcmp(&h1, &h2, sizeof(h1)));
-}
-
-/* This prototype is compatible with stdlib's qsort().
- * @return : >0 if *h128_1  > *h128_2
- *           <0 if *h128_1  < *h128_2
- *           =0 if *h128_1 == *h128_2  */
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API int XXH128_cmp(XXH_NOESCAPE const void* h128_1, XXH_NOESCAPE const void* h128_2)
-{
-    XXH128_hash_t const h1 = *(const XXH128_hash_t*)h128_1;
-    XXH128_hash_t const h2 = *(const XXH128_hash_t*)h128_2;
-    int const hcmp = (h1.high64 > h2.high64) - (h2.high64 > h1.high64);
-    /* note : bets that, in most cases, hash values are different */
-    if (hcmp) return hcmp;
-    return (h1.low64 > h2.low64) - (h2.low64 > h1.low64);
-}
-
-
-/*======   Canonical representation   ======*/
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API void
-XXH128_canonicalFromHash(XXH_NOESCAPE XXH128_canonical_t* dst, XXH128_hash_t hash)
-{
-    XXH_STATIC_ASSERT(sizeof(XXH128_canonical_t) == sizeof(XXH128_hash_t));
-    if (XXH_CPU_LITTLE_ENDIAN) {
-        hash.high64 = XXH_swap64(hash.high64);
-        hash.low64  = XXH_swap64(hash.low64);
-    }
-    XXH_memcpy(dst, &hash.high64, sizeof(hash.high64));
-    XXH_memcpy((char*)dst + sizeof(hash.high64), &hash.low64, sizeof(hash.low64));
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH128_hash_t
-XXH128_hashFromCanonical(XXH_NOESCAPE const XXH128_canonical_t* src)
-{
-    XXH128_hash_t h;
-    h.high64 = XXH_readBE64(src);
-    h.low64  = XXH_readBE64(src->digest + 8);
-    return h;
-}
-
-
-
-/* ==========================================
- * Secret generators
- * ==========================================
- */
-#define XXH_MIN(x, y) (((x) > (y)) ? (y) : (x))
-
-XXH_FORCE_INLINE void XXH3_combine16(void* dst, XXH128_hash_t h128)
-{
-    XXH_writeLE64( dst, XXH_readLE64(dst) ^ h128.low64 );
-    XXH_writeLE64( (char*)dst+8, XXH_readLE64((char*)dst+8) ^ h128.high64 );
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API XXH_errorcode
-XXH3_generateSecret(XXH_NOESCAPE void* secretBuffer, size_t secretSize, XXH_NOESCAPE const void* customSeed, size_t customSeedSize)
-{
-#if (XXH_DEBUGLEVEL >= 1)
-    XXH_ASSERT(secretBuffer != NULL);
-    XXH_ASSERT(secretSize >= XXH3_SECRET_SIZE_MIN);
-#else
-    /* production mode, assert() are disabled */
-    if (secretBuffer == NULL) return XXH_ERROR;
-    if (secretSize < XXH3_SECRET_SIZE_MIN) return XXH_ERROR;
-#endif
-
-    if (customSeedSize == 0) {
-        customSeed = XXH3_kSecret;
-        customSeedSize = XXH_SECRET_DEFAULT_SIZE;
-    }
-#if (XXH_DEBUGLEVEL >= 1)
-    XXH_ASSERT(customSeed != NULL);
-#else
-    if (customSeed == NULL) return XXH_ERROR;
-#endif
-
-    /* Fill secretBuffer with a copy of customSeed - repeat as needed */
-    {   size_t pos = 0;
-        while (pos < secretSize) {
-            size_t const toCopy = XXH_MIN((secretSize - pos), customSeedSize);
-            memcpy((char*)secretBuffer + pos, customSeed, toCopy);
-            pos += toCopy;
-    }   }
-
-    {   size_t const nbSeg16 = secretSize / 16;
-        size_t n;
-        XXH128_canonical_t scrambler;
-        XXH128_canonicalFromHash(&scrambler, XXH128(customSeed, customSeedSize, 0));
-        for (n=0; n<nbSeg16; n++) {
-            XXH128_hash_t const h128 = XXH128(&scrambler, sizeof(scrambler), n);
-            XXH3_combine16((char*)secretBuffer + n*16, h128);
-        }
-        /* last segment */
-        XXH3_combine16((char*)secretBuffer + secretSize - 16, XXH128_hashFromCanonical(&scrambler));
-    }
-    return XXH_OK;
-}
-
-/*! @ingroup XXH3_family */
-XXH_PUBLIC_API void
-XXH3_generateSecret_fromSeed(XXH_NOESCAPE void* secretBuffer, XXH64_hash_t seed)
-{
-    XXH_ALIGN(XXH_SEC_ALIGN) xxh_u8 secret[XXH_SECRET_DEFAULT_SIZE];
-    XXH3_initCustomSecret(secret, seed);
-    XXH_ASSERT(secretBuffer != NULL);
-    memcpy(secretBuffer, secret, XXH_SECRET_DEFAULT_SIZE);
-}
-
-
-
-/* Pop our optimization override from above */
-#if XXH_VECTOR == XXH_AVX2 /* AVX2 */ \
-  && defined(__GNUC__) && !defined(__clang__) /* GCC, not Clang */ \
-  && defined(__OPTIMIZE__) && XXH_SIZE_OPT <= 0 /* respect -O0 and -Os */
-#  pragma GCC pop_options
-#endif
-
-#endif  /* XXH_NO_LONG_LONG */
-
-#endif  /* XXH_NO_XXH3 */
-
-/*!
- * @}
- */
-#endif  /* XXH_IMPLEMENTATION */
-
-
-#if defined (__cplusplus)
-} /* extern "C" */
-#endif
diff --git a/pixelflux/nvcodec-sys/Cargo.lock b/pixelflux/nvcodec-sys/Cargo.lock
new file mode 100644
index 0000000..927d7ee
--- /dev/null
+++ b/pixelflux/nvcodec-sys/Cargo.lock
@@ -0,0 +1,204 @@
+# This file is automatically @generated by Cargo.
+# It is not intended for manual editing.
+version = 4
+
+[[package]]
+name = "aho-corasick"
+version = "1.1.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301"
+dependencies = [
+ "memchr",
+]
+
+[[package]]
+name = "bindgen"
+version = "0.59.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2bd2a9a458e8f4304c52c43ebb0cfbd520289f8379a52e329a38afda99bf8eb8"
+dependencies = [
+ "bitflags",
+ "cexpr",
+ "clang-sys",
+ "lazy_static",
+ "lazycell",
+ "peeking_take_while",
+ "proc-macro2",
+ "quote",
+ "regex",
+ "rustc-hash",
+ "shlex",
+]
+
+[[package]]
+name = "bitflags"
+version = "1.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
+
+[[package]]
+name = "cexpr"
+version = "0.6.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6fac387a98bb7c37292057cffc56d62ecb629900026402633ae9160df93a8766"
+dependencies = [
+ "nom",
+]
+
+[[package]]
+name = "cfg-if"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
+
+[[package]]
+name = "clang-sys"
+version = "1.8.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0b023947811758c97c59bf9d1c188fd619ad4718dcaa767947df1cadb14f39f4"
+dependencies = [
+ "glob",
+ "libc",
+ "libloading",
+]
+
+[[package]]
+name = "glob"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280"
+
+[[package]]
+name = "lazy_static"
+version = "1.5.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe"
+
+[[package]]
+name = "lazycell"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55"
+
+[[package]]
+name = "libc"
+version = "0.2.186"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66"
+
+[[package]]
+name = "libloading"
+version = "0.8.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55"
+dependencies = [
+ "cfg-if",
+ "windows-link",
+]
+
+[[package]]
+name = "memchr"
+version = "2.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "88904434abc2901f197fe8cc55f0445e7ded921dba5911dad2e2b39b48e663c4"
+
+[[package]]
+name = "minimal-lexical"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a"
+
+[[package]]
+name = "nom"
+version = "7.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d273983c5a657a70a3e8f2a01329822f3b8c8172b73826411a55751e404a0a4a"
+dependencies = [
+ "memchr",
+ "minimal-lexical",
+]
+
+[[package]]
+name = "nvcodec-sys"
+version = "0.1.0"
+dependencies = [
+ "bindgen",
+ "regex",
+]
+
+[[package]]
+name = "peeking_take_while"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "19b17cddbe7ec3f8bc800887bab5e717348c95ea2ca0b1bf0837fb964dc67099"
+
+[[package]]
+name = "proc-macro2"
+version = "1.0.106"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
+dependencies = [
+ "unicode-ident",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.46"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dfbc457d0c7a0759a614551b11a6409e5951f6c7537be1f1b7682b9ae9230368"
+dependencies = [
+ "proc-macro2",
+]
+
+[[package]]
+name = "regex"
+version = "1.12.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1292b7759ae1cb9ec195452d1390a074f0cd8541ab7a5a8c31cd6db45d4a6ba"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-automata",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-automata"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f"
+dependencies = [
+ "aho-corasick",
+ "memchr",
+ "regex-syntax",
+]
+
+[[package]]
+name = "regex-syntax"
+version = "0.8.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6f6ff9a378485b298a5286656da665ba74413d36db0979633275d2e708145d4"
+
+[[package]]
+name = "rustc-hash"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
+
+[[package]]
+name = "shlex"
+version = "1.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
+
+[[package]]
+name = "unicode-ident"
+version = "1.0.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
+
+[[package]]
+name = "windows-link"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
diff --git a/pixelflux/nvcodec-sys/Cargo.toml b/pixelflux/nvcodec-sys/Cargo.toml
new file mode 100644
index 0000000..6fd4ba4
--- /dev/null
+++ b/pixelflux/nvcodec-sys/Cargo.toml
@@ -0,0 +1,21 @@
+[package]
+name = "nvcodec-sys"
+description = "FFI bindings for the NVIDIA Video Codec SDK (NVENC) and the CUDA driver subset used by pixelflux. Generated from the bundled NVIDIA SDK headers in headers/ (NVENCAPI 13.0, from FFmpeg/nv-codec-headers sdk/13.0) and committed under src/bindgen/, so normal builds need no libclang. The NVIDIA headers retain their own license."
+version = "0.1.0"
+edition = "2021"
+license = "MIT OR Apache-2.0"
+build = "build.rs"
+publish = false
+
+[features]
+default = []
+# CUDA driver bindings (NVENC needs a CUDA context + device upload).
+cuda = []
+# Opt-in: regenerate src/bindgen/*.rs from the bundled headers. Needs libclang. OFF by default
+# so end-user / cibuildwheel builds compile the committed bindings with no extra toolchain.
+regen = ["bindgen", "regex"]
+
+[build-dependencies]
+# Pinned to the version that produced the committed bindings so a regen reproduces them.
+bindgen = { version = "=0.59.2", default-features = false, features = ["runtime"], optional = true }
+regex = { version = "1", optional = true }
diff --git a/pixelflux/nvcodec-sys/build.rs b/pixelflux/nvcodec-sys/build.rs
new file mode 100644
index 0000000..d934f0f
--- /dev/null
+++ b/pixelflux/nvcodec-sys/build.rs
@@ -0,0 +1,140 @@
+//! Regenerates the committed NVENC + CUDA bindings from the bundled NVIDIA SDK headers in
+//! `headers/`. Runs ONLY under the `regen` feature (which needs libclang); a normal build is a
+//! no-op and compiles the checked-in `src/bindgen/*.rs`, so end-user builds need no libclang.
+//!
+//! bindgen does not emit function-like macros, so the NVENC struct-version constants
+//! (`NV_ENC_*_VER`) and the `static const GUID` codec/preset GUIDs are extracted from the header
+//! with regexes and appended -- mirroring how the upstream SDK defines them.
+
+#[cfg(feature = "regen")]
+fn main() -> std::io::Result<()> {
+    use std::io::Write;
+    let out = std::path::PathBuf::from("src/bindgen");
+    std::fs::create_dir_all(&out)?;
+
+    // ---- NVENC ----
+    let nvenc_header = "headers/nvEncodeAPI.h";
+    let nvenc_out = out.join("nvenc.rs");
+    bindgen::builder()
+        .header(nvenc_header)
+        .parse_callbacks(Box::new(bindgen::CargoCallbacks))
+        .allowlist_type("NV.*")
+        .allowlist_function("Nv.*")
+        .allowlist_var("NVENC.*")
+        .allowlist_var("NV_MAX.*")
+        .size_t_is_usize(true)
+        .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false })
+        // Newtype (not rustified) for enums whose values arrive from the driver -- as return
+        // codes or in structs it fills that we then read/copy -- and for zero-less enums the
+        // generated zero-filling Default impls materialize: an out-of-range discriminant in a
+        // rustified enum is UB, and a newer driver may legally send codes this header predates.
+        .newtype_enum("_NVENCSTATUS")
+        .newtype_enum("_NV_ENC_PIC_TYPE")
+        .newtype_enum("_NV_ENC_PIC_STRUCT")
+        .newtype_enum("_NV_ENC_PARAMS_FRAME_FIELD_MODE")
+        .newtype_enum("_NV_ENC_PARAMS_RC_MODE")
+        .newtype_enum("_NV_ENC_MULTI_PASS")
+        .newtype_enum("_NV_ENC_MV_PRECISION")
+        .derive_default(true)
+        .derive_eq(true)
+        .derive_hash(true)
+        .derive_ord(true)
+        .generate()
+        .expect("Unable to generate NVENC bindings")
+        .write_to_file(&nvenc_out)
+        .expect("Unable to write nvenc.rs");
+
+    let hdr = std::fs::read_to_string(nvenc_header)?;
+    let mut extra = String::from(
+        "\nconst fn nv_struct_version(ver: u32) -> u32 {\n    NVENCAPI_VERSION | ((ver) << 16) | (0x7 << 28)\n}\n",
+    );
+    // The high-bit OR term is written `( 1u<<31 )` in the SDK headers; capture just the shift
+    // count so it can be re-emitted as a valid Rust literal (the C `1u` suffix is not Rust).
+    let ver_re = regex::Regex::new(
+        r"#define\s+([A-Z_]+)\s+\(?NVENCAPI_STRUCT_VERSION\((\d+)\)(?:\s*\|\s*\(\s*1u?\s*<<\s*(\d+)\s*\))?\s*\)?",
+    )
+    .unwrap();
+    for c in ver_re.captures_iter(&hdr) {
+        let (name, ver) = (&c[1], &c[2]);
+        match c.get(3) {
+            Some(shift) => extra.push_str(&format!(
+                "pub const {}: u32 = nv_struct_version({}) | (1u32 << {});\n",
+                name,
+                ver,
+                shift.as_str()
+            )),
+            None => extra.push_str(&format!(
+                "pub const {}: u32 = nv_struct_version({});\n",
+                name, ver
+            )),
+        }
+    }
+    let guid_re = regex::Regex::new(
+        r"static\s+const\s+GUID\s+([A-Z_\d]+)\s*=\s*\r?\n\{\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*\{\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*,\s*(0[xX][0-9a-fA-F]+)\s*\}\s*\}\s*;",
+    )
+    .unwrap();
+    for c in guid_re.captures_iter(&hdr) {
+        extra.push_str(&format!(
+            "pub const {}: GUID = GUID {{\n    Data1: {},\n    Data2: {},\n    Data3: {},\n    Data4: [{}, {}, {}, {}, {}, {}, {}, {}],\n}};\n",
+            &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7], &c[8], &c[9], &c[10], &c[11], &c[12]
+        ));
+    }
+    std::fs::OpenOptions::new()
+        .append(true)
+        .open(&nvenc_out)?
+        .write_all(extra.as_bytes())?;
+
+    // ---- CUDA driver (subset used by NVENC) ----
+    // The CUDA Driver API header ships with the CUDA toolkit (large, version-specific), so we do
+    // NOT bundle it. Regenerate cuda.rs from $CUDA_PATH/include/cuda.h when the toolkit is present;
+    // otherwise keep the committed binding (the NVENC regen above is self-contained). The 31-symbol
+    // subset below is exactly what pixelflux's NVENC path links.
+    if let Ok(cuda_path) = std::env::var("CUDA_PATH") {
+        let cuda_header = format!("{}/include/cuda.h", cuda_path);
+        let cuda_funcs = [
+            "cuGetErrorString", "cuGetErrorName", "cuInit", "cuDeviceGetCount", "cuDeviceGet",
+            "cuDeviceGetName", "cuDeviceGetUuid", "cuCtxCreate_v2", "cuCtxDestroy_v2",
+            "cuCtxPushCurrent_v2", "cuCtxPopCurrent_v2", "cuStreamCreate", "cuStreamDestroy_v2",
+            "cuMemAllocHost_v2", "cuMemAllocPitch_v2", "cuMemFree_v2", "cuMemFreeHost",
+            "cuMemcpy2D_v2", "cuMemcpy2DUnaligned_v2", "cuMemcpy2DAsync_v2", "cuMemcpyDtoH_v2",
+            "cuImportExternalMemory", "cuImportExternalSemaphore", "cuExternalMemoryGetMappedBuffer",
+            "cuExternalMemoryGetMappedMipmappedArray", "cuMipmappedArrayGetLevel",
+            "cuMipmappedArrayDestroy", "cuDestroyExternalMemory", "cuDestroyExternalSemaphore",
+            "cuWaitExternalSemaphoresAsync", "cuSignalExternalSemaphoresAsync",
+        ];
+        let mut cuda_builder = bindgen::builder()
+            .header(&cuda_header)
+            .parse_callbacks(Box::new(bindgen::CargoCallbacks))
+            .size_t_is_usize(true)
+            .default_enum_style(bindgen::EnumVariation::Rust { non_exhaustive: false })
+            // Same UB guard: CUresult comes back from the driver (and may postdate this
+            // header); CUmemorytype has no zero value yet is zero-filled by Default.
+            .newtype_enum("cudaError_enum")
+            .newtype_enum("CUmemorytype_enum")
+            .generate_comments(false)
+            .derive_default(true)
+            .derive_eq(true)
+            .derive_hash(true)
+            .derive_ord(true);
+        for f in cuda_funcs {
+            cuda_builder = cuda_builder.allowlist_function(f);
+        }
+        cuda_builder
+            .generate()
+            .expect("Unable to generate CUDA bindings")
+            .write_to_file(out.join("cuda.rs"))
+            .expect("Unable to write cuda.rs");
+        println!("cargo:rerun-if-env-changed=CUDA_PATH");
+    } else {
+        println!(
+            "cargo:warning=CUDA_PATH unset: kept committed src/bindgen/cuda.rs (regenerated NVENC only). Set CUDA_PATH to rebind CUDA."
+        );
+    }
+
+    println!("cargo:rerun-if-changed=headers/nvEncodeAPI.h");
+    println!("cargo:rerun-if-changed=build.rs");
+    Ok(())
+}
+
+#[cfg(not(feature = "regen"))]
+fn main() {}
diff --git a/pixelflux/include/nvEncodeAPI.h b/pixelflux/nvcodec-sys/headers/nvEncodeAPI.h
similarity index 91%
rename from pixelflux/include/nvEncodeAPI.h
rename to pixelflux/nvcodec-sys/headers/nvEncodeAPI.h
index f91ec9a..894b215 100644
--- a/pixelflux/include/nvEncodeAPI.h
+++ b/pixelflux/nvcodec-sys/headers/nvEncodeAPI.h
@@ -1,7 +1,7 @@
 /*
  * This copyright notice applies to this header file only:
  *
- * Copyright (c) 2010-2023 NVIDIA Corporation
+ * Copyright (c) 2010-2024 NVIDIA Corporation
  *
  * Permission is hereby granted, free of charge, to any person
  * obtaining a copy of this software and associated documentation
@@ -30,7 +30,7 @@
  *   NVIDIA GPUs - beginning with the Kepler generation - contain a hardware-based encoder
  *   (referred to as NVENC) which provides fully-accelerated hardware-based video encoding.
  *   NvEncodeAPI provides the interface for NVIDIA video encoder (NVENC).
- * \date 2011-2022
+ * \date 2011-2024
  *  This file contains the interface constants, structure definitions and function prototypes.
  */
 
@@ -115,8 +115,8 @@ typedef void* NV_ENC_OUTPUT_PTR;            /**< NVENCODE API output buffer*/
 typedef void* NV_ENC_REGISTERED_PTR;        /**< A Resource that has been registered with NVENCODE API*/
 typedef void* NV_ENC_CUSTREAM_PTR;          /**< Pointer to CUstream*/
 
-#define NVENCAPI_MAJOR_VERSION 12
-#define NVENCAPI_MINOR_VERSION 1
+#define NVENCAPI_MAJOR_VERSION 13
+#define NVENCAPI_MINOR_VERSION 0
 
 #define NVENCAPI_VERSION (NVENCAPI_MAJOR_VERSION | (NVENCAPI_MINOR_VERSION << 24))
 
@@ -174,6 +174,14 @@ static const GUID  NV_ENC_H264_PROFILE_MAIN_GUID =
 static const GUID NV_ENC_H264_PROFILE_HIGH_GUID =
 { 0xe7cbc309, 0x4f7a, 0x4b89, { 0xaf, 0x2a, 0xd5, 0x37, 0xc9, 0x2b, 0xe3, 0x10 } };
 
+// {8F0C337E-186C-48E9-A69D-7A8334089758}
+static const GUID NV_ENC_H264_PROFILE_HIGH_10_GUID =
+{ 0x8f0c337e, 0x186c, 0x48e9, { 0xa6, 0x9d, 0x7a, 0x83, 0x34, 0x08, 0x97, 0x58} };
+
+// {FF3242E9-613C-4295-A1E8-2A7FE94D8133}
+static const GUID  NV_ENC_H264_PROFILE_HIGH_422_GUID =
+{ 0xff3242e9, 0x613c, 0x4295, { 0xa1, 0xe8, 0x2a, 0x7f, 0xe9, 0x4d, 0x81, 0x33 } };
+
 // {7AC663CB-A598-4960-B844-339B261A7D52}
 static const GUID  NV_ENC_H264_PROFILE_HIGH_444_GUID =
 { 0x7ac663cb, 0xa598, 0x4960, { 0xb8, 0x44, 0x33, 0x9b, 0x26, 0x1a, 0x7d, 0x52 } };
@@ -198,7 +206,7 @@ static const GUID NV_ENC_HEVC_PROFILE_MAIN_GUID =
 static const GUID NV_ENC_HEVC_PROFILE_MAIN10_GUID =
 { 0xfa4d2b6c, 0x3a5b, 0x411a, { 0x80, 0x18, 0x0a, 0x3f, 0x5e, 0x3c, 0x9b, 0xe5 } };
 
-// For HEVC Main 444 8 bit and HEVC Main 444 10 bit profiles only
+// For HEVC Main 422/444 8 bit and HEVC Main 422/444 10 bit profiles only
 // {51ec32b5-1b4c-453c-9cbd-b616bd621341}
 static const GUID NV_ENC_HEVC_PROFILE_FREXT_GUID =
 { 0x51ec32b5, 0x1b4c, 0x453c, { 0x9c, 0xbd, 0xb6, 0x16, 0xbd, 0x62, 0x13, 0x41 } };
@@ -210,6 +218,7 @@ static const GUID NV_ENC_AV1_PROFILE_MAIN_GUID =
 // =========================================================================================
 // *   Preset GUIDS supported by the NvEncodeAPI interface.
 // =========================================================================================
+
 // Performance degrades and quality improves as we move from P1 to P7. Presets P3 to P7 for H264 and Presets P2 to P7 for HEVC have B frames enabled by default
 // for HIGH_QUALITY and LOSSLESS tuning info, and will not work with Weighted Prediction enabled. In case Weighted Prediction is required, disable B frames by
 // setting frameIntervalP = 1
@@ -276,9 +285,6 @@ typedef enum _NV_ENC_MULTI_PASS
     NV_ENC_TWO_PASS_FULL_RESOLUTION         = 0x2,        /**< Two Pass encoding is enabled where first Pass is full resolution */
 } NV_ENC_MULTI_PASS;
 
-/**
- * Restore Encoder state
- */
 typedef enum _NV_ENC_STATE_RESTORE_TYPE
 {
     NV_ENC_STATE_RESTORE_FULL               = 0x01,      /**< Restore full encoder state */
@@ -319,6 +325,7 @@ typedef enum _NV_ENC_QP_MAP_MODE
     NV_ENC_QP_MAP                        = 0x3,             /**< Currently This is not supported. Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as QP value.   */
 } NV_ENC_QP_MAP_MODE;
 
+
 /**
  * Input picture structure
  */
@@ -356,6 +363,7 @@ typedef enum _NV_ENC_PIC_TYPE
     NV_ENC_PIC_TYPE_SKIPPED         = 0x05,    /**< Picture is skipped */
     NV_ENC_PIC_TYPE_INTRA_REFRESH   = 0x06,    /**< First picture in intra refresh cycle */
     NV_ENC_PIC_TYPE_NONREF_P        = 0x07,    /**< Non reference P picture */
+    NV_ENC_PIC_TYPE_SWITCH          = 0x08,    /**< Switch frame (AV1 only) */
     NV_ENC_PIC_TYPE_UNKNOWN         = 0xFF     /**< Picture type unknown */
 } NV_ENC_PIC_TYPE;
 
@@ -408,13 +416,10 @@ typedef enum _NV_ENC_BUFFER_FORMAT
                                                                              This format should be used only when registering the
                                                                              resource as output buffer, which will be used to write
                                                                              the encoded bit stream or H.264 ME only mode output. */
+    NV_ENC_BUFFER_FORMAT_NV16                            = 0x40000001,  /**< Semi-Planar YUV 422 [Y plane followed by interleaved UV plane] */
+    NV_ENC_BUFFER_FORMAT_P210                            = 0x40000002,  /**< Semi-Planar 10-bit YUV 422 [Y plane followed by interleaved UV plane] */
 } NV_ENC_BUFFER_FORMAT;
 
-#define NV_ENC_BUFFER_FORMAT_NV12_PL NV_ENC_BUFFER_FORMAT_NV12
-#define NV_ENC_BUFFER_FORMAT_YV12_PL NV_ENC_BUFFER_FORMAT_YV12
-#define NV_ENC_BUFFER_FORMAT_IYUV_PL NV_ENC_BUFFER_FORMAT_IYUV
-#define NV_ENC_BUFFER_FORMAT_YUV444_PL NV_ENC_BUFFER_FORMAT_YUV444
-
 /**
  * Encoding levels
  */
@@ -677,12 +682,12 @@ typedef enum _NVENCSTATUS
  */
 typedef enum _NV_ENC_PIC_FLAGS
 {
-    NV_ENC_PIC_FLAG_FORCEINTRA         = 0x1,   /**< Encode the current picture as an Intra picture */
-    NV_ENC_PIC_FLAG_FORCEIDR           = 0x2,   /**< Encode the current picture as an IDR picture.
-                                                     This flag is only valid when Picture type decision is taken by the Encoder
-                                                     [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]. */
-    NV_ENC_PIC_FLAG_OUTPUT_SPSPPS      = 0x4,   /**< Write the sequence and picture header in encoded bitstream of the current picture */
-    NV_ENC_PIC_FLAG_EOS                = 0x8,   /**< Indicates end of the input stream */
+    NV_ENC_PIC_FLAG_FORCEINTRA                = 0x1,   /**< Encode the current picture as an Intra picture */
+    NV_ENC_PIC_FLAG_FORCEIDR                  = 0x2,   /**< Encode the current picture as an IDR picture.
+                                                            This flag is only valid when Picture type decision is taken by the Encoder
+                                                            [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]. */
+    NV_ENC_PIC_FLAG_OUTPUT_SPSPPS             = 0x4,   /**< Write the sequence and picture header in encoded bitstream of the current picture */
+    NV_ENC_PIC_FLAG_EOS                       = 0x8,   /**< Indicates end of the input stream */
     NV_ENC_PIC_FLAG_DISABLE_ENC_STATE_ADVANCE = 0x10,  /**< Do not advance encoder state during encode */
     NV_ENC_PIC_FLAG_OUTPUT_RECON_FRAME        = 0x20,  /**< Write reconstructed frame */
 } NV_ENC_PIC_FLAGS;
@@ -814,6 +819,14 @@ typedef enum _NV_ENC_NUM_REF_FRAMES
     NV_ENC_NUM_REF_FRAMES_7                = 0x7           /**< Number of reference frames equal to 7 */
 } NV_ENC_NUM_REF_FRAMES;
 
+/**
+*  Enum for Temporal filtering level.
+*/
+typedef enum _NV_ENC_TEMPORAL_FILTER_LEVEL
+{
+    NV_ENC_TEMPORAL_FILTER_LEVEL_0 = 0,
+    NV_ENC_TEMPORAL_FILTER_LEVEL_4 = 4,
+}NV_ENC_TEMPORAL_FILTER_LEVEL;
 /**
  * Encoder capabilities enumeration.
  */
@@ -1186,7 +1199,37 @@ typedef enum _NV_ENC_CAPS
      */
     NV_ENC_CAPS_OUTPUT_ROW_STATS,
 
-     /**
+
+    /**
+     * Indicates temporal filtering support.
+     */
+     NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER,
+
+    /**
+     * Maximum Lookahead level supported (See ::NV_ENC_LOOKAHEAD_LEVEL for details).
+     */
+    NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL,
+
+    /**
+     * Indicates UnidirectionalB support.
+     */
+    NV_ENC_CAPS_SUPPORT_UNIDIRECTIONAL_B,
+
+    /**
+     * Indicates HW support for MVHEVC encoding.
+     * \n 0 : MVHEVC encoding not supported.
+     * \n 1 : MVHEVC encoding supported.
+     */
+    NV_ENC_CAPS_SUPPORT_MVHEVC_ENCODE,
+
+    /**
+     * Indicates HW support for YUV422 mode encoding.
+     * \n 0 : YUV422 mode encoding not supported.
+     * \n 1 : YUV422 mode encoding supported.
+     */
+    NV_ENC_CAPS_SUPPORT_YUV422_ENCODE,
+
+    /**
      * Reserved - Not to be used by clients.
      */
     NV_ENC_CAPS_EXPOSED_COUNT
@@ -1203,7 +1246,7 @@ typedef enum _NV_ENC_HEVC_CUSIZE
     NV_ENC_HEVC_CUSIZE_16x16      = 2,
     NV_ENC_HEVC_CUSIZE_32x32      = 3,
     NV_ENC_HEVC_CUSIZE_64x64      = 4,
-}NV_ENC_HEVC_CUSIZE;
+} NV_ENC_HEVC_CUSIZE;
 
 /**
 *  AV1 PART SIZE
@@ -1216,7 +1259,7 @@ typedef enum _NV_ENC_AV1_PART_SIZE
     NV_ENC_AV1_PART_SIZE_16x16         = 3,
     NV_ENC_AV1_PART_SIZE_32x32         = 4,
     NV_ENC_AV1_PART_SIZE_64x64         = 5,
-}NV_ENC_AV1_PART_SIZE;
+} NV_ENC_AV1_PART_SIZE;
 
 /**
 *  Enums related to fields in VUI parameters.
@@ -1229,7 +1272,7 @@ typedef enum _NV_ENC_VUI_VIDEO_FORMAT
     NV_ENC_VUI_VIDEO_FORMAT_SECAM       = 3,
     NV_ENC_VUI_VIDEO_FORMAT_MAC         = 4,
     NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED = 5,
-}NV_ENC_VUI_VIDEO_FORMAT;
+} NV_ENC_VUI_VIDEO_FORMAT;
 
 typedef enum _NV_ENC_VUI_COLOR_PRIMARIES
 {
@@ -1247,7 +1290,7 @@ typedef enum _NV_ENC_VUI_COLOR_PRIMARIES
     NV_ENC_VUI_COLOR_PRIMARIES_SMPTE431    = 11,
     NV_ENC_VUI_COLOR_PRIMARIES_SMPTE432    = 12,
     NV_ENC_VUI_COLOR_PRIMARIES_JEDEC_P22   = 22,
-}NV_ENC_VUI_COLOR_PRIMARIES;
+} NV_ENC_VUI_COLOR_PRIMARIES;
 
 typedef enum _NV_ENC_VUI_TRANSFER_CHARACTERISTIC
 {
@@ -1270,7 +1313,7 @@ typedef enum _NV_ENC_VUI_TRANSFER_CHARACTERISTIC
     NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084     = 16,
     NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE428      = 17,
     NV_ENC_VUI_TRANSFER_CHARACTERISTIC_ARIB_STD_B67  = 18,
-}NV_ENC_VUI_TRANSFER_CHARACTERISTIC;
+} NV_ENC_VUI_TRANSFER_CHARACTERISTIC;
 
 typedef enum _NV_ENC_VUI_MATRIX_COEFFS
 {
@@ -1286,7 +1329,30 @@ typedef enum _NV_ENC_VUI_MATRIX_COEFFS
     NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL  = 9,
     NV_ENC_VUI_MATRIX_COEFFS_BT2020_CL   = 10,
     NV_ENC_VUI_MATRIX_COEFFS_SMPTE2085   = 11,
-}NV_ENC_VUI_MATRIX_COEFFS;
+} NV_ENC_VUI_MATRIX_COEFFS;
+
+
+/**
+*  Enum for Lookahead level.
+*/
+typedef enum _NV_ENC_LOOKAHEAD_LEVEL
+{
+    NV_ENC_LOOKAHEAD_LEVEL_0             = 0,
+    NV_ENC_LOOKAHEAD_LEVEL_1             = 1,
+    NV_ENC_LOOKAHEAD_LEVEL_2             = 2,
+    NV_ENC_LOOKAHEAD_LEVEL_3             = 3,
+    NV_ENC_LOOKAHEAD_LEVEL_AUTOSELECT    = 15,
+} NV_ENC_LOOKAHEAD_LEVEL;
+
+/**
+* Enum for Bit Depth
+*/
+typedef enum _NV_ENC_BIT_DEPTH
+{
+    NV_ENC_BIT_DEPTH_INVALID             = 0,         /**< Invalid Bit Depth */
+    NV_ENC_BIT_DEPTH_8                   = 8,         /**< Bit Depth 8 */
+    NV_ENC_BIT_DEPTH_10                  = 10,        /**< Bit Depth 10 */
+} NV_ENC_BIT_DEPTH;
 
 /**
  * Input struct for querying Encoding capabilities.
@@ -1310,6 +1376,7 @@ typedef struct _NV_ENC_RESTORE_ENCODER_STATE_PARAMS
     uint32_t                  version;                 /**< [in]: Struct version. */
     uint32_t                  bufferIdx;               /**< [in]: State buffer index to which the encoder state will be restored */
     NV_ENC_STATE_RESTORE_TYPE state;                   /**< [in]: State type to restore */
+    uint32_t                  reserved;                /**< [in]: Reserved and must be set to 0 */
     NV_ENC_OUTPUT_PTR         outputBitstream;         /**< [in]: Specifies the output buffer pointer, for AV1 encode only.
                                                                   Application must call NvEncRestoreEncoderState() API with _NV_ENC_RESTORE_ENCODER_STATE_PARAMS::outputBitstream and
                                                                   _NV_ENC_RESTORE_ENCODER_STATE_PARAMS::completionEvent as input when an earlier call to this API returned "NV_ENC_ERR_NEED_MORE_OUTPUT", for AV1 encode. */
@@ -1319,18 +1386,19 @@ typedef struct _NV_ENC_RESTORE_ENCODER_STATE_PARAMS
 } NV_ENC_RESTORE_ENCODER_STATE_PARAMS;
 
 /** NV_ENC_RESTORE_ENCODER_STATE_PARAMS struct version. */
-#define NV_ENC_RESTORE_ENCODER_STATE_PARAMS_VER NVENCAPI_STRUCT_VERSION(1)
+#define NV_ENC_RESTORE_ENCODER_STATE_PARAMS_VER NVENCAPI_STRUCT_VERSION(2)
 
 /**
  * Encoded frame information parameters for every block.
  */
 typedef struct _NV_ENC_OUTPUT_STATS_BLOCK
 {
-   uint32_t                 version;                /**< [in]: Struct version */
-   uint8_t                  QP;                     /**< [out]: QP of the block */
-   uint8_t                  reserved[3];            /**< [in]: Reserved and must be set to 0 */
-   uint32_t                 bitcount;               /**< [out]: Bitcount of the block */
-   uint32_t                 reserved1[13];          /**< [in]: Reserved and must be set to 0 */
+    uint32_t                 version;                /**< [in]: Struct version */
+    uint8_t                  QP;                     /**< [out]: QP of the block */
+    uint8_t                  reserved[3];            /**< [in]: Reserved and must be set to 0 */
+    uint32_t                 bitcount;               /**< [out]: Bitcount of the block */
+    uint32_t                 satdCost;               /**< [out]: SATD cost of the residual error */
+    uint32_t                 reserved1[12];          /**< [in]: Reserved and must be set to 0 */
 } NV_ENC_OUTPUT_STATS_BLOCK;
 
 /** NV_ENC_OUTPUT_STATS_BLOCK struct version. */
@@ -1341,11 +1409,12 @@ typedef struct _NV_ENC_OUTPUT_STATS_BLOCK
  */
 typedef struct _NV_ENC_OUTPUT_STATS_ROW
 {
-   uint32_t                 version;                /**< [in]: Struct version */
-   uint8_t                  QP;                     /**< [out]: QP of the row */
-   uint8_t                  reserved[3];            /**< [in]: Reserved and must be set to 0 */
-   uint32_t                 bitcount;               /**< [out]: Bitcount of the row */
-   uint32_t                 reserved1[13];          /**< [in]: Reserved and must be set to 0 */
+    uint32_t                 version;                /**< [in]: Struct version */
+    uint8_t                  QP;                     /**< [out]: QP of the row */
+    uint8_t                  reserved[3];            /**< [in]: Reserved and must be set to 0 */
+    uint32_t                 bitcount;               /**< [out]: Bitcount of the row */
+    uint32_t                 satdCost;               /**< [out]: SATD cost of the residual error */
+    uint32_t                 reserved1[12];          /**< [in]: Reserved and must be set to 0 */
 } NV_ENC_OUTPUT_STATS_ROW;
 
 /** NV_ENC_OUTPUT_STATS_ROW struct version. */
@@ -1370,14 +1439,15 @@ typedef struct _NV_ENC_ENCODE_OUT_PARAMS
 typedef struct _NV_ENC_LOOKAHEAD_PIC_PARAMS
 {
     uint32_t                  version;                 /**< [in]: Struct version. */
+    uint32_t                  reserved;                /**< [in]: Reserved and must be set to 0 */
     NV_ENC_INPUT_PTR          inputBuffer;             /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.*/
     NV_ENC_PIC_TYPE           pictureType;             /**< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder. */
-    uint32_t                  reserved[64];            /**< [in]: Reserved and must be set to 0 */
-    void*                     reserved1[64];           /**< [in]: Reserved and must be set to NULL */
+    uint32_t                  reserved1[63];           /**< [in]: Reserved and must be set to 0 */
+    void*                     reserved2[64];           /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_LOOKAHEAD_PIC_PARAMS;
 
 /** NV_ENC_LOOKAHEAD_PIC_PARAMS struct version. */
-#define NV_ENC_LOOKAHEAD_PIC_PARAMS_VER NVENCAPI_STRUCT_VERSION(1)
+#define NV_ENC_LOOKAHEAD_PIC_PARAMS_VER NVENCAPI_STRUCT_VERSION(2)
 
 /**
  * Creation parameters for input buffer.
@@ -1392,12 +1462,12 @@ typedef struct _NV_ENC_CREATE_INPUT_BUFFER
     uint32_t                  reserved;                /**< [in]: Reserved and must be set to 0 */
     NV_ENC_INPUT_PTR          inputBuffer;             /**< [out]: Pointer to input buffer */
     void*                     pSysMemBuffer;           /**< [in]: Pointer to existing system memory buffer */
-    uint32_t                  reserved1[57];           /**< [in]: Reserved and must be set to 0 */
+    uint32_t                  reserved1[58];           /**< [in]: Reserved and must be set to 0 */
     void*                     reserved2[63];           /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_CREATE_INPUT_BUFFER;
 
 /** NV_ENC_CREATE_INPUT_BUFFER struct version. */
-#define NV_ENC_CREATE_INPUT_BUFFER_VER NVENCAPI_STRUCT_VERSION(1)
+#define NV_ENC_CREATE_INPUT_BUFFER_VER NVENCAPI_STRUCT_VERSION(2)
 
 /**
  * Creation parameters for output bitstream buffer.
@@ -1458,13 +1528,14 @@ typedef struct _NV_ENC_HEVC_MV_DATA
 typedef struct _NV_ENC_CREATE_MV_BUFFER
 {
     uint32_t            version;           /**< [in]: Struct version. Must be set to NV_ENC_CREATE_MV_BUFFER_VER */
+    uint32_t            reserved;          /**< [in]: Reserved and should be set to 0 */
     NV_ENC_OUTPUT_PTR   mvBuffer;          /**< [out]: Pointer to the output motion vector buffer */
-    uint32_t            reserved1[255];    /**< [in]: Reserved and should be set to 0 */
+    uint32_t            reserved1[254];    /**< [in]: Reserved and should be set to 0 */
     void*               reserved2[63];     /**< [in]: Reserved and should be set to NULL */
 } NV_ENC_CREATE_MV_BUFFER;
 
 /** NV_ENC_CREATE_MV_BUFFER struct version*/
-#define NV_ENC_CREATE_MV_BUFFER_VER NVENCAPI_STRUCT_VERSION(1)
+#define NV_ENC_CREATE_MV_BUFFER_VER NVENCAPI_STRUCT_VERSION(2)
 
 /**
  * QP value for frames
@@ -1476,6 +1547,8 @@ typedef struct _NV_ENC_QP
     uint32_t        qpIntra;      /**< [in]: Specifies QP value for Intra Frame. Even though this field is uint32_t for legacy reasons, the client should treat this as a signed parameter(int32_t) for cases in which negative QP values are to be specified. */
 } NV_ENC_QP;
 
+#define MAX_NUM_VIEWS_MINUS_1 7
+
 /**
  * Rate Control Configuration Parameters
  */
@@ -1514,7 +1587,7 @@ typedef struct _NV_ENC_QP
                                                                                             Applicable only for constant QP mode (NV_ENC_RC_PARAMS::rateControlMode = NV_ENC_PARAMS_RC_CONSTQP). */
     uint8_t                         temporalLayerQP[8];                          /**< [in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as the array index.
                                                                                             Applicable only for constant QP mode (NV_ENC_RC_PARAMS::rateControlMode = NV_ENC_PARAMS_RC_CONSTQP). */
-    uint8_t                         targetQuality;                               /**< [in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 with 0-automatic)  */
+    uint8_t                         targetQuality;                               /**< [in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 for H264/HEVC, 0-63 for AV1 with 0-automatic)  */
     uint8_t                         targetQualityLSB;                            /**< [in]: Fractional part of target quality (as 8.8 fixed point format) */
     uint16_t                        lookaheadDepth;                              /**< [in]: Maximum depth of lookahead with range 0-(31 - number of B frames).
                                                                                             lookaheadDepth is only used if enableLookahead=1.*/
@@ -1546,7 +1619,11 @@ typedef struct _NV_ENC_QP
     int8_t                          cbQPIndexOffset;                              /**< [in]: Specifies the value of 'chroma_qp_index_offset' in H264 / 'pps_cb_qp_offset' in HEVC / 'deltaQ_u_ac' in AV1.*/
     int8_t                          crQPIndexOffset;                              /**< [in]: Specifies the value of 'second_chroma_qp_index_offset' in H264 / 'pps_cr_qp_offset' in HEVC / 'deltaQ_v_ac' in AV1 (for future use only - deltaQ_v_ac is currently always internally set to same value as deltaQ_u_ac). */
     uint16_t                        reserved2;
-    uint32_t                        reserved[4];
+    NV_ENC_LOOKAHEAD_LEVEL          lookaheadLevel;                               /**< [in]: Specifies the lookahead level. Higher level may improve quality at the expense of performance. */
+    uint8_t                         viewBitrateRatios[MAX_NUM_VIEWS_MINUS_1];     /**< [in]: Specifies the bit rate ratio for each view of MV-HEVC except the base view.
+                                                                                             The base view bit rate ratio = 100 - (sum of bit rate ratio of all other views). */
+    uint8_t                         reserved3;
+    uint32_t                        reserved1;
  } NV_ENC_RC_PARAMS;
 
 /** macro for constructing the version field of ::_NV_ENC_RC_PARAMS */
@@ -1578,8 +1655,67 @@ typedef struct _NV_ENC_TIME_CODE
 {
     NV_ENC_DISPLAY_PIC_STRUCT       displayPicStruct;                   /**< [in] Display picStruct */
     NV_ENC_CLOCK_TIMESTAMP_SET      clockTimestamp[MAX_NUM_CLOCK_TS];   /**< [in] Clock Timestamp set */
+    uint32_t                        skipClockTimestampInsertion;        /**< [in] 0 : Inserts Clock Timestamp if NV_ENC_CONFIG_H264::enableTimeCode (H264) or
+                                                                                      NV_ENC_CONFIG_HEVC::outputTimeCodeSEI (HEVC) is specified
+                                                                                  1 : Skips insertion of Clock Timestamp for current frame */
 } NV_ENC_TIME_CODE;
 
+#define MULTIVIEW_MAX_NUM_REF_DISPLAY 32
+
+/**
+ * G.14.2.3 3D reference displays information SEI message syntax elements
+ */
+typedef struct _HEVC_3D_REFERENCE_DISPLAY_INFO
+{
+    uint32_t      refViewingDistanceFlag                         : 1;           /**< [in] Specifies the presence of reference viewing distance.*/
+    uint32_t      threeDimensionalReferenceDisplaysExtensionFlag : 1;           /**< [in] Should be set to 0 for this version of specs. Saved for future use.*/
+    uint32_t      reserved                                       : 30;          /**< [in] Reserved and must be set to 0 */
+    int32_t       precRefDisplayWidth;                                          /**< [in] Specifies the exponent of the maximum allowable truncation error for refDisplayWidth[i]. Range 0-31, inclusive.*/
+    int32_t       precRefViewingDist;                                           /**< [in] Specifies the exponent of the maximum allowable truncation error for refViewingDist[i]. Range 0-31, inclusive.*/
+    int32_t       numRefDisplaysMinus1;                                         /**< [in] Plus 1 specifies the number of reference displays that are signalled in this SEI message. Range 0-31, inclusive.*/
+    int32_t       leftViewId[MULTIVIEW_MAX_NUM_REF_DISPLAY];                    /**< [in] Indicates the ViewId of the left view of a stereo pair corresponding to the i-th reference display.*/
+    int32_t       rightViewId[MULTIVIEW_MAX_NUM_REF_DISPLAY];                   /**< [in] Indicates the ViewId of the right view of a stereo-pair corresponding to the i-th reference display.*/
+    int32_t       exponentRefDisplayWidth[MULTIVIEW_MAX_NUM_REF_DISPLAY];       /**< [in] Specifies the exponent part of the reference display width of the i-th reference display.*/
+    int32_t       mantissaRefDisplayWidth[MULTIVIEW_MAX_NUM_REF_DISPLAY];       /**< [in] Specifies the mantissa part of the reference display width of the i-th reference display.*/
+    int32_t       exponentRefViewingDistance[MULTIVIEW_MAX_NUM_REF_DISPLAY];    /**< [in] Specifies the exponent part of the reference viewing distance of the i-th reference display.*/
+    int32_t       mantissaRefViewingDistance[MULTIVIEW_MAX_NUM_REF_DISPLAY];    /**< [in] Specifies the mantissa part of the reference viewing distance of the i-th reference display.*/
+    int32_t       numSampleShiftPlus512[MULTIVIEW_MAX_NUM_REF_DISPLAY];         /**< [in] Indicates the recommended additional horizontal shift for a stereo pair corresponding to the i-th reference baseline and the i-th reference display.*/
+    uint8_t       additionalShiftPresentFlag[MULTIVIEW_MAX_NUM_REF_DISPLAY];    /**< [in] Equal to 1 indicates that the information about additional horizontal shift of the left and right views for the i-th reference display is present in this SEI message.*/
+    uint32_t      reserved2[4];                                                 /**< [in] Reserved and must be set to 0 */
+} HEVC_3D_REFERENCE_DISPLAY_INFO;
+
+/**
+ * Struct for storing x and y chroma points
+ */
+typedef struct _CHROMA_POINTS {
+    uint16_t x;
+    uint16_t y;
+} CHROMA_POINTS;
+
+/**
+ * Struct for storing mastering-display information
+ * Refer to the AV1 spec 6.7.4 Metadata high dynamic range mastering display color volume semantics OR
+ * HEVC spec D.2.28 Mastering display colour volume SEI message syntax
+ */
+typedef struct _MASTERING_DISPLAY_INFO {
+    CHROMA_POINTS g;
+    CHROMA_POINTS b;
+    CHROMA_POINTS r;
+    CHROMA_POINTS whitePoint;
+    uint32_t                    maxLuma;
+    uint32_t                    minLuma;
+} MASTERING_DISPLAY_INFO;
+
+/*
+* Refer to Av1 spec 6.7.3 Metadata high dynamic range content light level semantics OR
+* HEVC spec D.2.35 Content light level information SEI message syntax
+*/
+typedef struct _CONTENT_LIGHT_LEVEL
+{
+    uint16_t maxContentLightLevel;
+    uint16_t maxPicAverageLightLevel;
+} CONTENT_LIGHT_LEVEL;
+
 
 /**
  * \struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS
@@ -1655,9 +1791,9 @@ typedef struct _NVENC_EXTERNAL_ME_SB_HINT
     int16_t    last_of_cu     : 1;                      /**< [in]: Set to 1 for the last MV current CU */
     int16_t    last_of_sb     : 1;                      /**< [in]: Set to 1 for the last MV of current SB */
     int16_t    reserved0      : 1;                      /**< [in]: Reserved and must be set to 0 */
-    int16_t    mvx            : 14;                     /**< [in]: Specifies the x component of integer pixel MV (relative to current MB) S12.2. */
+    int16_t    mvx            : 14;                     /**< [in]: Specifies the x component of integer pixel MV (relative to current MB) S12.2. Permissible value range: [-4092,4092]. */
     int16_t    cu_size        : 2;                      /**< [in]: Specifies the CU size: 0: 8x8, 1: 16x16, 2:32x32, 3:64x64 */
-    int16_t    mvy            : 12;                     /**< [in]: Specifies the y component of integer pixel MV (relative to current MB) S10.2 .*/
+    int16_t    mvy            : 12;                     /**< [in]: Specifies the y component of integer pixel MV (relative to current MB) S10.2. Permissible value range: [-2044,2044]. */
     int16_t    y8             : 3;                      /**< [in]: Specifies the current partition's top left y position in 8 pixel unit */
     int16_t    reserved1      : 1;                      /**< [in]: Reserved and must be set to 0 */
 } NVENC_EXTERNAL_ME_SB_HINT;
@@ -1762,8 +1898,13 @@ typedef struct _NV_ENC_CONFIG_H264
                                                                                Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */
     NV_ENC_NUM_REF_FRAMES               numRefL1;                   /**< [in]: Specifies max number of reference frames in reference picture list L1, that can be used by hardware for prediction of a frame.
                                                                                Check support for numRefL1 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */
-
-    uint32_t                            reserved1[267];             /**< [in]: Reserved and must be set to 0 */
+    NV_ENC_BIT_DEPTH                    outputBitDepth;             /**< [in]: Specifies pixel bit depth of encoded video. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit, NV_ENC_BIT_DEPTH_10 for 10 bit. */
+    NV_ENC_BIT_DEPTH                    inputBitDepth;              /**< [in]: Specifies pixel bit depth of video input. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit input, NV_ENC_BIT_DEPTH_10 for 10 bit input. */
+    NV_ENC_TEMPORAL_FILTER_LEVEL        tfLevel;                    /**< [in]: Specifies the strength of temporal filtering. Check support for temporal filter using ::NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER caps.
+                                                                               Temporal filter feature is supported only if frameIntervalP >= 5.
+                                                                               If ZeroReorderDelay or enableStereoMVC is enabled, the temporal filter feature is not supported.
+                                                                               Temporal filter is recommended for natural contents. */
+    uint32_t                            reserved1[264];             /**< [in]: Reserved and must be set to 0 */
     void*                               reserved2[64];              /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_CONFIG_H264;
 
@@ -1793,7 +1934,7 @@ typedef struct _NV_ENC_CONFIG_HEVC
     uint32_t repeatSPSPPS                          :1;              /**< [in]: Set 1 to output VPS,SPS and PPS for every IDR frame.*/
     uint32_t enableIntraRefresh                    :1;              /**< [in]: Set 1 to enable gradual decoder refresh or intra refresh. If the GOP structure uses B frames this will be ignored */
     uint32_t chromaFormatIDC                       :2;              /**< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input.*/
-    uint32_t pixelBitDepthMinus8                   :3;              /**< [in]: Specifies pixel bit depth minus 8. Should be set to 0 for 8 bit input, 2 for 10 bit input.*/
+    uint32_t reserved3                             :3;              /**< [in]: Reserved and must be set to 0.*/
     uint32_t enableFillerDataInsertion             :1;              /**< [in]: Set to 1 to enable insertion of filler data in the bitstream.
                                                                                This flag will take effect only when CBR rate control mode is in use and both
                                                                                NV_ENC_INITIALIZE_PARAMS::frameRateNum and
@@ -1811,7 +1952,15 @@ typedef struct _NV_ENC_CONFIG_HEVC
                                                                                This flag will be ignored if the value returned for ::NV_ENC_CAPS_SINGLE_SLICE_INTRA_REFRESH caps is false. */
     uint32_t outputRecoveryPointSEI                :1;              /**< [in]: Set to 1 to enable writing of recovery point SEI message */
     uint32_t outputTimeCodeSEI                     :1;              /**< [in]: Set 1 to write SEI time code syntax in the bitstream. Note that this flag will be ignored for D3D12 interface.*/
-    uint32_t reserved                              :12;             /**< [in]: Reserved bitfields.*/
+    uint32_t enableTemporalSVC                     :1;              /**< [in]: Set to 1 to enable SVC temporal */
+    uint32_t enableMVHEVC                          :1;              /**< [in]: Set to 1 to enable stereo MVHEVC. This feature currently supports only 2 views.
+                                                                               This feature is disabled for LTR, Alpha Layer Encoding, UniDirectionalB,
+                                                                               PyramidalME, Lookahead, Temporal Filter, Split encoding, 2 pass encoding and for NV_ENC_TUNING_INFO other than
+                                                                               NV_ENC_TUNING_INFO_HIGH_QUALITY. */
+    uint32_t outputHevc3DReferenceDisplayInfo      :1;              /**< [in]: Set to 1 to write 3D reference displays information SEI message for MVHEVC */
+    uint32_t outputMaxCll                          :1;              /**< [in]: Set to 1 to write Content Light Level information SEI message for HEVC */
+    uint32_t outputMasteringDisplay                :1;              /**< [in]: Set to 1 to write Mastering displays information SEI message for HEVC */
+    uint32_t reserved                              :7;              /**< [in]: Reserved bitfields.*/
     uint32_t idrPeriod;                                             /**< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG. Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */
     uint32_t intraRefreshPeriod;                                    /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set.
                                                                     Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH. */
@@ -1845,7 +1994,22 @@ typedef struct _NV_ENC_CONFIG_HEVC
                                                                                Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */
     NV_ENC_NUM_REF_FRAMES               numRefL1;                   /**< [in]: Specifies max number of reference frames in reference picture list L1, that can be used by hardware for prediction of a frame.
                                                                                Check support for numRefL1 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps. */
-    uint32_t                            reserved1[214];             /**< [in]: Reserved and must be set to 0.*/
+    NV_ENC_TEMPORAL_FILTER_LEVEL        tfLevel;                    /**< [in]: Specifies the strength of the temporal filtering. Check support for temporal filtering using ::NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER caps.
+                                                                               Temporal filter feature is supported only if frameIntervalP >= 5.
+                                                                               Temporal filter feature is not supported with ZeroReorderDelay/enableStereoMVC/AlphaLayerEncoding.
+                                                                               Temporal filter is recommended for natural contents. */
+    uint32_t                            disableDeblockingFilterIDC; /**< [in]: Specifies the deblocking filter mode. Permissible value range: [0,2]. This flag corresponds
+                                                                               to the flag pps_deblocking_filter_disabled_flag specified in section 7.4.3.3 of H.265 specification,
+                                                                               which specifies whether the operation of the deblocking filter shall be disabled across some
+                                                                               block edges of the slice and specifies for which edges the filtering is disabled. See section
+                                                                               7.4.3.3 of H.265 specification for more details.*/
+    NV_ENC_BIT_DEPTH                    outputBitDepth;             /**< [in]: Specifies pixel bit depth of encoded video. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit, NV_ENC_BIT_DEPTH_10 for 10 bit.
+                                                                               SW will do the bitdepth conversion internally from inputBitDepth -> outputBitDepth if bit depths differ
+                                                                               Support for 8 bit input to 10 bit encode conversion only*/
+    NV_ENC_BIT_DEPTH                    inputBitDepth;              /**< [in]: Specifies pixel bit depth of video input. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit input, NV_ENC_BIT_DEPTH_10 for 10 bit input.*/
+    uint32_t                            numTemporalLayers;          /**< [in]: Specifies the number of temporal layers to be used for hierarchical coding.*/
+    uint32_t                            numViews;                   /**< [in]: Specifies number of views for MVHEVC */
+    uint32_t                            reserved1[208];             /**< [in]: Reserved and must be set to 0.*/
     void*                               reserved2[64];              /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_CONFIG_HEVC;
 
@@ -1910,10 +2074,14 @@ typedef struct _NV_ENC_CONFIG_AV1
     uint32_t enableBitstreamPadding         : 1;                    /**< [in]: Set 1 to enable bitstream padding. */
     uint32_t enableCustomTileConfig         : 1;                    /**< [in]: Set 1 to enable custom tile configuration: numTileColumns and numTileRows must have non zero values and tileWidths and tileHeights must point to a valid address  */
     uint32_t enableFilmGrainParams          : 1;                    /**< [in]: Set 1 to enable custom film grain parameters: filmGrainParams must point to a valid address  */
-    uint32_t inputPixelBitDepthMinus8       : 3;                    /**< [in]: Specifies pixel bit depth minus 8 of video input. Should be set to 0 for 8 bit input, 2 for 10 bit input.*/
-    uint32_t pixelBitDepthMinus8            : 3;                    /**< [in]: Specifies pixel bit depth minus 8 of encoded video. Should be set to 0 for 8 bit, 2 for 10 bit.
-                                                                               HW will do the bitdepth conversion internally from inputPixelBitDepthMinus8 -> pixelBitDepthMinus8 if bit dpeths differ
-                                                                               Support for 8 bit input to 10 bit encode conversion only */
+    uint32_t enableLTR                      : 1;                    /**< [in]: Set to 1 to enable LTR (Long Term Reference) frame support. LTR can be used in "LTR Per Picture" mode.
+                                                                               In this mode, client can control whether the current picture should be marked as LTR.
+                                                                               use ltrMarkFrame = 1 for the picture to be marked as LTR.
+                                                                               Note that LTRs are not supported if encoding session is configured with B-frames */
+    uint32_t enableTemporalSVC              : 1;                    /**< [in]: Set to 1 to enable SVC temporal */
+    uint32_t outputMaxCll                   : 1;                    /**< [in]: Set to 1 to write Content Light Level metadata for Av1 */
+    uint32_t outputMasteringDisplay         : 1;                    /**< [in]: Set to 1 to write Mastering displays metadata for Av1 */
+    uint32_t reserved4                      : 2;                    /**< [in]: Reserved and must be set to 0.*/
     uint32_t reserved                       : 14;                   /**< [in]: Reserved bitfields.*/
     uint32_t idrPeriod;                                             /**< [in]: Specifies the IDR/Key frame interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically. */
     uint32_t intraRefreshPeriod;                                    /**< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set.
@@ -1930,9 +2098,10 @@ typedef struct _NV_ENC_CONFIG_AV1
                                                                                it will be rounded down to the next power of 2 value. If numTileRows == 0, the picture will be coded with the smallest number of horizontal tiles as allowed by standard.
                                                                                When enableCustomTileConfig == 1, numTileRows must be > 0 and <= NV_MAX_TILE_ROWS_AV1 and tileHeights must point to a valid array of numTileRows entries.
                                                                                Entry i specifies the height in 64x64 CTU unit of tile row i. The sum of all the entries should be equal to the picture hieght in 64x64 CTU units. */
+    uint32_t reserved2;                                             /**< [in]: Reserved and must be set to 0.*/
     uint32_t *tileWidths;                                           /**< [in]: If enableCustomTileConfig == 1, tileWidths[i] specifies the width of tile column i in 64x64 CTU unit, with 0 <= i <= numTileColumns -1. */
     uint32_t *tileHeights;                                          /**< [in]: If enableCustomTileConfig == 1, tileHeights[i] specifies the height of tile row i in 64x64 CTU unit, with 0 <= i <= numTileRows -1. */
-    uint32_t maxTemporalLayersMinus1;                               /**< [in]: Specifies the max temporal layer used for hierarchical coding. */
+    uint32_t maxTemporalLayersMinus1;                               /**< [in]: Specifies the max temporal layer used for hierarchical coding. Cannot be reconfigured and must be specified during encoder creation if temporal layer is considered. */
     NV_ENC_VUI_COLOR_PRIMARIES colorPrimaries;                      /**< [in]: as defined in section of ISO/IEC 23091-4/ITU-T H.273 */
     NV_ENC_VUI_TRANSFER_CHARACTERISTIC transferCharacteristics;     /**< [in]: as defined in section of ISO/IEC 23091-4/ITU-T H.273 */
     NV_ENC_VUI_MATRIX_COEFFS matrixCoefficients;                    /**< [in]: as defined in section of ISO/IEC 23091-4/ITU-T H.273 */
@@ -1942,10 +2111,23 @@ typedef struct _NV_ENC_CONFIG_AV1
                                                                                2: Co-located with luma (0,0) sample */
     NV_ENC_BFRAME_REF_MODE useBFramesAsRef;                         /**< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using  ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps.*/
     NV_ENC_FILM_GRAIN_PARAMS_AV1 *filmGrainParams;                  /**< [in]: If enableFilmGrainParams == 1, filmGrainParams must point to a valid NV_ENC_FILM_GRAIN_PARAMS_AV1 structure */
-    NV_ENC_NUM_REF_FRAMES  numFwdRefs;                              /**< [in]: Specifies max number of forward reference frame used for prediction of a frame. It must be in range 1-4 (Last, Last2, last3 and Golden). It's a suggestive value not necessarily be honored always. */
-    NV_ENC_NUM_REF_FRAMES  numBwdRefs;                              /**< [in]: Specifies max number of L1 list reference frame used for prediction of a frame. It must be in range 1-3 (Backward, Altref2, Altref). It's a suggestive value not necessarily be honored always. */
-    uint32_t reserved1[235];                                        /**< [in]: Reserved and must be set to 0.*/
-    void*    reserved2[62];                                         /**< [in]: Reserved and must be set to NULL */
+    NV_ENC_NUM_REF_FRAMES numFwdRefs;                               /**< [in]: Specifies max number of forward reference frame used for prediction of a frame. It must be in range 1-4 (Last, Last2, last3 and Golden). It's a suggestive value not necessarily be honored always. */
+    NV_ENC_NUM_REF_FRAMES numBwdRefs;                               /**< [in]: Specifies max number of L1 list reference frame used for prediction of a frame. It must be in range 1-3 (Backward, Altref2, Altref). It's a suggestive value not necessarily be honored always. */
+    NV_ENC_BIT_DEPTH outputBitDepth;                                /**< [in]: Specifies pixel bit depth of encoded video. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit, NV_ENC_BIT_DEPTH_10 for 10 bit.
+                                                                               HW will do the bitdepth conversion internally from inputBitDepth -> outputBitDepth if bit depths differ
+                                                                               Support for 8 bit input to 10 bit encode conversion only */
+    NV_ENC_BIT_DEPTH inputBitDepth;                                 /**< [in]: Specifies pixel bit depth of video input. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit input, NV_ENC_BIT_DEPTH_10 for 10 bit input. */
+    uint32_t ltrNumFrames;                                          /**< [in]: In "LTR Per Picture" mode (ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB.
+                                                                               These ltrNumFrames acts as a guidance to the encoder and are not necessarily honored. To achieve a right balance between the encoding
+                                                                               quality and keeping LTR frames in the DPB queue, the encoder can internally limit the number of LTR frames.
+                                                                               The number of LTR frames actually used depends upon the encoding preset being used; Faster encoding presets will use fewer LTR frames.*/
+    uint32_t numTemporalLayers;                                     /**< [in]: Specifies the number of temporal layers to be used for hierarchical coding.*/
+    NV_ENC_TEMPORAL_FILTER_LEVEL tfLevel;                           /**< [in]: Specifies the strength of temporal filtering. Check support for temporal filter using ::NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER caps.
+                                                                               Temporal filter feature is supported only if frameIntervalP >= 5.
+                                                                               If ZeroReorderDelay or enableStereoMVC is enabled, the temporal filter feature is not supported.
+                                                                               Temporal filter is recommended for natural contents. */
+    uint32_t reserved1[230];                                        /**< [in]: Reserved and must be set to 0.*/
+    void*    reserved3[62];                                         /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_CONFIG_AV1;
 
 /**
@@ -1974,7 +2156,7 @@ typedef struct _NV_ENC_CONFIG_H264_MEONLY
  */
 typedef struct _NV_ENC_CONFIG_HEVC_MEONLY
 {
-    uint32_t reserved [256];                                   /**< [in]: Reserved and must be set to 0 */
+    uint32_t reserved [256];                                    /**< [in]: Reserved and must be set to 0 */
     void*    reserved1[64];                                     /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_CONFIG_HEVC_MEONLY;
 
@@ -2015,7 +2197,7 @@ typedef struct _NV_ENC_CONFIG
 } NV_ENC_CONFIG;
 
 /** macro for constructing the version field of ::_NV_ENC_CONFIG */
-#define NV_ENC_CONFIG_VER (NVENCAPI_STRUCT_VERSION(8) | ( 1u<<31 ))
+#define NV_ENC_CONFIG_VER (NVENCAPI_STRUCT_VERSION(9) | ( 1u<<31 ))
 
 /**
  *  Tuning information of NVENC encoding (TuningInfo is not applicable to H264 and HEVC MEOnly mode).
@@ -2027,6 +2209,7 @@ typedef enum NV_ENC_TUNING_INFO
     NV_ENC_TUNING_INFO_LOW_LATENCY       = 2,                                     /**< Tune presets for low latency streaming.*/
     NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY = 3,                                     /**< Tune presets for ultra low latency streaming.*/
     NV_ENC_TUNING_INFO_LOSSLESS          = 4,                                     /**< Tune presets for lossless encoding.*/
+    NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY = 5,                                    /**< Tune presets for latency tolerant encoding for higher quality. Only supported for HEVC and AV1 on Turing+ architectures */
     NV_ENC_TUNING_INFO_COUNT                                                      /**< Count number of tuningInfos. Invalid value. */
 }NV_ENC_TUNING_INFO;
 
@@ -2035,10 +2218,11 @@ typedef enum NV_ENC_TUNING_INFO
  */
 typedef enum _NV_ENC_SPLIT_ENCODE_MODE
 {
-    NV_ENC_SPLIT_AUTO_MODE               = 0,                                    /**< Default value, split frame forced mode disabled, split frame auto mode enabled */
-    NV_ENC_SPLIT_AUTO_FORCED_MODE        = 1,                                    /**< Split frame forced mode enabled with number of strips automatically selected by driver to best fit configuration */
+    NV_ENC_SPLIT_AUTO_MODE               = 0,                                    /**< Default value, implicit mode. Split frame will not always be enabled, even if NVENC number > 1. It will be decided by the driver based on preset, tuning information and video resolution. */
+    NV_ENC_SPLIT_AUTO_FORCED_MODE        = 1,                                    /**< Split frame forced mode enabled with number of strips automatically selected by driver to best fit configuration. If NVENC number > 1, split frame will be forced. */
     NV_ENC_SPLIT_TWO_FORCED_MODE         = 2,                                    /**< Forced 2-strip split frame encoding (if NVENC number > 1, 1-strip encode otherwise) */
     NV_ENC_SPLIT_THREE_FORCED_MODE       = 3,                                    /**< Forced 3-strip split frame encoding (if NVENC number > 2, NVENC number of strips otherwise) */
+    NV_ENC_SPLIT_FOUR_FORCED_MODE        = 4,                                    /**< Forced 4-strip split frame encoding (if NVENC number > 3, NVENC number of strips otherwise) */
     NV_ENC_SPLIT_DISABLE_MODE            = 15,                                   /**< Both split frame auto mode and forced mode are disabled  */
 } NV_ENC_SPLIT_ENCODE_MODE;
 
@@ -2073,13 +2257,17 @@ typedef struct _NV_ENC_INITIALIZE_PARAMS
                                                                                            Not supported if any of the following features: weighted prediction, alpha layer encoding,
                                                                                            subframe mode, output into video memory buffer, picture timing/buffering period SEI message
                                                                                            insertion with DX12 interface are enabled in case of HEVC.
-                                                                                           For AV1, split encoding is not supported when output into video memory buffer is enabled. */
+                                                                                           For AV1, split encoding is not supported when output into video memory buffer is enabled.
+                                                                                           For valid values see ::NV_ENC_SPLIT_ENCODE_MODE enum.*/
     uint32_t                                   enableOutputInVidmem      :1;    /**< [in]: Set this to 1 to enable output of NVENC in video memory buffer created by application. This feature is not supported for HEVC ME only mode. */
     uint32_t                                   enableReconFrameOutput    :1;    /**< [in]: Set this to 1 to enable reconstructed frame output. */
     uint32_t                                   enableOutputStats         :1;    /**< [in]: Set this to 1 to enable encoded frame output stats. Client must allocate buffer of size equal to number of blocks multiplied by the size of
                                                                                            NV_ENC_OUTPUT_STATS_BLOCK struct in system memory and assign to NV_ENC_LOCK_BITSTREAM::encodedOutputStatsPtr to receive the encoded frame output stats.*/
-    uint32_t                                   reservedBitFields         :20;   /**< [in]: Reserved bitfields and must be set to 0 */
+    uint32_t                                   enableUniDirectionalB     :1;    /**< [in]: Set this to 1 to enable uni directional B-frame(both reference will be from past). It will give better compression
+                                                                                           efficiency for LowLatency/UltraLowLatency use case. Value of parameter is ignored when regular B frames are used. */
+    uint32_t                                   reservedBitFields         :19;   /**< [in]: Reserved bitfields and must be set to 0 */
     uint32_t                                   privDataSize;                    /**< [in]: Reserved private data buffer size and must be set to 0 */
+    uint32_t                                   reserved;                        /**< [in]: Reserved and must be set to 0 */
     void*                                      privData;                        /**< [in]: Reserved private data buffer and must be set to NULL */
     NV_ENC_CONFIG*                             encodeConfig;                    /**< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters.
                                                                                            Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfigEx() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS.
@@ -2099,12 +2287,12 @@ typedef struct _NV_ENC_INITIALIZE_PARAMS
                                                                                            if NV_ENC_INITIALIZE_PARAMS::outputStatsLevel is set to NV_ENC_OUTPUT_STATS_BLOCK or number of rows multiplied by the size of
                                                                                            NV_ENC_OUTPUT_STATS_ROW struct if NV_ENC_INITIALIZE_PARAMS::outputStatsLevel is set to NV_ENC_OUTPUT_STATS_ROW
                                                                                            in system memory and assign to NV_ENC_LOCK_BITSTREAM::encodedOutputStatsPtr to receive the encoded frame output stats. */
-    uint32_t                                   reserved [285];                  /**< [in]: Reserved and must be set to 0 */
+    uint32_t                                   reserved1[284];                  /**< [in]: Reserved and must be set to 0 */
     void*                                      reserved2[64];                   /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_INITIALIZE_PARAMS;
 
 /** macro for constructing the version field of ::_NV_ENC_INITIALIZE_PARAMS */
-#define NV_ENC_INITIALIZE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(6) | ( 1u<<31 ))
+#define NV_ENC_INITIALIZE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(7) | ( 1u<<31 ))
 
 
 /**
@@ -2114,6 +2302,7 @@ typedef struct _NV_ENC_INITIALIZE_PARAMS
 typedef struct _NV_ENC_RECONFIGURE_PARAMS
 {
     uint32_t                                    version;                        /**< [in]: Struct version. Must be set to ::NV_ENC_RECONFIGURE_PARAMS_VER. */
+    uint32_t                                    reserved;                       /**< [in]: Reserved and must be set to 0 */
     NV_ENC_INITIALIZE_PARAMS                    reInitEncodeParams;             /**< [in]: Encoder session re-initialization parameters.
                                                                                            If reInitEncodeParams.encodeConfig is NULL and
                                                                                            reInitEncodeParams.presetGUID is the same as the preset
@@ -2133,12 +2322,13 @@ typedef struct _NV_ENC_RECONFIGURE_PARAMS
                                                                                            If NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1, encoder will force the frame type to IDR */
     uint32_t                                    forceIDR                :1;     /**< [in]: Encode the current picture as an IDR picture. This flag is only valid when Picture type decision is taken by the Encoder
                                                                                            [_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]. */
-    uint32_t                                    reserved                :30;
+    uint32_t                                    reserved1               :30;
+    uint32_t                                    reserved2;                      /**< [in]: Reserved and must be set to 0 */
 
 }NV_ENC_RECONFIGURE_PARAMS;
 
 /** macro for constructing the version field of ::_NV_ENC_RECONFIGURE_PARAMS */
-#define NV_ENC_RECONFIGURE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(1) | ( 1u<<31 ))
+#define NV_ENC_RECONFIGURE_PARAMS_VER (NVENCAPI_STRUCT_VERSION(2) | ( 1u<<31 ))
 
 /**
  * \struct _NV_ENC_PRESET_CONFIG
@@ -2147,13 +2337,14 @@ typedef struct _NV_ENC_RECONFIGURE_PARAMS
 typedef struct _NV_ENC_PRESET_CONFIG
 {
     uint32_t      version;                               /**< [in]:  Struct version. Must be set to ::NV_ENC_PRESET_CONFIG_VER. */
+    uint32_t      reserved;                              /**< [in]: Reserved and must be set to 0 */
     NV_ENC_CONFIG presetCfg;                             /**< [out]: preset config returned by the Nvidia Video Encoder interface. */
-    uint32_t      reserved1[255];                        /**< [in]: Reserved and must be set to 0 */
+    uint32_t      reserved1[256];                        /**< [in]: Reserved and must be set to 0 */
     void*         reserved2[64];                         /**< [in]: Reserved and must be set to NULL */
 }NV_ENC_PRESET_CONFIG;
 
 /** macro for constructing the version field of ::_NV_ENC_PRESET_CONFIG */
-#define NV_ENC_PRESET_CONFIG_VER (NVENCAPI_STRUCT_VERSION(4) | ( 1u<<31 ))
+#define NV_ENC_PRESET_CONFIG_VER (NVENCAPI_STRUCT_VERSION(5) | ( 1u<<31 ))
 
 
 /**
@@ -2167,7 +2358,7 @@ typedef struct _NV_ENC_PIC_PARAMS_MVC
     uint32_t temporalID;                                 /**< [in]: Specifies the temporal ID associated with the current input view. */
     uint32_t priorityID;                                 /**< [in]: Specifies the priority ID associated with the current input view. Reserved and ignored by the NvEncodeAPI interface. */
     uint32_t reserved1[12];                              /**< [in]: Reserved and must be set to 0. */
-    void*    reserved2[8];                              /**< [in]: Reserved and must be set to NULL. */
+    void*    reserved2[8];                               /**< [in]: Reserved and must be set to NULL. */
 }NV_ENC_PIC_PARAMS_MVC;
 
 /** macro for constructing the version field of ::_NV_ENC_PIC_PARAMS_MVC */
@@ -2239,7 +2430,7 @@ typedef struct _NV_ENC_PIC_PARAMS_H264
                                                                     The number of entries in this array should be equal to forceIntraSliceCount */
     NV_ENC_PIC_PARAMS_H264_EXT h264ExtPicParams;         /**< [in]: Specifies the H264 extension config parameters using this config. */
     NV_ENC_TIME_CODE timeCode;                           /**< [in]: Specifies the clock timestamp sets used in picture timing SEI. Applicable only when NV_ENC_CONFIG_H264::enableTimeCode is set to 1. */
-    uint32_t reserved [203];                             /**< [in]: Reserved and must be set to 0. */
+    uint32_t reserved [202];                             /**< [in]: Reserved and must be set to 0. */
     void*    reserved2[61];                              /**< [in]: Reserved and must be set to NULL. */
 } NV_ENC_PIC_PARAMS_H264;
 
@@ -2261,7 +2452,9 @@ typedef struct _NV_ENC_PIC_PARAMS_HEVC
                                                                     When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting */
     uint32_t ltrMarkFrame               :1;              /**< [in]: Set to 1 if client wants to mark this frame as LTR */
     uint32_t ltrUseFrames               :1;              /**< [in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap */
-    uint32_t reservedBitFields          :28;             /**< [in]: Reserved bit fields and must be set to 0 */
+    uint32_t temporalConfigUpdate       :1;              /**< [in]: Set to 1 if client wants to change the number of temporal layers in temporal SVC encoding */
+    uint32_t reservedBitFields          :27;             /**< [in]: Reserved bit fields and must be set to 0 */
+    uint32_t reserved1;                                  /**< [in]: Reserved and must be set to 0. */
     uint8_t* sliceTypeData;                              /**< [in]: Array which specifies the slice type used to force intra slice for a particular slice. Currently supported only for NV_ENC_CONFIG_H264::sliceMode == 3.
                                                                     Client should allocate array of size sliceModeData where sliceModeData is specified in field of ::_NV_ENC_CONFIG_H264
                                                                     Array element with index n corresponds to nth slice. To force a particular slice to intra client should set corresponding array element to NV_ENC_SLICE_TYPE_I
@@ -2283,8 +2476,14 @@ typedef struct _NV_ENC_PIC_PARAMS_HEVC
     uint32_t reserved;                                   /**< [in]: Reserved and must be set to 0. */
     NV_ENC_SEI_PAYLOAD* seiPayloadArray;                 /**< [in]: Array of SEI payloads which will be inserted for this frame. */
     NV_ENC_TIME_CODE timeCode;                           /**< [in]: Specifies the clock timestamp sets used in time code SEI. Applicable only when NV_ENC_CONFIG_HEVC::enableTimeCodeSEI is set to 1. */
-    uint32_t reserved2 [237];                            /**< [in]: Reserved and must be set to 0. */
-    void*    reserved3[61];                              /**< [in]: Reserved and must be set to NULL. */
+    uint32_t numTemporalLayers;                          /**< [in]: Specifies the number of temporal layers to be used for hierarchical coding. The set only takes place when temporalConfigUpdate == 1.*/
+    uint32_t viewId;                                     /**< [in]: Specifies the view id of the picture */
+    HEVC_3D_REFERENCE_DISPLAY_INFO *p3DReferenceDisplayInfo; /**< [in]: Specifies the 3D reference displays information SEI message.
+                                                                        Applicable only when NV_ENC_CONFIG_HEVC::outputHevc3DReferenceDisplayInfo is set to 1. */
+    CONTENT_LIGHT_LEVEL *pMaxCll;                        /**< [in]: Specifies the Content light level information SEI syntax*/
+    MASTERING_DISPLAY_INFO *pMasteringDisplay;           /**< [in]: Specifies the Mastering display colour volume SEI syntax*/
+    uint32_t reserved2[234];                             /**< [in]: Reserved and must be set to 0. */
+    void* reserved3[58];                                 /**< [in]: Reserved and must be set to NULL. */
 } NV_ENC_PIC_PARAMS_HEVC;
 
 #define NV_ENC_AV1_OBU_PAYLOAD NV_ENC_SEI_PAYLOAD
@@ -2315,7 +2514,10 @@ typedef struct _NV_ENC_PIC_PARAMS_AV1
                                                                     When forceIntraRefreshWithFrameCnt is set it will have priority over tileConfigUpdate setting */
     uint32_t enableCustomTileConfig     : 1;             /**< [in]: Set 1 to enable custom tile configuration: numTileColumns and numTileRows must have non zero values and tileWidths and tileHeights must point to a valid address  */
     uint32_t filmGrainParamsUpdate      : 1;             /**< [in]: Set to 1 if client wants to update previous film grain parameters: filmGrainParams must point to a valid address and encoder must have been configured with film grain enabled  */
-    uint32_t reservedBitFields          : 22;            /**< [in]: Reserved bitfields and must be set to 0 */
+    uint32_t ltrMarkFrame               : 1;             /**< [in]: Set to 1 if client wants to mark this frame as LTR */
+    uint32_t ltrUseFrames               : 1;             /**< [in]: Set to 1 if client allows encoding this frame using the LTR frames specified in ltrFrameBitmap */
+    uint32_t temporalConfigUpdate       : 1;             /**< [in]: Set to 1 if client wants to change the number of temporal layers in temporal SVC encoding */
+    uint32_t reservedBitFields          : 19;            /**< [in]: Reserved bitfields and must be set to 0 */
     uint32_t numTileColumns;                             /**< [in]: This parameter in conjunction with the flag enableCustomTileConfig and the array tileWidths[] specifies the way in which the picture is divided into tile columns.
                                                                     When enableCustomTileConfig == 0, the picture will be uniformly divided into numTileColumns tile columns. If numTileColumns is not a power of 2,
                                                                     it will be rounded down to the next power of 2 value. If numTileColumns == 0, the picture will be coded with the smallest number of vertical tiles as allowed by standard.
@@ -2326,14 +2528,21 @@ typedef struct _NV_ENC_PIC_PARAMS_AV1
                                                                     it will be rounded down to the next power of 2 value. If numTileRows == 0, the picture will be coded with the smallest number of horizontal tiles as allowed by standard.
                                                                     When enableCustomTileConfig == 1, numTileRows must be > 0 and <= NV_MAX_TILE_ROWS_AV1 and tileHeights must point to a valid array of numTileRows entries.
                                                                     Entry i specifies the height in 64x64 CTU unit of tile row i. The sum of all the entries should be equal to the picture hieght in 64x64 CTU units. */
+    uint32_t reserved;                                   /**< [in]: Reserved and must be set to 0. */
     uint32_t *tileWidths;                                /**< [in]: If enableCustomTileConfig == 1, tileWidths[i] specifies the width of tile column i in 64x64 CTU unit, with 0 <= i <= numTileColumns -1. */
     uint32_t *tileHeights;                               /**< [in]: If enableCustomTileConfig == 1, tileHeights[i] specifies the height of tile row i in 64x64 CTU unit, with 0 <= i <= numTileRows -1. */
     uint32_t obuPayloadArrayCnt;                         /**< [in]: Specifies the number of elements allocated in  obuPayloadArray array. */
-    uint32_t reserved;                                   /**< [in]: Reserved and must be set to 0. */
+    uint32_t reserved1;                                  /**< [in]: Reserved and must be set to 0. */
     NV_ENC_AV1_OBU_PAYLOAD* obuPayloadArray;             /**< [in]: Array of OBU payloads which will be inserted for this frame. */
     NV_ENC_FILM_GRAIN_PARAMS_AV1 *filmGrainParams;       /**< [in]: If filmGrainParamsUpdate == 1, filmGrainParams must point to a valid NV_ENC_FILM_GRAIN_PARAMS_AV1 structure */
-    uint32_t reserved2[247];                             /**< [in]: Reserved and must be set to 0. */
-    void*    reserved3[61];                              /**< [in]: Reserved and must be set to NULL. */
+    uint32_t ltrMarkFrameIdx;                            /**< [in]: Specifies the long term reference frame index to use for marking this frame as LTR.*/
+    uint32_t ltrUseFrameBitmap;                          /**< [in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame. */
+    uint32_t numTemporalLayers;                          /**< [in]: Specifies the number of temporal layers to be used for hierarchical coding. The set only takes place when temporalConfigUpdate == 1.*/
+    uint32_t reserved4;                                  /**< [in]: Reserved and must be set to 0. */
+    CONTENT_LIGHT_LEVEL *pMaxCll;                        /**< [in]: Specifies the Content light level metadata syntax*/
+    MASTERING_DISPLAY_INFO *pMasteringDisplay;           /**< [in]: Specifies the Mastering display colour volume metadata syntax*/
+    uint32_t reserved2[242];                             /**< [in]: Reserved and must be set to 0. */
+    void*    reserved3[59];                              /**< [in]: Reserved and must be set to NULL. */
 } NV_ENC_PIC_PARAMS_AV1;
 
 /**
@@ -2359,7 +2568,8 @@ typedef struct _NV_ENC_PIC_PARAMS
     uint32_t                                    inputHeight;                    /**< [in]: Specifies the input frame height */
     uint32_t                                    inputPitch;                     /**< [in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth. */
     uint32_t                                    encodePicFlags;                 /**< [in]: Specifies bit-wise OR of encode picture flags. See ::NV_ENC_PIC_FLAGS enum. */
-    uint32_t                                    frameIdx;                       /**< [in]: Specifies the frame index associated with the input frame [optional]. */
+    uint32_t                                    frameIdx;                       /**< [in]: Specifies the frame index associated with the input frame. It is necessary to pass this as monotonically increasing starting 0 when lookaheadLevel, UHQ Tuning Info
+                                                                                           or encoding same frames multiple times without advancing encoder state feature are enabled */
     uint64_t                                    inputTimeStamp;                 /**< [in]: Specifies opaque data which is associated with the encoded frame, but not actually encoded in the output bitstream.
                                                                                            This opaque data can be used later to uniquely refer to the corresponding encoded frame. For example, it can be used
                                                                                            for identifying the frame to be invalidated in the reference picture buffer, if lost at the client. */
@@ -2383,8 +2593,8 @@ typedef struct _NV_ENC_PIC_PARAMS
     NVENC_EXTERNAL_ME_HINT                     *meExternalHints;                /**< [in]: For H264 and Hevc, Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock.
                                                                                            The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8
                                                                                            + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */
-    uint32_t                                    reserved1[6];                    /**< [in]: Reserved and must be set to 0 */
-    void*                                       reserved2[2];                    /**< [in]: Reserved and must be set to NULL */
+    uint32_t                                    reserved2[7];                    /**< [in]: Reserved and must be set to 0 */
+    void*                                       reserved5[2];                    /**< [in]: Reserved and must be set to NULL */
     int8_t                                     *qpDeltaMap;                      /**< [in]: Specifies the pointer to signed byte array containing value per MB for H264, per CTB for HEVC and per SB for AV1 in raster scan order for the current picture, which will be interpreted depending on NV_ENC_RC_PARAMS::qpMapMode.
                                                                                             If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DELTA, qpDeltaMap specifies QP modifier per MB for H264, per CTB for HEVC and per SB for AV1. This QP modifier will be applied on top of the QP chosen by rate control.
                                                                                             If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_EMPHASIS, qpDeltaMap specifies Emphasis Level Map per MB for H264. This level value along with QP chosen by rate control is used to
@@ -2395,6 +2605,7 @@ typedef struct _NV_ENC_PIC_PARAMS
     uint32_t                                    reservedBitFields;               /**< [in]: Reserved bitfields and must be set to 0 */
     uint16_t                                    meHintRefPicDist[2];             /**< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints.
                                                                                             If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0 */
+    uint32_t                                    reserved4;                       /**< [in]: Reserved and must be set to 0 */
     NV_ENC_INPUT_PTR                            alphaBuffer;                     /**< [in]: Specifies the input alpha buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs.
                                                                                             Applicable only when encoding hevc with alpha layer is enabled. */
     NVENC_EXTERNAL_ME_SB_HINT                  *meExternalSbHints;               /**< [in]: For AV1,Specifies the pointer to ME external SB hints for the current frame. The size of ME hint buffer should be equal to meSbHintsCount. */
@@ -2410,11 +2621,11 @@ typedef struct _NV_ENC_PIC_PARAMS
                                                                                             Reconstructed output will be in NV_ENC_BUFFER_FORMAT_NV12 format when chromaFormatIDC is set to 1.
                                                                                             chromaFormatIDC = 3 is not supported. */
     uint32_t                                    reserved3[284];                  /**< [in]: Reserved and must be set to 0 */
-    void*                                       reserved4[57];                   /**< [in]: Reserved and must be set to NULL */
+    void*                                       reserved6[57];                   /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_PIC_PARAMS;
 
 /** Macro for constructing the version field of ::_NV_ENC_PIC_PARAMS */
-#define NV_ENC_PIC_PARAMS_VER (NVENCAPI_STRUCT_VERSION(6) | ( 1u<<31 ))
+#define NV_ENC_PIC_PARAMS_VER (NVENCAPI_STRUCT_VERSION(7) | ( 1u<<31 ))
 
 
 /**
@@ -2427,6 +2638,7 @@ typedef struct _NV_ENC_MEONLY_PARAMS
     uint32_t                version;                            /**< [in]: Struct version. Must be set to NV_ENC_MEONLY_PARAMS_VER.*/
     uint32_t                inputWidth;                         /**< [in]: Specifies the input frame width */
     uint32_t                inputHeight;                        /**< [in]: Specifies the input frame height */
+    uint32_t                reserved;                           /**< [in]: Reserved and must be set to 0 */
     NV_ENC_INPUT_PTR        inputBuffer;                        /**< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from NvEncCreateInputBuffer() or NvEncMapInputResource() APIs. */
     NV_ENC_INPUT_PTR        referenceFrame;                     /**< [in]: Specifies the reference frame pointer */
     NV_ENC_OUTPUT_PTR       mvBuffer;                           /**< [in]: Specifies the output buffer pointer.
@@ -2435,6 +2647,7 @@ typedef struct _NV_ENC_MEONLY_PARAMS
                                                                            If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for storing the motion vector data. The size of this buffer must
                                                                            be equal to total number of macroblocks multiplied by size of NV_ENC_H264_MV_DATA struct. Client should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this
                                                                            output buffer and assign it to NV_ENC_MEONLY_PARAMS::mvBuffer. All CUDA operations on this buffer must use the default stream. */
+    uint32_t                reserved2;                          /**< [in]: Reserved and must be set to 0 */
     NV_ENC_BUFFER_FORMAT    bufferFmt;                          /**< [in]: Specifies the input buffer format. */
     void*                   completionEvent;                    /**< [in]: Specifies an event to be signaled on completion of motion estimation
                                                                            of this Frame [only if operating in Asynchronous mode].
@@ -2447,12 +2660,12 @@ typedef struct _NV_ENC_MEONLY_PARAMS
     NVENC_EXTERNAL_ME_HINT  *meExternalHints;                   /**< [in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock.
                                                                             The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8
                                                                             + 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1) */
-    uint32_t                reserved1[243];                     /**< [in]: Reserved and must be set to 0 */
-    void*                   reserved2[59];                      /**< [in]: Reserved and must be set to NULL */
+    uint32_t                reserved1[241];                     /**< [in]: Reserved and must be set to 0 */
+    void*                   reserved3[59];                      /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_MEONLY_PARAMS;
 
 /** NV_ENC_MEONLY_PARAMS struct version*/
-#define NV_ENC_MEONLY_PARAMS_VER NVENCAPI_STRUCT_VERSION(3)
+#define NV_ENC_MEONLY_PARAMS_VER NVENCAPI_STRUCT_VERSION(4)
 
 
 /**
@@ -2492,15 +2705,15 @@ typedef struct _NV_ENC_LOCK_BITSTREAM
     int32_t                 averageMVY;                  /**< [out]: Average Motion Vector in y direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1. */
     uint32_t                alphaLayerSizeInBytes;       /**< [out]: Number of bytes generated for the alpha layer in the encoded output. Applicable only when HEVC with alpha encoding is enabled. */
     uint32_t                outputStatsPtrSize;          /**< [in]: Size of the buffer pointed by NV_ENC_LOCK_BITSTREAM::outputStatsPtr. */
+    uint32_t                reserved;                    /**< [in]: Reserved and must be set to 0 */
     void*                   outputStatsPtr;              /**< [in, out]: Buffer which receives the encoded frame output stats, if NV_ENC_INITIALIZE_PARAMS::enableOutputStats is set to 1. */
     uint32_t                frameIdxDisplay;             /**< [out]: Frame index in display order */
-    uint32_t                reserved1[220];              /**< [in]: Reserved and must be set to 0 */
+    uint32_t                reserved1[219];              /**< [in]: Reserved and must be set to 0 */
     void*                   reserved2[63];               /**< [in]: Reserved and must be set to NULL */
     uint32_t                reservedInternal[8];         /**< [in]: Reserved and must be set to 0 */
 } NV_ENC_LOCK_BITSTREAM;
 
-#define NV_ENC_LOCK_BITSTREAM_VER (NVENCAPI_STRUCT_VERSION(1) | ( 1u<<31 ))
-
+#define NV_ENC_LOCK_BITSTREAM_VER (NVENCAPI_STRUCT_VERSION(2) | ( 1u<<31 ))
 
 /**
  * \struct _NV_ENC_LOCK_INPUT_BUFFER
@@ -2648,12 +2861,15 @@ typedef struct _NV_ENC_REGISTER_RESOURCE
                                                                            to NV_ENC_OUTPUT_RECON and D3D11 interface is used.
                                                                            When chroma components are interleaved, 'chromaOffset[0]' will contain chroma offset.
                                                                            chromaOffset[1] is reserved for future use. */
-    uint32_t                    reserved1[245];                 /**< [in]: Reserved and must be set to 0. */
+    uint32_t                    chromaOffsetIn[2];              /**< [in]: Chroma offset for input buffer when NV_ENC_BUFFER_USAGE::bufferUsage is set to NV_ENC_INPUT_IMAGE
+                                                                           and NVCUVID interface is used. This is required only when luma and chroma allocations are not continuous,
+                                                                           and the planes are padded. */
+    uint32_t                    reserved1[244];                 /**< [in]: Reserved and must be set to 0. */
     void*                       reserved2[61];                  /**< [in]: Reserved and must be set to NULL. */
 } NV_ENC_REGISTER_RESOURCE;
 
 /** Macro for constructing the version field of ::_NV_ENC_REGISTER_RESOURCE */
-#define NV_ENC_REGISTER_RESOURCE_VER NVENCAPI_STRUCT_VERSION(4)
+#define NV_ENC_REGISTER_RESOURCE_VER NVENCAPI_STRUCT_VERSION(5)
 
 /**
  * \struct _NV_ENC_STAT
@@ -2663,7 +2879,7 @@ typedef struct _NV_ENC_STAT
 {
     uint32_t            version;                         /**< [in]:  Struct version. Must be set to ::NV_ENC_STAT_VER. */
     uint32_t            reserved;                        /**< [in]:  Reserved and must be set to 0 */
-    NV_ENC_OUTPUT_PTR   outputBitStream;                 /**< [out]: Specifies the pointer to output bitstream. */
+    NV_ENC_OUTPUT_PTR   outputBitStream;                 /**< [in]:  Specifies the pointer to output bitstream. */
     uint32_t            bitStreamSize;                   /**< [out]: Size of generated bitstream in bytes. */
     uint32_t            picType;                         /**< [out]: Picture type of encoded picture. See ::NV_ENC_PIC_TYPE. */
     uint32_t            lastValidByteOffset;             /**< [out]: Offset of last valid bytes of completed bitstream */
@@ -2677,12 +2893,12 @@ typedef struct _NV_ENC_STAT
     uint32_t            interMBCount;                    /**< [out]: For H264, Number of Inter MBs in the encoded frame, includes skip MBs. For HEVC, Number of Inter CTBs in the encoded frame. */
     int32_t             averageMVX;                      /**< [out]: Average Motion Vector in X direction for the encoded frame. */
     int32_t             averageMVY;                      /**< [out]: Average Motion Vector in y direction for the encoded frame. */
-    uint32_t            reserved1[226];                  /**< [in]:  Reserved and must be set to 0 */
+    uint32_t            reserved1[227];                  /**< [in]:  Reserved and must be set to 0 */
     void*               reserved2[64];                   /**< [in]:  Reserved and must be set to NULL */
 } NV_ENC_STAT;
 
 /** Macro for constructing the version field of ::_NV_ENC_STAT */
-#define NV_ENC_STAT_VER NVENCAPI_STRUCT_VERSION(1)
+#define NV_ENC_STAT_VER NVENCAPI_STRUCT_VERSION(2)
 
 
 /**
@@ -2714,12 +2930,12 @@ typedef struct _NV_ENC_EVENT_PARAMS
     uint32_t            version;                          /**< [in]: Struct version. Must be set to ::NV_ENC_EVENT_PARAMS_VER. */
     uint32_t            reserved;                         /**< [in]: Reserved and must be set to 0 */
     void*               completionEvent;                  /**< [in]: Handle to event to be registered/unregistered with the NvEncodeAPI interface. */
-    uint32_t            reserved1[253];                   /**< [in]: Reserved and must be set to 0    */
+    uint32_t            reserved1[254];                   /**< [in]: Reserved and must be set to 0    */
     void*               reserved2[64];                    /**< [in]: Reserved and must be set to NULL */
 } NV_ENC_EVENT_PARAMS;
 
 /** Macro for constructing the version field of ::_NV_ENC_EVENT_PARAMS */
-#define NV_ENC_EVENT_PARAMS_VER NVENCAPI_STRUCT_VERSION(1)
+#define NV_ENC_EVENT_PARAMS_VER NVENCAPI_STRUCT_VERSION(2)
 
 /**
  * Encoder Session Creation parameters
@@ -4378,8 +4594,8 @@ typedef struct _NV_ENCODE_API_FUNCTION_LIST
     uint32_t                        reserved;                          /**< [in]: Reserved and should be set to 0.                                                  */
     PNVENCOPENENCODESESSION         nvEncOpenEncodeSession;            /**< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer.        */
     PNVENCGETENCODEGUIDCOUNT        nvEncGetEncodeGUIDCount;           /**< [out]: Client should access ::NvEncGetEncodeGUIDCount() API through this pointer.       */
-    PNVENCGETENCODEPRESETCOUNT      nvEncGetEncodeProfileGUIDCount;    /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDCount() API through this pointer.*/
-    PNVENCGETENCODEPRESETGUIDS      nvEncGetEncodeProfileGUIDs;        /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDs() API through this pointer.    */
+    PNVENCGETENCODEPROFILEGUIDCOUNT nvEncGetEncodeProfileGUIDCount;    /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDCount() API through this pointer.*/
+    PNVENCGETENCODEPROFILEGUIDS     nvEncGetEncodeProfileGUIDs;        /**< [out]: Client should access ::NvEncGetEncodeProfileGUIDs() API through this pointer.    */
     PNVENCGETENCODEGUIDS            nvEncGetEncodeGUIDs;               /**< [out]: Client should access ::NvEncGetEncodeGUIDs() API through this pointer.           */
     PNVENCGETINPUTFORMATCOUNT       nvEncGetInputFormatCount;          /**< [out]: Client should access ::NvEncGetInputFormatCount() API through this pointer.      */
     PNVENCGETINPUTFORMATS           nvEncGetInputFormats;              /**< [out]: Client should access ::NvEncGetInputFormats() API through this pointer.          */
diff --git a/pixelflux/nvcodec-sys/src/bindgen/cuda.rs b/pixelflux/nvcodec-sys/src/bindgen/cuda.rs
new file mode 100644
index 0000000..f4b0f1c
--- /dev/null
+++ b/pixelflux/nvcodec-sys/src/bindgen/cuda.rs
@@ -0,0 +1,2143 @@
+/* automatically generated by rust-bindgen 0.59.2 */
+
+// Generated through nvenc-sys build script
+
+pub type CUdeviceptr_v2 = ::std::os::raw::c_ulonglong;
+pub type CUdeviceptr = CUdeviceptr_v2;
+pub type CUdevice_v1 = ::std::os::raw::c_int;
+pub type CUdevice = CUdevice_v1;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct CUctx_st {
+    _unused: [u8; 0],
+}
+pub type CUcontext = *mut CUctx_st;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct CUarray_st {
+    _unused: [u8; 0],
+}
+pub type CUarray = *mut CUarray_st;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct CUmipmappedArray_st {
+    _unused: [u8; 0],
+}
+pub type CUmipmappedArray = *mut CUmipmappedArray_st;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct CUstream_st {
+    _unused: [u8; 0],
+}
+pub type CUstream = *mut CUstream_st;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct CUextMemory_st {
+    _unused: [u8; 0],
+}
+pub type CUexternalMemory = *mut CUextMemory_st;
+#[repr(C)]
+#[derive(Debug, Copy, Clone)]
+pub struct CUextSemaphore_st {
+    _unused: [u8; 0],
+}
+pub type CUexternalSemaphore = *mut CUextSemaphore_st;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUuuid_st {
+    pub bytes: [::std::os::raw::c_char; 16usize],
+}
+#[test]
+fn bindgen_test_layout_CUuuid_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUuuid_st>(),
+        16usize,
+        concat!("Size of: ", stringify!(CUuuid_st))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUuuid_st>(),
+        1usize,
+        concat!("Alignment of ", stringify!(CUuuid_st))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUuuid_st>())).bytes as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUuuid_st),
+            "::",
+            stringify!(bytes)
+        )
+    );
+}
+pub type CUuuid = CUuuid_st;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum CUarray_format_enum {
+    CU_AD_FORMAT_UNSIGNED_INT8 = 1,
+    CU_AD_FORMAT_UNSIGNED_INT16 = 2,
+    CU_AD_FORMAT_UNSIGNED_INT32 = 3,
+    CU_AD_FORMAT_SIGNED_INT8 = 8,
+    CU_AD_FORMAT_SIGNED_INT16 = 9,
+    CU_AD_FORMAT_SIGNED_INT32 = 10,
+    CU_AD_FORMAT_HALF = 16,
+    CU_AD_FORMAT_FLOAT = 32,
+    CU_AD_FORMAT_NV12 = 176,
+    CU_AD_FORMAT_UNORM_INT8X1 = 192,
+    CU_AD_FORMAT_UNORM_INT8X2 = 193,
+    CU_AD_FORMAT_UNORM_INT8X4 = 194,
+    CU_AD_FORMAT_UNORM_INT16X1 = 195,
+    CU_AD_FORMAT_UNORM_INT16X2 = 196,
+    CU_AD_FORMAT_UNORM_INT16X4 = 197,
+    CU_AD_FORMAT_SNORM_INT8X1 = 198,
+    CU_AD_FORMAT_SNORM_INT8X2 = 199,
+    CU_AD_FORMAT_SNORM_INT8X4 = 200,
+    CU_AD_FORMAT_SNORM_INT16X1 = 201,
+    CU_AD_FORMAT_SNORM_INT16X2 = 202,
+    CU_AD_FORMAT_SNORM_INT16X4 = 203,
+    CU_AD_FORMAT_BC1_UNORM = 145,
+    CU_AD_FORMAT_BC1_UNORM_SRGB = 146,
+    CU_AD_FORMAT_BC2_UNORM = 147,
+    CU_AD_FORMAT_BC2_UNORM_SRGB = 148,
+    CU_AD_FORMAT_BC3_UNORM = 149,
+    CU_AD_FORMAT_BC3_UNORM_SRGB = 150,
+    CU_AD_FORMAT_BC4_UNORM = 151,
+    CU_AD_FORMAT_BC4_SNORM = 152,
+    CU_AD_FORMAT_BC5_UNORM = 153,
+    CU_AD_FORMAT_BC5_SNORM = 154,
+    CU_AD_FORMAT_BC6H_UF16 = 155,
+    CU_AD_FORMAT_BC6H_SF16 = 156,
+    CU_AD_FORMAT_BC7_UNORM = 157,
+    CU_AD_FORMAT_BC7_UNORM_SRGB = 158,
+}
+pub use self::CUarray_format_enum as CUarray_format;
+impl CUmemorytype_enum {
+    pub const CU_MEMORYTYPE_HOST: CUmemorytype_enum = CUmemorytype_enum(1);
+}
+impl CUmemorytype_enum {
+    pub const CU_MEMORYTYPE_DEVICE: CUmemorytype_enum = CUmemorytype_enum(2);
+}
+impl CUmemorytype_enum {
+    pub const CU_MEMORYTYPE_ARRAY: CUmemorytype_enum = CUmemorytype_enum(3);
+}
+impl CUmemorytype_enum {
+    pub const CU_MEMORYTYPE_UNIFIED: CUmemorytype_enum = CUmemorytype_enum(4);
+}
+#[repr(transparent)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUmemorytype_enum(pub ::std::os::raw::c_uint);
+pub use self::CUmemorytype_enum as CUmemorytype;
+impl cudaError_enum {
+    pub const CUDA_SUCCESS: cudaError_enum = cudaError_enum(0);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_VALUE: cudaError_enum = cudaError_enum(1);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_OUT_OF_MEMORY: cudaError_enum = cudaError_enum(2);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_INITIALIZED: cudaError_enum = cudaError_enum(3);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_DEINITIALIZED: cudaError_enum = cudaError_enum(4);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PROFILER_DISABLED: cudaError_enum = cudaError_enum(5);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PROFILER_NOT_INITIALIZED: cudaError_enum = cudaError_enum(6);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PROFILER_ALREADY_STARTED: cudaError_enum = cudaError_enum(7);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PROFILER_ALREADY_STOPPED: cudaError_enum = cudaError_enum(8);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STUB_LIBRARY: cudaError_enum = cudaError_enum(34);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NO_DEVICE: cudaError_enum = cudaError_enum(100);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_DEVICE: cudaError_enum = cudaError_enum(101);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_DEVICE_NOT_LICENSED: cudaError_enum = cudaError_enum(102);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_IMAGE: cudaError_enum = cudaError_enum(200);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_CONTEXT: cudaError_enum = cudaError_enum(201);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_CONTEXT_ALREADY_CURRENT: cudaError_enum = cudaError_enum(202);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MAP_FAILED: cudaError_enum = cudaError_enum(205);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_UNMAP_FAILED: cudaError_enum = cudaError_enum(206);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ARRAY_IS_MAPPED: cudaError_enum = cudaError_enum(207);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ALREADY_MAPPED: cudaError_enum = cudaError_enum(208);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NO_BINARY_FOR_GPU: cudaError_enum = cudaError_enum(209);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ALREADY_ACQUIRED: cudaError_enum = cudaError_enum(210);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_MAPPED: cudaError_enum = cudaError_enum(211);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_MAPPED_AS_ARRAY: cudaError_enum = cudaError_enum(212);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_MAPPED_AS_POINTER: cudaError_enum = cudaError_enum(213);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ECC_UNCORRECTABLE: cudaError_enum = cudaError_enum(214);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_UNSUPPORTED_LIMIT: cudaError_enum = cudaError_enum(215);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_CONTEXT_ALREADY_IN_USE: cudaError_enum = cudaError_enum(216);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PEER_ACCESS_UNSUPPORTED: cudaError_enum = cudaError_enum(217);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_PTX: cudaError_enum = cudaError_enum(218);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_GRAPHICS_CONTEXT: cudaError_enum = cudaError_enum(219);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NVLINK_UNCORRECTABLE: cudaError_enum = cudaError_enum(220);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_JIT_COMPILER_NOT_FOUND: cudaError_enum = cudaError_enum(221);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_UNSUPPORTED_PTX_VERSION: cudaError_enum = cudaError_enum(222);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_JIT_COMPILATION_DISABLED: cudaError_enum = cudaError_enum(223);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_UNSUPPORTED_EXEC_AFFINITY: cudaError_enum = cudaError_enum(224);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_SOURCE: cudaError_enum = cudaError_enum(300);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_FILE_NOT_FOUND: cudaError_enum = cudaError_enum(301);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND: cudaError_enum = cudaError_enum(302);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_SHARED_OBJECT_INIT_FAILED: cudaError_enum = cudaError_enum(303);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_OPERATING_SYSTEM: cudaError_enum = cudaError_enum(304);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_HANDLE: cudaError_enum = cudaError_enum(400);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ILLEGAL_STATE: cudaError_enum = cudaError_enum(401);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_FOUND: cudaError_enum = cudaError_enum(500);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_READY: cudaError_enum = cudaError_enum(600);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ILLEGAL_ADDRESS: cudaError_enum = cudaError_enum(700);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES: cudaError_enum = cudaError_enum(701);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_LAUNCH_TIMEOUT: cudaError_enum = cudaError_enum(702);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING: cudaError_enum = cudaError_enum(703);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED: cudaError_enum = cudaError_enum(704);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PEER_ACCESS_NOT_ENABLED: cudaError_enum = cudaError_enum(705);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE: cudaError_enum = cudaError_enum(708);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_CONTEXT_IS_DESTROYED: cudaError_enum = cudaError_enum(709);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ASSERT: cudaError_enum = cudaError_enum(710);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_TOO_MANY_PEERS: cudaError_enum = cudaError_enum(711);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED: cudaError_enum = cudaError_enum(712);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED: cudaError_enum = cudaError_enum(713);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_HARDWARE_STACK_ERROR: cudaError_enum = cudaError_enum(714);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_ILLEGAL_INSTRUCTION: cudaError_enum = cudaError_enum(715);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MISALIGNED_ADDRESS: cudaError_enum = cudaError_enum(716);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_ADDRESS_SPACE: cudaError_enum = cudaError_enum(717);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_INVALID_PC: cudaError_enum = cudaError_enum(718);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_LAUNCH_FAILED: cudaError_enum = cudaError_enum(719);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_COOPERATIVE_LAUNCH_TOO_LARGE: cudaError_enum = cudaError_enum(720);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_PERMITTED: cudaError_enum = cudaError_enum(800);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_NOT_SUPPORTED: cudaError_enum = cudaError_enum(801);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_SYSTEM_NOT_READY: cudaError_enum = cudaError_enum(802);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_SYSTEM_DRIVER_MISMATCH: cudaError_enum = cudaError_enum(803);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_COMPAT_NOT_SUPPORTED_ON_DEVICE: cudaError_enum = cudaError_enum(804);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MPS_CONNECTION_FAILED: cudaError_enum = cudaError_enum(805);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MPS_RPC_FAILURE: cudaError_enum = cudaError_enum(806);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MPS_SERVER_NOT_READY: cudaError_enum = cudaError_enum(807);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MPS_MAX_CLIENTS_REACHED: cudaError_enum = cudaError_enum(808);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_MPS_MAX_CONNECTIONS_REACHED: cudaError_enum = cudaError_enum(809);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_UNSUPPORTED: cudaError_enum = cudaError_enum(900);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_INVALIDATED: cudaError_enum = cudaError_enum(901);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_MERGE: cudaError_enum = cudaError_enum(902);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_UNMATCHED: cudaError_enum = cudaError_enum(903);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_UNJOINED: cudaError_enum = cudaError_enum(904);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_ISOLATION: cudaError_enum = cudaError_enum(905);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_IMPLICIT: cudaError_enum = cudaError_enum(906);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_CAPTURED_EVENT: cudaError_enum = cudaError_enum(907);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_STREAM_CAPTURE_WRONG_THREAD: cudaError_enum = cudaError_enum(908);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_TIMEOUT: cudaError_enum = cudaError_enum(909);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_GRAPH_EXEC_UPDATE_FAILURE: cudaError_enum = cudaError_enum(910);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_EXTERNAL_DEVICE: cudaError_enum = cudaError_enum(911);
+}
+impl cudaError_enum {
+    pub const CUDA_ERROR_UNKNOWN: cudaError_enum = cudaError_enum(999);
+}
+#[repr(transparent)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct cudaError_enum(pub ::std::os::raw::c_uint);
+pub use self::cudaError_enum as CUresult;
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_MEMCPY2D_st {
+    pub srcXInBytes: usize,
+    pub srcY: usize,
+    pub srcMemoryType: CUmemorytype,
+    pub srcHost: *const ::std::os::raw::c_void,
+    pub srcDevice: CUdeviceptr,
+    pub srcArray: CUarray,
+    pub srcPitch: usize,
+    pub dstXInBytes: usize,
+    pub dstY: usize,
+    pub dstMemoryType: CUmemorytype,
+    pub dstHost: *mut ::std::os::raw::c_void,
+    pub dstDevice: CUdeviceptr,
+    pub dstArray: CUarray,
+    pub dstPitch: usize,
+    pub WidthInBytes: usize,
+    pub Height: usize,
+}
+#[test]
+fn bindgen_test_layout_CUDA_MEMCPY2D_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_MEMCPY2D_st>(),
+        128usize,
+        concat!("Size of: ", stringify!(CUDA_MEMCPY2D_st))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_MEMCPY2D_st>(),
+        8usize,
+        concat!("Alignment of ", stringify!(CUDA_MEMCPY2D_st))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcXInBytes as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcXInBytes)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcY as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcY)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcMemoryType as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcMemoryType)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcHost as *const _ as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcHost)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcDevice as *const _ as usize },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcDevice)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcArray as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcArray)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).srcPitch as *const _ as usize },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(srcPitch)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstXInBytes as *const _ as usize },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstXInBytes)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstY as *const _ as usize },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstY)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstMemoryType as *const _ as usize },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstMemoryType)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstHost as *const _ as usize },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstHost)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstDevice as *const _ as usize },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstDevice)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstArray as *const _ as usize },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstArray)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).dstPitch as *const _ as usize },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(dstPitch)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).WidthInBytes as *const _ as usize },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(WidthInBytes)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<CUDA_MEMCPY2D_st>())).Height as *const _ as usize },
+        120usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_MEMCPY2D_st),
+            "::",
+            stringify!(Height)
+        )
+    );
+}
+impl Default for CUDA_MEMCPY2D_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_MEMCPY2D_v2 = CUDA_MEMCPY2D_st;
+pub type CUDA_MEMCPY2D = CUDA_MEMCPY2D_v2;
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_ARRAY3D_DESCRIPTOR_st {
+    pub Width: usize,
+    pub Height: usize,
+    pub Depth: usize,
+    pub Format: CUarray_format,
+    pub NumChannels: ::std::os::raw::c_uint,
+    pub Flags: ::std::os::raw::c_uint,
+}
+#[test]
+fn bindgen_test_layout_CUDA_ARRAY3D_DESCRIPTOR_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_ARRAY3D_DESCRIPTOR_st>(),
+        40usize,
+        concat!("Size of: ", stringify!(CUDA_ARRAY3D_DESCRIPTOR_st))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_ARRAY3D_DESCRIPTOR_st>(),
+        8usize,
+        concat!("Alignment of ", stringify!(CUDA_ARRAY3D_DESCRIPTOR_st))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_ARRAY3D_DESCRIPTOR_st>())).Width as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_ARRAY3D_DESCRIPTOR_st),
+            "::",
+            stringify!(Width)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_ARRAY3D_DESCRIPTOR_st>())).Height as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_ARRAY3D_DESCRIPTOR_st),
+            "::",
+            stringify!(Height)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_ARRAY3D_DESCRIPTOR_st>())).Depth as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_ARRAY3D_DESCRIPTOR_st),
+            "::",
+            stringify!(Depth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_ARRAY3D_DESCRIPTOR_st>())).Format as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_ARRAY3D_DESCRIPTOR_st),
+            "::",
+            stringify!(Format)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_ARRAY3D_DESCRIPTOR_st>())).NumChannels as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_ARRAY3D_DESCRIPTOR_st),
+            "::",
+            stringify!(NumChannels)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_ARRAY3D_DESCRIPTOR_st>())).Flags as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_ARRAY3D_DESCRIPTOR_st),
+            "::",
+            stringify!(Flags)
+        )
+    );
+}
+impl Default for CUDA_ARRAY3D_DESCRIPTOR_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_ARRAY3D_DESCRIPTOR_v2 = CUDA_ARRAY3D_DESCRIPTOR_st;
+pub type CUDA_ARRAY3D_DESCRIPTOR = CUDA_ARRAY3D_DESCRIPTOR_v2;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum CUexternalMemoryHandleType_enum {
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD = 1,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32 = 2,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_HEAP = 4,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D12_RESOURCE = 5,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE = 6,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_D3D11_RESOURCE_KMT = 7,
+    CU_EXTERNAL_MEMORY_HANDLE_TYPE_NVSCIBUF = 8,
+}
+pub use self::CUexternalMemoryHandleType_enum as CUexternalMemoryHandleType;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
+    pub type_: CUexternalMemoryHandleType,
+    pub handle: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1,
+    pub size: ::std::os::raw::c_ulonglong,
+    pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 {
+    pub fd: ::std::os::raw::c_int,
+    pub win32: CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
+    pub nvSciBufObject: *const ::std::os::raw::c_void,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
+    pub handle: *mut ::std::os::raw::c_void,
+    pub name: *const ::std::os::raw::c_void,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1>(),
+        16usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1>(
+            )))
+            .handle as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1),
+            "::",
+            stringify!(handle)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1>(
+            )))
+            .name as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1),
+            "::",
+            stringify!(name)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1>(),
+        16usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1>())).fd
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1),
+            "::",
+            stringify!(fd)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1>())).win32
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1),
+            "::",
+            stringify!(win32)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1>()))
+                .nvSciBufObject as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1),
+            "::",
+            stringify!(nvSciBufObject)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st__bindgen_ty_1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>(),
+        104usize,
+        concat!("Size of: ", stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>())).type_ as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st),
+            "::",
+            stringify!(type_)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>())).handle as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st),
+            "::",
+            stringify!(handle)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>())).size as *const _
+                as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st),
+            "::",
+            stringify!(size)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>())).flags as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st),
+            "::",
+            stringify!(flags)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st>())).reserved as *const _
+                as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1 = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_st;
+pub type CUDA_EXTERNAL_MEMORY_HANDLE_DESC = CUDA_EXTERNAL_MEMORY_HANDLE_DESC_v1;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st {
+    pub offset: ::std::os::raw::c_ulonglong,
+    pub size: ::std::os::raw::c_ulonglong,
+    pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st>(),
+        88usize,
+        concat!("Size of: ", stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st>())).offset as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st),
+            "::",
+            stringify!(offset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st>())).size as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st),
+            "::",
+            stringify!(size)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st>())).flags as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st),
+            "::",
+            stringify!(flags)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st>())).reserved as *const _
+                as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1 = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_st;
+pub type CUDA_EXTERNAL_MEMORY_BUFFER_DESC = CUDA_EXTERNAL_MEMORY_BUFFER_DESC_v1;
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
+    pub offset: ::std::os::raw::c_ulonglong,
+    pub arrayDesc: CUDA_ARRAY3D_DESCRIPTOR,
+    pub numLevels: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st>(),
+        120usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st>())).offset
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st),
+            "::",
+            stringify!(offset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st>())).arrayDesc
+                as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st),
+            "::",
+            stringify!(arrayDesc)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st>())).numLevels
+                as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st),
+            "::",
+            stringify!(numLevels)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st>())).reserved
+                as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1 =
+    CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_st;
+pub type CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC = CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC_v1;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum CUexternalSemaphoreHandleType_enum {
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD = 1,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32 = 2,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_WIN32_KMT = 3,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D12_FENCE = 4,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_FENCE = 5,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_NVSCISYNC = 6,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX = 7,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_D3D11_KEYED_MUTEX_KMT = 8,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD = 9,
+    CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32 = 10,
+}
+pub use self::CUexternalSemaphoreHandleType_enum as CUexternalSemaphoreHandleType;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
+    pub type_: CUexternalSemaphoreHandleType,
+    pub handle: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1,
+    pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1 {
+    pub fd: ::std::os::raw::c_int,
+    pub win32: CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
+    pub nvSciSyncObj: *const ::std::os::raw::c_void,
+}
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
+    pub handle: *mut ::std::os::raw::c_void,
+    pub name: *const ::std::os::raw::c_void,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1>(),
+        16usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
+            >()))
+            .handle as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1),
+            "::",
+            stringify!(handle)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1,
+            >()))
+            .name as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1),
+            "::",
+            stringify!(name)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1__bindgen_ty_1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1>(),
+        16usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1>())).fd
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1),
+            "::",
+            stringify!(fd)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1>())).win32
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1),
+            "::",
+            stringify!(win32)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1>()))
+                .nvSciSyncObj as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1),
+            "::",
+            stringify!(nvSciSyncObj)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st__bindgen_ty_1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st>(),
+        96usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st>())).type_ as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st),
+            "::",
+            stringify!(type_)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st>())).handle as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st),
+            "::",
+            stringify!(handle)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st>())).flags as *const _
+                as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st),
+            "::",
+            stringify!(flags)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st>())).reserved as *const _
+                as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1 = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_st;
+pub type CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC = CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC_v1;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
+    pub params: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1,
+    pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1 {
+    pub fence: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1,
+    pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
+    pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
+    pub reserved: [::std::os::raw::c_uint; 12usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1 {
+    pub value: ::std::os::raw::c_ulonglong,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1>(
+        ),
+        8usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1,
+            >()))
+            .value as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_1),
+            "::",
+            stringify!(value)
+        )
+    );
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
+    pub fence: *mut ::std::os::raw::c_void,
+    pub reserved: ::std::os::raw::c_ulonglong,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2>(
+        ),
+        8usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
+            >()))
+            .fence as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2),
+            "::",
+            stringify!(fence)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
+            >()))
+            .reserved as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3 {
+    pub key: ::std::os::raw::c_ulonglong,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3>(
+        ),
+        8usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
+            >()))
+            .key as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1__bindgen_ty_3),
+            "::",
+            stringify!(key)
+        )
+    );
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1>(),
+        72usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1>())).fence
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(fence)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1>()))
+                .nvSciSync as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(nvSciSync)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1>()))
+                .keyedMutex as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(keyedMutex)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1>()))
+                .reserved as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st__bindgen_ty_1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st>(),
+        144usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st>())).params as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st),
+            "::",
+            stringify!(params)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st>())).flags as *const _
+                as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st),
+            "::",
+            stringify!(flags)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st>())).reserved
+                as *const _ as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1 = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_st;
+pub type CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS = CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS_v1;
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
+    pub params: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1,
+    pub flags: ::std::os::raw::c_uint,
+    pub reserved: [::std::os::raw::c_uint; 16usize],
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1 {
+    pub fence: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1,
+    pub nvSciSync: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
+    pub keyedMutex: CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
+    pub reserved: [::std::os::raw::c_uint; 10usize],
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1 {
+    pub value: ::std::os::raw::c_ulonglong,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1>(),
+        8usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1,
+            >()))
+            .value as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_1),
+            "::",
+            stringify!(value)
+        )
+    );
+}
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
+    pub fence: *mut ::std::os::raw::c_void,
+    pub reserved: ::std::os::raw::c_ulonglong,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2>(),
+        8usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
+            >()))
+            .fence as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2),
+            "::",
+            stringify!(fence)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2,
+            >()))
+            .reserved as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_2 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3 {
+    pub key: ::std::os::raw::c_ulonglong,
+    pub timeoutMs: ::std::os::raw::c_uint,
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3>(),
+        16usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3>(
+        ),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
+            >()))
+            .key as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3),
+            "::",
+            stringify!(key)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<
+                CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3,
+            >()))
+            .timeoutMs as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1__bindgen_ty_3),
+            "::",
+            stringify!(timeoutMs)
+        )
+    );
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1>(),
+        72usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1>())).fence
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(fence)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1>()))
+                .nvSciSync as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(nvSciSync)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1>()))
+                .keyedMutex as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(keyedMutex)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1>()))
+                .reserved as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st__bindgen_ty_1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[test]
+fn bindgen_test_layout_CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st() {
+    assert_eq!(
+        ::std::mem::size_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st>(),
+        144usize,
+        concat!(
+            "Size of: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st>())).params as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st),
+            "::",
+            stringify!(params)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st>())).flags as *const _
+                as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st),
+            "::",
+            stringify!(flags)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st>())).reserved as *const _
+                as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1 = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_st;
+pub type CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS = CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS_v1;
+extern "C" {
+    pub fn cuGetErrorString(error: CUresult, pStr: *mut *const ::std::os::raw::c_char) -> CUresult;
+}
+extern "C" {
+    pub fn cuGetErrorName(error: CUresult, pStr: *mut *const ::std::os::raw::c_char) -> CUresult;
+}
+extern "C" {
+    pub fn cuInit(Flags: ::std::os::raw::c_uint) -> CUresult;
+}
+extern "C" {
+    pub fn cuDeviceGet(device: *mut CUdevice, ordinal: ::std::os::raw::c_int) -> CUresult;
+}
+extern "C" {
+    pub fn cuDeviceGetCount(count: *mut ::std::os::raw::c_int) -> CUresult;
+}
+extern "C" {
+    pub fn cuDeviceGetName(
+        name: *mut ::std::os::raw::c_char,
+        len: ::std::os::raw::c_int,
+        dev: CUdevice,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuDeviceGetUuid(uuid: *mut CUuuid, dev: CUdevice) -> CUresult;
+}
+extern "C" {
+    pub fn cuCtxCreate_v2(
+        pctx: *mut CUcontext,
+        flags: ::std::os::raw::c_uint,
+        dev: CUdevice,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuCtxDestroy_v2(ctx: CUcontext) -> CUresult;
+}
+extern "C" {
+    pub fn cuCtxPushCurrent_v2(ctx: CUcontext) -> CUresult;
+}
+extern "C" {
+    pub fn cuCtxPopCurrent_v2(pctx: *mut CUcontext) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemAllocPitch_v2(
+        dptr: *mut CUdeviceptr,
+        pPitch: *mut usize,
+        WidthInBytes: usize,
+        Height: usize,
+        ElementSizeBytes: ::std::os::raw::c_uint,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemFree_v2(dptr: CUdeviceptr) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemAllocHost_v2(pp: *mut *mut ::std::os::raw::c_void, bytesize: usize) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemFreeHost(p: *mut ::std::os::raw::c_void) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemcpyDtoH_v2(
+        dstHost: *mut ::std::os::raw::c_void,
+        srcDevice: CUdeviceptr,
+        ByteCount: usize,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemcpy2D_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemcpy2DUnaligned_v2(pCopy: *const CUDA_MEMCPY2D) -> CUresult;
+}
+extern "C" {
+    pub fn cuMemcpy2DAsync_v2(pCopy: *const CUDA_MEMCPY2D, hStream: CUstream) -> CUresult;
+}
+extern "C" {
+    pub fn cuMipmappedArrayGetLevel(
+        pLevelArray: *mut CUarray,
+        hMipmappedArray: CUmipmappedArray,
+        level: ::std::os::raw::c_uint,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuMipmappedArrayDestroy(hMipmappedArray: CUmipmappedArray) -> CUresult;
+}
+extern "C" {
+    pub fn cuStreamCreate(phStream: *mut CUstream, Flags: ::std::os::raw::c_uint) -> CUresult;
+}
+extern "C" {
+    pub fn cuStreamDestroy_v2(hStream: CUstream) -> CUresult;
+}
+extern "C" {
+    pub fn cuImportExternalMemory(
+        extMem_out: *mut CUexternalMemory,
+        memHandleDesc: *const CUDA_EXTERNAL_MEMORY_HANDLE_DESC,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuExternalMemoryGetMappedBuffer(
+        devPtr: *mut CUdeviceptr,
+        extMem: CUexternalMemory,
+        bufferDesc: *const CUDA_EXTERNAL_MEMORY_BUFFER_DESC,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuExternalMemoryGetMappedMipmappedArray(
+        mipmap: *mut CUmipmappedArray,
+        extMem: CUexternalMemory,
+        mipmapDesc: *const CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuDestroyExternalMemory(extMem: CUexternalMemory) -> CUresult;
+}
+extern "C" {
+    pub fn cuImportExternalSemaphore(
+        extSem_out: *mut CUexternalSemaphore,
+        semHandleDesc: *const CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuSignalExternalSemaphoresAsync(
+        extSemArray: *const CUexternalSemaphore,
+        paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS,
+        numExtSems: ::std::os::raw::c_uint,
+        stream: CUstream,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuWaitExternalSemaphoresAsync(
+        extSemArray: *const CUexternalSemaphore,
+        paramsArray: *const CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS,
+        numExtSems: ::std::os::raw::c_uint,
+        stream: CUstream,
+    ) -> CUresult;
+}
+extern "C" {
+    pub fn cuDestroyExternalSemaphore(extSem: CUexternalSemaphore) -> CUresult;
+}
diff --git a/pixelflux/nvcodec-sys/src/bindgen/nvenc.rs b/pixelflux/nvcodec-sys/src/bindgen/nvenc.rs
new file mode 100644
index 0000000..b6f1714
--- /dev/null
+++ b/pixelflux/nvcodec-sys/src/bindgen/nvenc.rs
@@ -0,0 +1,16519 @@
+/* automatically generated by rust-bindgen 0.59.2 */
+
+#[repr(C)]
+#[derive(Copy, Clone, Debug, Default, Eq, Hash, Ord, PartialEq, PartialOrd)]
+pub struct __BindgenBitfieldUnit<Storage> {
+    storage: Storage,
+}
+impl<Storage> __BindgenBitfieldUnit<Storage> {
+    #[inline]
+    pub const fn new(storage: Storage) -> Self {
+        Self { storage }
+    }
+}
+impl<Storage> __BindgenBitfieldUnit<Storage>
+where
+    Storage: AsRef<[u8]> + AsMut<[u8]>,
+{
+    #[inline]
+    pub fn get_bit(&self, index: usize) -> bool {
+        debug_assert!(index / 8 < self.storage.as_ref().len());
+        let byte_index = index / 8;
+        let byte = self.storage.as_ref()[byte_index];
+        let bit_index = if cfg!(target_endian = "big") {
+            7 - (index % 8)
+        } else {
+            index % 8
+        };
+        let mask = 1 << bit_index;
+        byte & mask == mask
+    }
+    #[inline]
+    pub fn set_bit(&mut self, index: usize, val: bool) {
+        debug_assert!(index / 8 < self.storage.as_ref().len());
+        let byte_index = index / 8;
+        let byte = &mut self.storage.as_mut()[byte_index];
+        let bit_index = if cfg!(target_endian = "big") {
+            7 - (index % 8)
+        } else {
+            index % 8
+        };
+        let mask = 1 << bit_index;
+        if val {
+            *byte |= mask;
+        } else {
+            *byte &= !mask;
+        }
+    }
+    #[inline]
+    pub fn get(&self, bit_offset: usize, bit_width: u8) -> u64 {
+        debug_assert!(bit_width <= 64);
+        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
+        debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
+        let mut val = 0;
+        for i in 0..(bit_width as usize) {
+            if self.get_bit(i + bit_offset) {
+                let index = if cfg!(target_endian = "big") {
+                    bit_width as usize - 1 - i
+                } else {
+                    i
+                };
+                val |= 1 << index;
+            }
+        }
+        val
+    }
+    #[inline]
+    pub fn set(&mut self, bit_offset: usize, bit_width: u8, val: u64) {
+        debug_assert!(bit_width <= 64);
+        debug_assert!(bit_offset / 8 < self.storage.as_ref().len());
+        debug_assert!((bit_offset + (bit_width as usize)) / 8 <= self.storage.as_ref().len());
+        for i in 0..(bit_width as usize) {
+            let mask = 1 << i;
+            let val_bit_is_set = val & mask == mask;
+            let index = if cfg!(target_endian = "big") {
+                bit_width as usize - 1 - i
+            } else {
+                i
+            };
+            self.set_bit(index + bit_offset, val_bit_is_set);
+        }
+    }
+}
+pub const NVENCAPI_MAJOR_VERSION: u32 = 13;
+pub const NVENCAPI_MINOR_VERSION: u32 = 0;
+pub const NVENCAPI_VERSION: u32 = 13;
+pub const NVENC_INFINITE_GOPLENGTH: u32 = 4294967295;
+pub const NV_MAX_SEQ_HDR_LEN: u32 = 512;
+pub const NV_MAX_TILE_COLS_AV1: u32 = 64;
+pub const NV_MAX_TILE_ROWS_AV1: u32 = 64;
+pub type __int8_t = ::std::os::raw::c_schar;
+pub type __uint8_t = ::std::os::raw::c_uchar;
+pub type __int16_t = ::std::os::raw::c_short;
+pub type __uint16_t = ::std::os::raw::c_ushort;
+pub type __int32_t = ::std::os::raw::c_int;
+pub type __uint32_t = ::std::os::raw::c_uint;
+pub type __uint64_t = ::std::os::raw::c_ulong;
+#[doc = " \\struct GUID"]
+#[doc = " Abstracts the GUID structure for non-windows platforms."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _GUID {
+    #[doc = "< [in]: Specifies the first 8 hexadecimal digits of the GUID."]
+    pub Data1: u32,
+    #[doc = "< [in]: Specifies the first group of 4 hexadecimal digits."]
+    pub Data2: u16,
+    #[doc = "< [in]: Specifies the second group of 4 hexadecimal digits."]
+    pub Data3: u16,
+    #[doc = "< [in]: Array of 8 bytes. The first 2 bytes contain the third group of 4 hexadecimal digits."]
+    #[doc = "The remaining 6 bytes contain the final 12 hexadecimal digits."]
+    pub Data4: [u8; 8usize],
+}
+#[test]
+fn bindgen_test_layout__GUID() {
+    assert_eq!(
+        ::std::mem::size_of::<_GUID>(),
+        16usize,
+        concat!("Size of: ", stringify!(_GUID))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_GUID>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_GUID))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_GUID>())).Data1 as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_GUID),
+            "::",
+            stringify!(Data1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_GUID>())).Data2 as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_GUID),
+            "::",
+            stringify!(Data2)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_GUID>())).Data3 as *const _ as usize },
+        6usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_GUID),
+            "::",
+            stringify!(Data3)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_GUID>())).Data4 as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_GUID),
+            "::",
+            stringify!(Data4)
+        )
+    );
+}
+#[doc = " \\struct GUID"]
+#[doc = " Abstracts the GUID structure for non-windows platforms."]
+pub type GUID = _GUID;
+#[doc = " \\struct _NVENC_RECT"]
+#[doc = " Defines a Rectangle. Used in ::NV_ENC_PREPROCESS_FRAME."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NVENC_RECT {
+    #[doc = "< [in]: X coordinate of the upper left corner of rectangular area to be specified."]
+    pub left: u32,
+    #[doc = "< [in]: Y coordinate of the upper left corner of the rectangular area to be specified."]
+    pub top: u32,
+    #[doc = "< [in]: X coordinate of the bottom right corner of the rectangular area to be specified."]
+    pub right: u32,
+    #[doc = "< [in]: Y coordinate of the bottom right corner of the rectangular area to be specified."]
+    pub bottom: u32,
+}
+#[test]
+fn bindgen_test_layout__NVENC_RECT() {
+    assert_eq!(
+        ::std::mem::size_of::<_NVENC_RECT>(),
+        16usize,
+        concat!("Size of: ", stringify!(_NVENC_RECT))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NVENC_RECT>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NVENC_RECT))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NVENC_RECT>())).left as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NVENC_RECT),
+            "::",
+            stringify!(left)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NVENC_RECT>())).top as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NVENC_RECT),
+            "::",
+            stringify!(top)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NVENC_RECT>())).right as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NVENC_RECT),
+            "::",
+            stringify!(right)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NVENC_RECT>())).bottom as *const _ as usize },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NVENC_RECT),
+            "::",
+            stringify!(bottom)
+        )
+    );
+}
+#[doc = " \\struct _NVENC_RECT"]
+#[doc = " Defines a Rectangle. Used in ::NV_ENC_PREPROCESS_FRAME."]
+pub type NVENC_RECT = _NVENC_RECT;
+#[doc = " @}"]
+pub type NV_ENC_INPUT_PTR = *mut ::std::os::raw::c_void;
+pub type NV_ENC_OUTPUT_PTR = *mut ::std::os::raw::c_void;
+pub type NV_ENC_REGISTERED_PTR = *mut ::std::os::raw::c_void;
+pub type NV_ENC_CUSTREAM_PTR = *mut ::std::os::raw::c_void;
+impl _NV_ENC_PARAMS_FRAME_FIELD_MODE {
+    #[doc = "< Frame mode"]
+    pub const NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME: _NV_ENC_PARAMS_FRAME_FIELD_MODE =
+        _NV_ENC_PARAMS_FRAME_FIELD_MODE(1);
+}
+impl _NV_ENC_PARAMS_FRAME_FIELD_MODE {
+    #[doc = "< Field mode"]
+    pub const NV_ENC_PARAMS_FRAME_FIELD_MODE_FIELD: _NV_ENC_PARAMS_FRAME_FIELD_MODE =
+        _NV_ENC_PARAMS_FRAME_FIELD_MODE(2);
+}
+impl _NV_ENC_PARAMS_FRAME_FIELD_MODE {
+    #[doc = "< MB adaptive frame/field"]
+    pub const NV_ENC_PARAMS_FRAME_FIELD_MODE_MBAFF: _NV_ENC_PARAMS_FRAME_FIELD_MODE =
+        _NV_ENC_PARAMS_FRAME_FIELD_MODE(3);
+}
+#[repr(transparent)]
+#[doc = " Input frame encode modes"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PARAMS_FRAME_FIELD_MODE(pub ::std::os::raw::c_uint);
+#[doc = " Input frame encode modes"]
+pub use self::_NV_ENC_PARAMS_FRAME_FIELD_MODE as NV_ENC_PARAMS_FRAME_FIELD_MODE;
+impl _NV_ENC_PARAMS_RC_MODE {
+    #[doc = "< Constant QP mode"]
+    pub const NV_ENC_PARAMS_RC_CONSTQP: _NV_ENC_PARAMS_RC_MODE = _NV_ENC_PARAMS_RC_MODE(0);
+}
+impl _NV_ENC_PARAMS_RC_MODE {
+    #[doc = "< Variable bitrate mode"]
+    pub const NV_ENC_PARAMS_RC_VBR: _NV_ENC_PARAMS_RC_MODE = _NV_ENC_PARAMS_RC_MODE(1);
+}
+impl _NV_ENC_PARAMS_RC_MODE {
+    #[doc = "< Constant bitrate mode"]
+    pub const NV_ENC_PARAMS_RC_CBR: _NV_ENC_PARAMS_RC_MODE = _NV_ENC_PARAMS_RC_MODE(2);
+}
+#[repr(transparent)]
+#[doc = " Rate Control Modes"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PARAMS_RC_MODE(pub ::std::os::raw::c_uint);
+#[doc = " Rate Control Modes"]
+pub use self::_NV_ENC_PARAMS_RC_MODE as NV_ENC_PARAMS_RC_MODE;
+impl _NV_ENC_MULTI_PASS {
+    #[doc = "< Single Pass"]
+    pub const NV_ENC_MULTI_PASS_DISABLED: _NV_ENC_MULTI_PASS = _NV_ENC_MULTI_PASS(0);
+}
+impl _NV_ENC_MULTI_PASS {
+    #[doc = "< Two Pass encoding is enabled where first Pass is quarter resolution"]
+    pub const NV_ENC_TWO_PASS_QUARTER_RESOLUTION: _NV_ENC_MULTI_PASS = _NV_ENC_MULTI_PASS(1);
+}
+impl _NV_ENC_MULTI_PASS {
+    #[doc = "< Two Pass encoding is enabled where first Pass is full resolution"]
+    pub const NV_ENC_TWO_PASS_FULL_RESOLUTION: _NV_ENC_MULTI_PASS = _NV_ENC_MULTI_PASS(2);
+}
+#[repr(transparent)]
+#[doc = " Multi Pass encoding"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_MULTI_PASS(pub ::std::os::raw::c_uint);
+#[doc = " Multi Pass encoding"]
+pub use self::_NV_ENC_MULTI_PASS as NV_ENC_MULTI_PASS;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_STATE_RESTORE_TYPE {
+    #[doc = "< Restore full encoder state"]
+    NV_ENC_STATE_RESTORE_FULL = 1,
+    #[doc = "< Restore only rate control state"]
+    NV_ENC_STATE_RESTORE_RATE_CONTROL = 2,
+    #[doc = "< Restore full encoder state except for rate control state"]
+    NV_ENC_STATE_RESTORE_ENCODE = 3,
+}
+pub use self::_NV_ENC_STATE_RESTORE_TYPE as NV_ENC_STATE_RESTORE_TYPE;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_OUTPUT_STATS_LEVEL {
+    NV_ENC_OUTPUT_STATS_NONE = 0,
+    #[doc = " No output stats"]
+    NV_ENC_OUTPUT_STATS_BLOCK_LEVEL = 1,
+    #[doc = " Output stats for every block."]
+    #[doc = "Block represents a CTB for HEVC, macroblock for H.264, super block for AV1"]
+    NV_ENC_OUTPUT_STATS_ROW_LEVEL = 2,
+}
+pub use self::_NV_ENC_OUTPUT_STATS_LEVEL as NV_ENC_OUTPUT_STATS_LEVEL;
+#[repr(u32)]
+#[doc = " Emphasis Levels"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_EMPHASIS_MAP_LEVEL {
+    #[doc = "< Emphasis Map Level 0, for zero Delta QP value"]
+    NV_ENC_EMPHASIS_MAP_LEVEL_0 = 0,
+    #[doc = "< Emphasis Map Level 1, for very low Delta QP value"]
+    NV_ENC_EMPHASIS_MAP_LEVEL_1 = 1,
+    #[doc = "< Emphasis Map Level 2, for low Delta QP value"]
+    NV_ENC_EMPHASIS_MAP_LEVEL_2 = 2,
+    #[doc = "< Emphasis Map Level 3, for medium Delta QP value"]
+    NV_ENC_EMPHASIS_MAP_LEVEL_3 = 3,
+    #[doc = "< Emphasis Map Level 4, for high Delta QP value"]
+    NV_ENC_EMPHASIS_MAP_LEVEL_4 = 4,
+    #[doc = "< Emphasis Map Level 5, for very high Delta QP value"]
+    NV_ENC_EMPHASIS_MAP_LEVEL_5 = 5,
+}
+#[doc = " Emphasis Levels"]
+pub use self::_NV_ENC_EMPHASIS_MAP_LEVEL as NV_ENC_EMPHASIS_MAP_LEVEL;
+#[repr(u32)]
+#[doc = " QP MAP MODE"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_QP_MAP_MODE {
+    #[doc = "< Value in NV_ENC_PIC_PARAMS::qpDeltaMap have no effect."]
+    NV_ENC_QP_MAP_DISABLED = 0,
+    #[doc = "< Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as Emphasis level. Currently this is only supported for H264"]
+    NV_ENC_QP_MAP_EMPHASIS = 1,
+    #[doc = "< Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as QP delta map."]
+    NV_ENC_QP_MAP_DELTA = 2,
+    #[doc = "< Currently This is not supported. Value in NV_ENC_PIC_PARAMS::qpDeltaMap will be treated as QP value."]
+    NV_ENC_QP_MAP = 3,
+}
+#[doc = " QP MAP MODE"]
+pub use self::_NV_ENC_QP_MAP_MODE as NV_ENC_QP_MAP_MODE;
+impl _NV_ENC_PIC_STRUCT {
+    #[doc = "< Progressive frame"]
+    pub const NV_ENC_PIC_STRUCT_FRAME: _NV_ENC_PIC_STRUCT = _NV_ENC_PIC_STRUCT(1);
+}
+impl _NV_ENC_PIC_STRUCT {
+    #[doc = "< Field encoding top field first"]
+    pub const NV_ENC_PIC_STRUCT_FIELD_TOP_BOTTOM: _NV_ENC_PIC_STRUCT = _NV_ENC_PIC_STRUCT(2);
+}
+impl _NV_ENC_PIC_STRUCT {
+    #[doc = "< Field encoding bottom field first"]
+    pub const NV_ENC_PIC_STRUCT_FIELD_BOTTOM_TOP: _NV_ENC_PIC_STRUCT = _NV_ENC_PIC_STRUCT(3);
+}
+#[repr(transparent)]
+#[doc = " Input picture structure"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PIC_STRUCT(pub ::std::os::raw::c_uint);
+#[doc = " Input picture structure"]
+pub use self::_NV_ENC_PIC_STRUCT as NV_ENC_PIC_STRUCT;
+#[repr(u32)]
+#[doc = " Display picture structure"]
+#[doc = " Currently, this enum is only used for deciding the number of clock timestamp sets in Picture Timing SEI / Time Code SEI"]
+#[doc = " Otherwise, this has no impact on encoder behavior"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_DISPLAY_PIC_STRUCT {
+    #[doc = "< Field encoding top field first"]
+    NV_ENC_PIC_STRUCT_DISPLAY_FRAME = 0,
+    #[doc = "< Field encoding top field first"]
+    NV_ENC_PIC_STRUCT_DISPLAY_FIELD_TOP_BOTTOM = 1,
+    #[doc = "< Field encoding bottom field first"]
+    NV_ENC_PIC_STRUCT_DISPLAY_FIELD_BOTTOM_TOP = 2,
+    #[doc = "< Frame doubling"]
+    NV_ENC_PIC_STRUCT_DISPLAY_FRAME_DOUBLING = 3,
+    #[doc = "< Field tripling"]
+    NV_ENC_PIC_STRUCT_DISPLAY_FRAME_TRIPLING = 4,
+}
+#[doc = " Display picture structure"]
+#[doc = " Currently, this enum is only used for deciding the number of clock timestamp sets in Picture Timing SEI / Time Code SEI"]
+#[doc = " Otherwise, this has no impact on encoder behavior"]
+pub use self::_NV_ENC_DISPLAY_PIC_STRUCT as NV_ENC_DISPLAY_PIC_STRUCT;
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Forward predicted"]
+    pub const NV_ENC_PIC_TYPE_P: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(0);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Bi-directionally predicted picture"]
+    pub const NV_ENC_PIC_TYPE_B: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(1);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Intra predicted picture"]
+    pub const NV_ENC_PIC_TYPE_I: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(2);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< IDR picture"]
+    pub const NV_ENC_PIC_TYPE_IDR: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(3);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Bi-directionally predicted with only Intra MBs"]
+    pub const NV_ENC_PIC_TYPE_BI: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(4);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Picture is skipped"]
+    pub const NV_ENC_PIC_TYPE_SKIPPED: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(5);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< First picture in intra refresh cycle"]
+    pub const NV_ENC_PIC_TYPE_INTRA_REFRESH: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(6);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Non reference P picture"]
+    pub const NV_ENC_PIC_TYPE_NONREF_P: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(7);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Switch frame (AV1 only)"]
+    pub const NV_ENC_PIC_TYPE_SWITCH: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(8);
+}
+impl _NV_ENC_PIC_TYPE {
+    #[doc = "< Picture type unknown"]
+    pub const NV_ENC_PIC_TYPE_UNKNOWN: _NV_ENC_PIC_TYPE = _NV_ENC_PIC_TYPE(255);
+}
+#[repr(transparent)]
+#[doc = " Input picture type"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PIC_TYPE(pub ::std::os::raw::c_uint);
+#[doc = " Input picture type"]
+pub use self::_NV_ENC_PIC_TYPE as NV_ENC_PIC_TYPE;
+impl _NV_ENC_MV_PRECISION {
+    #[doc = "< Driver selects Quarter-Pel motion vector precision by default"]
+    pub const NV_ENC_MV_PRECISION_DEFAULT: _NV_ENC_MV_PRECISION = _NV_ENC_MV_PRECISION(0);
+}
+impl _NV_ENC_MV_PRECISION {
+    #[doc = "< Full-Pel motion vector precision"]
+    pub const NV_ENC_MV_PRECISION_FULL_PEL: _NV_ENC_MV_PRECISION = _NV_ENC_MV_PRECISION(1);
+}
+impl _NV_ENC_MV_PRECISION {
+    #[doc = "< Half-Pel motion vector precision"]
+    pub const NV_ENC_MV_PRECISION_HALF_PEL: _NV_ENC_MV_PRECISION = _NV_ENC_MV_PRECISION(2);
+}
+impl _NV_ENC_MV_PRECISION {
+    #[doc = "< Quarter-Pel motion vector precision"]
+    pub const NV_ENC_MV_PRECISION_QUARTER_PEL: _NV_ENC_MV_PRECISION = _NV_ENC_MV_PRECISION(3);
+}
+#[repr(transparent)]
+#[doc = " Motion vector precisions"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_MV_PRECISION(pub ::std::os::raw::c_uint);
+#[doc = " Motion vector precisions"]
+pub use self::_NV_ENC_MV_PRECISION as NV_ENC_MV_PRECISION;
+#[repr(u32)]
+#[doc = " Input buffer formats"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_BUFFER_FORMAT {
+    #[doc = "< Undefined buffer format"]
+    NV_ENC_BUFFER_FORMAT_UNDEFINED = 0,
+    #[doc = "< Semi-Planar YUV [Y plane followed by interleaved UV plane]"]
+    NV_ENC_BUFFER_FORMAT_NV12 = 1,
+    #[doc = "< Planar YUV [Y plane followed by V and U planes]"]
+    NV_ENC_BUFFER_FORMAT_YV12 = 16,
+    #[doc = "< Planar YUV [Y plane followed by U and V planes]"]
+    NV_ENC_BUFFER_FORMAT_IYUV = 256,
+    #[doc = "< Planar YUV [Y plane followed by U and V planes]"]
+    NV_ENC_BUFFER_FORMAT_YUV444 = 4096,
+    #[doc = "< 10 bit Semi-Planar YUV [Y plane followed by interleaved UV plane]. Each pixel of size 2 bytes. Most Significant 10 bits contain pixel data."]
+    NV_ENC_BUFFER_FORMAT_YUV420_10BIT = 65536,
+    #[doc = "< 10 bit Planar YUV444 [Y plane followed by U and V planes]. Each pixel of size 2 bytes. Most Significant 10 bits contain pixel data."]
+    NV_ENC_BUFFER_FORMAT_YUV444_10BIT = 1048576,
+    #[doc = "< 8 bit Packed A8R8G8B8. This is a word-ordered format"]
+    #[doc = "where a pixel is represented by a 32-bit word with B"]
+    #[doc = "in the lowest 8 bits, G in the next 8 bits, R in the"]
+    #[doc = "8 bits after that and A in the highest 8 bits."]
+    NV_ENC_BUFFER_FORMAT_ARGB = 16777216,
+    #[doc = "< 10 bit Packed A2R10G10B10. This is a word-ordered format"]
+    #[doc = "where a pixel is represented by a 32-bit word with B"]
+    #[doc = "in the lowest 10 bits, G in the next 10 bits, R in the"]
+    #[doc = "10 bits after that and A in the highest 2 bits."]
+    NV_ENC_BUFFER_FORMAT_ARGB10 = 33554432,
+    #[doc = "< 8 bit Packed A8Y8U8V8. This is a word-ordered format"]
+    #[doc = "where a pixel is represented by a 32-bit word with V"]
+    #[doc = "in the lowest 8 bits, U in the next 8 bits, Y in the"]
+    #[doc = "8 bits after that and A in the highest 8 bits."]
+    NV_ENC_BUFFER_FORMAT_AYUV = 67108864,
+    #[doc = "< 8 bit Packed A8B8G8R8. This is a word-ordered format"]
+    #[doc = "where a pixel is represented by a 32-bit word with R"]
+    #[doc = "in the lowest 8 bits, G in the next 8 bits, B in the"]
+    #[doc = "8 bits after that and A in the highest 8 bits."]
+    NV_ENC_BUFFER_FORMAT_ABGR = 268435456,
+    #[doc = "< 10 bit Packed A2B10G10R10. This is a word-ordered format"]
+    #[doc = "where a pixel is represented by a 32-bit word with R"]
+    #[doc = "in the lowest 10 bits, G in the next 10 bits, B in the"]
+    #[doc = "10 bits after that and A in the highest 2 bits."]
+    NV_ENC_BUFFER_FORMAT_ABGR10 = 536870912,
+    #[doc = "< Buffer format representing one-dimensional buffer."]
+    #[doc = "This format should be used only when registering the"]
+    #[doc = "resource as output buffer, which will be used to write"]
+    #[doc = "the encoded bit stream or H.264 ME only mode output."]
+    NV_ENC_BUFFER_FORMAT_U8 = 1073741824,
+    #[doc = "< Semi-Planar YUV 422 [Y plane followed by interleaved UV plane]"]
+    NV_ENC_BUFFER_FORMAT_NV16 = 1073741825,
+    #[doc = "< Semi-Planar 10-bit YUV 422 [Y plane followed by interleaved UV plane]"]
+    NV_ENC_BUFFER_FORMAT_P210 = 1073741826,
+}
+#[doc = " Input buffer formats"]
+pub use self::_NV_ENC_BUFFER_FORMAT as NV_ENC_BUFFER_FORMAT;
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_HEVC_1: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_3;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_HEVC_2: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_60;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_TIER_HEVC_MAIN: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_AUTOSELECT;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_2: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_AUTOSELECT;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_21: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_TIER_HEVC_HIGH;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_41: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_1b;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_42: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_1;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_43: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_11;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_5: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_12;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_51: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_13;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_7: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_2;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_71: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_21;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_LEVEL_AV1_72: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_H264_22;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_TIER_AV1_0: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_LEVEL_AUTOSELECT;
+}
+impl _NV_ENC_LEVEL {
+    pub const NV_ENC_TIER_AV1_1: _NV_ENC_LEVEL = _NV_ENC_LEVEL::NV_ENC_TIER_HEVC_HIGH;
+}
+#[repr(u32)]
+#[doc = " Encoding levels"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_LEVEL {
+    NV_ENC_LEVEL_AUTOSELECT = 0,
+    NV_ENC_LEVEL_H264_1 = 10,
+    NV_ENC_LEVEL_H264_1b = 9,
+    NV_ENC_LEVEL_H264_11 = 11,
+    NV_ENC_LEVEL_H264_12 = 12,
+    NV_ENC_LEVEL_H264_13 = 13,
+    NV_ENC_LEVEL_H264_2 = 20,
+    NV_ENC_LEVEL_H264_21 = 21,
+    NV_ENC_LEVEL_H264_22 = 22,
+    NV_ENC_LEVEL_H264_3 = 30,
+    NV_ENC_LEVEL_H264_31 = 31,
+    NV_ENC_LEVEL_H264_32 = 32,
+    NV_ENC_LEVEL_H264_4 = 40,
+    NV_ENC_LEVEL_H264_41 = 41,
+    NV_ENC_LEVEL_H264_42 = 42,
+    NV_ENC_LEVEL_H264_5 = 50,
+    NV_ENC_LEVEL_H264_51 = 51,
+    NV_ENC_LEVEL_H264_52 = 52,
+    NV_ENC_LEVEL_H264_60 = 60,
+    NV_ENC_LEVEL_H264_61 = 61,
+    NV_ENC_LEVEL_H264_62 = 62,
+    NV_ENC_LEVEL_HEVC_21 = 63,
+    NV_ENC_LEVEL_HEVC_3 = 90,
+    NV_ENC_LEVEL_HEVC_31 = 93,
+    NV_ENC_LEVEL_HEVC_4 = 120,
+    NV_ENC_LEVEL_HEVC_41 = 123,
+    NV_ENC_LEVEL_HEVC_5 = 150,
+    NV_ENC_LEVEL_HEVC_51 = 153,
+    NV_ENC_LEVEL_HEVC_52 = 156,
+    NV_ENC_LEVEL_HEVC_6 = 180,
+    NV_ENC_LEVEL_HEVC_61 = 183,
+    NV_ENC_LEVEL_HEVC_62 = 186,
+    NV_ENC_TIER_HEVC_HIGH = 1,
+    NV_ENC_LEVEL_AV1_22 = 2,
+    NV_ENC_LEVEL_AV1_23 = 3,
+    NV_ENC_LEVEL_AV1_3 = 4,
+    NV_ENC_LEVEL_AV1_31 = 5,
+    NV_ENC_LEVEL_AV1_32 = 6,
+    NV_ENC_LEVEL_AV1_33 = 7,
+    NV_ENC_LEVEL_AV1_4 = 8,
+    NV_ENC_LEVEL_AV1_52 = 14,
+    NV_ENC_LEVEL_AV1_53 = 15,
+    NV_ENC_LEVEL_AV1_6 = 16,
+    NV_ENC_LEVEL_AV1_61 = 17,
+    NV_ENC_LEVEL_AV1_62 = 18,
+    NV_ENC_LEVEL_AV1_63 = 19,
+    NV_ENC_LEVEL_AV1_73 = 23,
+    NV_ENC_LEVEL_AV1_AUTOSELECT = 24,
+}
+#[doc = " Encoding levels"]
+pub use self::_NV_ENC_LEVEL as NV_ENC_LEVEL;
+impl _NVENCSTATUS {
+    #[doc = " This indicates that API call returned with no errors."]
+    pub const NV_ENC_SUCCESS: _NVENCSTATUS = _NVENCSTATUS(0);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that no encode capable devices were detected."]
+    pub const NV_ENC_ERR_NO_ENCODE_DEVICE: _NVENCSTATUS = _NVENCSTATUS(1);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that devices pass by the client is not supported."]
+    pub const NV_ENC_ERR_UNSUPPORTED_DEVICE: _NVENCSTATUS = _NVENCSTATUS(2);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the encoder device supplied by the client is not"]
+    #[doc = " valid."]
+    pub const NV_ENC_ERR_INVALID_ENCODERDEVICE: _NVENCSTATUS = _NVENCSTATUS(3);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that device passed to the API call is invalid."]
+    pub const NV_ENC_ERR_INVALID_DEVICE: _NVENCSTATUS = _NVENCSTATUS(4);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that device passed to the API call is no longer available and"]
+    #[doc = " needs to be reinitialized. The clients need to destroy the current encoder"]
+    #[doc = " session by freeing the allocated input output buffers and destroying the device"]
+    #[doc = " and create a new encoding session."]
+    pub const NV_ENC_ERR_DEVICE_NOT_EXIST: _NVENCSTATUS = _NVENCSTATUS(5);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that one or more of the pointers passed to the API call"]
+    #[doc = " is invalid."]
+    pub const NV_ENC_ERR_INVALID_PTR: _NVENCSTATUS = _NVENCSTATUS(6);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that completion event passed in ::NvEncEncodePicture() call"]
+    #[doc = " is invalid."]
+    pub const NV_ENC_ERR_INVALID_EVENT: _NVENCSTATUS = _NVENCSTATUS(7);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that one or more of the parameter passed to the API call"]
+    #[doc = " is invalid."]
+    pub const NV_ENC_ERR_INVALID_PARAM: _NVENCSTATUS = _NVENCSTATUS(8);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that an API call was made in wrong sequence/order."]
+    pub const NV_ENC_ERR_INVALID_CALL: _NVENCSTATUS = _NVENCSTATUS(9);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the API call failed because it was unable to allocate"]
+    #[doc = " enough memory to perform the requested operation."]
+    pub const NV_ENC_ERR_OUT_OF_MEMORY: _NVENCSTATUS = _NVENCSTATUS(10);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the encoder has not been initialized with"]
+    #[doc = " ::NvEncInitializeEncoder() or that initialization has failed."]
+    #[doc = " The client cannot allocate input or output buffers or do any encoding"]
+    #[doc = " related operation before successfully initializing the encoder."]
+    pub const NV_ENC_ERR_ENCODER_NOT_INITIALIZED: _NVENCSTATUS = _NVENCSTATUS(11);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that an unsupported parameter was passed by the client."]
+    pub const NV_ENC_ERR_UNSUPPORTED_PARAM: _NVENCSTATUS = _NVENCSTATUS(12);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the ::NvEncLockBitstream() failed to lock the output"]
+    #[doc = " buffer. This happens when the client makes a non blocking lock call to"]
+    #[doc = " access the output bitstream by passing NV_ENC_LOCK_BITSTREAM::doNotWait flag."]
+    #[doc = " This is not a fatal error and client should retry the same operation after"]
+    #[doc = " few milliseconds."]
+    pub const NV_ENC_ERR_LOCK_BUSY: _NVENCSTATUS = _NVENCSTATUS(13);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the size of the user buffer passed by the client is"]
+    #[doc = " insufficient for the requested operation."]
+    pub const NV_ENC_ERR_NOT_ENOUGH_BUFFER: _NVENCSTATUS = _NVENCSTATUS(14);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that an invalid struct version was used by the client."]
+    pub const NV_ENC_ERR_INVALID_VERSION: _NVENCSTATUS = _NVENCSTATUS(15);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that ::NvEncMapInputResource() API failed to map the client"]
+    #[doc = " provided input resource."]
+    pub const NV_ENC_ERR_MAP_FAILED: _NVENCSTATUS = _NVENCSTATUS(16);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates encode driver requires more input buffers to produce an output"]
+    #[doc = " bitstream. If this error is returned from ::NvEncEncodePicture() API, this"]
+    #[doc = " is not a fatal error. If the client is encoding with B frames then,"]
+    #[doc = " ::NvEncEncodePicture() API might be buffering the input frame for re-ordering."]
+    #[doc = ""]
+    #[doc = " A client operating in synchronous mode cannot call ::NvEncLockBitstream()"]
+    #[doc = " API on the output bitstream buffer if ::NvEncEncodePicture() returned the"]
+    #[doc = " ::NV_ENC_ERR_NEED_MORE_INPUT error code."]
+    #[doc = " The client must continue providing input frames until encode driver returns"]
+    #[doc = " ::NV_ENC_SUCCESS. After receiving ::NV_ENC_SUCCESS status the client can call"]
+    #[doc = " ::NvEncLockBitstream() API on the output buffers in the same order in which"]
+    #[doc = " it has called ::NvEncEncodePicture()."]
+    pub const NV_ENC_ERR_NEED_MORE_INPUT: _NVENCSTATUS = _NVENCSTATUS(17);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the HW encoder is busy encoding and is unable to encode"]
+    #[doc = " the input. The client should call ::NvEncEncodePicture() again after few"]
+    #[doc = " milliseconds."]
+    pub const NV_ENC_ERR_ENCODER_BUSY: _NVENCSTATUS = _NVENCSTATUS(18);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the completion event passed in ::NvEncEncodePicture()"]
+    #[doc = " API has not been registered with encoder driver using ::NvEncRegisterAsyncEvent()."]
+    pub const NV_ENC_ERR_EVENT_NOT_REGISTERD: _NVENCSTATUS = _NVENCSTATUS(19);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that an unknown internal error has occurred."]
+    pub const NV_ENC_ERR_GENERIC: _NVENCSTATUS = _NVENCSTATUS(20);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the client is attempting to use a feature"]
+    #[doc = " that is not available for the license type for the current system."]
+    pub const NV_ENC_ERR_INCOMPATIBLE_CLIENT_KEY: _NVENCSTATUS = _NVENCSTATUS(21);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the client is attempting to use a feature"]
+    #[doc = " that is not implemented for the current version."]
+    pub const NV_ENC_ERR_UNIMPLEMENTED: _NVENCSTATUS = _NVENCSTATUS(22);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the ::NvEncRegisterResource API failed to register the resource."]
+    pub const NV_ENC_ERR_RESOURCE_REGISTER_FAILED: _NVENCSTATUS = _NVENCSTATUS(23);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the client is attempting to unregister a resource"]
+    #[doc = " that has not been successfully registered."]
+    pub const NV_ENC_ERR_RESOURCE_NOT_REGISTERED: _NVENCSTATUS = _NVENCSTATUS(24);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates that the client is attempting to unmap a resource"]
+    #[doc = " that has not been successfully mapped."]
+    pub const NV_ENC_ERR_RESOURCE_NOT_MAPPED: _NVENCSTATUS = _NVENCSTATUS(25);
+}
+impl _NVENCSTATUS {
+    #[doc = " This indicates encode driver requires more output buffers to write an output"]
+    #[doc = " bitstream. If this error is returned from ::NvEncRestoreEncoderState() API, this"]
+    #[doc = " is not a fatal error. If the client is encoding with B frames then,"]
+    #[doc = " ::NvEncRestoreEncoderState() API might be requiring the extra output buffer for accomodating overlay frame output in a separate buffer, for AV1 codec."]
+    #[doc = " In this case, client must call NvEncRestoreEncoderState() API again with NV_ENC_RESTORE_ENCODER_STATE_PARAMS::outputBitstream as input along with"]
+    #[doc = " the parameters in the previous call. When operating in asynchronous mode of encoding, client must also specify NV_ENC_RESTORE_ENCODER_STATE_PARAMS::completionEvent."]
+    pub const NV_ENC_ERR_NEED_MORE_OUTPUT: _NVENCSTATUS = _NVENCSTATUS(26);
+}
+#[repr(transparent)]
+#[doc = " Error Codes"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NVENCSTATUS(pub ::std::os::raw::c_uint);
+#[doc = " Error Codes"]
+pub use self::_NVENCSTATUS as NVENCSTATUS;
+#[repr(u32)]
+#[doc = " Encode Picture encode flags."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_PIC_FLAGS {
+    #[doc = "< Encode the current picture as an Intra picture"]
+    NV_ENC_PIC_FLAG_FORCEINTRA = 1,
+    #[doc = "< Encode the current picture as an IDR picture."]
+    #[doc = "This flag is only valid when Picture type decision is taken by the Encoder"]
+    #[doc = "[_NV_ENC_INITIALIZE_PARAMS::enablePTD == 1]."]
+    NV_ENC_PIC_FLAG_FORCEIDR = 2,
+    #[doc = "< Write the sequence and picture header in encoded bitstream of the current picture"]
+    NV_ENC_PIC_FLAG_OUTPUT_SPSPPS = 4,
+    #[doc = "< Indicates end of the input stream"]
+    NV_ENC_PIC_FLAG_EOS = 8,
+    #[doc = "< Do not advance encoder state during encode"]
+    NV_ENC_PIC_FLAG_DISABLE_ENC_STATE_ADVANCE = 16,
+    #[doc = "< Write reconstructed frame"]
+    NV_ENC_PIC_FLAG_OUTPUT_RECON_FRAME = 32,
+}
+#[doc = " Encode Picture encode flags."]
+pub use self::_NV_ENC_PIC_FLAGS as NV_ENC_PIC_FLAGS;
+#[repr(u32)]
+#[doc = " Memory heap to allocate input and output buffers."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_MEMORY_HEAP {
+    #[doc = "< Memory heap to be decided by the encoder driver based on the usage"]
+    NV_ENC_MEMORY_HEAP_AUTOSELECT = 0,
+    #[doc = "< Memory heap is in local video memory"]
+    NV_ENC_MEMORY_HEAP_VID = 1,
+    #[doc = "< Memory heap is in cached system memory"]
+    NV_ENC_MEMORY_HEAP_SYSMEM_CACHED = 2,
+    #[doc = "< Memory heap is in uncached system memory"]
+    NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED = 3,
+}
+#[doc = " Memory heap to allocate input and output buffers."]
+pub use self::_NV_ENC_MEMORY_HEAP as NV_ENC_MEMORY_HEAP;
+#[repr(u32)]
+#[doc = " B-frame used as reference modes"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_BFRAME_REF_MODE {
+    #[doc = "< B frame is not used for reference"]
+    NV_ENC_BFRAME_REF_MODE_DISABLED = 0,
+    #[doc = "< Each B-frame will be used for reference"]
+    NV_ENC_BFRAME_REF_MODE_EACH = 1,
+    #[doc = "< Only(Number of B-frame)/2 th B-frame will be used for reference"]
+    NV_ENC_BFRAME_REF_MODE_MIDDLE = 2,
+}
+#[doc = " B-frame used as reference modes"]
+pub use self::_NV_ENC_BFRAME_REF_MODE as NV_ENC_BFRAME_REF_MODE;
+#[repr(u32)]
+#[doc = " H.264 entropy coding modes."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_H264_ENTROPY_CODING_MODE {
+    #[doc = "< Entropy coding mode is auto selected by the encoder driver"]
+    NV_ENC_H264_ENTROPY_CODING_MODE_AUTOSELECT = 0,
+    #[doc = "< Entropy coding mode is CABAC"]
+    NV_ENC_H264_ENTROPY_CODING_MODE_CABAC = 1,
+    #[doc = "< Entropy coding mode is CAVLC"]
+    NV_ENC_H264_ENTROPY_CODING_MODE_CAVLC = 2,
+}
+#[doc = " H.264 entropy coding modes."]
+pub use self::_NV_ENC_H264_ENTROPY_CODING_MODE as NV_ENC_H264_ENTROPY_CODING_MODE;
+#[repr(u32)]
+#[doc = " H.264 specific BDirect modes"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_H264_BDIRECT_MODE {
+    #[doc = "< BDirect mode is auto selected by the encoder driver"]
+    NV_ENC_H264_BDIRECT_MODE_AUTOSELECT = 0,
+    #[doc = "< Disable BDirect mode"]
+    NV_ENC_H264_BDIRECT_MODE_DISABLE = 1,
+    #[doc = "< Temporal BDirect mode"]
+    NV_ENC_H264_BDIRECT_MODE_TEMPORAL = 2,
+    #[doc = "< Spatial BDirect mode"]
+    NV_ENC_H264_BDIRECT_MODE_SPATIAL = 3,
+}
+#[doc = " H.264 specific BDirect modes"]
+pub use self::_NV_ENC_H264_BDIRECT_MODE as NV_ENC_H264_BDIRECT_MODE;
+#[repr(u32)]
+#[doc = " H.264 specific FMO usage"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_H264_FMO_MODE {
+    #[doc = "< FMO usage is auto selected by the encoder driver"]
+    NV_ENC_H264_FMO_AUTOSELECT = 0,
+    #[doc = "< Enable FMO"]
+    NV_ENC_H264_FMO_ENABLE = 1,
+    #[doc = "< Disable FMO"]
+    NV_ENC_H264_FMO_DISABLE = 2,
+}
+#[doc = " H.264 specific FMO usage"]
+pub use self::_NV_ENC_H264_FMO_MODE as NV_ENC_H264_FMO_MODE;
+#[repr(u32)]
+#[doc = " H.264 specific Adaptive Transform modes"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE {
+    #[doc = "< Adaptive Transform 8x8 mode is auto selected by the encoder driver"]
+    NV_ENC_H264_ADAPTIVE_TRANSFORM_AUTOSELECT = 0,
+    #[doc = "< Adaptive Transform 8x8 mode disabled"]
+    NV_ENC_H264_ADAPTIVE_TRANSFORM_DISABLE = 1,
+    #[doc = "< Adaptive Transform 8x8 mode should be used"]
+    NV_ENC_H264_ADAPTIVE_TRANSFORM_ENABLE = 2,
+}
+#[doc = " H.264 specific Adaptive Transform modes"]
+pub use self::_NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE as NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE;
+#[repr(u32)]
+#[doc = " Stereo frame packing modes."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_STEREO_PACKING_MODE {
+    #[doc = "< No Stereo packing required"]
+    NV_ENC_STEREO_PACKING_MODE_NONE = 0,
+    #[doc = "< Checkerboard mode for packing stereo frames"]
+    NV_ENC_STEREO_PACKING_MODE_CHECKERBOARD = 1,
+    #[doc = "< Column Interleave mode for packing stereo frames"]
+    NV_ENC_STEREO_PACKING_MODE_COLINTERLEAVE = 2,
+    #[doc = "< Row Interleave mode for packing stereo frames"]
+    NV_ENC_STEREO_PACKING_MODE_ROWINTERLEAVE = 3,
+    #[doc = "< Side-by-side mode for packing stereo frames"]
+    NV_ENC_STEREO_PACKING_MODE_SIDEBYSIDE = 4,
+    #[doc = "< Top-Bottom mode for packing stereo frames"]
+    NV_ENC_STEREO_PACKING_MODE_TOPBOTTOM = 5,
+    #[doc = "< Frame Sequential mode for packing stereo frames"]
+    NV_ENC_STEREO_PACKING_MODE_FRAMESEQ = 6,
+}
+#[doc = " Stereo frame packing modes."]
+pub use self::_NV_ENC_STEREO_PACKING_MODE as NV_ENC_STEREO_PACKING_MODE;
+#[repr(u32)]
+#[doc = "  Input Resource type"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_INPUT_RESOURCE_TYPE {
+    #[doc = "< input resource type is a directx9 surface"]
+    NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX = 0,
+    #[doc = "< input resource type is a cuda device pointer surface"]
+    NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR = 1,
+    #[doc = "< input resource type is a cuda array surface."]
+    #[doc = "This array must be a 2D array and the CUDA_ARRAY3D_SURFACE_LDST"]
+    #[doc = "flag must have been specified when creating it."]
+    NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY = 2,
+    #[doc = "< input resource type is an OpenGL texture"]
+    NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX = 3,
+}
+#[doc = "  Input Resource type"]
+pub use self::_NV_ENC_INPUT_RESOURCE_TYPE as NV_ENC_INPUT_RESOURCE_TYPE;
+#[repr(u32)]
+#[doc = "  Buffer usage"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_BUFFER_USAGE {
+    #[doc = "< Registered surface will be used for input image"]
+    NV_ENC_INPUT_IMAGE = 0,
+    #[doc = "< Registered surface will be used for output of H.264 ME only mode."]
+    #[doc = "This buffer usage type is not supported for HEVC ME only mode."]
+    NV_ENC_OUTPUT_MOTION_VECTOR = 1,
+    #[doc = "< Registered surface will be used for output bitstream in encoding"]
+    NV_ENC_OUTPUT_BITSTREAM = 2,
+    #[doc = "< Registered surface will be used for output reconstructed frame in encoding"]
+    NV_ENC_OUTPUT_RECON = 4,
+}
+#[doc = "  Buffer usage"]
+pub use self::_NV_ENC_BUFFER_USAGE as NV_ENC_BUFFER_USAGE;
+#[repr(u32)]
+#[doc = "  Encoder Device type"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_DEVICE_TYPE {
+    #[doc = "< encode device type is a directx9 device"]
+    NV_ENC_DEVICE_TYPE_DIRECTX = 0,
+    #[doc = "< encode device type is a cuda device"]
+    NV_ENC_DEVICE_TYPE_CUDA = 1,
+    #[doc = "< encode device type is an OpenGL device."]
+    #[doc = "Use of this device type is supported only on Linux"]
+    NV_ENC_DEVICE_TYPE_OPENGL = 2,
+}
+#[doc = "  Encoder Device type"]
+pub use self::_NV_ENC_DEVICE_TYPE as NV_ENC_DEVICE_TYPE;
+#[repr(u32)]
+#[doc = " Number of reference frames"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_NUM_REF_FRAMES {
+    #[doc = "< Number of reference frames is auto selected by the encoder driver"]
+    NV_ENC_NUM_REF_FRAMES_AUTOSELECT = 0,
+    #[doc = "< Number of reference frames equal to 1"]
+    NV_ENC_NUM_REF_FRAMES_1 = 1,
+    #[doc = "< Number of reference frames equal to 2"]
+    NV_ENC_NUM_REF_FRAMES_2 = 2,
+    #[doc = "< Number of reference frames equal to 3"]
+    NV_ENC_NUM_REF_FRAMES_3 = 3,
+    #[doc = "< Number of reference frames equal to 4"]
+    NV_ENC_NUM_REF_FRAMES_4 = 4,
+    #[doc = "< Number of reference frames equal to 5"]
+    NV_ENC_NUM_REF_FRAMES_5 = 5,
+    #[doc = "< Number of reference frames equal to 6"]
+    NV_ENC_NUM_REF_FRAMES_6 = 6,
+    #[doc = "< Number of reference frames equal to 7"]
+    NV_ENC_NUM_REF_FRAMES_7 = 7,
+}
+#[doc = " Number of reference frames"]
+pub use self::_NV_ENC_NUM_REF_FRAMES as NV_ENC_NUM_REF_FRAMES;
+#[repr(u32)]
+#[doc = "  Enum for Temporal filtering level."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_TEMPORAL_FILTER_LEVEL {
+    NV_ENC_TEMPORAL_FILTER_LEVEL_0 = 0,
+    NV_ENC_TEMPORAL_FILTER_LEVEL_4 = 4,
+}
+#[doc = "  Enum for Temporal filtering level."]
+pub use self::_NV_ENC_TEMPORAL_FILTER_LEVEL as NV_ENC_TEMPORAL_FILTER_LEVEL;
+#[repr(u32)]
+#[doc = " Encoder capabilities enumeration."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_CAPS {
+    #[doc = " Maximum number of B-Frames supported."]
+    NV_ENC_CAPS_NUM_MAX_BFRAMES = 0,
+    #[doc = " Rate control modes supported."]
+    #[doc = " \\n The API return value is a bitmask of the values in NV_ENC_PARAMS_RC_MODE."]
+    NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES = 1,
+    #[doc = " Indicates HW support for field mode encoding."]
+    #[doc = " \\n 0 : Interlaced mode encoding is not supported."]
+    #[doc = " \\n 1 : Interlaced field mode encoding is supported."]
+    #[doc = " \\n 2 : Interlaced frame encoding and field mode encoding are both supported."]
+    NV_ENC_CAPS_SUPPORT_FIELD_ENCODING = 2,
+    #[doc = " Indicates HW support for monochrome mode encoding."]
+    #[doc = " \\n 0 : Monochrome mode not supported."]
+    #[doc = " \\n 1 : Monochrome mode supported."]
+    NV_ENC_CAPS_SUPPORT_MONOCHROME = 3,
+    #[doc = " Indicates HW support for FMO."]
+    #[doc = " \\n 0 : FMO not supported."]
+    #[doc = " \\n 1 : FMO supported."]
+    NV_ENC_CAPS_SUPPORT_FMO = 4,
+    #[doc = " Indicates HW capability for Quarter pel motion estimation."]
+    #[doc = " \\n 0 : Quarter-Pel Motion Estimation not supported."]
+    #[doc = " \\n 1 : Quarter-Pel Motion Estimation supported."]
+    NV_ENC_CAPS_SUPPORT_QPELMV = 5,
+    #[doc = " H.264 specific. Indicates HW support for BDirect modes."]
+    #[doc = " \\n 0 : BDirect mode encoding not supported."]
+    #[doc = " \\n 1 : BDirect mode encoding supported."]
+    NV_ENC_CAPS_SUPPORT_BDIRECT_MODE = 6,
+    #[doc = " H264 specific. Indicates HW support for CABAC entropy coding mode."]
+    #[doc = " \\n 0 : CABAC entropy coding not supported."]
+    #[doc = " \\n 1 : CABAC entropy coding supported."]
+    NV_ENC_CAPS_SUPPORT_CABAC = 7,
+    #[doc = " Indicates HW support for Adaptive Transform."]
+    #[doc = " \\n 0 : Adaptive Transform not supported."]
+    #[doc = " \\n 1 : Adaptive Transform supported."]
+    NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM = 8,
+    #[doc = " Indicates HW support for Multi View Coding."]
+    #[doc = " \\n 0 : Multi View Coding not supported."]
+    #[doc = " \\n 1 : Multi View Coding supported."]
+    NV_ENC_CAPS_SUPPORT_STEREO_MVC = 9,
+    #[doc = " Indicates HW support for encoding Temporal layers."]
+    #[doc = " \\n 0 : Encoding Temporal layers not supported."]
+    #[doc = " \\n 1 : Encoding Temporal layers supported."]
+    NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS = 10,
+    #[doc = " Indicates HW support for Hierarchical P frames."]
+    #[doc = " \\n 0 : Hierarchical P frames not supported."]
+    #[doc = " \\n 1 : Hierarchical P frames supported."]
+    NV_ENC_CAPS_SUPPORT_HIERARCHICAL_PFRAMES = 11,
+    #[doc = " Indicates HW support for Hierarchical B frames."]
+    #[doc = " \\n 0 : Hierarchical B frames not supported."]
+    #[doc = " \\n 1 : Hierarchical B frames supported."]
+    NV_ENC_CAPS_SUPPORT_HIERARCHICAL_BFRAMES = 12,
+    #[doc = " Maximum Encoding level supported (See ::NV_ENC_LEVEL for details)."]
+    NV_ENC_CAPS_LEVEL_MAX = 13,
+    #[doc = " Minimum Encoding level supported (See ::NV_ENC_LEVEL for details)."]
+    NV_ENC_CAPS_LEVEL_MIN = 14,
+    #[doc = " Indicates HW support for separate colour plane encoding."]
+    #[doc = " \\n 0 : Separate colour plane encoding not supported."]
+    #[doc = " \\n 1 : Separate colour plane encoding supported."]
+    NV_ENC_CAPS_SEPARATE_COLOUR_PLANE = 15,
+    #[doc = " Maximum output width supported."]
+    NV_ENC_CAPS_WIDTH_MAX = 16,
+    #[doc = " Maximum output height supported."]
+    NV_ENC_CAPS_HEIGHT_MAX = 17,
+    #[doc = " Indicates Temporal Scalability Support."]
+    #[doc = " \\n 0 : Temporal SVC encoding not supported."]
+    #[doc = " \\n 1 : Temporal SVC encoding supported."]
+    NV_ENC_CAPS_SUPPORT_TEMPORAL_SVC = 18,
+    #[doc = " Indicates Dynamic Encode Resolution Change Support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Dynamic Encode Resolution Change not supported."]
+    #[doc = " \\n 1 : Dynamic Encode Resolution Change supported."]
+    NV_ENC_CAPS_SUPPORT_DYN_RES_CHANGE = 19,
+    #[doc = " Indicates Dynamic Encode Bitrate Change Support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Dynamic Encode bitrate change not supported."]
+    #[doc = " \\n 1 : Dynamic Encode bitrate change supported."]
+    NV_ENC_CAPS_SUPPORT_DYN_BITRATE_CHANGE = 20,
+    #[doc = " Indicates Forcing Constant QP On The Fly Support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Forcing constant QP on the fly not supported."]
+    #[doc = " \\n 1 : Forcing constant QP on the fly supported."]
+    NV_ENC_CAPS_SUPPORT_DYN_FORCE_CONSTQP = 21,
+    #[doc = " Indicates Dynamic rate control mode Change Support."]
+    #[doc = " \\n 0 : Dynamic rate control mode change not supported."]
+    #[doc = " \\n 1 : Dynamic rate control mode change supported."]
+    NV_ENC_CAPS_SUPPORT_DYN_RCMODE_CHANGE = 22,
+    #[doc = " Indicates Subframe readback support for slice-based encoding. If this feature is supported, it can be enabled by setting enableSubFrameWrite = 1."]
+    #[doc = " \\n 0 : Subframe readback not supported."]
+    #[doc = " \\n 1 : Subframe readback supported."]
+    NV_ENC_CAPS_SUPPORT_SUBFRAME_READBACK = 23,
+    #[doc = " Indicates Constrained Encoding mode support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Constrained encoding mode not supported."]
+    #[doc = " \\n 1 : Constrained encoding mode supported."]
+    #[doc = " If this mode is supported client can enable this during initialization."]
+    #[doc = " Client can then force a picture to be coded as constrained picture where"]
+    #[doc = " in-loop filtering is disabled across slice boundaries and prediction vectors for inter"]
+    #[doc = " macroblocks in each slice will be restricted to the slice region."]
+    NV_ENC_CAPS_SUPPORT_CONSTRAINED_ENCODING = 24,
+    #[doc = " Indicates Intra Refresh Mode Support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Intra Refresh Mode not supported."]
+    #[doc = " \\n 1 : Intra Refresh Mode supported."]
+    NV_ENC_CAPS_SUPPORT_INTRA_REFRESH = 25,
+    #[doc = " Indicates Custom VBV Buffer Size support. It can be used for capping frame size."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Custom VBV buffer size specification from client, not supported."]
+    #[doc = " \\n 1 : Custom VBV buffer size specification from client, supported."]
+    NV_ENC_CAPS_SUPPORT_CUSTOM_VBV_BUF_SIZE = 26,
+    #[doc = " Indicates Dynamic Slice Mode Support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Dynamic Slice Mode not supported."]
+    #[doc = " \\n 1 : Dynamic Slice Mode supported."]
+    NV_ENC_CAPS_SUPPORT_DYNAMIC_SLICE_MODE = 27,
+    #[doc = " Indicates Reference Picture Invalidation Support."]
+    #[doc = " Support added from NvEncodeAPI version 2.0."]
+    #[doc = " \\n 0 : Reference Picture Invalidation not supported."]
+    #[doc = " \\n 1 : Reference Picture Invalidation supported."]
+    NV_ENC_CAPS_SUPPORT_REF_PIC_INVALIDATION = 28,
+    #[doc = " Indicates support for Pre-Processing."]
+    #[doc = " The API return value is a bitmask of the values defined in ::NV_ENC_PREPROC_FLAGS"]
+    NV_ENC_CAPS_PREPROC_SUPPORT = 29,
+    #[doc = " Indicates support Async mode."]
+    #[doc = " \\n 0 : Async Encode mode not supported."]
+    #[doc = " \\n 1 : Async Encode mode supported."]
+    NV_ENC_CAPS_ASYNC_ENCODE_SUPPORT = 30,
+    #[doc = " Maximum MBs per frame supported."]
+    NV_ENC_CAPS_MB_NUM_MAX = 31,
+    #[doc = " Maximum aggregate throughput in MBs per sec."]
+    NV_ENC_CAPS_MB_PER_SEC_MAX = 32,
+    #[doc = " Indicates HW support for YUV444 mode encoding."]
+    #[doc = " \\n 0 : YUV444 mode encoding not supported."]
+    #[doc = " \\n 1 : YUV444 mode encoding supported."]
+    NV_ENC_CAPS_SUPPORT_YUV444_ENCODE = 33,
+    #[doc = " Indicates HW support for lossless encoding."]
+    #[doc = " \\n 0 : lossless encoding not supported."]
+    #[doc = " \\n 1 : lossless encoding supported."]
+    NV_ENC_CAPS_SUPPORT_LOSSLESS_ENCODE = 34,
+    #[doc = " Indicates HW support for Sample Adaptive Offset."]
+    #[doc = " \\n 0 : SAO not supported."]
+    #[doc = " \\n 1 : SAO encoding supported."]
+    NV_ENC_CAPS_SUPPORT_SAO = 35,
+    #[doc = " Indicates HW support for Motion Estimation Only Mode."]
+    #[doc = " \\n 0 : MEOnly Mode not supported."]
+    #[doc = " \\n 1 : MEOnly Mode supported for I and P frames."]
+    #[doc = " \\n 2 : MEOnly Mode supported for I, P and B frames."]
+    NV_ENC_CAPS_SUPPORT_MEONLY_MODE = 36,
+    #[doc = " Indicates HW support for lookahead encoding (enableLookahead=1)."]
+    #[doc = " \\n 0 : Lookahead not supported."]
+    #[doc = " \\n 1 : Lookahead supported."]
+    NV_ENC_CAPS_SUPPORT_LOOKAHEAD = 37,
+    #[doc = " Indicates HW support for temporal AQ encoding (enableTemporalAQ=1)."]
+    #[doc = " \\n 0 : Temporal AQ not supported."]
+    #[doc = " \\n 1 : Temporal AQ supported."]
+    NV_ENC_CAPS_SUPPORT_TEMPORAL_AQ = 38,
+    #[doc = " Indicates HW support for 10 bit encoding."]
+    #[doc = " \\n 0 : 10 bit encoding not supported."]
+    #[doc = " \\n 1 : 10 bit encoding supported."]
+    NV_ENC_CAPS_SUPPORT_10BIT_ENCODE = 39,
+    #[doc = " Maximum number of Long Term Reference frames supported"]
+    NV_ENC_CAPS_NUM_MAX_LTR_FRAMES = 40,
+    #[doc = " Indicates HW support for Weighted Prediction."]
+    #[doc = " \\n 0 : Weighted Prediction not supported."]
+    #[doc = " \\n 1 : Weighted Prediction supported."]
+    NV_ENC_CAPS_SUPPORT_WEIGHTED_PREDICTION = 41,
+    #[doc = " On managed (vGPU) platforms (Windows only), this API, in conjunction with other GRID Management APIs, can be used"]
+    #[doc = " to estimate the residual capacity of the hardware encoder on the GPU as a percentage of the total available encoder capacity."]
+    #[doc = " This API can be called at any time; i.e. during the encode session or before opening the encode session."]
+    #[doc = " If the available encoder capacity is returned as zero, applications may choose to switch to software encoding"]
+    #[doc = " and continue to call this API (e.g. polling once per second) until capacity becomes available."]
+    #[doc = ""]
+    #[doc = " On bare metal (non-virtualized GPU) and linux platforms, this API always returns 100."]
+    NV_ENC_CAPS_DYNAMIC_QUERY_ENCODER_CAPACITY = 42,
+    #[doc = " Indicates B as reference support."]
+    #[doc = " \\n 0 : B as reference is not supported."]
+    #[doc = " \\n 1 : each B-Frame as reference is supported."]
+    #[doc = " \\n 2 : only Middle B-frame as reference is supported."]
+    NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE = 43,
+    #[doc = " Indicates HW support for Emphasis Level Map based delta QP computation."]
+    #[doc = " \\n 0 : Emphasis Level Map based delta QP not supported."]
+    #[doc = " \\n 1 : Emphasis Level Map based delta QP is supported."]
+    NV_ENC_CAPS_SUPPORT_EMPHASIS_LEVEL_MAP = 44,
+    #[doc = " Minimum input width supported."]
+    NV_ENC_CAPS_WIDTH_MIN = 45,
+    #[doc = " Minimum input height supported."]
+    NV_ENC_CAPS_HEIGHT_MIN = 46,
+    #[doc = " Indicates HW support for multiple reference frames."]
+    NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES = 47,
+    #[doc = " Indicates HW support for HEVC with alpha encoding."]
+    #[doc = " \\n 0 : HEVC with alpha encoding not supported."]
+    #[doc = " \\n 1 : HEVC with alpha encoding is supported."]
+    NV_ENC_CAPS_SUPPORT_ALPHA_LAYER_ENCODING = 48,
+    #[doc = " Indicates number of Encoding engines present on GPU."]
+    NV_ENC_CAPS_NUM_ENCODER_ENGINES = 49,
+    #[doc = " Indicates single slice intra refresh support."]
+    NV_ENC_CAPS_SINGLE_SLICE_INTRA_REFRESH = 50,
+    #[doc = " Indicates encoding without advancing the state support."]
+    NV_ENC_CAPS_DISABLE_ENC_STATE_ADVANCE = 51,
+    #[doc = " Indicates reconstructed output support."]
+    NV_ENC_CAPS_OUTPUT_RECON_SURFACE = 52,
+    #[doc = " Indicates encoded frame output stats support for every block. Block represents a CTB for HEVC, macroblock for H.264 and super block for AV1."]
+    NV_ENC_CAPS_OUTPUT_BLOCK_STATS = 53,
+    #[doc = " Indicates encoded frame output stats support for every row. Row represents a CTB row for HEVC, macroblock row for H.264 and super block row for AV1."]
+    NV_ENC_CAPS_OUTPUT_ROW_STATS = 54,
+    #[doc = " Indicates temporal filtering support."]
+    NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER = 55,
+    #[doc = " Maximum Lookahead level supported (See ::NV_ENC_LOOKAHEAD_LEVEL for details)."]
+    NV_ENC_CAPS_SUPPORT_LOOKAHEAD_LEVEL = 56,
+    #[doc = " Indicates UnidirectionalB support."]
+    NV_ENC_CAPS_SUPPORT_UNIDIRECTIONAL_B = 57,
+    #[doc = " Indicates HW support for MVHEVC encoding."]
+    #[doc = " \\n 0 : MVHEVC encoding not supported."]
+    #[doc = " \\n 1 : MVHEVC encoding supported."]
+    NV_ENC_CAPS_SUPPORT_MVHEVC_ENCODE = 58,
+    #[doc = " Indicates HW support for YUV422 mode encoding."]
+    #[doc = " \\n 0 : YUV422 mode encoding not supported."]
+    #[doc = " \\n 1 : YUV422 mode encoding supported."]
+    NV_ENC_CAPS_SUPPORT_YUV422_ENCODE = 59,
+    #[doc = " Reserved - Not to be used by clients."]
+    NV_ENC_CAPS_EXPOSED_COUNT = 60,
+}
+#[doc = " Encoder capabilities enumeration."]
+pub use self::_NV_ENC_CAPS as NV_ENC_CAPS;
+#[repr(u32)]
+#[doc = "  HEVC CU SIZE"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_HEVC_CUSIZE {
+    NV_ENC_HEVC_CUSIZE_AUTOSELECT = 0,
+    NV_ENC_HEVC_CUSIZE_8x8 = 1,
+    NV_ENC_HEVC_CUSIZE_16x16 = 2,
+    NV_ENC_HEVC_CUSIZE_32x32 = 3,
+    NV_ENC_HEVC_CUSIZE_64x64 = 4,
+}
+#[doc = "  HEVC CU SIZE"]
+pub use self::_NV_ENC_HEVC_CUSIZE as NV_ENC_HEVC_CUSIZE;
+#[repr(u32)]
+#[doc = "  AV1 PART SIZE"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_AV1_PART_SIZE {
+    NV_ENC_AV1_PART_SIZE_AUTOSELECT = 0,
+    NV_ENC_AV1_PART_SIZE_4x4 = 1,
+    NV_ENC_AV1_PART_SIZE_8x8 = 2,
+    NV_ENC_AV1_PART_SIZE_16x16 = 3,
+    NV_ENC_AV1_PART_SIZE_32x32 = 4,
+    NV_ENC_AV1_PART_SIZE_64x64 = 5,
+}
+#[doc = "  AV1 PART SIZE"]
+pub use self::_NV_ENC_AV1_PART_SIZE as NV_ENC_AV1_PART_SIZE;
+#[repr(u32)]
+#[doc = "  Enums related to fields in VUI parameters."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_VUI_VIDEO_FORMAT {
+    NV_ENC_VUI_VIDEO_FORMAT_COMPONENT = 0,
+    NV_ENC_VUI_VIDEO_FORMAT_PAL = 1,
+    NV_ENC_VUI_VIDEO_FORMAT_NTSC = 2,
+    NV_ENC_VUI_VIDEO_FORMAT_SECAM = 3,
+    NV_ENC_VUI_VIDEO_FORMAT_MAC = 4,
+    NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED = 5,
+}
+#[doc = "  Enums related to fields in VUI parameters."]
+pub use self::_NV_ENC_VUI_VIDEO_FORMAT as NV_ENC_VUI_VIDEO_FORMAT;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_VUI_COLOR_PRIMARIES {
+    NV_ENC_VUI_COLOR_PRIMARIES_UNDEFINED = 0,
+    NV_ENC_VUI_COLOR_PRIMARIES_BT709 = 1,
+    NV_ENC_VUI_COLOR_PRIMARIES_UNSPECIFIED = 2,
+    NV_ENC_VUI_COLOR_PRIMARIES_RESERVED = 3,
+    NV_ENC_VUI_COLOR_PRIMARIES_BT470M = 4,
+    NV_ENC_VUI_COLOR_PRIMARIES_BT470BG = 5,
+    NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M = 6,
+    NV_ENC_VUI_COLOR_PRIMARIES_SMPTE240M = 7,
+    NV_ENC_VUI_COLOR_PRIMARIES_FILM = 8,
+    NV_ENC_VUI_COLOR_PRIMARIES_BT2020 = 9,
+    NV_ENC_VUI_COLOR_PRIMARIES_SMPTE428 = 10,
+    NV_ENC_VUI_COLOR_PRIMARIES_SMPTE431 = 11,
+    NV_ENC_VUI_COLOR_PRIMARIES_SMPTE432 = 12,
+    NV_ENC_VUI_COLOR_PRIMARIES_JEDEC_P22 = 22,
+}
+pub use self::_NV_ENC_VUI_COLOR_PRIMARIES as NV_ENC_VUI_COLOR_PRIMARIES;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_VUI_TRANSFER_CHARACTERISTIC {
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_UNDEFINED = 0,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709 = 1,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_UNSPECIFIED = 2,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_RESERVED = 3,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT470M = 4,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT470BG = 5,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M = 6,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE240M = 7,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_LINEAR = 8,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_LOG = 9,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_LOG_SQRT = 10,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_IEC61966_2_4 = 11,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT1361_ECG = 12,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SRGB = 13,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT2020_10 = 14,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT2020_12 = 15,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE2084 = 16,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE428 = 17,
+    NV_ENC_VUI_TRANSFER_CHARACTERISTIC_ARIB_STD_B67 = 18,
+}
+pub use self::_NV_ENC_VUI_TRANSFER_CHARACTERISTIC as NV_ENC_VUI_TRANSFER_CHARACTERISTIC;
+#[repr(u32)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_VUI_MATRIX_COEFFS {
+    NV_ENC_VUI_MATRIX_COEFFS_RGB = 0,
+    NV_ENC_VUI_MATRIX_COEFFS_BT709 = 1,
+    NV_ENC_VUI_MATRIX_COEFFS_UNSPECIFIED = 2,
+    NV_ENC_VUI_MATRIX_COEFFS_RESERVED = 3,
+    NV_ENC_VUI_MATRIX_COEFFS_FCC = 4,
+    NV_ENC_VUI_MATRIX_COEFFS_BT470BG = 5,
+    NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M = 6,
+    NV_ENC_VUI_MATRIX_COEFFS_SMPTE240M = 7,
+    NV_ENC_VUI_MATRIX_COEFFS_YCGCO = 8,
+    NV_ENC_VUI_MATRIX_COEFFS_BT2020_NCL = 9,
+    NV_ENC_VUI_MATRIX_COEFFS_BT2020_CL = 10,
+    NV_ENC_VUI_MATRIX_COEFFS_SMPTE2085 = 11,
+}
+pub use self::_NV_ENC_VUI_MATRIX_COEFFS as NV_ENC_VUI_MATRIX_COEFFS;
+#[repr(u32)]
+#[doc = "  Enum for Lookahead level."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_LOOKAHEAD_LEVEL {
+    NV_ENC_LOOKAHEAD_LEVEL_0 = 0,
+    NV_ENC_LOOKAHEAD_LEVEL_1 = 1,
+    NV_ENC_LOOKAHEAD_LEVEL_2 = 2,
+    NV_ENC_LOOKAHEAD_LEVEL_3 = 3,
+    NV_ENC_LOOKAHEAD_LEVEL_AUTOSELECT = 15,
+}
+#[doc = "  Enum for Lookahead level."]
+pub use self::_NV_ENC_LOOKAHEAD_LEVEL as NV_ENC_LOOKAHEAD_LEVEL;
+#[repr(u32)]
+#[doc = " Enum for Bit Depth"]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_BIT_DEPTH {
+    #[doc = "< Invalid Bit Depth"]
+    NV_ENC_BIT_DEPTH_INVALID = 0,
+    #[doc = "< Bit Depth 8"]
+    NV_ENC_BIT_DEPTH_8 = 8,
+    #[doc = "< Bit Depth 10"]
+    NV_ENC_BIT_DEPTH_10 = 10,
+}
+#[doc = " Enum for Bit Depth"]
+pub use self::_NV_ENC_BIT_DEPTH as NV_ENC_BIT_DEPTH;
+#[doc = " Input struct for querying Encoding capabilities."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CAPS_PARAM {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_CAPS_PARAM_VER"]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the encode capability to be queried. Client should pass a member for ::NV_ENC_CAPS enum."]
+    pub capsToQuery: NV_ENC_CAPS,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u32; 62usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CAPS_PARAM() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CAPS_PARAM>(),
+        256usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CAPS_PARAM))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CAPS_PARAM>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CAPS_PARAM))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CAPS_PARAM>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CAPS_PARAM),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CAPS_PARAM>())).capsToQuery as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CAPS_PARAM),
+            "::",
+            stringify!(capsToQuery)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CAPS_PARAM>())).reserved as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CAPS_PARAM),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for _NV_ENC_CAPS_PARAM {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Input struct for querying Encoding capabilities."]
+pub type NV_ENC_CAPS_PARAM = _NV_ENC_CAPS_PARAM;
+#[doc = " Restore encoder state parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_RESTORE_ENCODER_STATE_PARAMS {
+    #[doc = "< [in]: Struct version."]
+    pub version: u32,
+    #[doc = "< [in]: State buffer index to which the encoder state will be restored"]
+    pub bufferIdx: u32,
+    #[doc = "< [in]: State type to restore"]
+    pub state: NV_ENC_STATE_RESTORE_TYPE,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]: Specifies the output buffer pointer, for AV1 encode only."]
+    #[doc = "Application must call NvEncRestoreEncoderState() API with _NV_ENC_RESTORE_ENCODER_STATE_PARAMS::outputBitstream and"]
+    #[doc = "_NV_ENC_RESTORE_ENCODER_STATE_PARAMS::completionEvent as input when an earlier call to this API returned \"NV_ENC_ERR_NEED_MORE_OUTPUT\", for AV1 encode."]
+    pub outputBitstream: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [in]: Specifies the completion event when asynchronous mode of encoding is enabled. Used for AV1 encode only."]
+    pub completionEvent: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 64usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_RESTORE_ENCODER_STATE_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>(),
+        800usize,
+        concat!(
+            "Size of: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).version as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).bufferIdx as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(bufferIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).state as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(state)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).reserved as *const _
+                as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).outputBitstream
+                as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(outputBitstream)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).completionEvent
+                as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(completionEvent)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).reserved1 as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RESTORE_ENCODER_STATE_PARAMS>())).reserved2 as *const _
+                as usize
+        },
+        288usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RESTORE_ENCODER_STATE_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_RESTORE_ENCODER_STATE_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Restore encoder state parameters"]
+pub type NV_ENC_RESTORE_ENCODER_STATE_PARAMS = _NV_ENC_RESTORE_ENCODER_STATE_PARAMS;
+#[doc = " Encoded frame information parameters for every block."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_OUTPUT_STATS_BLOCK {
+    #[doc = "< [in]: Struct version"]
+    pub version: u32,
+    #[doc = "< [out]: QP of the block"]
+    pub QP: u8,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u8; 3usize],
+    #[doc = "< [out]: Bitcount of the block"]
+    pub bitcount: u32,
+    #[doc = "< [out]: SATD cost of the residual error"]
+    pub satdCost: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 12usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_OUTPUT_STATS_BLOCK() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_OUTPUT_STATS_BLOCK>(),
+        64usize,
+        concat!("Size of: ", stringify!(_NV_ENC_OUTPUT_STATS_BLOCK))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_OUTPUT_STATS_BLOCK>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_OUTPUT_STATS_BLOCK))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_BLOCK>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_BLOCK),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_BLOCK>())).QP as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_BLOCK),
+            "::",
+            stringify!(QP)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_BLOCK>())).reserved as *const _ as usize
+        },
+        5usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_BLOCK),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_BLOCK>())).bitcount as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_BLOCK),
+            "::",
+            stringify!(bitcount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_BLOCK>())).satdCost as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_BLOCK),
+            "::",
+            stringify!(satdCost)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_BLOCK>())).reserved1 as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_BLOCK),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+#[doc = " Encoded frame information parameters for every block."]
+pub type NV_ENC_OUTPUT_STATS_BLOCK = _NV_ENC_OUTPUT_STATS_BLOCK;
+#[doc = " Encoded frame information parameters for every row."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_OUTPUT_STATS_ROW {
+    #[doc = "< [in]: Struct version"]
+    pub version: u32,
+    #[doc = "< [out]: QP of the row"]
+    pub QP: u8,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u8; 3usize],
+    #[doc = "< [out]: Bitcount of the row"]
+    pub bitcount: u32,
+    #[doc = "< [out]: SATD cost of the residual error"]
+    pub satdCost: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 12usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_OUTPUT_STATS_ROW() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_OUTPUT_STATS_ROW>(),
+        64usize,
+        concat!("Size of: ", stringify!(_NV_ENC_OUTPUT_STATS_ROW))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_OUTPUT_STATS_ROW>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_OUTPUT_STATS_ROW))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_ROW>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_ROW),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_ROW>())).QP as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_ROW),
+            "::",
+            stringify!(QP)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_ROW>())).reserved as *const _ as usize
+        },
+        5usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_ROW),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_ROW>())).bitcount as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_ROW),
+            "::",
+            stringify!(bitcount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_ROW>())).satdCost as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_ROW),
+            "::",
+            stringify!(satdCost)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_STATS_ROW>())).reserved1 as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_STATS_ROW),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+#[doc = " Encoded frame information parameters for every row."]
+pub type NV_ENC_OUTPUT_STATS_ROW = _NV_ENC_OUTPUT_STATS_ROW;
+#[doc = " Encoder Output parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_ENCODE_OUT_PARAMS {
+    #[doc = "< [out]: Struct version."]
+    pub version: u32,
+    #[doc = "< [out]: Encoded bitstream size in bytes"]
+    pub bitstreamSizeInBytes: u32,
+    #[doc = "< [out]: Reserved and must be set to 0"]
+    pub reserved: [u32; 62usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_ENCODE_OUT_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_ENCODE_OUT_PARAMS>(),
+        256usize,
+        concat!("Size of: ", stringify!(_NV_ENC_ENCODE_OUT_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_ENCODE_OUT_PARAMS>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_ENCODE_OUT_PARAMS))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_ENCODE_OUT_PARAMS>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_ENCODE_OUT_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_ENCODE_OUT_PARAMS>())).bitstreamSizeInBytes as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_ENCODE_OUT_PARAMS),
+            "::",
+            stringify!(bitstreamSizeInBytes)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_ENCODE_OUT_PARAMS>())).reserved as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_ENCODE_OUT_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for _NV_ENC_ENCODE_OUT_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Encoder Output parameters"]
+pub type NV_ENC_ENCODE_OUT_PARAMS = _NV_ENC_ENCODE_OUT_PARAMS;
+#[doc = " Lookahead picture parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_LOOKAHEAD_PIC_PARAMS {
+    #[doc = "< [in]: Struct version."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs."]
+    pub inputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder."]
+    pub pictureType: NV_ENC_PIC_TYPE,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 63usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_LOOKAHEAD_PIC_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>(),
+        784usize,
+        concat!("Size of: ", stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>())).inputBuffer as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS),
+            "::",
+            stringify!(inputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>())).pictureType as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS),
+            "::",
+            stringify!(pictureType)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>())).reserved1 as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOOKAHEAD_PIC_PARAMS>())).reserved2 as *const _ as usize
+        },
+        272usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOOKAHEAD_PIC_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_LOOKAHEAD_PIC_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Lookahead picture parameters"]
+pub type NV_ENC_LOOKAHEAD_PIC_PARAMS = _NV_ENC_LOOKAHEAD_PIC_PARAMS;
+#[doc = " Creation parameters for input buffer."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CREATE_INPUT_BUFFER {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_CREATE_INPUT_BUFFER_VER"]
+    pub version: u32,
+    #[doc = "< [in]: Input frame width"]
+    pub width: u32,
+    #[doc = "< [in]: Input frame height"]
+    pub height: u32,
+    #[doc = "< [in]: Deprecated. Do not use"]
+    pub memoryHeap: NV_ENC_MEMORY_HEAP,
+    #[doc = "< [in]: Input buffer format"]
+    pub bufferFmt: NV_ENC_BUFFER_FORMAT,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [out]: Pointer to input buffer"]
+    pub inputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Pointer to existing system memory buffer"]
+    pub pSysMemBuffer: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 58usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 63usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CREATE_INPUT_BUFFER() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CREATE_INPUT_BUFFER>(),
+        776usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CREATE_INPUT_BUFFER))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CREATE_INPUT_BUFFER>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CREATE_INPUT_BUFFER))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).width as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(width)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).height as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(height)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).memoryHeap as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(memoryHeap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).bufferFmt as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(bufferFmt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).reserved as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).inputBuffer as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(inputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).pSysMemBuffer as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(pSysMemBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).reserved1 as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_INPUT_BUFFER>())).reserved2 as *const _ as usize
+        },
+        272usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_INPUT_BUFFER),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CREATE_INPUT_BUFFER {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Creation parameters for input buffer."]
+pub type NV_ENC_CREATE_INPUT_BUFFER = _NV_ENC_CREATE_INPUT_BUFFER;
+#[doc = " Creation parameters for output bitstream buffer."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CREATE_BITSTREAM_BUFFER {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_CREATE_BITSTREAM_BUFFER_VER"]
+    pub version: u32,
+    #[doc = "< [in]: Deprecated. Do not use"]
+    pub size: u32,
+    #[doc = "< [in]: Deprecated. Do not use"]
+    pub memoryHeap: NV_ENC_MEMORY_HEAP,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [out]: Pointer to the output bitstream buffer"]
+    pub bitstreamBuffer: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [out]: Reserved and should not be used"]
+    pub bitstreamBufferPtr: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Reserved and should be set to 0"]
+    pub reserved1: [u32; 58usize],
+    #[doc = "< [in]: Reserved and should be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CREATE_BITSTREAM_BUFFER() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CREATE_BITSTREAM_BUFFER>(),
+        776usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CREATE_BITSTREAM_BUFFER>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).size as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(size)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).memoryHeap as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(memoryHeap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).reserved as *const _
+                as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).bitstreamBuffer as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(bitstreamBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).bitstreamBufferPtr
+                as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(bitstreamBufferPtr)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).reserved1 as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_BITSTREAM_BUFFER>())).reserved2 as *const _
+                as usize
+        },
+        264usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_BITSTREAM_BUFFER),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CREATE_BITSTREAM_BUFFER {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Creation parameters for output bitstream buffer."]
+pub type NV_ENC_CREATE_BITSTREAM_BUFFER = _NV_ENC_CREATE_BITSTREAM_BUFFER;
+#[doc = " Structs needed for ME only mode."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_MVECTOR {
+    #[doc = "< the x component of MV in quarter-pel units"]
+    pub mvx: i16,
+    #[doc = "< the y component of MV in quarter-pel units"]
+    pub mvy: i16,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_MVECTOR() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_MVECTOR>(),
+        4usize,
+        concat!("Size of: ", stringify!(_NV_ENC_MVECTOR))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_MVECTOR>(),
+        2usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_MVECTOR))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MVECTOR>())).mvx as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MVECTOR),
+            "::",
+            stringify!(mvx)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MVECTOR>())).mvy as *const _ as usize },
+        2usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MVECTOR),
+            "::",
+            stringify!(mvy)
+        )
+    );
+}
+#[doc = " Structs needed for ME only mode."]
+pub type NV_ENC_MVECTOR = _NV_ENC_MVECTOR;
+#[doc = " Motion vector structure per macroblock for H264 motion estimation."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_H264_MV_DATA {
+    #[doc = "< up to 4 vectors for 8x8 partition"]
+    pub mv: [NV_ENC_MVECTOR; 4usize],
+    #[doc = "< 0 (I), 1 (P), 2 (IPCM), 3 (B)"]
+    pub mbType: u8,
+    #[doc = "< Specifies the block partition type. 0:16x16, 1:8x8, 2:16x8, 3:8x16"]
+    pub partitionType: u8,
+    #[doc = "< reserved padding for alignment"]
+    pub reserved: u16,
+    pub mbCost: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_H264_MV_DATA() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_H264_MV_DATA>(),
+        24usize,
+        concat!("Size of: ", stringify!(_NV_ENC_H264_MV_DATA))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_H264_MV_DATA>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_H264_MV_DATA))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_H264_MV_DATA>())).mv as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_H264_MV_DATA),
+            "::",
+            stringify!(mv)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_H264_MV_DATA>())).mbType as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_H264_MV_DATA),
+            "::",
+            stringify!(mbType)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_H264_MV_DATA>())).partitionType as *const _ as usize
+        },
+        17usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_H264_MV_DATA),
+            "::",
+            stringify!(partitionType)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_H264_MV_DATA>())).reserved as *const _ as usize },
+        18usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_H264_MV_DATA),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_H264_MV_DATA>())).mbCost as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_H264_MV_DATA),
+            "::",
+            stringify!(mbCost)
+        )
+    );
+}
+#[doc = " Motion vector structure per macroblock for H264 motion estimation."]
+pub type NV_ENC_H264_MV_DATA = _NV_ENC_H264_MV_DATA;
+#[doc = " Motion vector structure per CU for HEVC motion estimation."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_HEVC_MV_DATA {
+    #[doc = "< up to 4 vectors within a CU"]
+    pub mv: [NV_ENC_MVECTOR; 4usize],
+    #[doc = "< 0 (I), 1(P)"]
+    pub cuType: u8,
+    #[doc = "< 0: 8x8, 1: 16x16, 2: 32x32, 3: 64x64"]
+    pub cuSize: u8,
+    #[doc = "< The CU partition mode"]
+    #[doc = "0 (2Nx2N), 1 (2NxN), 2(Nx2N), 3 (NxN),"]
+    #[doc = "4 (2NxnU), 5 (2NxnD), 6(nLx2N), 7 (nRx2N)"]
+    pub partitionMode: u8,
+    #[doc = "< Marker to separate CUs in the current CTB from CUs in the next CTB"]
+    pub lastCUInCTB: u8,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_HEVC_MV_DATA() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_HEVC_MV_DATA>(),
+        20usize,
+        concat!("Size of: ", stringify!(_NV_ENC_HEVC_MV_DATA))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_HEVC_MV_DATA>(),
+        2usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_HEVC_MV_DATA))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_HEVC_MV_DATA>())).mv as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_HEVC_MV_DATA),
+            "::",
+            stringify!(mv)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_HEVC_MV_DATA>())).cuType as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_HEVC_MV_DATA),
+            "::",
+            stringify!(cuType)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_HEVC_MV_DATA>())).cuSize as *const _ as usize },
+        17usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_HEVC_MV_DATA),
+            "::",
+            stringify!(cuSize)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_HEVC_MV_DATA>())).partitionMode as *const _ as usize
+        },
+        18usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_HEVC_MV_DATA),
+            "::",
+            stringify!(partitionMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_HEVC_MV_DATA>())).lastCUInCTB as *const _ as usize
+        },
+        19usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_HEVC_MV_DATA),
+            "::",
+            stringify!(lastCUInCTB)
+        )
+    );
+}
+#[doc = " Motion vector structure per CU for HEVC motion estimation."]
+pub type NV_ENC_HEVC_MV_DATA = _NV_ENC_HEVC_MV_DATA;
+#[doc = " Creation parameters for output motion vector buffer for ME only mode."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CREATE_MV_BUFFER {
+    #[doc = "< [in]: Struct version. Must be set to NV_ENC_CREATE_MV_BUFFER_VER"]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and should be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [out]: Pointer to the output motion vector buffer"]
+    pub mvBuffer: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [in]: Reserved and should be set to 0"]
+    pub reserved1: [u32; 254usize],
+    #[doc = "< [in]: Reserved and should be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 63usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CREATE_MV_BUFFER() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CREATE_MV_BUFFER>(),
+        1536usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CREATE_MV_BUFFER))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CREATE_MV_BUFFER>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CREATE_MV_BUFFER))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_MV_BUFFER>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_MV_BUFFER),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_MV_BUFFER>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_MV_BUFFER),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_MV_BUFFER>())).mvBuffer as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_MV_BUFFER),
+            "::",
+            stringify!(mvBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_MV_BUFFER>())).reserved1 as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_MV_BUFFER),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CREATE_MV_BUFFER>())).reserved2 as *const _ as usize
+        },
+        1032usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CREATE_MV_BUFFER),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CREATE_MV_BUFFER {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Creation parameters for output motion vector buffer for ME only mode."]
+pub type NV_ENC_CREATE_MV_BUFFER = _NV_ENC_CREATE_MV_BUFFER;
+#[doc = " QP value for frames"]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_QP {
+    #[doc = "< [in]: Specifies QP value for P-frame. Even though this field is uint32_t for legacy reasons, the client should treat this as a signed parameter(int32_t) for cases in which negative QP values are to be specified."]
+    pub qpInterP: u32,
+    #[doc = "< [in]: Specifies QP value for B-frame. Even though this field is uint32_t for legacy reasons, the client should treat this as a signed parameter(int32_t) for cases in which negative QP values are to be specified."]
+    pub qpInterB: u32,
+    #[doc = "< [in]: Specifies QP value for Intra Frame. Even though this field is uint32_t for legacy reasons, the client should treat this as a signed parameter(int32_t) for cases in which negative QP values are to be specified."]
+    pub qpIntra: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_QP() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_QP>(),
+        12usize,
+        concat!("Size of: ", stringify!(_NV_ENC_QP))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_QP>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_QP))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_QP>())).qpInterP as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_QP),
+            "::",
+            stringify!(qpInterP)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_QP>())).qpInterB as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_QP),
+            "::",
+            stringify!(qpInterB)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_QP>())).qpIntra as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_QP),
+            "::",
+            stringify!(qpIntra)
+        )
+    );
+}
+#[doc = " QP value for frames"]
+pub type NV_ENC_QP = _NV_ENC_QP;
+#[doc = " Rate Control Configuration Parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_RC_PARAMS {
+    pub version: u32,
+    #[doc = "< [in]: Specifies the rate control mode. Check support for various rate control modes using ::NV_ENC_CAPS_SUPPORTED_RATECONTROL_MODES caps."]
+    pub rateControlMode: NV_ENC_PARAMS_RC_MODE,
+    #[doc = "< [in]: Specifies the initial QP to be used for encoding, these values would be used for all frames if in Constant QP mode."]
+    pub constQP: NV_ENC_QP,
+    #[doc = "< [in]: Specifies the average bitrate(in bits/sec) used for encoding."]
+    pub averageBitRate: u32,
+    #[doc = "< [in]: Specifies the maximum bitrate for the encoded output. This is used for VBR and ignored for CBR mode."]
+    pub maxBitRate: u32,
+    #[doc = "< [in]: Specifies the VBV(HRD) buffer size. in bits. Set 0 to use the default VBV  buffer size."]
+    pub vbvBufferSize: u32,
+    #[doc = "< [in]: Specifies the VBV(HRD) initial delay in bits. Set 0 to use the default VBV  initial delay ."]
+    pub vbvInitialDelay: u32,
+    pub _bitfield_align_1: [u16; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Specifies the minimum QP used for rate control. Client must set NV_ENC_CONFIG::enableMinQP to 1."]
+    pub minQP: NV_ENC_QP,
+    #[doc = "< [in]: Specifies the maximum QP used for rate control. Client must set NV_ENC_CONFIG::enableMaxQP to 1."]
+    pub maxQP: NV_ENC_QP,
+    #[doc = "< [in]: Specifies the initial QP hint used for rate control. The parameter is just used as hint to influence the QP difference between I,P and B frames."]
+    #[doc = "Client must set NV_ENC_CONFIG::enableInitialRCQP to 1."]
+    pub initialRCQP: NV_ENC_QP,
+    #[doc = "< [in]: Specifies the temporal layers (as a bitmask) whose QPs have changed. Valid max bitmask is [2^NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS - 1]."]
+    #[doc = "Applicable only for constant QP mode (NV_ENC_RC_PARAMS::rateControlMode = NV_ENC_PARAMS_RC_CONSTQP)."]
+    pub temporallayerIdxMask: u32,
+    #[doc = "< [in]: Specifies the temporal layer QPs used for rate control. Temporal layer index is used as the array index."]
+    #[doc = "Applicable only for constant QP mode (NV_ENC_RC_PARAMS::rateControlMode = NV_ENC_PARAMS_RC_CONSTQP)."]
+    pub temporalLayerQP: [u8; 8usize],
+    #[doc = "< [in]: Target CQ (Constant Quality) level for VBR mode (range 0-51 for H264/HEVC, 0-63 for AV1 with 0-automatic)"]
+    pub targetQuality: u8,
+    #[doc = "< [in]: Fractional part of target quality (as 8.8 fixed point format)"]
+    pub targetQualityLSB: u8,
+    #[doc = "< [in]: Maximum depth of lookahead with range 0-(31 - number of B frames)."]
+    #[doc = "lookaheadDepth is only used if enableLookahead=1."]
+    pub lookaheadDepth: u16,
+    #[doc = "< [in]: Specifies the ratio of I frame bits to P frame bits in case of single frame VBV and CBR rate control mode,"]
+    #[doc = "is set to 2 by default for low latency tuning info and 1 by default for ultra low latency tuning info"]
+    pub lowDelayKeyFrameScale: u8,
+    #[doc = "< [in]: Specifies the value of 'deltaQ_y_dc' in AV1."]
+    pub yDcQPIndexOffset: i8,
+    #[doc = "< [in]: Specifies the value of 'deltaQ_u_dc' in AV1."]
+    pub uDcQPIndexOffset: i8,
+    #[doc = "< [in]: Specifies the value of 'deltaQ_v_dc' in AV1 (for future use only - deltaQ_v_dc is currently always internally set to same value as deltaQ_u_dc)."]
+    pub vDcQPIndexOffset: i8,
+    #[doc = "< [in]: This flag is used to interpret values in array specified by NV_ENC_PIC_PARAMS::qpDeltaMap."]
+    #[doc = "Set this to NV_ENC_QP_MAP_EMPHASIS to treat values specified by NV_ENC_PIC_PARAMS::qpDeltaMap as Emphasis Level Map."]
+    #[doc = "Emphasis Level can be assigned any value specified in enum NV_ENC_EMPHASIS_MAP_LEVEL."]
+    #[doc = "Emphasis Level Map is used to specify regions to be encoded at varying levels of quality."]
+    #[doc = "The hardware encoder adjusts the quantization within the image as per the provided emphasis map,"]
+    #[doc = "by adjusting the quantization parameter (QP) assigned to each macroblock. This adjustment is commonly called \"Delta QP\"."]
+    #[doc = "The adjustment depends on the absolute QP decided by the rate control algorithm, and is applied after the rate control has decided each macroblock's QP."]
+    #[doc = "Since the Delta QP overrides rate control, enabling Emphasis Level Map may violate bitrate and VBV buffer size constraints."]
+    #[doc = "Emphasis Level Map is useful in situations where client has a priori knowledge of the image complexity (e.g. via use of NVFBC's Classification feature) and encoding those high-complexity areas at higher quality (lower QP) is important, even at the possible cost of violating bitrate/VBV buffer size constraints"]
+    #[doc = "This feature is not supported when AQ( Spatial/Temporal) is enabled."]
+    #[doc = "This feature is only supported for H264 codec currently."]
+    #[doc = ""]
+    #[doc = "Set this to NV_ENC_QP_MAP_DELTA to treat values specified by NV_ENC_PIC_PARAMS::qpDeltaMap as QP Delta. This specifies QP modifier to be applied on top of the QP chosen by rate control"]
+    #[doc = ""]
+    #[doc = "Set this to NV_ENC_QP_MAP_DISABLED to ignore NV_ENC_PIC_PARAMS::qpDeltaMap values. In this case, qpDeltaMap should be set to NULL."]
+    #[doc = ""]
+    #[doc = "Other values are reserved for future use."]
+    pub qpMapMode: NV_ENC_QP_MAP_MODE,
+    #[doc = "< [in]: This flag is used to enable multi-pass encoding for a given ::NV_ENC_PARAMS_RC_MODE. This flag is not valid for H264 and HEVC MEOnly mode"]
+    pub multiPass: NV_ENC_MULTI_PASS,
+    #[doc = "< [in]: Specifies the ratio in which bitrate should be split between base and alpha layer. A value 'x' for this field will split the target bitrate in a ratio of x : 1 between base and alpha layer."]
+    #[doc = "The default split ratio is 15."]
+    pub alphaLayerBitrateRatio: u32,
+    #[doc = "< [in]: Specifies the value of 'chroma_qp_index_offset' in H264 / 'pps_cb_qp_offset' in HEVC / 'deltaQ_u_ac' in AV1."]
+    pub cbQPIndexOffset: i8,
+    #[doc = "< [in]: Specifies the value of 'second_chroma_qp_index_offset' in H264 / 'pps_cr_qp_offset' in HEVC / 'deltaQ_v_ac' in AV1 (for future use only - deltaQ_v_ac is currently always internally set to same value as deltaQ_u_ac)."]
+    pub crQPIndexOffset: i8,
+    pub reserved2: u16,
+    #[doc = "< [in]: Specifies the lookahead level. Higher level may improve quality at the expense of performance."]
+    pub lookaheadLevel: NV_ENC_LOOKAHEAD_LEVEL,
+    #[doc = "< [in]: Specifies the bit rate ratio for each view of MV-HEVC except the base view."]
+    #[doc = "The base view bit rate ratio = 100 - (sum of bit rate ratio of all other views)."]
+    pub viewBitrateRatios: [u8; 7usize],
+    pub reserved3: u8,
+    pub reserved1: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_RC_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_RC_PARAMS>(),
+        128usize,
+        concat!("Size of: ", stringify!(_NV_ENC_RC_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_RC_PARAMS>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_RC_PARAMS))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).rateControlMode as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(rateControlMode)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).constQP as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(constQP)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).averageBitRate as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(averageBitRate)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).maxBitRate as *const _ as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(maxBitRate)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).vbvBufferSize as *const _ as usize },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(vbvBufferSize)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).vbvInitialDelay as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(vbvInitialDelay)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).minQP as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(minQP)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).maxQP as *const _ as usize },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(maxQP)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).initialRCQP as *const _ as usize },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(initialRCQP)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).temporallayerIdxMask as *const _ as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(temporallayerIdxMask)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).temporalLayerQP as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(temporalLayerQP)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).targetQuality as *const _ as usize },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(targetQuality)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).targetQualityLSB as *const _ as usize
+        },
+        89usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(targetQualityLSB)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).lookaheadDepth as *const _ as usize
+        },
+        90usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(lookaheadDepth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).lowDelayKeyFrameScale as *const _ as usize
+        },
+        92usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(lowDelayKeyFrameScale)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).yDcQPIndexOffset as *const _ as usize
+        },
+        93usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(yDcQPIndexOffset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).uDcQPIndexOffset as *const _ as usize
+        },
+        94usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(uDcQPIndexOffset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).vDcQPIndexOffset as *const _ as usize
+        },
+        95usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(vDcQPIndexOffset)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).qpMapMode as *const _ as usize },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(qpMapMode)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).multiPass as *const _ as usize },
+        100usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(multiPass)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).alphaLayerBitrateRatio as *const _
+                as usize
+        },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(alphaLayerBitrateRatio)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).cbQPIndexOffset as *const _ as usize
+        },
+        108usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(cbQPIndexOffset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).crQPIndexOffset as *const _ as usize
+        },
+        109usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(crQPIndexOffset)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).reserved2 as *const _ as usize },
+        110usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).lookaheadLevel as *const _ as usize
+        },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(lookaheadLevel)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).viewBitrateRatios as *const _ as usize
+        },
+        116usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(viewBitrateRatios)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).reserved3 as *const _ as usize },
+        123usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_RC_PARAMS>())).reserved1 as *const _ as usize },
+        124usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RC_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+impl Default for _NV_ENC_RC_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_RC_PARAMS {
+    #[inline]
+    pub fn enableMinQP(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableMinQP(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableMaxQP(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableMaxQP(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableInitialRCQP(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableInitialRCQP(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableAQ(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableAQ(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitField1(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitField1(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableLookahead(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableLookahead(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableIadapt(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableIadapt(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableBadapt(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(7usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableBadapt(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(7usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableTemporalAQ(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(8usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableTemporalAQ(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(8usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn zeroReorderDelay(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(9usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_zeroReorderDelay(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(9usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableNonRefP(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(10usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableNonRefP(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(10usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn strictGOPTarget(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_strictGOPTarget(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn aqStrength(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_aqStrength(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableExtLookahead(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableExtLookahead(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(17usize, 15u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(17usize, 15u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        enableMinQP: u32,
+        enableMaxQP: u32,
+        enableInitialRCQP: u32,
+        enableAQ: u32,
+        reservedBitField1: u32,
+        enableLookahead: u32,
+        disableIadapt: u32,
+        disableBadapt: u32,
+        enableTemporalAQ: u32,
+        zeroReorderDelay: u32,
+        enableNonRefP: u32,
+        strictGOPTarget: u32,
+        aqStrength: u32,
+        enableExtLookahead: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let enableMinQP: u32 = unsafe { ::std::mem::transmute(enableMinQP) };
+            enableMinQP as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let enableMaxQP: u32 = unsafe { ::std::mem::transmute(enableMaxQP) };
+            enableMaxQP as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let enableInitialRCQP: u32 = unsafe { ::std::mem::transmute(enableInitialRCQP) };
+            enableInitialRCQP as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let enableAQ: u32 = unsafe { ::std::mem::transmute(enableAQ) };
+            enableAQ as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let reservedBitField1: u32 = unsafe { ::std::mem::transmute(reservedBitField1) };
+            reservedBitField1 as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let enableLookahead: u32 = unsafe { ::std::mem::transmute(enableLookahead) };
+            enableLookahead as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 1u8, {
+            let disableIadapt: u32 = unsafe { ::std::mem::transmute(disableIadapt) };
+            disableIadapt as u64
+        });
+        __bindgen_bitfield_unit.set(7usize, 1u8, {
+            let disableBadapt: u32 = unsafe { ::std::mem::transmute(disableBadapt) };
+            disableBadapt as u64
+        });
+        __bindgen_bitfield_unit.set(8usize, 1u8, {
+            let enableTemporalAQ: u32 = unsafe { ::std::mem::transmute(enableTemporalAQ) };
+            enableTemporalAQ as u64
+        });
+        __bindgen_bitfield_unit.set(9usize, 1u8, {
+            let zeroReorderDelay: u32 = unsafe { ::std::mem::transmute(zeroReorderDelay) };
+            zeroReorderDelay as u64
+        });
+        __bindgen_bitfield_unit.set(10usize, 1u8, {
+            let enableNonRefP: u32 = unsafe { ::std::mem::transmute(enableNonRefP) };
+            enableNonRefP as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 1u8, {
+            let strictGOPTarget: u32 = unsafe { ::std::mem::transmute(strictGOPTarget) };
+            strictGOPTarget as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 4u8, {
+            let aqStrength: u32 = unsafe { ::std::mem::transmute(aqStrength) };
+            aqStrength as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 1u8, {
+            let enableExtLookahead: u32 = unsafe { ::std::mem::transmute(enableExtLookahead) };
+            enableExtLookahead as u64
+        });
+        __bindgen_bitfield_unit.set(17usize, 15u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " Rate Control Configuration Parameters"]
+pub type NV_ENC_RC_PARAMS = _NV_ENC_RC_PARAMS;
+#[doc = " Clock Timestamp set parameters"]
+#[doc = " For H264, this structure is used to populate Picture Timing SEI when NV_ENC_CONFIG_H264::enableTimeCode is set to 1."]
+#[doc = " For HEVC, this structure is used to populate Time Code SEI when NV_ENC_CONFIG_HEVC::enableTimeCodeSEI is set to 1."]
+#[doc = " For more details, refer to Annex D of ITU-T Specification."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CLOCK_TIMESTAMP_SET {
+    pub _bitfield_align_1: [u8; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in] Specifies the 'time_offset_value'"]
+    pub timeOffset: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CLOCK_TIMESTAMP_SET() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CLOCK_TIMESTAMP_SET>(),
+        8usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CLOCK_TIMESTAMP_SET))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CLOCK_TIMESTAMP_SET>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CLOCK_TIMESTAMP_SET))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CLOCK_TIMESTAMP_SET>())).timeOffset as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CLOCK_TIMESTAMP_SET),
+            "::",
+            stringify!(timeOffset)
+        )
+    );
+}
+impl _NV_ENC_CLOCK_TIMESTAMP_SET {
+    #[inline]
+    pub fn countingType(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_countingType(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn discontinuityFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_discontinuityFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn cntDroppedFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_cntDroppedFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn nFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 8u8) as u32) }
+    }
+    #[inline]
+    pub fn set_nFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 8u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn secondsValue(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 6u8) as u32) }
+    }
+    #[inline]
+    pub fn set_secondsValue(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 6u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn minutesValue(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(17usize, 6u8) as u32) }
+    }
+    #[inline]
+    pub fn set_minutesValue(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(17usize, 6u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn hoursValue(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(23usize, 5u8) as u32) }
+    }
+    #[inline]
+    pub fn set_hoursValue(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(23usize, 5u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved2(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(28usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved2(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(28usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        countingType: u32,
+        discontinuityFlag: u32,
+        cntDroppedFrames: u32,
+        nFrames: u32,
+        secondsValue: u32,
+        minutesValue: u32,
+        hoursValue: u32,
+        reserved2: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let countingType: u32 = unsafe { ::std::mem::transmute(countingType) };
+            countingType as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let discontinuityFlag: u32 = unsafe { ::std::mem::transmute(discontinuityFlag) };
+            discontinuityFlag as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let cntDroppedFrames: u32 = unsafe { ::std::mem::transmute(cntDroppedFrames) };
+            cntDroppedFrames as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 8u8, {
+            let nFrames: u32 = unsafe { ::std::mem::transmute(nFrames) };
+            nFrames as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 6u8, {
+            let secondsValue: u32 = unsafe { ::std::mem::transmute(secondsValue) };
+            secondsValue as u64
+        });
+        __bindgen_bitfield_unit.set(17usize, 6u8, {
+            let minutesValue: u32 = unsafe { ::std::mem::transmute(minutesValue) };
+            minutesValue as u64
+        });
+        __bindgen_bitfield_unit.set(23usize, 5u8, {
+            let hoursValue: u32 = unsafe { ::std::mem::transmute(hoursValue) };
+            hoursValue as u64
+        });
+        __bindgen_bitfield_unit.set(28usize, 4u8, {
+            let reserved2: u32 = unsafe { ::std::mem::transmute(reserved2) };
+            reserved2 as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " Clock Timestamp set parameters"]
+#[doc = " For H264, this structure is used to populate Picture Timing SEI when NV_ENC_CONFIG_H264::enableTimeCode is set to 1."]
+#[doc = " For HEVC, this structure is used to populate Time Code SEI when NV_ENC_CONFIG_HEVC::enableTimeCodeSEI is set to 1."]
+#[doc = " For more details, refer to Annex D of ITU-T Specification."]
+pub type NV_ENC_CLOCK_TIMESTAMP_SET = _NV_ENC_CLOCK_TIMESTAMP_SET;
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_TIME_CODE {
+    #[doc = "< [in] Display picStruct"]
+    pub displayPicStruct: NV_ENC_DISPLAY_PIC_STRUCT,
+    #[doc = "< [in] Clock Timestamp set"]
+    pub clockTimestamp: [NV_ENC_CLOCK_TIMESTAMP_SET; 3usize],
+    #[doc = "< [in] 0 : Inserts Clock Timestamp if NV_ENC_CONFIG_H264::enableTimeCode (H264) or"]
+    #[doc = "NV_ENC_CONFIG_HEVC::outputTimeCodeSEI (HEVC) is specified"]
+    #[doc = "1 : Skips insertion of Clock Timestamp for current frame"]
+    pub skipClockTimestampInsertion: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_TIME_CODE() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_TIME_CODE>(),
+        32usize,
+        concat!("Size of: ", stringify!(_NV_ENC_TIME_CODE))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_TIME_CODE>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_TIME_CODE))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_TIME_CODE>())).displayPicStruct as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_TIME_CODE),
+            "::",
+            stringify!(displayPicStruct)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_TIME_CODE>())).clockTimestamp as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_TIME_CODE),
+            "::",
+            stringify!(clockTimestamp)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_TIME_CODE>())).skipClockTimestampInsertion as *const _
+                as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_TIME_CODE),
+            "::",
+            stringify!(skipClockTimestampInsertion)
+        )
+    );
+}
+impl Default for _NV_ENC_TIME_CODE {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+pub type NV_ENC_TIME_CODE = _NV_ENC_TIME_CODE;
+#[doc = " G.14.2.3 3D reference displays information SEI message syntax elements"]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _HEVC_3D_REFERENCE_DISPLAY_INFO {
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in] Specifies the exponent of the maximum allowable truncation error for refDisplayWidth[i]. Range 0-31, inclusive."]
+    pub precRefDisplayWidth: i32,
+    #[doc = "< [in] Specifies the exponent of the maximum allowable truncation error for refViewingDist[i]. Range 0-31, inclusive."]
+    pub precRefViewingDist: i32,
+    #[doc = "< [in] Plus 1 specifies the number of reference displays that are signalled in this SEI message. Range 0-31, inclusive."]
+    pub numRefDisplaysMinus1: i32,
+    #[doc = "< [in] Indicates the ViewId of the left view of a stereo pair corresponding to the i-th reference display."]
+    pub leftViewId: [i32; 32usize],
+    #[doc = "< [in] Indicates the ViewId of the right view of a stereo-pair corresponding to the i-th reference display."]
+    pub rightViewId: [i32; 32usize],
+    #[doc = "< [in] Specifies the exponent part of the reference display width of the i-th reference display."]
+    pub exponentRefDisplayWidth: [i32; 32usize],
+    #[doc = "< [in] Specifies the mantissa part of the reference display width of the i-th reference display."]
+    pub mantissaRefDisplayWidth: [i32; 32usize],
+    #[doc = "< [in] Specifies the exponent part of the reference viewing distance of the i-th reference display."]
+    pub exponentRefViewingDistance: [i32; 32usize],
+    #[doc = "< [in] Specifies the mantissa part of the reference viewing distance of the i-th reference display."]
+    pub mantissaRefViewingDistance: [i32; 32usize],
+    #[doc = "< [in] Indicates the recommended additional horizontal shift for a stereo pair corresponding to the i-th reference baseline and the i-th reference display."]
+    pub numSampleShiftPlus512: [i32; 32usize],
+    #[doc = "< [in] Equal to 1 indicates that the information about additional horizontal shift of the left and right views for the i-th reference display is present in this SEI message."]
+    pub additionalShiftPresentFlag: [u8; 32usize],
+    #[doc = "< [in] Reserved and must be set to 0"]
+    pub reserved2: [u32; 4usize],
+}
+#[test]
+fn bindgen_test_layout__HEVC_3D_REFERENCE_DISPLAY_INFO() {
+    assert_eq!(
+        ::std::mem::size_of::<_HEVC_3D_REFERENCE_DISPLAY_INFO>(),
+        960usize,
+        concat!("Size of: ", stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_HEVC_3D_REFERENCE_DISPLAY_INFO>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).precRefDisplayWidth
+                as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(precRefDisplayWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).precRefViewingDist
+                as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(precRefViewingDist)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).numRefDisplaysMinus1
+                as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(numRefDisplaysMinus1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).leftViewId as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(leftViewId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).rightViewId as *const _
+                as usize
+        },
+        144usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(rightViewId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).exponentRefDisplayWidth
+                as *const _ as usize
+        },
+        272usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(exponentRefDisplayWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).mantissaRefDisplayWidth
+                as *const _ as usize
+        },
+        400usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(mantissaRefDisplayWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).exponentRefViewingDistance
+                as *const _ as usize
+        },
+        528usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(exponentRefViewingDistance)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).mantissaRefViewingDistance
+                as *const _ as usize
+        },
+        656usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(mantissaRefViewingDistance)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).numSampleShiftPlus512
+                as *const _ as usize
+        },
+        784usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(numSampleShiftPlus512)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).additionalShiftPresentFlag
+                as *const _ as usize
+        },
+        912usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(additionalShiftPresentFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_HEVC_3D_REFERENCE_DISPLAY_INFO>())).reserved2 as *const _
+                as usize
+        },
+        944usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_HEVC_3D_REFERENCE_DISPLAY_INFO),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl _HEVC_3D_REFERENCE_DISPLAY_INFO {
+    #[inline]
+    pub fn refViewingDistanceFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_refViewingDistanceFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn threeDimensionalReferenceDisplaysExtensionFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_threeDimensionalReferenceDisplaysExtensionFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 30u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 30u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        refViewingDistanceFlag: u32,
+        threeDimensionalReferenceDisplaysExtensionFlag: u32,
+        reserved: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let refViewingDistanceFlag: u32 =
+                unsafe { ::std::mem::transmute(refViewingDistanceFlag) };
+            refViewingDistanceFlag as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let threeDimensionalReferenceDisplaysExtensionFlag: u32 =
+                unsafe { ::std::mem::transmute(threeDimensionalReferenceDisplaysExtensionFlag) };
+            threeDimensionalReferenceDisplaysExtensionFlag as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 30u8, {
+            let reserved: u32 = unsafe { ::std::mem::transmute(reserved) };
+            reserved as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " G.14.2.3 3D reference displays information SEI message syntax elements"]
+pub type HEVC_3D_REFERENCE_DISPLAY_INFO = _HEVC_3D_REFERENCE_DISPLAY_INFO;
+#[doc = " Struct for storing x and y chroma points"]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _CHROMA_POINTS {
+    pub x: u16,
+    pub y: u16,
+}
+#[test]
+fn bindgen_test_layout__CHROMA_POINTS() {
+    assert_eq!(
+        ::std::mem::size_of::<_CHROMA_POINTS>(),
+        4usize,
+        concat!("Size of: ", stringify!(_CHROMA_POINTS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_CHROMA_POINTS>(),
+        2usize,
+        concat!("Alignment of ", stringify!(_CHROMA_POINTS))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_CHROMA_POINTS>())).x as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_CHROMA_POINTS),
+            "::",
+            stringify!(x)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_CHROMA_POINTS>())).y as *const _ as usize },
+        2usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_CHROMA_POINTS),
+            "::",
+            stringify!(y)
+        )
+    );
+}
+#[doc = " Struct for storing x and y chroma points"]
+pub type CHROMA_POINTS = _CHROMA_POINTS;
+#[doc = " Struct for storing mastering-display information"]
+#[doc = " Refer to the AV1 spec 6.7.4 Metadata high dynamic range mastering display color volume semantics OR"]
+#[doc = " HEVC spec D.2.28 Mastering display colour volume SEI message syntax"]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _MASTERING_DISPLAY_INFO {
+    pub g: CHROMA_POINTS,
+    pub b: CHROMA_POINTS,
+    pub r: CHROMA_POINTS,
+    pub whitePoint: CHROMA_POINTS,
+    pub maxLuma: u32,
+    pub minLuma: u32,
+}
+#[test]
+fn bindgen_test_layout__MASTERING_DISPLAY_INFO() {
+    assert_eq!(
+        ::std::mem::size_of::<_MASTERING_DISPLAY_INFO>(),
+        24usize,
+        concat!("Size of: ", stringify!(_MASTERING_DISPLAY_INFO))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_MASTERING_DISPLAY_INFO>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_MASTERING_DISPLAY_INFO))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_MASTERING_DISPLAY_INFO>())).g as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_MASTERING_DISPLAY_INFO),
+            "::",
+            stringify!(g)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_MASTERING_DISPLAY_INFO>())).b as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_MASTERING_DISPLAY_INFO),
+            "::",
+            stringify!(b)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_MASTERING_DISPLAY_INFO>())).r as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_MASTERING_DISPLAY_INFO),
+            "::",
+            stringify!(r)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_MASTERING_DISPLAY_INFO>())).whitePoint as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_MASTERING_DISPLAY_INFO),
+            "::",
+            stringify!(whitePoint)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_MASTERING_DISPLAY_INFO>())).maxLuma as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_MASTERING_DISPLAY_INFO),
+            "::",
+            stringify!(maxLuma)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_MASTERING_DISPLAY_INFO>())).minLuma as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_MASTERING_DISPLAY_INFO),
+            "::",
+            stringify!(minLuma)
+        )
+    );
+}
+#[doc = " Struct for storing mastering-display information"]
+#[doc = " Refer to the AV1 spec 6.7.4 Metadata high dynamic range mastering display color volume semantics OR"]
+#[doc = " HEVC spec D.2.28 Mastering display colour volume SEI message syntax"]
+pub type MASTERING_DISPLAY_INFO = _MASTERING_DISPLAY_INFO;
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _CONTENT_LIGHT_LEVEL {
+    pub maxContentLightLevel: u16,
+    pub maxPicAverageLightLevel: u16,
+}
+#[test]
+fn bindgen_test_layout__CONTENT_LIGHT_LEVEL() {
+    assert_eq!(
+        ::std::mem::size_of::<_CONTENT_LIGHT_LEVEL>(),
+        4usize,
+        concat!("Size of: ", stringify!(_CONTENT_LIGHT_LEVEL))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_CONTENT_LIGHT_LEVEL>(),
+        2usize,
+        concat!("Alignment of ", stringify!(_CONTENT_LIGHT_LEVEL))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_CONTENT_LIGHT_LEVEL>())).maxContentLightLevel as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_CONTENT_LIGHT_LEVEL),
+            "::",
+            stringify!(maxContentLightLevel)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_CONTENT_LIGHT_LEVEL>())).maxPicAverageLightLevel as *const _
+                as usize
+        },
+        2usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_CONTENT_LIGHT_LEVEL),
+            "::",
+            stringify!(maxPicAverageLightLevel)
+        )
+    );
+}
+pub type CONTENT_LIGHT_LEVEL = _CONTENT_LIGHT_LEVEL;
+#[doc = " \\struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS"]
+#[doc = " H264 Video Usability Info parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS {
+    #[doc = "< [in]: If set to 1 , it specifies that the overscanInfo is present"]
+    pub overscanInfoPresentFlag: u32,
+    #[doc = "< [in]: Specifies the overscan info(as defined in Annex E of the ITU-T Specification)."]
+    pub overscanInfo: u32,
+    #[doc = "< [in]: If set to 1, it specifies  that the videoFormat, videoFullRangeFlag and colourDescriptionPresentFlag are present."]
+    pub videoSignalTypePresentFlag: u32,
+    #[doc = "< [in]: Specifies the source video format(as defined in Annex E of the ITU-T Specification)."]
+    pub videoFormat: NV_ENC_VUI_VIDEO_FORMAT,
+    #[doc = "< [in]: Specifies the output range of the luma and chroma samples(as defined in Annex E of the ITU-T Specification)."]
+    pub videoFullRangeFlag: u32,
+    #[doc = "< [in]: If set to 1, it specifies that the colourPrimaries, transferCharacteristics and colourMatrix are present."]
+    pub colourDescriptionPresentFlag: u32,
+    #[doc = "< [in]: Specifies color primaries for converting to RGB(as defined in Annex E of the ITU-T Specification)"]
+    pub colourPrimaries: NV_ENC_VUI_COLOR_PRIMARIES,
+    #[doc = "< [in]: Specifies the opto-electronic transfer characteristics to use (as defined in Annex E of the ITU-T Specification)"]
+    pub transferCharacteristics: NV_ENC_VUI_TRANSFER_CHARACTERISTIC,
+    #[doc = "< [in]: Specifies the matrix coefficients used in deriving the luma and chroma from the RGB primaries (as defined in Annex E of the ITU-T Specification)."]
+    pub colourMatrix: NV_ENC_VUI_MATRIX_COEFFS,
+    #[doc = "< [in]: If set to 1 , it specifies that the chromaSampleLocationTop and chromaSampleLocationBot are present."]
+    pub chromaSampleLocationFlag: u32,
+    #[doc = "< [in]: Specifies the chroma sample location for top field(as defined in Annex E of the ITU-T Specification)"]
+    pub chromaSampleLocationTop: u32,
+    #[doc = "< [in]: Specifies the chroma sample location for bottom field(as defined in Annex E of the ITU-T Specification)"]
+    pub chromaSampleLocationBot: u32,
+    #[doc = "< [in]: If set to 1, it specifies the bitstream restriction parameters are present in the bitstream."]
+    pub bitstreamRestrictionFlag: u32,
+    #[doc = "< [in]: If set to 1, it specifies that the timingInfo is present and the 'numUnitInTicks' and 'timeScale' fields are specified by the application. */"]
+    pub timingInfoPresentFlag: u32,
+    #[doc = "< [in]: Specifies the number of time units of the clock(as defined in Annex E of the ITU-T Specification)."]
+    pub numUnitInTicks: u32,
+    #[doc = "< [in]: Specifies the frquency of the clock(as defined in Annex E of the ITU-T Specification)."]
+    pub timeScale: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u32; 12usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG_H264_VUI_PARAMETERS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>(),
+        112usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>(),
+        4usize,
+        concat!(
+            "Alignment of ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).overscanInfoPresentFlag
+                as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(overscanInfoPresentFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).overscanInfo as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(overscanInfo)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>()))
+                .videoSignalTypePresentFlag as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(videoSignalTypePresentFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).videoFormat as *const _
+                as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(videoFormat)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).videoFullRangeFlag
+                as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(videoFullRangeFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>()))
+                .colourDescriptionPresentFlag as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(colourDescriptionPresentFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).colourPrimaries
+                as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(colourPrimaries)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).transferCharacteristics
+                as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(transferCharacteristics)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).colourMatrix as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(colourMatrix)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).chromaSampleLocationFlag
+                as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(chromaSampleLocationFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).chromaSampleLocationTop
+                as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(chromaSampleLocationTop)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).chromaSampleLocationBot
+                as *const _ as usize
+        },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(chromaSampleLocationBot)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).bitstreamRestrictionFlag
+                as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(bitstreamRestrictionFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).timingInfoPresentFlag
+                as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(timingInfoPresentFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).numUnitInTicks
+                as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(numUnitInTicks)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).timeScale as *const _
+                as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(timeScale)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_VUI_PARAMETERS>())).reserved as *const _
+                as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_VUI_PARAMETERS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG_H264_VUI_PARAMETERS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS"]
+#[doc = " H264 Video Usability Info parameters"]
+pub type NV_ENC_CONFIG_H264_VUI_PARAMETERS = _NV_ENC_CONFIG_H264_VUI_PARAMETERS;
+#[doc = " \\struct _NV_ENC_CONFIG_H264_VUI_PARAMETERS"]
+#[doc = " H264 Video Usability Info parameters"]
+pub type NV_ENC_CONFIG_HEVC_VUI_PARAMETERS = NV_ENC_CONFIG_H264_VUI_PARAMETERS;
+#[doc = " \\struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE"]
+#[doc = " External motion vector hint counts per block type."]
+#[doc = " H264 and AV1 support multiple hint while HEVC supports one hint for each valid candidate."]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE {
+    pub _bitfield_align_1: [u8; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Reserved for future use."]
+    pub reserved1: [u32; 3usize],
+}
+#[test]
+fn bindgen_test_layout__NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE() {
+    assert_eq!(
+        ::std::mem::size_of::<_NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE>(),
+        16usize,
+        concat!(
+            "Size of: ",
+            stringify!(_NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE>(),
+        4usize,
+        concat!(
+            "Alignment of ",
+            stringify!(_NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE>())).reserved1
+                as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+impl _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE {
+    #[inline]
+    pub fn numCandsPerBlk16x16(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCandsPerBlk16x16(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numCandsPerBlk16x8(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCandsPerBlk16x8(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numCandsPerBlk8x16(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(8usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCandsPerBlk8x16(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(8usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numCandsPerBlk8x8(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCandsPerBlk8x8(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numCandsPerSb(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 8u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCandsPerSb(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 8u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(24usize, 8u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(24usize, 8u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        numCandsPerBlk16x16: u32,
+        numCandsPerBlk16x8: u32,
+        numCandsPerBlk8x16: u32,
+        numCandsPerBlk8x8: u32,
+        numCandsPerSb: u32,
+        reserved: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 4u8, {
+            let numCandsPerBlk16x16: u32 = unsafe { ::std::mem::transmute(numCandsPerBlk16x16) };
+            numCandsPerBlk16x16 as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 4u8, {
+            let numCandsPerBlk16x8: u32 = unsafe { ::std::mem::transmute(numCandsPerBlk16x8) };
+            numCandsPerBlk16x8 as u64
+        });
+        __bindgen_bitfield_unit.set(8usize, 4u8, {
+            let numCandsPerBlk8x16: u32 = unsafe { ::std::mem::transmute(numCandsPerBlk8x16) };
+            numCandsPerBlk8x16 as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 4u8, {
+            let numCandsPerBlk8x8: u32 = unsafe { ::std::mem::transmute(numCandsPerBlk8x8) };
+            numCandsPerBlk8x8 as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 8u8, {
+            let numCandsPerSb: u32 = unsafe { ::std::mem::transmute(numCandsPerSb) };
+            numCandsPerSb as u64
+        });
+        __bindgen_bitfield_unit.set(24usize, 8u8, {
+            let reserved: u32 = unsafe { ::std::mem::transmute(reserved) };
+            reserved as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE"]
+#[doc = " External motion vector hint counts per block type."]
+#[doc = " H264 and AV1 support multiple hint while HEVC supports one hint for each valid candidate."]
+pub type NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE = _NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE;
+#[doc = " \\struct _NVENC_EXTERNAL_ME_HINT"]
+#[doc = " External Motion Vector hint structure for H264 and HEVC."]
+#[repr(C)]
+#[repr(align(4))]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NVENC_EXTERNAL_ME_HINT {
+    pub _bitfield_align_1: [u16; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+}
+#[test]
+fn bindgen_test_layout__NVENC_EXTERNAL_ME_HINT() {
+    assert_eq!(
+        ::std::mem::size_of::<_NVENC_EXTERNAL_ME_HINT>(),
+        4usize,
+        concat!("Size of: ", stringify!(_NVENC_EXTERNAL_ME_HINT))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NVENC_EXTERNAL_ME_HINT>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NVENC_EXTERNAL_ME_HINT))
+    );
+}
+impl _NVENC_EXTERNAL_ME_HINT {
+    #[inline]
+    pub fn mvx(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 12u8) as u32) }
+    }
+    #[inline]
+    pub fn set_mvx(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 12u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn mvy(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 10u8) as u32) }
+    }
+    #[inline]
+    pub fn set_mvy(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 10u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn refidx(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(22usize, 5u8) as u32) }
+    }
+    #[inline]
+    pub fn set_refidx(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(22usize, 5u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn dir(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(27usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_dir(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(27usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn partType(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(28usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_partType(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(28usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn lastofPart(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(30usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_lastofPart(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(30usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn lastOfMB(&self) -> i32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(31usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_lastOfMB(&mut self, val: i32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(31usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        mvx: i32,
+        mvy: i32,
+        refidx: i32,
+        dir: i32,
+        partType: i32,
+        lastofPart: i32,
+        lastOfMB: i32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 12u8, {
+            let mvx: u32 = unsafe { ::std::mem::transmute(mvx) };
+            mvx as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 10u8, {
+            let mvy: u32 = unsafe { ::std::mem::transmute(mvy) };
+            mvy as u64
+        });
+        __bindgen_bitfield_unit.set(22usize, 5u8, {
+            let refidx: u32 = unsafe { ::std::mem::transmute(refidx) };
+            refidx as u64
+        });
+        __bindgen_bitfield_unit.set(27usize, 1u8, {
+            let dir: u32 = unsafe { ::std::mem::transmute(dir) };
+            dir as u64
+        });
+        __bindgen_bitfield_unit.set(28usize, 2u8, {
+            let partType: u32 = unsafe { ::std::mem::transmute(partType) };
+            partType as u64
+        });
+        __bindgen_bitfield_unit.set(30usize, 1u8, {
+            let lastofPart: u32 = unsafe { ::std::mem::transmute(lastofPart) };
+            lastofPart as u64
+        });
+        __bindgen_bitfield_unit.set(31usize, 1u8, {
+            let lastOfMB: u32 = unsafe { ::std::mem::transmute(lastOfMB) };
+            lastOfMB as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NVENC_EXTERNAL_ME_HINT"]
+#[doc = " External Motion Vector hint structure for H264 and HEVC."]
+pub type NVENC_EXTERNAL_ME_HINT = _NVENC_EXTERNAL_ME_HINT;
+#[doc = " \\struct _NVENC_EXTERNAL_ME_SB_HINT"]
+#[doc = " External Motion Vector SB hint structure for AV1"]
+#[repr(C)]
+#[repr(align(2))]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NVENC_EXTERNAL_ME_SB_HINT {
+    pub _bitfield_align_1: [u16; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 6usize]>,
+}
+#[test]
+fn bindgen_test_layout__NVENC_EXTERNAL_ME_SB_HINT() {
+    assert_eq!(
+        ::std::mem::size_of::<_NVENC_EXTERNAL_ME_SB_HINT>(),
+        6usize,
+        concat!("Size of: ", stringify!(_NVENC_EXTERNAL_ME_SB_HINT))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NVENC_EXTERNAL_ME_SB_HINT>(),
+        2usize,
+        concat!("Alignment of ", stringify!(_NVENC_EXTERNAL_ME_SB_HINT))
+    );
+}
+impl _NVENC_EXTERNAL_ME_SB_HINT {
+    #[inline]
+    pub fn refidx(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 5u8) as u16) }
+    }
+    #[inline]
+    pub fn set_refidx(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 5u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn direction(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u16) }
+    }
+    #[inline]
+    pub fn set_direction(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn bi(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 1u8) as u16) }
+    }
+    #[inline]
+    pub fn set_bi(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn partition_type(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(7usize, 3u8) as u16) }
+    }
+    #[inline]
+    pub fn set_partition_type(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(7usize, 3u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn x8(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(10usize, 3u8) as u16) }
+    }
+    #[inline]
+    pub fn set_x8(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(10usize, 3u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn last_of_cu(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(13usize, 1u8) as u16) }
+    }
+    #[inline]
+    pub fn set_last_of_cu(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(13usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn last_of_sb(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(14usize, 1u8) as u16) }
+    }
+    #[inline]
+    pub fn set_last_of_sb(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(14usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved0(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(15usize, 1u8) as u16) }
+    }
+    #[inline]
+    pub fn set_reserved0(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(15usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn mvx(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 14u8) as u16) }
+    }
+    #[inline]
+    pub fn set_mvx(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 14u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn cu_size(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(30usize, 2u8) as u16) }
+    }
+    #[inline]
+    pub fn set_cu_size(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(30usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn mvy(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(32usize, 12u8) as u16) }
+    }
+    #[inline]
+    pub fn set_mvy(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(32usize, 12u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn y8(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(44usize, 3u8) as u16) }
+    }
+    #[inline]
+    pub fn set_y8(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(44usize, 3u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved1(&self) -> i16 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(47usize, 1u8) as u16) }
+    }
+    #[inline]
+    pub fn set_reserved1(&mut self, val: i16) {
+        unsafe {
+            let val: u16 = ::std::mem::transmute(val);
+            self._bitfield_1.set(47usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        refidx: i16,
+        direction: i16,
+        bi: i16,
+        partition_type: i16,
+        x8: i16,
+        last_of_cu: i16,
+        last_of_sb: i16,
+        reserved0: i16,
+        mvx: i16,
+        cu_size: i16,
+        mvy: i16,
+        y8: i16,
+        reserved1: i16,
+    ) -> __BindgenBitfieldUnit<[u8; 6usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 6usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 5u8, {
+            let refidx: u16 = unsafe { ::std::mem::transmute(refidx) };
+            refidx as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let direction: u16 = unsafe { ::std::mem::transmute(direction) };
+            direction as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 1u8, {
+            let bi: u16 = unsafe { ::std::mem::transmute(bi) };
+            bi as u64
+        });
+        __bindgen_bitfield_unit.set(7usize, 3u8, {
+            let partition_type: u16 = unsafe { ::std::mem::transmute(partition_type) };
+            partition_type as u64
+        });
+        __bindgen_bitfield_unit.set(10usize, 3u8, {
+            let x8: u16 = unsafe { ::std::mem::transmute(x8) };
+            x8 as u64
+        });
+        __bindgen_bitfield_unit.set(13usize, 1u8, {
+            let last_of_cu: u16 = unsafe { ::std::mem::transmute(last_of_cu) };
+            last_of_cu as u64
+        });
+        __bindgen_bitfield_unit.set(14usize, 1u8, {
+            let last_of_sb: u16 = unsafe { ::std::mem::transmute(last_of_sb) };
+            last_of_sb as u64
+        });
+        __bindgen_bitfield_unit.set(15usize, 1u8, {
+            let reserved0: u16 = unsafe { ::std::mem::transmute(reserved0) };
+            reserved0 as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 14u8, {
+            let mvx: u16 = unsafe { ::std::mem::transmute(mvx) };
+            mvx as u64
+        });
+        __bindgen_bitfield_unit.set(30usize, 2u8, {
+            let cu_size: u16 = unsafe { ::std::mem::transmute(cu_size) };
+            cu_size as u64
+        });
+        __bindgen_bitfield_unit.set(32usize, 12u8, {
+            let mvy: u16 = unsafe { ::std::mem::transmute(mvy) };
+            mvy as u64
+        });
+        __bindgen_bitfield_unit.set(44usize, 3u8, {
+            let y8: u16 = unsafe { ::std::mem::transmute(y8) };
+            y8 as u64
+        });
+        __bindgen_bitfield_unit.set(47usize, 1u8, {
+            let reserved1: u16 = unsafe { ::std::mem::transmute(reserved1) };
+            reserved1 as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NVENC_EXTERNAL_ME_SB_HINT"]
+#[doc = " External Motion Vector SB hint structure for AV1"]
+pub type NVENC_EXTERNAL_ME_SB_HINT = _NVENC_EXTERNAL_ME_SB_HINT;
+#[doc = " \\struct _NV_ENC_CONFIG_H264"]
+#[doc = " H264 encoder configuration parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CONFIG_H264 {
+    pub _bitfield_align_1: [u16; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Specifies the encoding level. Client is recommended to set this to NV_ENC_LEVEL_AUTOSELECT in order to enable the NvEncodeAPI interface to select the correct level."]
+    pub level: u32,
+    #[doc = "< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically."]
+    pub idrPeriod: u32,
+    #[doc = "< [in]: Set to 1 to enable 4:4:4 separate colour planes"]
+    pub separateColourPlaneFlag: u32,
+    #[doc = "< [in]: Specifies the deblocking filter mode. Permissible value range: [0,2]. This flag corresponds"]
+    #[doc = "to the flag disable_deblocking_filter_idc specified in section 7.4.3 of H.264 specification,"]
+    #[doc = "which specifies whether the operation of the deblocking filter shall be disabled across some"]
+    #[doc = "block edges of the slice and specifies for which edges the filtering is disabled. See section"]
+    #[doc = "7.4.3 of H.264 specification for more details."]
+    pub disableDeblockingFilterIDC: u32,
+    #[doc = "< [in]: Specifies number of temporal layers to be used for hierarchical coding / temporal SVC. Valid value range is [1,::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS]"]
+    pub numTemporalLayers: u32,
+    #[doc = "< [in]: Specifies the SPS id of the sequence header"]
+    pub spsId: u32,
+    #[doc = "< [in]: Specifies the PPS id of the picture header"]
+    pub ppsId: u32,
+    #[doc = "< [in]: Specifies the AdaptiveTransform Mode. Check support for AdaptiveTransform mode using ::NV_ENC_CAPS_SUPPORT_ADAPTIVE_TRANSFORM caps."]
+    pub adaptiveTransformMode: NV_ENC_H264_ADAPTIVE_TRANSFORM_MODE,
+    #[doc = "< [in]: Specified the FMO Mode. Check support for FMO using ::NV_ENC_CAPS_SUPPORT_FMO caps."]
+    pub fmoMode: NV_ENC_H264_FMO_MODE,
+    #[doc = "< [in]: Specifies the BDirect mode. Check support for BDirect mode using ::NV_ENC_CAPS_SUPPORT_BDIRECT_MODE caps."]
+    pub bdirectMode: NV_ENC_H264_BDIRECT_MODE,
+    #[doc = "< [in]: Specifies the entropy coding mode. Check support for CABAC mode using ::NV_ENC_CAPS_SUPPORT_CABAC caps."]
+    pub entropyCodingMode: NV_ENC_H264_ENTROPY_CODING_MODE,
+    #[doc = "< [in]: Specifies the stereo frame packing mode which is to be signaled in frame packing arrangement SEI"]
+    pub stereoMode: NV_ENC_STEREO_PACKING_MODE,
+    #[doc = "< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set."]
+    #[doc = "Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH."]
+    pub intraRefreshPeriod: u32,
+    #[doc = "< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod"]
+    pub intraRefreshCnt: u32,
+    #[doc = "< [in]: Specifies the DPB size used for encoding. Setting it to 0 will let driver use the default DPB size."]
+    #[doc = "The low latency application which wants to invalidate reference frame as an error resilience tool"]
+    #[doc = "is recommended to use a large DPB size so that the encoder can keep old reference frames which can be used if recent"]
+    #[doc = "frames are invalidated."]
+    pub maxNumRefFrames: u32,
+    #[doc = "< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices"]
+    #[doc = "sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3 numSlices in Picture."]
+    #[doc = "When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting"]
+    #[doc = "When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice"]
+    pub sliceMode: u32,
+    #[doc = "< [in]: Specifies the parameter needed for sliceMode. For:"]
+    #[doc = "sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice)"]
+    #[doc = "sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)"]
+    #[doc = "sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice)"]
+    #[doc = "sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally"]
+    pub sliceModeData: u32,
+    #[doc = "< [in]: Specifies the H264 video usability info parameters"]
+    pub h264VUIParameters: NV_ENC_CONFIG_H264_VUI_PARAMETERS,
+    #[doc = "< [in]: Specifies the number of LTR frames. This parameter has different meaning in two LTR modes."]
+    #[doc = "In \"LTR Trust\" mode (ltrTrustMode = 1), encoder will mark the first ltrNumFrames base layer reference frames within each IDR interval as LTR."]
+    #[doc = "In \"LTR Per Picture\" mode (ltrTrustMode = 0 and ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB."]
+    pub ltrNumFrames: u32,
+    #[doc = "< [in]: Specifies the LTR operating mode. See comments near NV_ENC_CONFIG_H264::enableLTR for description of the two modes."]
+    #[doc = "Set to 1 to use \"LTR Trust\" mode of LTR operation. Clients are discouraged to use \"LTR Trust\" mode as this mode may"]
+    #[doc = "be deprecated in future releases."]
+    #[doc = "Set to 0 when using \"LTR Per Picture\" mode of LTR operation."]
+    pub ltrTrustMode: u32,
+    #[doc = "< [in]: Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for yuv444 input."]
+    #[doc = "Check support for YUV444 encoding using ::NV_ENC_CAPS_SUPPORT_YUV444_ENCODE caps."]
+    pub chromaFormatIDC: u32,
+    #[doc = "< [in]: Specifies the max temporal layer used for temporal SVC / hierarchical coding."]
+    #[doc = "Defaut value of this field is NV_ENC_CAPS::NV_ENC_CAPS_NUM_MAX_TEMPORAL_LAYERS. Note that the value NV_ENC_CONFIG_H264::maxNumRefFrames should"]
+    #[doc = "be greater than or equal to (NV_ENC_CONFIG_H264::maxTemporalLayers - 2) * 2, for NV_ENC_CONFIG_H264::maxTemporalLayers >= 2."]
+    pub maxTemporalLayers: u32,
+    #[doc = "< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps."]
+    pub useBFramesAsRef: NV_ENC_BFRAME_REF_MODE,
+    #[doc = "< [in]: Specifies max number of reference frames in reference picture list L0, that can be used by hardware for prediction of a frame."]
+    #[doc = "Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps."]
+    pub numRefL0: NV_ENC_NUM_REF_FRAMES,
+    #[doc = "< [in]: Specifies max number of reference frames in reference picture list L1, that can be used by hardware for prediction of a frame."]
+    #[doc = "Check support for numRefL1 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps."]
+    pub numRefL1: NV_ENC_NUM_REF_FRAMES,
+    #[doc = "< [in]: Specifies pixel bit depth of encoded video. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit, NV_ENC_BIT_DEPTH_10 for 10 bit."]
+    pub outputBitDepth: NV_ENC_BIT_DEPTH,
+    #[doc = "< [in]: Specifies pixel bit depth of video input. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit input, NV_ENC_BIT_DEPTH_10 for 10 bit input."]
+    pub inputBitDepth: NV_ENC_BIT_DEPTH,
+    #[doc = "< [in]: Specifies the strength of temporal filtering. Check support for temporal filter using ::NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER caps."]
+    #[doc = "Temporal filter feature is supported only if frameIntervalP >= 5."]
+    #[doc = "If ZeroReorderDelay or enableStereoMVC is enabled, the temporal filter feature is not supported."]
+    #[doc = "Temporal filter is recommended for natural contents."]
+    pub tfLevel: NV_ENC_TEMPORAL_FILTER_LEVEL,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 264usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG_H264() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG_H264>(),
+        1792usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG_H264))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG_H264>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CONFIG_H264))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).level as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(level)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).idrPeriod as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(idrPeriod)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).separateColourPlaneFlag as *const _
+                as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(separateColourPlaneFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).disableDeblockingFilterIDC as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(disableDeblockingFilterIDC)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).numTemporalLayers as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(numTemporalLayers)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).spsId as *const _ as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(spsId)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).ppsId as *const _ as usize },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(ppsId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).adaptiveTransformMode as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(adaptiveTransformMode)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).fmoMode as *const _ as usize },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(fmoMode)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).bdirectMode as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(bdirectMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).entropyCodingMode as *const _ as usize
+        },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(entropyCodingMode)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).stereoMode as *const _ as usize },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(stereoMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).intraRefreshPeriod as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(intraRefreshPeriod)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).intraRefreshCnt as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(intraRefreshCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).maxNumRefFrames as *const _ as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(maxNumRefFrames)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).sliceMode as *const _ as usize },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(sliceMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).sliceModeData as *const _ as usize
+        },
+        68usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(sliceModeData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).h264VUIParameters as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(h264VUIParameters)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).ltrNumFrames as *const _ as usize
+        },
+        184usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(ltrNumFrames)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).ltrTrustMode as *const _ as usize
+        },
+        188usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(ltrTrustMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).chromaFormatIDC as *const _ as usize
+        },
+        192usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(chromaFormatIDC)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).maxTemporalLayers as *const _ as usize
+        },
+        196usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(maxTemporalLayers)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).useBFramesAsRef as *const _ as usize
+        },
+        200usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(useBFramesAsRef)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).numRefL0 as *const _ as usize },
+        204usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(numRefL0)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).numRefL1 as *const _ as usize },
+        208usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(numRefL1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).outputBitDepth as *const _ as usize
+        },
+        212usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(outputBitDepth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).inputBitDepth as *const _ as usize
+        },
+        216usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(inputBitDepth)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).tfLevel as *const _ as usize },
+        220usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(tfLevel)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).reserved1 as *const _ as usize },
+        224usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264>())).reserved2 as *const _ as usize },
+        1280usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG_H264 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_CONFIG_H264 {
+    #[inline]
+    pub fn enableTemporalSVC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableTemporalSVC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableStereoMVC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableStereoMVC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn hierarchicalPFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_hierarchicalPFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn hierarchicalBFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_hierarchicalBFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputBufferingPeriodSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputBufferingPeriodSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputPictureTimingSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputPictureTimingSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputAUD(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputAUD(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableSPSPPS(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(7usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableSPSPPS(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(7usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputFramePackingSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(8usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputFramePackingSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(8usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputRecoveryPointSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(9usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputRecoveryPointSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(9usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableIntraRefresh(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(10usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableIntraRefresh(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(10usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableConstrainedEncoding(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableConstrainedEncoding(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn repeatSPSPPS(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_repeatSPSPPS(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableVFR(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(13usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableVFR(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(13usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableLTR(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(14usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableLTR(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(14usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn qpPrimeYZeroTransformBypassFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(15usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_qpPrimeYZeroTransformBypassFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(15usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn useConstrainedIntraPred(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_useConstrainedIntraPred(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableFillerDataInsertion(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(17usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableFillerDataInsertion(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(17usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableSVCPrefixNalu(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(18usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableSVCPrefixNalu(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(18usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableScalabilityInfoSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(19usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableScalabilityInfoSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(19usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn singleSliceIntraRefresh(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(20usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_singleSliceIntraRefresh(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(20usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableTimeCode(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(21usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableTimeCode(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(21usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(22usize, 10u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(22usize, 10u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        enableTemporalSVC: u32,
+        enableStereoMVC: u32,
+        hierarchicalPFrames: u32,
+        hierarchicalBFrames: u32,
+        outputBufferingPeriodSEI: u32,
+        outputPictureTimingSEI: u32,
+        outputAUD: u32,
+        disableSPSPPS: u32,
+        outputFramePackingSEI: u32,
+        outputRecoveryPointSEI: u32,
+        enableIntraRefresh: u32,
+        enableConstrainedEncoding: u32,
+        repeatSPSPPS: u32,
+        enableVFR: u32,
+        enableLTR: u32,
+        qpPrimeYZeroTransformBypassFlag: u32,
+        useConstrainedIntraPred: u32,
+        enableFillerDataInsertion: u32,
+        disableSVCPrefixNalu: u32,
+        enableScalabilityInfoSEI: u32,
+        singleSliceIntraRefresh: u32,
+        enableTimeCode: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let enableTemporalSVC: u32 = unsafe { ::std::mem::transmute(enableTemporalSVC) };
+            enableTemporalSVC as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let enableStereoMVC: u32 = unsafe { ::std::mem::transmute(enableStereoMVC) };
+            enableStereoMVC as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let hierarchicalPFrames: u32 = unsafe { ::std::mem::transmute(hierarchicalPFrames) };
+            hierarchicalPFrames as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let hierarchicalBFrames: u32 = unsafe { ::std::mem::transmute(hierarchicalBFrames) };
+            hierarchicalBFrames as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let outputBufferingPeriodSEI: u32 =
+                unsafe { ::std::mem::transmute(outputBufferingPeriodSEI) };
+            outputBufferingPeriodSEI as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let outputPictureTimingSEI: u32 =
+                unsafe { ::std::mem::transmute(outputPictureTimingSEI) };
+            outputPictureTimingSEI as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 1u8, {
+            let outputAUD: u32 = unsafe { ::std::mem::transmute(outputAUD) };
+            outputAUD as u64
+        });
+        __bindgen_bitfield_unit.set(7usize, 1u8, {
+            let disableSPSPPS: u32 = unsafe { ::std::mem::transmute(disableSPSPPS) };
+            disableSPSPPS as u64
+        });
+        __bindgen_bitfield_unit.set(8usize, 1u8, {
+            let outputFramePackingSEI: u32 =
+                unsafe { ::std::mem::transmute(outputFramePackingSEI) };
+            outputFramePackingSEI as u64
+        });
+        __bindgen_bitfield_unit.set(9usize, 1u8, {
+            let outputRecoveryPointSEI: u32 =
+                unsafe { ::std::mem::transmute(outputRecoveryPointSEI) };
+            outputRecoveryPointSEI as u64
+        });
+        __bindgen_bitfield_unit.set(10usize, 1u8, {
+            let enableIntraRefresh: u32 = unsafe { ::std::mem::transmute(enableIntraRefresh) };
+            enableIntraRefresh as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 1u8, {
+            let enableConstrainedEncoding: u32 =
+                unsafe { ::std::mem::transmute(enableConstrainedEncoding) };
+            enableConstrainedEncoding as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 1u8, {
+            let repeatSPSPPS: u32 = unsafe { ::std::mem::transmute(repeatSPSPPS) };
+            repeatSPSPPS as u64
+        });
+        __bindgen_bitfield_unit.set(13usize, 1u8, {
+            let enableVFR: u32 = unsafe { ::std::mem::transmute(enableVFR) };
+            enableVFR as u64
+        });
+        __bindgen_bitfield_unit.set(14usize, 1u8, {
+            let enableLTR: u32 = unsafe { ::std::mem::transmute(enableLTR) };
+            enableLTR as u64
+        });
+        __bindgen_bitfield_unit.set(15usize, 1u8, {
+            let qpPrimeYZeroTransformBypassFlag: u32 =
+                unsafe { ::std::mem::transmute(qpPrimeYZeroTransformBypassFlag) };
+            qpPrimeYZeroTransformBypassFlag as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 1u8, {
+            let useConstrainedIntraPred: u32 =
+                unsafe { ::std::mem::transmute(useConstrainedIntraPred) };
+            useConstrainedIntraPred as u64
+        });
+        __bindgen_bitfield_unit.set(17usize, 1u8, {
+            let enableFillerDataInsertion: u32 =
+                unsafe { ::std::mem::transmute(enableFillerDataInsertion) };
+            enableFillerDataInsertion as u64
+        });
+        __bindgen_bitfield_unit.set(18usize, 1u8, {
+            let disableSVCPrefixNalu: u32 = unsafe { ::std::mem::transmute(disableSVCPrefixNalu) };
+            disableSVCPrefixNalu as u64
+        });
+        __bindgen_bitfield_unit.set(19usize, 1u8, {
+            let enableScalabilityInfoSEI: u32 =
+                unsafe { ::std::mem::transmute(enableScalabilityInfoSEI) };
+            enableScalabilityInfoSEI as u64
+        });
+        __bindgen_bitfield_unit.set(20usize, 1u8, {
+            let singleSliceIntraRefresh: u32 =
+                unsafe { ::std::mem::transmute(singleSliceIntraRefresh) };
+            singleSliceIntraRefresh as u64
+        });
+        __bindgen_bitfield_unit.set(21usize, 1u8, {
+            let enableTimeCode: u32 = unsafe { ::std::mem::transmute(enableTimeCode) };
+            enableTimeCode as u64
+        });
+        __bindgen_bitfield_unit.set(22usize, 10u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG_H264"]
+#[doc = " H264 encoder configuration parameters"]
+pub type NV_ENC_CONFIG_H264 = _NV_ENC_CONFIG_H264;
+#[doc = " \\struct _NV_ENC_CONFIG_HEVC"]
+#[doc = " HEVC encoder configuration parameters to be set during initialization."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CONFIG_HEVC {
+    #[doc = "< [in]: Specifies the level of the encoded bitstream."]
+    pub level: u32,
+    #[doc = "< [in]: Specifies the level tier of the encoded bitstream."]
+    pub tier: u32,
+    #[doc = "< [in]: Specifies the minimum size of luma coding unit."]
+    pub minCUSize: NV_ENC_HEVC_CUSIZE,
+    #[doc = "< [in]: Specifies the maximum size of luma coding unit. Currently NVENC SDK only supports maxCUSize equal to NV_ENC_HEVC_CUSIZE_32x32."]
+    pub maxCUSize: NV_ENC_HEVC_CUSIZE,
+    pub _bitfield_align_1: [u8; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Specifies the IDR interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG. Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically."]
+    pub idrPeriod: u32,
+    #[doc = "< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set."]
+    #[doc = "Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH."]
+    pub intraRefreshPeriod: u32,
+    #[doc = "< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod"]
+    pub intraRefreshCnt: u32,
+    #[doc = "< [in]: Specifies the maximum number of references frames in the DPB."]
+    pub maxNumRefFramesInDPB: u32,
+    #[doc = "< [in]: This parameter has different meaning in two LTR modes."]
+    #[doc = "In \"LTR Trust\" mode (ltrTrustMode = 1), encoder will mark the first ltrNumFrames base layer reference frames within each IDR interval as LTR."]
+    #[doc = "In \"LTR Per Picture\" mode (ltrTrustMode = 0 and ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB."]
+    #[doc = "These ltrNumFrames acts as a guidance to the encoder and are not necessarily honored. To achieve a right balance between the encoding"]
+    #[doc = "quality and keeping LTR frames in the DPB queue, the encoder can internally limit the number of LTR frames."]
+    #[doc = "The number of LTR frames actually used depends upon the encoding preset being used; Faster encoding presets will use fewer LTR frames."]
+    pub ltrNumFrames: u32,
+    #[doc = "< [in]: Specifies the VPS id of the video parameter set"]
+    pub vpsId: u32,
+    #[doc = "< [in]: Specifies the SPS id of the sequence header"]
+    pub spsId: u32,
+    #[doc = "< [in]: Specifies the PPS id of the picture header"]
+    pub ppsId: u32,
+    #[doc = "< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices"]
+    #[doc = "sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture"]
+    #[doc = "When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice"]
+    pub sliceMode: u32,
+    #[doc = "< [in]: Specifies the parameter needed for sliceMode. For:"]
+    #[doc = "sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice)"]
+    #[doc = "sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)"]
+    #[doc = "sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice)"]
+    #[doc = "sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally"]
+    pub sliceModeData: u32,
+    #[doc = "< [in]: Specifies the max temporal layer used for hierarchical coding."]
+    pub maxTemporalLayersMinus1: u32,
+    #[doc = "< [in]: Specifies the HEVC video usability info parameters"]
+    pub hevcVUIParameters: NV_ENC_CONFIG_HEVC_VUI_PARAMETERS,
+    #[doc = "< [in]: Specifies the LTR operating mode. See comments near NV_ENC_CONFIG_HEVC::enableLTR for description of the two modes."]
+    #[doc = "Set to 1 to use \"LTR Trust\" mode of LTR operation. Clients are discouraged to use \"LTR Trust\" mode as this mode may"]
+    #[doc = "be deprecated in future releases."]
+    #[doc = "Set to 0 when using \"LTR Per Picture\" mode of LTR operation."]
+    pub ltrTrustMode: u32,
+    #[doc = "< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using  ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps."]
+    pub useBFramesAsRef: NV_ENC_BFRAME_REF_MODE,
+    #[doc = "< [in]: Specifies max number of reference frames in reference picture list L0, that can be used by hardware for prediction of a frame."]
+    #[doc = "Check support for numRefL0 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps."]
+    pub numRefL0: NV_ENC_NUM_REF_FRAMES,
+    #[doc = "< [in]: Specifies max number of reference frames in reference picture list L1, that can be used by hardware for prediction of a frame."]
+    #[doc = "Check support for numRefL1 using ::NV_ENC_CAPS_SUPPORT_MULTIPLE_REF_FRAMES caps."]
+    pub numRefL1: NV_ENC_NUM_REF_FRAMES,
+    #[doc = "< [in]: Specifies the strength of the temporal filtering. Check support for temporal filtering using ::NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER caps."]
+    #[doc = "Temporal filter feature is supported only if frameIntervalP >= 5."]
+    #[doc = "Temporal filter feature is not supported with ZeroReorderDelay/enableStereoMVC/AlphaLayerEncoding."]
+    #[doc = "Temporal filter is recommended for natural contents."]
+    pub tfLevel: NV_ENC_TEMPORAL_FILTER_LEVEL,
+    #[doc = "< [in]: Specifies the deblocking filter mode. Permissible value range: [0,2]. This flag corresponds"]
+    #[doc = "to the flag pps_deblocking_filter_disabled_flag specified in section 7.4.3.3 of H.265 specification,"]
+    #[doc = "which specifies whether the operation of the deblocking filter shall be disabled across some"]
+    #[doc = "block edges of the slice and specifies for which edges the filtering is disabled. See section"]
+    #[doc = "7.4.3.3 of H.265 specification for more details."]
+    pub disableDeblockingFilterIDC: u32,
+    #[doc = "< [in]: Specifies pixel bit depth of encoded video. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit, NV_ENC_BIT_DEPTH_10 for 10 bit."]
+    #[doc = "SW will do the bitdepth conversion internally from inputBitDepth -> outputBitDepth if bit depths differ"]
+    #[doc = "Support for 8 bit input to 10 bit encode conversion only"]
+    pub outputBitDepth: NV_ENC_BIT_DEPTH,
+    #[doc = "< [in]: Specifies pixel bit depth of video input. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit input, NV_ENC_BIT_DEPTH_10 for 10 bit input."]
+    pub inputBitDepth: NV_ENC_BIT_DEPTH,
+    #[doc = "< [in]: Specifies the number of temporal layers to be used for hierarchical coding."]
+    pub numTemporalLayers: u32,
+    #[doc = "< [in]: Specifies number of views for MVHEVC"]
+    pub numViews: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 208usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG_HEVC() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG_HEVC>(),
+        1560usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG_HEVC))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG_HEVC>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CONFIG_HEVC))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).level as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(level)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).tier as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(tier)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).minCUSize as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(minCUSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).maxCUSize as *const _ as usize },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(maxCUSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).idrPeriod as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(idrPeriod)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).intraRefreshPeriod as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(intraRefreshPeriod)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).intraRefreshCnt as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(intraRefreshCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).maxNumRefFramesInDPB as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(maxNumRefFramesInDPB)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).ltrNumFrames as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(ltrNumFrames)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).vpsId as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(vpsId)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).spsId as *const _ as usize },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(spsId)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).ppsId as *const _ as usize },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(ppsId)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).sliceMode as *const _ as usize },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(sliceMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).sliceModeData as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(sliceModeData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).maxTemporalLayersMinus1 as *const _
+                as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(maxTemporalLayersMinus1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).hevcVUIParameters as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(hevcVUIParameters)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).ltrTrustMode as *const _ as usize
+        },
+        176usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(ltrTrustMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).useBFramesAsRef as *const _ as usize
+        },
+        180usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(useBFramesAsRef)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).numRefL0 as *const _ as usize },
+        184usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(numRefL0)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).numRefL1 as *const _ as usize },
+        188usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(numRefL1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).tfLevel as *const _ as usize },
+        192usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(tfLevel)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).disableDeblockingFilterIDC as *const _
+                as usize
+        },
+        196usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(disableDeblockingFilterIDC)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).outputBitDepth as *const _ as usize
+        },
+        200usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(outputBitDepth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).inputBitDepth as *const _ as usize
+        },
+        204usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(inputBitDepth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).numTemporalLayers as *const _ as usize
+        },
+        208usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(numTemporalLayers)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).numViews as *const _ as usize },
+        212usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(numViews)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).reserved1 as *const _ as usize },
+        216usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC>())).reserved2 as *const _ as usize },
+        1048usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG_HEVC {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_CONFIG_HEVC {
+    #[inline]
+    pub fn useConstrainedIntraPred(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_useConstrainedIntraPred(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableDeblockAcrossSliceBoundary(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableDeblockAcrossSliceBoundary(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputBufferingPeriodSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputBufferingPeriodSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputPictureTimingSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputPictureTimingSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputAUD(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputAUD(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableLTR(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableLTR(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableSPSPPS(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableSPSPPS(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn repeatSPSPPS(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(7usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_repeatSPSPPS(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(7usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableIntraRefresh(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(8usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableIntraRefresh(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(8usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn chromaFormatIDC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(9usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_chromaFormatIDC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(9usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved3(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 3u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved3(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 3u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableFillerDataInsertion(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(14usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableFillerDataInsertion(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(14usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableConstrainedEncoding(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(15usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableConstrainedEncoding(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(15usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableAlphaLayerEncoding(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableAlphaLayerEncoding(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn singleSliceIntraRefresh(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(17usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_singleSliceIntraRefresh(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(17usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputRecoveryPointSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(18usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputRecoveryPointSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(18usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputTimeCodeSEI(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(19usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputTimeCodeSEI(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(19usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableTemporalSVC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(20usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableTemporalSVC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(20usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableMVHEVC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(21usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableMVHEVC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(21usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputHevc3DReferenceDisplayInfo(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(22usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputHevc3DReferenceDisplayInfo(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(22usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputMaxCll(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(23usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputMaxCll(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(23usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputMasteringDisplay(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(24usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputMasteringDisplay(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(24usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(25usize, 7u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(25usize, 7u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        useConstrainedIntraPred: u32,
+        disableDeblockAcrossSliceBoundary: u32,
+        outputBufferingPeriodSEI: u32,
+        outputPictureTimingSEI: u32,
+        outputAUD: u32,
+        enableLTR: u32,
+        disableSPSPPS: u32,
+        repeatSPSPPS: u32,
+        enableIntraRefresh: u32,
+        chromaFormatIDC: u32,
+        reserved3: u32,
+        enableFillerDataInsertion: u32,
+        enableConstrainedEncoding: u32,
+        enableAlphaLayerEncoding: u32,
+        singleSliceIntraRefresh: u32,
+        outputRecoveryPointSEI: u32,
+        outputTimeCodeSEI: u32,
+        enableTemporalSVC: u32,
+        enableMVHEVC: u32,
+        outputHevc3DReferenceDisplayInfo: u32,
+        outputMaxCll: u32,
+        outputMasteringDisplay: u32,
+        reserved: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let useConstrainedIntraPred: u32 =
+                unsafe { ::std::mem::transmute(useConstrainedIntraPred) };
+            useConstrainedIntraPred as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let disableDeblockAcrossSliceBoundary: u32 =
+                unsafe { ::std::mem::transmute(disableDeblockAcrossSliceBoundary) };
+            disableDeblockAcrossSliceBoundary as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let outputBufferingPeriodSEI: u32 =
+                unsafe { ::std::mem::transmute(outputBufferingPeriodSEI) };
+            outputBufferingPeriodSEI as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let outputPictureTimingSEI: u32 =
+                unsafe { ::std::mem::transmute(outputPictureTimingSEI) };
+            outputPictureTimingSEI as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let outputAUD: u32 = unsafe { ::std::mem::transmute(outputAUD) };
+            outputAUD as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let enableLTR: u32 = unsafe { ::std::mem::transmute(enableLTR) };
+            enableLTR as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 1u8, {
+            let disableSPSPPS: u32 = unsafe { ::std::mem::transmute(disableSPSPPS) };
+            disableSPSPPS as u64
+        });
+        __bindgen_bitfield_unit.set(7usize, 1u8, {
+            let repeatSPSPPS: u32 = unsafe { ::std::mem::transmute(repeatSPSPPS) };
+            repeatSPSPPS as u64
+        });
+        __bindgen_bitfield_unit.set(8usize, 1u8, {
+            let enableIntraRefresh: u32 = unsafe { ::std::mem::transmute(enableIntraRefresh) };
+            enableIntraRefresh as u64
+        });
+        __bindgen_bitfield_unit.set(9usize, 2u8, {
+            let chromaFormatIDC: u32 = unsafe { ::std::mem::transmute(chromaFormatIDC) };
+            chromaFormatIDC as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 3u8, {
+            let reserved3: u32 = unsafe { ::std::mem::transmute(reserved3) };
+            reserved3 as u64
+        });
+        __bindgen_bitfield_unit.set(14usize, 1u8, {
+            let enableFillerDataInsertion: u32 =
+                unsafe { ::std::mem::transmute(enableFillerDataInsertion) };
+            enableFillerDataInsertion as u64
+        });
+        __bindgen_bitfield_unit.set(15usize, 1u8, {
+            let enableConstrainedEncoding: u32 =
+                unsafe { ::std::mem::transmute(enableConstrainedEncoding) };
+            enableConstrainedEncoding as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 1u8, {
+            let enableAlphaLayerEncoding: u32 =
+                unsafe { ::std::mem::transmute(enableAlphaLayerEncoding) };
+            enableAlphaLayerEncoding as u64
+        });
+        __bindgen_bitfield_unit.set(17usize, 1u8, {
+            let singleSliceIntraRefresh: u32 =
+                unsafe { ::std::mem::transmute(singleSliceIntraRefresh) };
+            singleSliceIntraRefresh as u64
+        });
+        __bindgen_bitfield_unit.set(18usize, 1u8, {
+            let outputRecoveryPointSEI: u32 =
+                unsafe { ::std::mem::transmute(outputRecoveryPointSEI) };
+            outputRecoveryPointSEI as u64
+        });
+        __bindgen_bitfield_unit.set(19usize, 1u8, {
+            let outputTimeCodeSEI: u32 = unsafe { ::std::mem::transmute(outputTimeCodeSEI) };
+            outputTimeCodeSEI as u64
+        });
+        __bindgen_bitfield_unit.set(20usize, 1u8, {
+            let enableTemporalSVC: u32 = unsafe { ::std::mem::transmute(enableTemporalSVC) };
+            enableTemporalSVC as u64
+        });
+        __bindgen_bitfield_unit.set(21usize, 1u8, {
+            let enableMVHEVC: u32 = unsafe { ::std::mem::transmute(enableMVHEVC) };
+            enableMVHEVC as u64
+        });
+        __bindgen_bitfield_unit.set(22usize, 1u8, {
+            let outputHevc3DReferenceDisplayInfo: u32 =
+                unsafe { ::std::mem::transmute(outputHevc3DReferenceDisplayInfo) };
+            outputHevc3DReferenceDisplayInfo as u64
+        });
+        __bindgen_bitfield_unit.set(23usize, 1u8, {
+            let outputMaxCll: u32 = unsafe { ::std::mem::transmute(outputMaxCll) };
+            outputMaxCll as u64
+        });
+        __bindgen_bitfield_unit.set(24usize, 1u8, {
+            let outputMasteringDisplay: u32 =
+                unsafe { ::std::mem::transmute(outputMasteringDisplay) };
+            outputMasteringDisplay as u64
+        });
+        __bindgen_bitfield_unit.set(25usize, 7u8, {
+            let reserved: u32 = unsafe { ::std::mem::transmute(reserved) };
+            reserved as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG_HEVC"]
+#[doc = " HEVC encoder configuration parameters to be set during initialization."]
+pub type NV_ENC_CONFIG_HEVC = _NV_ENC_CONFIG_HEVC;
+#[doc = " \\struct _NV_ENC_FILM_GRAIN_PARAMS_AV1"]
+#[doc = " AV1 Film Grain Parameters structure"]
+#[repr(C)]
+#[repr(align(4))]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_FILM_GRAIN_PARAMS_AV1 {
+    pub _bitfield_align_1: [u8; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: pointYValue[i]: x coordinate for i-th point of luma piecewise linear scaling function. Values on a scale of 0...255"]
+    pub pointYValue: [u8; 14usize],
+    #[doc = "< [in]: pointYScaling[i]: i-th point output value of luma piecewise linear scaling function"]
+    pub pointYScaling: [u8; 14usize],
+    #[doc = "< [in]: pointCbValue[i]: x coordinate for i-th point of cb piecewise linear scaling function. Values on a scale of 0...255"]
+    pub pointCbValue: [u8; 10usize],
+    #[doc = "< [in]: pointCbScaling[i]: i-th point output value of cb piecewise linear scaling function"]
+    pub pointCbScaling: [u8; 10usize],
+    #[doc = "< [in]: pointCrValue[i]: x coordinate for i-th point of cr piecewise linear scaling function. Values on a scale of 0...255"]
+    pub pointCrValue: [u8; 10usize],
+    #[doc = "< [in]: pointCrScaling[i]: i-th point output value of cr piecewise linear scaling function"]
+    pub pointCrScaling: [u8; 10usize],
+    #[doc = "< [in]: Specifies auto-regressive coefficients used for the Y plane"]
+    pub arCoeffsYPlus128: [u8; 24usize],
+    #[doc = "< [in]: Specifies auto-regressive coefficients used for the U plane"]
+    pub arCoeffsCbPlus128: [u8; 25usize],
+    #[doc = "< [in]: Specifies auto-regressive coefficients used for the V plane"]
+    pub arCoeffsCrPlus128: [u8; 25usize],
+    #[doc = "< [in]: Reserved bytes -  should be set to 0"]
+    pub reserved2: [u8; 2usize],
+    #[doc = "< [in]: Represents a multiplier for the cb component used in derivation of the input index to the cb component scaling function"]
+    pub cbMult: u8,
+    #[doc = "< [in]: represents a multiplier for the average luma component used in derivation of the input index to the cb component scaling function."]
+    pub cbLumaMult: u8,
+    #[doc = "< [in]: Represents an offset used in derivation of the input index to the cb component scaling function"]
+    pub cbOffset: u16,
+    #[doc = "< [in]: Represents a multiplier for the cr component used in derivation of the input index to the cr component scaling function"]
+    pub crMult: u8,
+    #[doc = "< [in]: represents a multiplier for the average luma component used in derivation of the input index to the cr component scaling function."]
+    pub crLumaMult: u8,
+    #[doc = "< [in]: Represents an offset used in derivation of the input index to the cr component scaling function"]
+    pub crOffset: u16,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_FILM_GRAIN_PARAMS_AV1() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>(),
+        156usize,
+        concat!("Size of: ", stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>(),
+        4usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).pointYValue as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(pointYValue)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).pointYScaling as *const _
+                as usize
+        },
+        18usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(pointYScaling)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).pointCbValue as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(pointCbValue)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).pointCbScaling as *const _
+                as usize
+        },
+        42usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(pointCbScaling)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).pointCrValue as *const _
+                as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(pointCrValue)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).pointCrScaling as *const _
+                as usize
+        },
+        62usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(pointCrScaling)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).arCoeffsYPlus128 as *const _
+                as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(arCoeffsYPlus128)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).arCoeffsCbPlus128 as *const _
+                as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(arCoeffsCbPlus128)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).arCoeffsCrPlus128 as *const _
+                as usize
+        },
+        121usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(arCoeffsCrPlus128)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).reserved2 as *const _ as usize
+        },
+        146usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).cbMult as *const _ as usize
+        },
+        148usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(cbMult)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).cbLumaMult as *const _
+                as usize
+        },
+        149usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(cbLumaMult)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).cbOffset as *const _ as usize
+        },
+        150usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(cbOffset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).crMult as *const _ as usize
+        },
+        152usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(crMult)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).crLumaMult as *const _
+                as usize
+        },
+        153usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(crLumaMult)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FILM_GRAIN_PARAMS_AV1>())).crOffset as *const _ as usize
+        },
+        154usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FILM_GRAIN_PARAMS_AV1),
+            "::",
+            stringify!(crOffset)
+        )
+    );
+}
+impl _NV_ENC_FILM_GRAIN_PARAMS_AV1 {
+    #[inline]
+    pub fn applyGrain(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_applyGrain(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn chromaScalingFromLuma(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_chromaScalingFromLuma(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn overlapFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_overlapFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn clipToRestrictedRange(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_clipToRestrictedRange(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn grainScalingMinus8(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_grainScalingMinus8(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn arCoeffLag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_arCoeffLag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numYPoints(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(8usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numYPoints(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(8usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numCbPoints(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCbPoints(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn numCrPoints(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_numCrPoints(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn arCoeffShiftMinus6(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(20usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_arCoeffShiftMinus6(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(20usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn grainScaleShift(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(22usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_grainScaleShift(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(22usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved1(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(24usize, 8u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved1(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(24usize, 8u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        applyGrain: u32,
+        chromaScalingFromLuma: u32,
+        overlapFlag: u32,
+        clipToRestrictedRange: u32,
+        grainScalingMinus8: u32,
+        arCoeffLag: u32,
+        numYPoints: u32,
+        numCbPoints: u32,
+        numCrPoints: u32,
+        arCoeffShiftMinus6: u32,
+        grainScaleShift: u32,
+        reserved1: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let applyGrain: u32 = unsafe { ::std::mem::transmute(applyGrain) };
+            applyGrain as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let chromaScalingFromLuma: u32 =
+                unsafe { ::std::mem::transmute(chromaScalingFromLuma) };
+            chromaScalingFromLuma as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let overlapFlag: u32 = unsafe { ::std::mem::transmute(overlapFlag) };
+            overlapFlag as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let clipToRestrictedRange: u32 =
+                unsafe { ::std::mem::transmute(clipToRestrictedRange) };
+            clipToRestrictedRange as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 2u8, {
+            let grainScalingMinus8: u32 = unsafe { ::std::mem::transmute(grainScalingMinus8) };
+            grainScalingMinus8 as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 2u8, {
+            let arCoeffLag: u32 = unsafe { ::std::mem::transmute(arCoeffLag) };
+            arCoeffLag as u64
+        });
+        __bindgen_bitfield_unit.set(8usize, 4u8, {
+            let numYPoints: u32 = unsafe { ::std::mem::transmute(numYPoints) };
+            numYPoints as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 4u8, {
+            let numCbPoints: u32 = unsafe { ::std::mem::transmute(numCbPoints) };
+            numCbPoints as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 4u8, {
+            let numCrPoints: u32 = unsafe { ::std::mem::transmute(numCrPoints) };
+            numCrPoints as u64
+        });
+        __bindgen_bitfield_unit.set(20usize, 2u8, {
+            let arCoeffShiftMinus6: u32 = unsafe { ::std::mem::transmute(arCoeffShiftMinus6) };
+            arCoeffShiftMinus6 as u64
+        });
+        __bindgen_bitfield_unit.set(22usize, 2u8, {
+            let grainScaleShift: u32 = unsafe { ::std::mem::transmute(grainScaleShift) };
+            grainScaleShift as u64
+        });
+        __bindgen_bitfield_unit.set(24usize, 8u8, {
+            let reserved1: u32 = unsafe { ::std::mem::transmute(reserved1) };
+            reserved1 as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_FILM_GRAIN_PARAMS_AV1"]
+#[doc = " AV1 Film Grain Parameters structure"]
+pub type NV_ENC_FILM_GRAIN_PARAMS_AV1 = _NV_ENC_FILM_GRAIN_PARAMS_AV1;
+#[doc = " \\struct _NV_ENC_CONFIG_AV1"]
+#[doc = " AV1 encoder configuration parameters to be set during initialization."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CONFIG_AV1 {
+    #[doc = "< [in]: Specifies the level of the encoded bitstream."]
+    pub level: u32,
+    #[doc = "< [in]: Specifies the level tier of the encoded bitstream."]
+    pub tier: u32,
+    #[doc = "< [in]: Specifies the minimum size of luma coding block partition."]
+    pub minPartSize: NV_ENC_AV1_PART_SIZE,
+    #[doc = "< [in]: Specifies the maximum size of luma coding block partition."]
+    pub maxPartSize: NV_ENC_AV1_PART_SIZE,
+    pub _bitfield_align_1: [u16; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Specifies the IDR/Key frame interval. If not set, this is made equal to gopLength in NV_ENC_CONFIG.Low latency application client can set IDR interval to NVENC_INFINITE_GOPLENGTH so that IDR frames are not inserted automatically."]
+    pub idrPeriod: u32,
+    #[doc = "< [in]: Specifies the interval between successive intra refresh if enableIntrarefresh is set. Requires enableIntraRefresh to be set."]
+    #[doc = "Will be disabled if NV_ENC_CONFIG::gopLength is not set to NVENC_INFINITE_GOPLENGTH."]
+    pub intraRefreshPeriod: u32,
+    #[doc = "< [in]: Specifies the length of intra refresh in number of frames for periodic intra refresh. This value should be smaller than intraRefreshPeriod"]
+    pub intraRefreshCnt: u32,
+    #[doc = "< [in]: Specifies the maximum number of references frames in the DPB."]
+    pub maxNumRefFramesInDPB: u32,
+    #[doc = "< [in]: This parameter in conjunction with the flag enableCustomTileConfig and the array tileWidths[] specifies the way in which the picture is divided into tile columns."]
+    #[doc = "When enableCustomTileConfig == 0, the picture will be uniformly divided into numTileColumns tile columns. If numTileColumns is not a power of 2,"]
+    #[doc = "it will be rounded down to the next power of 2 value. If numTileColumns == 0, the picture will be coded with the smallest number of vertical tiles as allowed by standard."]
+    #[doc = "When enableCustomTileConfig == 1, numTileColumns must be > 0 and <= NV_MAX_TILE_COLS_AV1 and tileWidths must point to a valid array of numTileColumns entries."]
+    #[doc = "Entry i specifies the width in 64x64 CTU unit of tile colum i. The sum of all the entries should be equal to the picture width in 64x64 CTU units."]
+    pub numTileColumns: u32,
+    #[doc = "< [in]: This parameter in conjunction with the flag enableCustomTileConfig and the array tileHeights[] specifies the way in which the picture is divided into tiles rows"]
+    #[doc = "When enableCustomTileConfig == 0, the picture will be uniformly divided into numTileRows tile rows. If numTileRows is not a power of 2,"]
+    #[doc = "it will be rounded down to the next power of 2 value. If numTileRows == 0, the picture will be coded with the smallest number of horizontal tiles as allowed by standard."]
+    #[doc = "When enableCustomTileConfig == 1, numTileRows must be > 0 and <= NV_MAX_TILE_ROWS_AV1 and tileHeights must point to a valid array of numTileRows entries."]
+    #[doc = "Entry i specifies the height in 64x64 CTU unit of tile row i. The sum of all the entries should be equal to the picture hieght in 64x64 CTU units."]
+    pub numTileRows: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved2: u32,
+    #[doc = "< [in]: If enableCustomTileConfig == 1, tileWidths[i] specifies the width of tile column i in 64x64 CTU unit, with 0 <= i <= numTileColumns -1."]
+    pub tileWidths: *mut u32,
+    #[doc = "< [in]: If enableCustomTileConfig == 1, tileHeights[i] specifies the height of tile row i in 64x64 CTU unit, with 0 <= i <= numTileRows -1."]
+    pub tileHeights: *mut u32,
+    #[doc = "< [in]: Specifies the max temporal layer used for hierarchical coding. Cannot be reconfigured and must be specified during encoder creation if temporal layer is considered."]
+    pub maxTemporalLayersMinus1: u32,
+    #[doc = "< [in]: as defined in section of ISO/IEC 23091-4/ITU-T H.273"]
+    pub colorPrimaries: NV_ENC_VUI_COLOR_PRIMARIES,
+    #[doc = "< [in]: as defined in section of ISO/IEC 23091-4/ITU-T H.273"]
+    pub transferCharacteristics: NV_ENC_VUI_TRANSFER_CHARACTERISTIC,
+    #[doc = "< [in]: as defined in section of ISO/IEC 23091-4/ITU-T H.273"]
+    pub matrixCoefficients: NV_ENC_VUI_MATRIX_COEFFS,
+    #[doc = "< [in]: 0: studio swing representation - 1: full swing representation"]
+    pub colorRange: u32,
+    #[doc = "< [in]: 0: unknown"]
+    #[doc = "1: Horizontally collocated with luma (0,0) sample, between two vertical samples"]
+    #[doc = "2: Co-located with luma (0,0) sample"]
+    pub chromaSamplePosition: u32,
+    #[doc = "< [in]: Specifies the B-Frame as reference mode. Check support for useBFramesAsRef mode using  ::NV_ENC_CAPS_SUPPORT_BFRAME_REF_MODE caps."]
+    pub useBFramesAsRef: NV_ENC_BFRAME_REF_MODE,
+    #[doc = "< [in]: If enableFilmGrainParams == 1, filmGrainParams must point to a valid NV_ENC_FILM_GRAIN_PARAMS_AV1 structure"]
+    pub filmGrainParams: *mut NV_ENC_FILM_GRAIN_PARAMS_AV1,
+    #[doc = "< [in]: Specifies max number of forward reference frame used for prediction of a frame. It must be in range 1-4 (Last, Last2, last3 and Golden). It's a suggestive value not necessarily be honored always."]
+    pub numFwdRefs: NV_ENC_NUM_REF_FRAMES,
+    #[doc = "< [in]: Specifies max number of L1 list reference frame used for prediction of a frame. It must be in range 1-3 (Backward, Altref2, Altref). It's a suggestive value not necessarily be honored always."]
+    pub numBwdRefs: NV_ENC_NUM_REF_FRAMES,
+    #[doc = "< [in]: Specifies pixel bit depth of encoded video. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit, NV_ENC_BIT_DEPTH_10 for 10 bit."]
+    #[doc = "HW will do the bitdepth conversion internally from inputBitDepth -> outputBitDepth if bit depths differ"]
+    #[doc = "Support for 8 bit input to 10 bit encode conversion only"]
+    pub outputBitDepth: NV_ENC_BIT_DEPTH,
+    #[doc = "< [in]: Specifies pixel bit depth of video input. Should be set to NV_ENC_BIT_DEPTH_8 for 8 bit input, NV_ENC_BIT_DEPTH_10 for 10 bit input."]
+    pub inputBitDepth: NV_ENC_BIT_DEPTH,
+    #[doc = "< [in]: In \"LTR Per Picture\" mode (ltrMarkFrame = 1), ltrNumFrames specifies maximum number of LTR frames in DPB."]
+    #[doc = "These ltrNumFrames acts as a guidance to the encoder and are not necessarily honored. To achieve a right balance between the encoding"]
+    #[doc = "quality and keeping LTR frames in the DPB queue, the encoder can internally limit the number of LTR frames."]
+    #[doc = "The number of LTR frames actually used depends upon the encoding preset being used; Faster encoding presets will use fewer LTR frames."]
+    pub ltrNumFrames: u32,
+    #[doc = "< [in]: Specifies the number of temporal layers to be used for hierarchical coding."]
+    pub numTemporalLayers: u32,
+    #[doc = "< [in]: Specifies the strength of temporal filtering. Check support for temporal filter using ::NV_ENC_CAPS_SUPPORT_TEMPORAL_FILTER caps."]
+    #[doc = "Temporal filter feature is supported only if frameIntervalP >= 5."]
+    #[doc = "If ZeroReorderDelay or enableStereoMVC is enabled, the temporal filter feature is not supported."]
+    #[doc = "Temporal filter is recommended for natural contents."]
+    pub tfLevel: NV_ENC_TEMPORAL_FILTER_LEVEL,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 230usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved3: [*mut ::std::os::raw::c_void; 62usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG_AV1() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG_AV1>(),
+        1552usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG_AV1))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG_AV1>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CONFIG_AV1))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).level as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(level)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).tier as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(tier)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).minPartSize as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(minPartSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).maxPartSize as *const _ as usize },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(maxPartSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).idrPeriod as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(idrPeriod)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).intraRefreshPeriod as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(intraRefreshPeriod)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).intraRefreshCnt as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(intraRefreshCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).maxNumRefFramesInDPB as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(maxNumRefFramesInDPB)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).numTileColumns as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(numTileColumns)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).numTileRows as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(numTileRows)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).reserved2 as *const _ as usize },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).tileWidths as *const _ as usize },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(tileWidths)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).tileHeights as *const _ as usize },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(tileHeights)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).maxTemporalLayersMinus1 as *const _
+                as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(maxTemporalLayersMinus1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).colorPrimaries as *const _ as usize
+        },
+        68usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(colorPrimaries)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).transferCharacteristics as *const _
+                as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(transferCharacteristics)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).matrixCoefficients as *const _ as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(matrixCoefficients)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).colorRange as *const _ as usize },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(colorRange)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).chromaSamplePosition as *const _ as usize
+        },
+        84usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(chromaSamplePosition)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).useBFramesAsRef as *const _ as usize
+        },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(useBFramesAsRef)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).filmGrainParams as *const _ as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(filmGrainParams)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).numFwdRefs as *const _ as usize },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(numFwdRefs)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).numBwdRefs as *const _ as usize },
+        108usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(numBwdRefs)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).outputBitDepth as *const _ as usize
+        },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(outputBitDepth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).inputBitDepth as *const _ as usize
+        },
+        116usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(inputBitDepth)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).ltrNumFrames as *const _ as usize },
+        120usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(ltrNumFrames)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).numTemporalLayers as *const _ as usize
+        },
+        124usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(numTemporalLayers)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).tfLevel as *const _ as usize },
+        128usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(tfLevel)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).reserved1 as *const _ as usize },
+        132usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG_AV1>())).reserved3 as *const _ as usize },
+        1056usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_AV1),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG_AV1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_CONFIG_AV1 {
+    #[inline]
+    pub fn outputAnnexBFormat(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputAnnexBFormat(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableTimingInfo(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableTimingInfo(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableDecoderModelInfo(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableDecoderModelInfo(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableFrameIdNumbers(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableFrameIdNumbers(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableSeqHdr(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableSeqHdr(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn repeatSeqHdr(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_repeatSeqHdr(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableIntraRefresh(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableIntraRefresh(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn chromaFormatIDC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(7usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_chromaFormatIDC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(7usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableBitstreamPadding(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(9usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableBitstreamPadding(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(9usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableCustomTileConfig(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(10usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableCustomTileConfig(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(10usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableFilmGrainParams(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableFilmGrainParams(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableLTR(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableLTR(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableTemporalSVC(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(13usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableTemporalSVC(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(13usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputMaxCll(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(14usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputMaxCll(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(14usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn outputMasteringDisplay(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(15usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_outputMasteringDisplay(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(15usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved4(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(16usize, 2u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved4(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(16usize, 2u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(18usize, 14u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(18usize, 14u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        outputAnnexBFormat: u32,
+        enableTimingInfo: u32,
+        enableDecoderModelInfo: u32,
+        enableFrameIdNumbers: u32,
+        disableSeqHdr: u32,
+        repeatSeqHdr: u32,
+        enableIntraRefresh: u32,
+        chromaFormatIDC: u32,
+        enableBitstreamPadding: u32,
+        enableCustomTileConfig: u32,
+        enableFilmGrainParams: u32,
+        enableLTR: u32,
+        enableTemporalSVC: u32,
+        outputMaxCll: u32,
+        outputMasteringDisplay: u32,
+        reserved4: u32,
+        reserved: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let outputAnnexBFormat: u32 = unsafe { ::std::mem::transmute(outputAnnexBFormat) };
+            outputAnnexBFormat as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let enableTimingInfo: u32 = unsafe { ::std::mem::transmute(enableTimingInfo) };
+            enableTimingInfo as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let enableDecoderModelInfo: u32 =
+                unsafe { ::std::mem::transmute(enableDecoderModelInfo) };
+            enableDecoderModelInfo as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let enableFrameIdNumbers: u32 = unsafe { ::std::mem::transmute(enableFrameIdNumbers) };
+            enableFrameIdNumbers as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let disableSeqHdr: u32 = unsafe { ::std::mem::transmute(disableSeqHdr) };
+            disableSeqHdr as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let repeatSeqHdr: u32 = unsafe { ::std::mem::transmute(repeatSeqHdr) };
+            repeatSeqHdr as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 1u8, {
+            let enableIntraRefresh: u32 = unsafe { ::std::mem::transmute(enableIntraRefresh) };
+            enableIntraRefresh as u64
+        });
+        __bindgen_bitfield_unit.set(7usize, 2u8, {
+            let chromaFormatIDC: u32 = unsafe { ::std::mem::transmute(chromaFormatIDC) };
+            chromaFormatIDC as u64
+        });
+        __bindgen_bitfield_unit.set(9usize, 1u8, {
+            let enableBitstreamPadding: u32 =
+                unsafe { ::std::mem::transmute(enableBitstreamPadding) };
+            enableBitstreamPadding as u64
+        });
+        __bindgen_bitfield_unit.set(10usize, 1u8, {
+            let enableCustomTileConfig: u32 =
+                unsafe { ::std::mem::transmute(enableCustomTileConfig) };
+            enableCustomTileConfig as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 1u8, {
+            let enableFilmGrainParams: u32 =
+                unsafe { ::std::mem::transmute(enableFilmGrainParams) };
+            enableFilmGrainParams as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 1u8, {
+            let enableLTR: u32 = unsafe { ::std::mem::transmute(enableLTR) };
+            enableLTR as u64
+        });
+        __bindgen_bitfield_unit.set(13usize, 1u8, {
+            let enableTemporalSVC: u32 = unsafe { ::std::mem::transmute(enableTemporalSVC) };
+            enableTemporalSVC as u64
+        });
+        __bindgen_bitfield_unit.set(14usize, 1u8, {
+            let outputMaxCll: u32 = unsafe { ::std::mem::transmute(outputMaxCll) };
+            outputMaxCll as u64
+        });
+        __bindgen_bitfield_unit.set(15usize, 1u8, {
+            let outputMasteringDisplay: u32 =
+                unsafe { ::std::mem::transmute(outputMasteringDisplay) };
+            outputMasteringDisplay as u64
+        });
+        __bindgen_bitfield_unit.set(16usize, 2u8, {
+            let reserved4: u32 = unsafe { ::std::mem::transmute(reserved4) };
+            reserved4 as u64
+        });
+        __bindgen_bitfield_unit.set(18usize, 14u8, {
+            let reserved: u32 = unsafe { ::std::mem::transmute(reserved) };
+            reserved as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG_AV1"]
+#[doc = " AV1 encoder configuration parameters to be set during initialization."]
+pub type NV_ENC_CONFIG_AV1 = _NV_ENC_CONFIG_AV1;
+#[doc = " \\struct _NV_ENC_CONFIG_H264_MEONLY"]
+#[doc = " H264 encoder configuration parameters for ME only Mode"]
+#[doc = ""]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CONFIG_H264_MEONLY {
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 255usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG_H264_MEONLY() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG_H264_MEONLY>(),
+        1536usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG_H264_MEONLY))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG_H264_MEONLY>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CONFIG_H264_MEONLY))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_MEONLY>())).reserved1 as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_MEONLY),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_H264_MEONLY>())).reserved2 as *const _ as usize
+        },
+        1024usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_H264_MEONLY),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG_H264_MEONLY {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_CONFIG_H264_MEONLY {
+    #[inline]
+    pub fn disablePartition16x16(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disablePartition16x16(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disablePartition8x16(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disablePartition8x16(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disablePartition16x8(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disablePartition16x8(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disablePartition8x8(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disablePartition8x8(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn disableIntraSearch(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_disableIntraSearch(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn bStereoEnable(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_bStereoEnable(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 26u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 26u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        disablePartition16x16: u32,
+        disablePartition8x16: u32,
+        disablePartition16x8: u32,
+        disablePartition8x8: u32,
+        disableIntraSearch: u32,
+        bStereoEnable: u32,
+        reserved: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let disablePartition16x16: u32 =
+                unsafe { ::std::mem::transmute(disablePartition16x16) };
+            disablePartition16x16 as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let disablePartition8x16: u32 = unsafe { ::std::mem::transmute(disablePartition8x16) };
+            disablePartition8x16 as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let disablePartition16x8: u32 = unsafe { ::std::mem::transmute(disablePartition16x8) };
+            disablePartition16x8 as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let disablePartition8x8: u32 = unsafe { ::std::mem::transmute(disablePartition8x8) };
+            disablePartition8x8 as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let disableIntraSearch: u32 = unsafe { ::std::mem::transmute(disableIntraSearch) };
+            disableIntraSearch as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let bStereoEnable: u32 = unsafe { ::std::mem::transmute(bStereoEnable) };
+            bStereoEnable as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 26u8, {
+            let reserved: u32 = unsafe { ::std::mem::transmute(reserved) };
+            reserved as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG_H264_MEONLY"]
+#[doc = " H264 encoder configuration parameters for ME only Mode"]
+#[doc = ""]
+pub type NV_ENC_CONFIG_H264_MEONLY = _NV_ENC_CONFIG_H264_MEONLY;
+#[doc = " \\struct _NV_ENC_CONFIG_HEVC_MEONLY"]
+#[doc = " HEVC encoder configuration parameters for ME only Mode"]
+#[doc = ""]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_CONFIG_HEVC_MEONLY {
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u32; 256usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved1: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG_HEVC_MEONLY() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG_HEVC_MEONLY>(),
+        1536usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG_HEVC_MEONLY))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG_HEVC_MEONLY>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CONFIG_HEVC_MEONLY))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC_MEONLY>())).reserved as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC_MEONLY),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG_HEVC_MEONLY>())).reserved1 as *const _ as usize
+        },
+        1024usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG_HEVC_MEONLY),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG_HEVC_MEONLY {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG_HEVC_MEONLY"]
+#[doc = " HEVC encoder configuration parameters for ME only Mode"]
+#[doc = ""]
+pub type NV_ENC_CONFIG_HEVC_MEONLY = _NV_ENC_CONFIG_HEVC_MEONLY;
+#[doc = " \\struct _NV_ENC_CODEC_CONFIG"]
+#[doc = " Codec-specific encoder configuration parameters to be set during initialization."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union _NV_ENC_CODEC_CONFIG {
+    #[doc = "< [in]: Specifies the H.264-specific encoder configuration."]
+    pub h264Config: NV_ENC_CONFIG_H264,
+    #[doc = "< [in]: Specifies the HEVC-specific encoder configuration."]
+    pub hevcConfig: NV_ENC_CONFIG_HEVC,
+    #[doc = "< [in]: Specifies the AV1-specific encoder configuration."]
+    pub av1Config: NV_ENC_CONFIG_AV1,
+    #[doc = "< [in]: Specifies the H.264-specific ME only encoder configuration."]
+    pub h264MeOnlyConfig: NV_ENC_CONFIG_H264_MEONLY,
+    #[doc = "< [in]: Specifies the HEVC-specific ME only encoder configuration."]
+    pub hevcMeOnlyConfig: NV_ENC_CONFIG_HEVC_MEONLY,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u32; 320usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CODEC_CONFIG() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CODEC_CONFIG>(),
+        1792usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CODEC_CONFIG))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CODEC_CONFIG>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CODEC_CONFIG))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CODEC_CONFIG>())).h264Config as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_CONFIG),
+            "::",
+            stringify!(h264Config)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CODEC_CONFIG>())).hevcConfig as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_CONFIG),
+            "::",
+            stringify!(hevcConfig)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CODEC_CONFIG>())).av1Config as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_CONFIG),
+            "::",
+            stringify!(av1Config)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CODEC_CONFIG>())).h264MeOnlyConfig as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_CONFIG),
+            "::",
+            stringify!(h264MeOnlyConfig)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CODEC_CONFIG>())).hevcMeOnlyConfig as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_CONFIG),
+            "::",
+            stringify!(hevcMeOnlyConfig)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CODEC_CONFIG>())).reserved as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_CONFIG),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for _NV_ENC_CODEC_CONFIG {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_CODEC_CONFIG"]
+#[doc = " Codec-specific encoder configuration parameters to be set during initialization."]
+pub type NV_ENC_CODEC_CONFIG = _NV_ENC_CODEC_CONFIG;
+#[doc = " \\struct _NV_ENC_CONFIG"]
+#[doc = " Encoder configuration parameters to be set during initialization."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct _NV_ENC_CONFIG {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_CONFIG_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the codec profile GUID. If client specifies \\p NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID the NvEncodeAPI interface will select the appropriate codec profile."]
+    pub profileGUID: GUID,
+    #[doc = "< [in]: Specifies the number of pictures in one GOP. Low latency application client can set goplength to NVENC_INFINITE_GOPLENGTH so that keyframes are not inserted automatically."]
+    pub gopLength: u32,
+    #[doc = "< [in]: Specifies the GOP pattern as follows: \\p frameIntervalP = 0: I, 1: IPP, 2: IBP, 3: IBBP  If goplength is set to NVENC_INFINITE_GOPLENGTH \\p frameIntervalP should be set to 1."]
+    pub frameIntervalP: i32,
+    #[doc = "< [in]: Set this to 1 to enable monochrome encoding for this session."]
+    pub monoChromeEncoding: u32,
+    #[doc = "< [in]: Specifies the frame/field mode."]
+    #[doc = "Check support for field encoding using ::NV_ENC_CAPS_SUPPORT_FIELD_ENCODING caps."]
+    #[doc = "Using a frameFieldMode other than NV_ENC_PARAMS_FRAME_FIELD_MODE_FRAME for RGB input is not supported."]
+    pub frameFieldMode: NV_ENC_PARAMS_FRAME_FIELD_MODE,
+    #[doc = "< [in]: Specifies the desired motion vector prediction precision."]
+    pub mvPrecision: NV_ENC_MV_PRECISION,
+    #[doc = "< [in]: Specifies the rate control parameters for the current encoding session."]
+    pub rcParams: NV_ENC_RC_PARAMS,
+    #[doc = "< [in]: Specifies the codec specific config parameters through this union."]
+    pub encodeCodecConfig: NV_ENC_CODEC_CONFIG,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: [u32; 278usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CONFIG() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CONFIG>(),
+        3584usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CONFIG))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CONFIG>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CONFIG))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).profileGUID as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(profileGUID)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).gopLength as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(gopLength)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).frameIntervalP as *const _ as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(frameIntervalP)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).monoChromeEncoding as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(monoChromeEncoding)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).frameFieldMode as *const _ as usize },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(frameFieldMode)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).mvPrecision as *const _ as usize },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(mvPrecision)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).rcParams as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(rcParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).encodeCodecConfig as *const _ as usize
+        },
+        168usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(encodeCodecConfig)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).reserved as *const _ as usize },
+        1960usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_CONFIG>())).reserved2 as *const _ as usize },
+        3072usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CONFIG),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_CONFIG {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_CONFIG"]
+#[doc = " Encoder configuration parameters to be set during initialization."]
+pub type NV_ENC_CONFIG = _NV_ENC_CONFIG;
+#[repr(u32)]
+#[doc = "  Tuning information of NVENC encoding (TuningInfo is not applicable to H264 and HEVC MEOnly mode)."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum NV_ENC_TUNING_INFO {
+    #[doc = "< Undefined tuningInfo. Invalid value for encoding."]
+    NV_ENC_TUNING_INFO_UNDEFINED = 0,
+    #[doc = "< Tune presets for latency tolerant encoding."]
+    NV_ENC_TUNING_INFO_HIGH_QUALITY = 1,
+    #[doc = "< Tune presets for low latency streaming."]
+    NV_ENC_TUNING_INFO_LOW_LATENCY = 2,
+    #[doc = "< Tune presets for ultra low latency streaming."]
+    NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY = 3,
+    #[doc = "< Tune presets for lossless encoding."]
+    NV_ENC_TUNING_INFO_LOSSLESS = 4,
+    #[doc = "< Tune presets for latency tolerant encoding for higher quality. Only supported for HEVC and AV1 on Turing+ architectures"]
+    NV_ENC_TUNING_INFO_ULTRA_HIGH_QUALITY = 5,
+    #[doc = "< Count number of tuningInfos. Invalid value."]
+    NV_ENC_TUNING_INFO_COUNT = 6,
+}
+#[repr(u32)]
+#[doc = " Split Encoding Modes (Split Encoding is not applicable to H264)."]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub enum _NV_ENC_SPLIT_ENCODE_MODE {
+    #[doc = "< Default value, implicit mode. Split frame will not always be enabled, even if NVENC number > 1. It will be decided by the driver based on preset, tuning information and video resolution."]
+    NV_ENC_SPLIT_AUTO_MODE = 0,
+    #[doc = "< Split frame forced mode enabled with number of strips automatically selected by driver to best fit configuration. If NVENC number > 1, split frame will be forced."]
+    NV_ENC_SPLIT_AUTO_FORCED_MODE = 1,
+    #[doc = "< Forced 2-strip split frame encoding (if NVENC number > 1, 1-strip encode otherwise)"]
+    NV_ENC_SPLIT_TWO_FORCED_MODE = 2,
+    #[doc = "< Forced 3-strip split frame encoding (if NVENC number > 2, NVENC number of strips otherwise)"]
+    NV_ENC_SPLIT_THREE_FORCED_MODE = 3,
+    #[doc = "< Forced 4-strip split frame encoding (if NVENC number > 3, NVENC number of strips otherwise)"]
+    NV_ENC_SPLIT_FOUR_FORCED_MODE = 4,
+    #[doc = "< Both split frame auto mode and forced mode are disabled"]
+    NV_ENC_SPLIT_DISABLE_MODE = 15,
+}
+#[doc = " Split Encoding Modes (Split Encoding is not applicable to H264)."]
+pub use self::_NV_ENC_SPLIT_ENCODE_MODE as NV_ENC_SPLIT_ENCODE_MODE;
+#[doc = " \\struct _NV_ENC_INITIALIZE_PARAMS"]
+#[doc = " Encode Session Initialization parameters."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_INITIALIZE_PARAMS {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the Encode GUID for which the encoder is being created. ::NvEncInitializeEncoder() API will fail if this is not set, or set to unsupported value."]
+    pub encodeGUID: GUID,
+    #[doc = "< [in]: Specifies the preset for encoding. If the preset GUID is set then , the preset configuration will be applied before any other parameter."]
+    pub presetGUID: GUID,
+    #[doc = "< [in]: Specifies the encode width. If not set ::NvEncInitializeEncoder() API will fail."]
+    pub encodeWidth: u32,
+    #[doc = "< [in]: Specifies the encode height. If not set ::NvEncInitializeEncoder() API will fail."]
+    pub encodeHeight: u32,
+    #[doc = "< [in]: Specifies the display aspect ratio width (H264/HEVC) or the render width (AV1)."]
+    pub darWidth: u32,
+    #[doc = "< [in]: Specifies the display aspect ratio height (H264/HEVC) or the render height (AV1)."]
+    pub darHeight: u32,
+    #[doc = "< [in]: Specifies the numerator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen )."]
+    pub frameRateNum: u32,
+    #[doc = "< [in]: Specifies the denominator for frame rate used for encoding in frames per second ( Frame rate = frameRateNum / frameRateDen )."]
+    pub frameRateDen: u32,
+    #[doc = "< [in]: Set this to 1 to enable asynchronous mode and is expected to use events to get picture completion notification."]
+    pub enableEncodeAsync: u32,
+    #[doc = "< [in]: Set this to 1 to enable the Picture Type Decision is be taken by the NvEncodeAPI interface."]
+    pub enablePTD: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Reserved private data buffer size and must be set to 0"]
+    pub privDataSize: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]: Reserved private data buffer and must be set to NULL"]
+    pub privData: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Specifies the advanced codec specific structure. If client has sent a valid codec config structure, it will override parameters set by the NV_ENC_INITIALIZE_PARAMS::presetGUID parameter. If set to NULL the NvEncodeAPI interface will use the NV_ENC_INITIALIZE_PARAMS::presetGUID to set the codec specific parameters."]
+    #[doc = "Client can also optionally query the NvEncodeAPI interface to get codec specific parameters for a presetGUID using ::NvEncGetEncodePresetConfigEx() API. It can then modify (if required) some of the codec config parameters and send down a custom config structure as part of ::_NV_ENC_INITIALIZE_PARAMS."]
+    #[doc = "Even in this case client is recommended to pass the same preset guid it has used in ::NvEncGetEncodePresetConfigEx() API to query the config structure; as NV_ENC_INITIALIZE_PARAMS::presetGUID. This will not override the custom config structure but will be used to determine other Encoder HW specific parameters not exposed in the API."]
+    pub encodeConfig: *mut NV_ENC_CONFIG,
+    #[doc = "< [in]: Maximum encode width to be used for current Encode session."]
+    #[doc = "Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encoder will not allow dynamic resolution change."]
+    pub maxEncodeWidth: u32,
+    #[doc = "< [in]: Maximum encode height to be allowed for current Encode session."]
+    #[doc = "Client should allocate output buffers according to this dimension for dynamic resolution change. If set to 0, Encode will not allow dynamic resolution change."]
+    pub maxEncodeHeight: u32,
+    #[doc = "< [in]: If Client wants to pass external motion vectors in NV_ENC_PIC_PARAMS::meExternalHints buffer it must specify the maximum number of hint candidates per block per direction for the encode session."]
+    #[doc = "The NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[0] is for L0 predictors and NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[1] is for L1 predictors."]
+    #[doc = "This client must also set NV_ENC_INITIALIZE_PARAMS::enableExternalMEHints to 1."]
+    pub maxMEHintCountsPerBlock: [NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE; 2usize],
+    #[doc = "< [in]: Tuning Info of NVENC encoding(TuningInfo is not applicable to H264 and HEVC meonly mode)."]
+    pub tuningInfo: NV_ENC_TUNING_INFO,
+    #[doc = "< [in]: Input buffer format. Used only when DX12 interface type is used"]
+    pub bufferFormat: NV_ENC_BUFFER_FORMAT,
+    #[doc = "< [in]: Number of state buffers to allocate to save encoder state. Set this to value greater than zero to enable encoding without advancing the encoder state."]
+    pub numStateBuffers: u32,
+    #[doc = "< [in]: Specifies the level for encoded frame output stats, when NV_ENC_INITIALIZE_PARAMS::enableOutputStats is set to 1."]
+    #[doc = "Client should allocate buffer of size equal to number of blocks multiplied by the size of NV_ENC_OUTPUT_STATS_BLOCK struct"]
+    #[doc = "if NV_ENC_INITIALIZE_PARAMS::outputStatsLevel is set to NV_ENC_OUTPUT_STATS_BLOCK or number of rows multiplied by the size of"]
+    #[doc = "NV_ENC_OUTPUT_STATS_ROW struct if NV_ENC_INITIALIZE_PARAMS::outputStatsLevel is set to NV_ENC_OUTPUT_STATS_ROW"]
+    #[doc = "in system memory and assign to NV_ENC_LOCK_BITSTREAM::encodedOutputStatsPtr to receive the encoded frame output stats."]
+    pub outputStatsLevel: NV_ENC_OUTPUT_STATS_LEVEL,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 284usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_INITIALIZE_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_INITIALIZE_PARAMS>(),
+        1800usize,
+        concat!("Size of: ", stringify!(_NV_ENC_INITIALIZE_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_INITIALIZE_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_INITIALIZE_PARAMS))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).encodeGUID as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(encodeGUID)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).presetGUID as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(presetGUID)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).encodeWidth as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(encodeWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).encodeHeight as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(encodeHeight)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).darWidth as *const _ as usize
+        },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(darWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).darHeight as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(darHeight)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).frameRateNum as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(frameRateNum)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).frameRateDen as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(frameRateDen)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).enableEncodeAsync as *const _
+                as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(enableEncodeAsync)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).enablePTD as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(enablePTD)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).privDataSize as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(privDataSize)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).reserved as *const _ as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).privData as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(privData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).encodeConfig as *const _ as usize
+        },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(encodeConfig)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).maxEncodeWidth as *const _
+                as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(maxEncodeWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).maxEncodeHeight as *const _
+                as usize
+        },
+        100usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(maxEncodeHeight)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).maxMEHintCountsPerBlock
+                as *const _ as usize
+        },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(maxMEHintCountsPerBlock)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).tuningInfo as *const _ as usize
+        },
+        136usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(tuningInfo)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).bufferFormat as *const _ as usize
+        },
+        140usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(bufferFormat)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).numStateBuffers as *const _
+                as usize
+        },
+        144usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(numStateBuffers)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).outputStatsLevel as *const _
+                as usize
+        },
+        148usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(outputStatsLevel)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).reserved1 as *const _ as usize
+        },
+        152usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INITIALIZE_PARAMS>())).reserved2 as *const _ as usize
+        },
+        1288usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INITIALIZE_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_INITIALIZE_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_INITIALIZE_PARAMS {
+    #[inline]
+    pub fn reportSliceOffsets(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reportSliceOffsets(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableSubFrameWrite(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableSubFrameWrite(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableExternalMEHints(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableExternalMEHints(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableMEOnlyMode(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableMEOnlyMode(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableWeightedPrediction(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableWeightedPrediction(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn splitEncodeMode(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 4u8) as u32) }
+    }
+    #[inline]
+    pub fn set_splitEncodeMode(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 4u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableOutputInVidmem(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(9usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableOutputInVidmem(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(9usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableReconFrameOutput(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(10usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableReconFrameOutput(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(10usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableOutputStats(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableOutputStats(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableUniDirectionalB(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableUniDirectionalB(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(13usize, 19u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(13usize, 19u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        reportSliceOffsets: u32,
+        enableSubFrameWrite: u32,
+        enableExternalMEHints: u32,
+        enableMEOnlyMode: u32,
+        enableWeightedPrediction: u32,
+        splitEncodeMode: u32,
+        enableOutputInVidmem: u32,
+        enableReconFrameOutput: u32,
+        enableOutputStats: u32,
+        enableUniDirectionalB: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let reportSliceOffsets: u32 = unsafe { ::std::mem::transmute(reportSliceOffsets) };
+            reportSliceOffsets as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let enableSubFrameWrite: u32 = unsafe { ::std::mem::transmute(enableSubFrameWrite) };
+            enableSubFrameWrite as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let enableExternalMEHints: u32 =
+                unsafe { ::std::mem::transmute(enableExternalMEHints) };
+            enableExternalMEHints as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let enableMEOnlyMode: u32 = unsafe { ::std::mem::transmute(enableMEOnlyMode) };
+            enableMEOnlyMode as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let enableWeightedPrediction: u32 =
+                unsafe { ::std::mem::transmute(enableWeightedPrediction) };
+            enableWeightedPrediction as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 4u8, {
+            let splitEncodeMode: u32 = unsafe { ::std::mem::transmute(splitEncodeMode) };
+            splitEncodeMode as u64
+        });
+        __bindgen_bitfield_unit.set(9usize, 1u8, {
+            let enableOutputInVidmem: u32 = unsafe { ::std::mem::transmute(enableOutputInVidmem) };
+            enableOutputInVidmem as u64
+        });
+        __bindgen_bitfield_unit.set(10usize, 1u8, {
+            let enableReconFrameOutput: u32 =
+                unsafe { ::std::mem::transmute(enableReconFrameOutput) };
+            enableReconFrameOutput as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 1u8, {
+            let enableOutputStats: u32 = unsafe { ::std::mem::transmute(enableOutputStats) };
+            enableOutputStats as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 1u8, {
+            let enableUniDirectionalB: u32 =
+                unsafe { ::std::mem::transmute(enableUniDirectionalB) };
+            enableUniDirectionalB as u64
+        });
+        __bindgen_bitfield_unit.set(13usize, 19u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_INITIALIZE_PARAMS"]
+#[doc = " Encode Session Initialization parameters."]
+pub type NV_ENC_INITIALIZE_PARAMS = _NV_ENC_INITIALIZE_PARAMS;
+#[doc = " \\struct _NV_ENC_RECONFIGURE_PARAMS"]
+#[doc = " Encode Session Reconfigured parameters."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_RECONFIGURE_PARAMS {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_RECONFIGURE_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]: Encoder session re-initialization parameters."]
+    #[doc = "If reInitEncodeParams.encodeConfig is NULL and"]
+    #[doc = "reInitEncodeParams.presetGUID is the same as the preset"]
+    #[doc = "GUID specified on the call to NvEncInitializeEncoder(),"]
+    #[doc = "EncodeAPI will continue to use the existing encode"]
+    #[doc = "configuration."]
+    #[doc = "If reInitEncodeParams.encodeConfig is NULL and"]
+    #[doc = "reInitEncodeParams.presetGUID is different from the preset"]
+    #[doc = "GUID specified on the call to NvEncInitializeEncoder(),"]
+    #[doc = "EncodeAPI will try to use the default configuration for"]
+    #[doc = "the preset specified by reInitEncodeParams.presetGUID."]
+    #[doc = "In this case, reconfiguration may fail if the new"]
+    #[doc = "configuration is incompatible with the existing"]
+    #[doc = "configuration (e.g. the new configuration results in"]
+    #[doc = "a change in the GOP structure)."]
+    pub reInitEncodeParams: NV_ENC_INITIALIZE_PARAMS,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved2: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_RECONFIGURE_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_RECONFIGURE_PARAMS>(),
+        1816usize,
+        concat!("Size of: ", stringify!(_NV_ENC_RECONFIGURE_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_RECONFIGURE_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_RECONFIGURE_PARAMS))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RECONFIGURE_PARAMS>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RECONFIGURE_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RECONFIGURE_PARAMS>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RECONFIGURE_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RECONFIGURE_PARAMS>())).reInitEncodeParams as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RECONFIGURE_PARAMS),
+            "::",
+            stringify!(reInitEncodeParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_RECONFIGURE_PARAMS>())).reserved2 as *const _ as usize
+        },
+        1812usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_RECONFIGURE_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_RECONFIGURE_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_RECONFIGURE_PARAMS {
+    #[inline]
+    pub fn resetEncoder(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_resetEncoder(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn forceIDR(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_forceIDR(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reserved1(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 30u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reserved1(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 30u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        resetEncoder: u32,
+        forceIDR: u32,
+        reserved1: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let resetEncoder: u32 = unsafe { ::std::mem::transmute(resetEncoder) };
+            resetEncoder as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let forceIDR: u32 = unsafe { ::std::mem::transmute(forceIDR) };
+            forceIDR as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 30u8, {
+            let reserved1: u32 = unsafe { ::std::mem::transmute(reserved1) };
+            reserved1 as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_RECONFIGURE_PARAMS"]
+#[doc = " Encode Session Reconfigured parameters."]
+pub type NV_ENC_RECONFIGURE_PARAMS = _NV_ENC_RECONFIGURE_PARAMS;
+#[doc = " \\struct _NV_ENC_PRESET_CONFIG"]
+#[doc = " Encoder preset config"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct _NV_ENC_PRESET_CONFIG {
+    #[doc = "< [in]:  Struct version. Must be set to ::NV_ENC_PRESET_CONFIG_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [out]: preset config returned by the Nvidia Video Encoder interface."]
+    pub presetCfg: NV_ENC_CONFIG,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 256usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PRESET_CONFIG() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PRESET_CONFIG>(),
+        5128usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PRESET_CONFIG))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PRESET_CONFIG>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PRESET_CONFIG))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PRESET_CONFIG>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PRESET_CONFIG),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PRESET_CONFIG>())).reserved as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PRESET_CONFIG),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PRESET_CONFIG>())).presetCfg as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PRESET_CONFIG),
+            "::",
+            stringify!(presetCfg)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PRESET_CONFIG>())).reserved1 as *const _ as usize },
+        3592usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PRESET_CONFIG),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PRESET_CONFIG>())).reserved2 as *const _ as usize },
+        4616usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PRESET_CONFIG),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_PRESET_CONFIG {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_PRESET_CONFIG"]
+#[doc = " Encoder preset config"]
+pub type NV_ENC_PRESET_CONFIG = _NV_ENC_PRESET_CONFIG;
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_MVC"]
+#[doc = " MVC-specific parameters to be sent on a per-frame basis."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PIC_PARAMS_MVC {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_MVC_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the view ID associated with the current input view."]
+    pub viewID: u32,
+    #[doc = "< [in]: Specifies the temporal ID associated with the current input view."]
+    pub temporalID: u32,
+    #[doc = "< [in]: Specifies the priority ID associated with the current input view. Reserved and ignored by the NvEncodeAPI interface."]
+    pub priorityID: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 12usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved2: [*mut ::std::os::raw::c_void; 8usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PIC_PARAMS_MVC() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PIC_PARAMS_MVC>(),
+        128usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PIC_PARAMS_MVC))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PIC_PARAMS_MVC>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PIC_PARAMS_MVC))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_MVC>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_MVC),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_MVC>())).viewID as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_MVC),
+            "::",
+            stringify!(viewID)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_MVC>())).temporalID as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_MVC),
+            "::",
+            stringify!(temporalID)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_MVC>())).priorityID as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_MVC),
+            "::",
+            stringify!(priorityID)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_MVC>())).reserved1 as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_MVC),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_MVC>())).reserved2 as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_MVC),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_PIC_PARAMS_MVC {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_MVC"]
+#[doc = " MVC-specific parameters to be sent on a per-frame basis."]
+pub type NV_ENC_PIC_PARAMS_MVC = _NV_ENC_PIC_PARAMS_MVC;
+#[doc = " \\union _NV_ENC_PIC_PARAMS_H264_EXT"]
+#[doc = " H264 extension  picture parameters"]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union _NV_ENC_PIC_PARAMS_H264_EXT {
+    #[doc = "< [in]: Specifies the MVC picture parameters."]
+    pub mvcPicParams: NV_ENC_PIC_PARAMS_MVC,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 32usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PIC_PARAMS_H264_EXT() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PIC_PARAMS_H264_EXT>(),
+        128usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PIC_PARAMS_H264_EXT))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PIC_PARAMS_H264_EXT>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PIC_PARAMS_H264_EXT))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264_EXT>())).mvcPicParams as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264_EXT),
+            "::",
+            stringify!(mvcPicParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264_EXT>())).reserved1 as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264_EXT),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+impl Default for _NV_ENC_PIC_PARAMS_H264_EXT {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\union _NV_ENC_PIC_PARAMS_H264_EXT"]
+#[doc = " H264 extension  picture parameters"]
+pub type NV_ENC_PIC_PARAMS_H264_EXT = _NV_ENC_PIC_PARAMS_H264_EXT;
+#[doc = " \\struct _NV_ENC_SEI_PAYLOAD"]
+#[doc = "  User SEI message"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_SEI_PAYLOAD {
+    #[doc = "< [in] SEI payload size in bytes. SEI payload must be byte aligned, as described in Annex D"]
+    pub payloadSize: u32,
+    #[doc = "< [in] SEI payload types and syntax can be found in Annex D of the H.264 Specification."]
+    pub payloadType: u32,
+    #[doc = "< [in] pointer to user data"]
+    pub payload: *mut u8,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_SEI_PAYLOAD() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_SEI_PAYLOAD>(),
+        16usize,
+        concat!("Size of: ", stringify!(_NV_ENC_SEI_PAYLOAD))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_SEI_PAYLOAD>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_SEI_PAYLOAD))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_SEI_PAYLOAD>())).payloadSize as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEI_PAYLOAD),
+            "::",
+            stringify!(payloadSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_SEI_PAYLOAD>())).payloadType as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEI_PAYLOAD),
+            "::",
+            stringify!(payloadType)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_SEI_PAYLOAD>())).payload as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEI_PAYLOAD),
+            "::",
+            stringify!(payload)
+        )
+    );
+}
+impl Default for _NV_ENC_SEI_PAYLOAD {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_SEI_PAYLOAD"]
+#[doc = "  User SEI message"]
+pub type NV_ENC_SEI_PAYLOAD = _NV_ENC_SEI_PAYLOAD;
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_H264"]
+#[doc = " H264 specific enc pic params. sent on a per frame basis."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct _NV_ENC_PIC_PARAMS_H264 {
+    #[doc = "< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision."]
+    pub displayPOCSyntax: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved3: u32,
+    #[doc = "< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1."]
+    pub refPicFlag: u32,
+    #[doc = "< [in]: Specifies the colour plane ID associated with the current input."]
+    pub colourPlaneId: u32,
+    #[doc = "< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt."]
+    #[doc = "When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message"]
+    #[doc = "forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified"]
+    pub forceIntraRefreshWithFrameCnt: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Deprecated."]
+    pub sliceTypeData: *mut u8,
+    #[doc = "< [in]: Deprecated."]
+    pub sliceTypeArrayCnt: u32,
+    #[doc = "< [in]: Specifies the number of elements allocated in  seiPayloadArray array."]
+    pub seiPayloadArrayCnt: u32,
+    #[doc = "< [in]: Array of SEI payloads which will be inserted for this frame."]
+    pub seiPayloadArray: *mut NV_ENC_SEI_PAYLOAD,
+    #[doc = "< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices"]
+    #[doc = "sliceMode = 0 MB based slices, sliceMode = 1 Byte based slices, sliceMode = 2 MB row based slices, sliceMode = 3, numSlices in Picture"]
+    #[doc = "When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting"]
+    #[doc = "When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice"]
+    pub sliceMode: u32,
+    #[doc = "< [in]: Specifies the parameter needed for sliceMode. For:"]
+    #[doc = "sliceMode = 0, sliceModeData specifies # of MBs in each slice (except last slice)"]
+    #[doc = "sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)"]
+    #[doc = "sliceMode = 2, sliceModeData specifies # of MB rows in each slice (except last slice)"]
+    #[doc = "sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally"]
+    pub sliceModeData: u32,
+    #[doc = "< [in]: Specifies the long term referenceframe index to use for marking this frame as LTR."]
+    pub ltrMarkFrameIdx: u32,
+    #[doc = "< [in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame."]
+    pub ltrUseFrameBitmap: u32,
+    #[doc = "< [in]: Not supported. Reserved for future use and must be set to 0."]
+    pub ltrUsageMode: u32,
+    #[doc = "< [in]: Specifies the number of slices to be forced to Intra in the current picture."]
+    #[doc = "This option along with forceIntraSliceIdx[] array needs to be used with sliceMode = 3 only"]
+    pub forceIntraSliceCount: u32,
+    #[doc = "< [in]: Slice indices to be forced to intra in the current picture. Each slice index should be <= num_slices_in_picture -1. Index starts from 0 for first slice."]
+    #[doc = "The number of entries in this array should be equal to forceIntraSliceCount"]
+    pub forceIntraSliceIdx: *mut u32,
+    #[doc = "< [in]: Specifies the H264 extension config parameters using this config."]
+    pub h264ExtPicParams: NV_ENC_PIC_PARAMS_H264_EXT,
+    #[doc = "< [in]: Specifies the clock timestamp sets used in picture timing SEI. Applicable only when NV_ENC_CONFIG_H264::enableTimeCode is set to 1."]
+    pub timeCode: NV_ENC_TIME_CODE,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: [u32; 202usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved2: [*mut ::std::os::raw::c_void; 61usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PIC_PARAMS_H264() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PIC_PARAMS_H264>(),
+        1536usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PIC_PARAMS_H264))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PIC_PARAMS_H264>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PIC_PARAMS_H264))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).displayPOCSyntax as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(displayPOCSyntax)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).reserved3 as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).refPicFlag as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(refPicFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).colourPlaneId as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(colourPlaneId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).forceIntraRefreshWithFrameCnt
+                as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(forceIntraRefreshWithFrameCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).sliceTypeData as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(sliceTypeData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).sliceTypeArrayCnt as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(sliceTypeArrayCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).seiPayloadArrayCnt as *const _
+                as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(seiPayloadArrayCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).seiPayloadArray as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(seiPayloadArray)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).sliceMode as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(sliceMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).sliceModeData as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(sliceModeData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).ltrMarkFrameIdx as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(ltrMarkFrameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).ltrUseFrameBitmap as *const _
+                as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(ltrUseFrameBitmap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).ltrUsageMode as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(ltrUsageMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).forceIntraSliceCount as *const _
+                as usize
+        },
+        68usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(forceIntraSliceCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).forceIntraSliceIdx as *const _
+                as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(forceIntraSliceIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).h264ExtPicParams as *const _
+                as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(h264ExtPicParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).timeCode as *const _ as usize
+        },
+        208usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(timeCode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).reserved as *const _ as usize
+        },
+        240usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_H264>())).reserved2 as *const _ as usize
+        },
+        1048usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_H264),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_PIC_PARAMS_H264 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_PIC_PARAMS_H264 {
+    #[inline]
+    pub fn constrainedFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_constrainedFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn sliceModeDataUpdate(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_sliceModeDataUpdate(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrMarkFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrMarkFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrUseFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrUseFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 28u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 28u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        constrainedFrame: u32,
+        sliceModeDataUpdate: u32,
+        ltrMarkFrame: u32,
+        ltrUseFrames: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let constrainedFrame: u32 = unsafe { ::std::mem::transmute(constrainedFrame) };
+            constrainedFrame as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let sliceModeDataUpdate: u32 = unsafe { ::std::mem::transmute(sliceModeDataUpdate) };
+            sliceModeDataUpdate as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let ltrMarkFrame: u32 = unsafe { ::std::mem::transmute(ltrMarkFrame) };
+            ltrMarkFrame as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let ltrUseFrames: u32 = unsafe { ::std::mem::transmute(ltrUseFrames) };
+            ltrUseFrames as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 28u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_H264"]
+#[doc = " H264 specific enc pic params. sent on a per frame basis."]
+pub type NV_ENC_PIC_PARAMS_H264 = _NV_ENC_PIC_PARAMS_H264;
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_HEVC"]
+#[doc = " HEVC specific enc pic params. sent on a per frame basis."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PIC_PARAMS_HEVC {
+    #[doc = "< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision."]
+    pub displayPOCSyntax: u32,
+    #[doc = "< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1."]
+    pub refPicFlag: u32,
+    #[doc = "< [in]: Specifies the temporal id of the picture"]
+    pub temporalId: u32,
+    #[doc = "< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt."]
+    #[doc = "When outputRecoveryPointSEI is set this is value is used for recovery_frame_cnt in recovery point SEI message"]
+    #[doc = "forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified"]
+    pub forceIntraRefreshWithFrameCnt: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: u32,
+    #[doc = "< [in]: Array which specifies the slice type used to force intra slice for a particular slice. Currently supported only for NV_ENC_CONFIG_H264::sliceMode == 3."]
+    #[doc = "Client should allocate array of size sliceModeData where sliceModeData is specified in field of ::_NV_ENC_CONFIG_H264"]
+    #[doc = "Array element with index n corresponds to nth slice. To force a particular slice to intra client should set corresponding array element to NV_ENC_SLICE_TYPE_I"]
+    #[doc = "all other array elements should be set to NV_ENC_SLICE_TYPE_DEFAULT"]
+    pub sliceTypeData: *mut u8,
+    #[doc = "< [in]: Client should set this to the number of elements allocated in sliceTypeData array. If sliceTypeData is NULL then this should be set to 0"]
+    pub sliceTypeArrayCnt: u32,
+    #[doc = "< [in]: This parameter in conjunction with sliceModeData specifies the way in which the picture is divided into slices"]
+    #[doc = "sliceMode = 0 CTU based slices, sliceMode = 1 Byte based slices, sliceMode = 2 CTU row based slices, sliceMode = 3, numSlices in Picture"]
+    #[doc = "When forceIntraRefreshWithFrameCnt is set it will have priority over sliceMode setting"]
+    #[doc = "When sliceMode == 0 and sliceModeData == 0 whole picture will be coded with one slice"]
+    pub sliceMode: u32,
+    #[doc = "< [in]: Specifies the parameter needed for sliceMode. For:"]
+    #[doc = "sliceMode = 0, sliceModeData specifies # of CTUs in each slice (except last slice)"]
+    #[doc = "sliceMode = 1, sliceModeData specifies maximum # of bytes in each slice (except last slice)"]
+    #[doc = "sliceMode = 2, sliceModeData specifies # of CTU rows in each slice (except last slice)"]
+    #[doc = "sliceMode = 3, sliceModeData specifies number of slices in the picture. Driver will divide picture into slices optimally"]
+    pub sliceModeData: u32,
+    #[doc = "< [in]: Specifies the long term reference frame index to use for marking this frame as LTR."]
+    pub ltrMarkFrameIdx: u32,
+    #[doc = "< [in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame."]
+    pub ltrUseFrameBitmap: u32,
+    #[doc = "< [in]: Not supported. Reserved for future use and must be set to 0."]
+    pub ltrUsageMode: u32,
+    #[doc = "< [in]: Specifies the number of elements allocated in  seiPayloadArray array."]
+    pub seiPayloadArrayCnt: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: u32,
+    #[doc = "< [in]: Array of SEI payloads which will be inserted for this frame."]
+    pub seiPayloadArray: *mut NV_ENC_SEI_PAYLOAD,
+    #[doc = "< [in]: Specifies the clock timestamp sets used in time code SEI. Applicable only when NV_ENC_CONFIG_HEVC::enableTimeCodeSEI is set to 1."]
+    pub timeCode: NV_ENC_TIME_CODE,
+    #[doc = "< [in]: Specifies the number of temporal layers to be used for hierarchical coding. The set only takes place when temporalConfigUpdate == 1."]
+    pub numTemporalLayers: u32,
+    #[doc = "< [in]: Specifies the view id of the picture"]
+    pub viewId: u32,
+    #[doc = "< [in]: Specifies the 3D reference displays information SEI message."]
+    #[doc = "Applicable only when NV_ENC_CONFIG_HEVC::outputHevc3DReferenceDisplayInfo is set to 1."]
+    pub p3DReferenceDisplayInfo: *mut HEVC_3D_REFERENCE_DISPLAY_INFO,
+    #[doc = "< [in]: Specifies the Content light level information SEI syntax"]
+    pub pMaxCll: *mut CONTENT_LIGHT_LEVEL,
+    #[doc = "< [in]: Specifies the Mastering display colour volume SEI syntax"]
+    pub pMasteringDisplay: *mut MASTERING_DISPLAY_INFO,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved2: [u32; 234usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved3: [*mut ::std::os::raw::c_void; 58usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PIC_PARAMS_HEVC() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PIC_PARAMS_HEVC>(),
+        1536usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PIC_PARAMS_HEVC))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PIC_PARAMS_HEVC>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PIC_PARAMS_HEVC))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).displayPOCSyntax as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(displayPOCSyntax)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).refPicFlag as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(refPicFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).temporalId as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(temporalId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).forceIntraRefreshWithFrameCnt
+                as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(forceIntraRefreshWithFrameCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).reserved1 as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).sliceTypeData as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(sliceTypeData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).sliceTypeArrayCnt as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(sliceTypeArrayCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).sliceMode as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(sliceMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).sliceModeData as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(sliceModeData)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).ltrMarkFrameIdx as *const _ as usize
+        },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(ltrMarkFrameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).ltrUseFrameBitmap as *const _
+                as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(ltrUseFrameBitmap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).ltrUsageMode as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(ltrUsageMode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).seiPayloadArrayCnt as *const _
+                as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(seiPayloadArrayCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).reserved as *const _ as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).seiPayloadArray as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(seiPayloadArray)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).timeCode as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(timeCode)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).numTemporalLayers as *const _
+                as usize
+        },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(numTemporalLayers)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).viewId as *const _ as usize },
+        108usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(viewId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).p3DReferenceDisplayInfo as *const _
+                as usize
+        },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(p3DReferenceDisplayInfo)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).pMaxCll as *const _ as usize },
+        120usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(pMaxCll)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).pMasteringDisplay as *const _
+                as usize
+        },
+        128usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(pMasteringDisplay)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).reserved2 as *const _ as usize
+        },
+        136usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_HEVC>())).reserved3 as *const _ as usize
+        },
+        1072usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_HEVC),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+}
+impl Default for _NV_ENC_PIC_PARAMS_HEVC {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_PIC_PARAMS_HEVC {
+    #[inline]
+    pub fn constrainedFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_constrainedFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn sliceModeDataUpdate(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_sliceModeDataUpdate(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrMarkFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrMarkFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrUseFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrUseFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn temporalConfigUpdate(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_temporalConfigUpdate(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 27u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 27u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        constrainedFrame: u32,
+        sliceModeDataUpdate: u32,
+        ltrMarkFrame: u32,
+        ltrUseFrames: u32,
+        temporalConfigUpdate: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let constrainedFrame: u32 = unsafe { ::std::mem::transmute(constrainedFrame) };
+            constrainedFrame as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let sliceModeDataUpdate: u32 = unsafe { ::std::mem::transmute(sliceModeDataUpdate) };
+            sliceModeDataUpdate as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let ltrMarkFrame: u32 = unsafe { ::std::mem::transmute(ltrMarkFrame) };
+            ltrMarkFrame as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let ltrUseFrames: u32 = unsafe { ::std::mem::transmute(ltrUseFrames) };
+            ltrUseFrames as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let temporalConfigUpdate: u32 = unsafe { ::std::mem::transmute(temporalConfigUpdate) };
+            temporalConfigUpdate as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 27u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_HEVC"]
+#[doc = " HEVC specific enc pic params. sent on a per frame basis."]
+pub type NV_ENC_PIC_PARAMS_HEVC = _NV_ENC_PIC_PARAMS_HEVC;
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_AV1"]
+#[doc = " AV1 specific enc pic params. sent on a per frame basis."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_PIC_PARAMS_AV1 {
+    #[doc = "< [in]: Specifies the display POC syntax This is required to be set if client is handling the picture type decision."]
+    pub displayPOCSyntax: u32,
+    #[doc = "< [in]: Set to 1 for a reference picture. This is ignored if NV_ENC_INITIALIZE_PARAMS::enablePTD is set to 1."]
+    pub refPicFlag: u32,
+    #[doc = "< [in]: Specifies the temporal id of the picture"]
+    pub temporalId: u32,
+    #[doc = "< [in]: Forces an intra refresh with duration equal to intraRefreshFrameCnt."]
+    #[doc = "forceIntraRefreshWithFrameCnt cannot be used if B frames are used in the GOP structure specified"]
+    pub forceIntraRefreshWithFrameCnt: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: This parameter in conjunction with the flag enableCustomTileConfig and the array tileWidths[] specifies the way in which the picture is divided into tile columns."]
+    #[doc = "When enableCustomTileConfig == 0, the picture will be uniformly divided into numTileColumns tile columns. If numTileColumns is not a power of 2,"]
+    #[doc = "it will be rounded down to the next power of 2 value. If numTileColumns == 0, the picture will be coded with the smallest number of vertical tiles as allowed by standard."]
+    #[doc = "When enableCustomTileConfig == 1, numTileColumns must be > 0 and <= NV_MAX_TILE_COLS_AV1 and tileWidths must point to a valid array of numTileColumns entries."]
+    #[doc = "Entry i specifies the width in 64x64 CTU unit of tile colum i. The sum of all the entries should be equal to the picture width in 64x64 CTU units."]
+    pub numTileColumns: u32,
+    #[doc = "< [in]: This parameter in conjunction with the flag enableCustomTileConfig and the array tileHeights[] specifies the way in which the picture is divided into tiles rows"]
+    #[doc = "When enableCustomTileConfig == 0, the picture will be uniformly divided into numTileRows tile rows. If numTileRows is not a power of 2,"]
+    #[doc = "it will be rounded down to the next power of 2 value. If numTileRows == 0, the picture will be coded with the smallest number of horizontal tiles as allowed by standard."]
+    #[doc = "When enableCustomTileConfig == 1, numTileRows must be > 0 and <= NV_MAX_TILE_ROWS_AV1 and tileHeights must point to a valid array of numTileRows entries."]
+    #[doc = "Entry i specifies the height in 64x64 CTU unit of tile row i. The sum of all the entries should be equal to the picture hieght in 64x64 CTU units."]
+    pub numTileRows: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: u32,
+    #[doc = "< [in]: If enableCustomTileConfig == 1, tileWidths[i] specifies the width of tile column i in 64x64 CTU unit, with 0 <= i <= numTileColumns -1."]
+    pub tileWidths: *mut u32,
+    #[doc = "< [in]: If enableCustomTileConfig == 1, tileHeights[i] specifies the height of tile row i in 64x64 CTU unit, with 0 <= i <= numTileRows -1."]
+    pub tileHeights: *mut u32,
+    #[doc = "< [in]: Specifies the number of elements allocated in  obuPayloadArray array."]
+    pub obuPayloadArrayCnt: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: u32,
+    #[doc = "< [in]: Array of OBU payloads which will be inserted for this frame."]
+    pub obuPayloadArray: *mut NV_ENC_SEI_PAYLOAD,
+    #[doc = "< [in]: If filmGrainParamsUpdate == 1, filmGrainParams must point to a valid NV_ENC_FILM_GRAIN_PARAMS_AV1 structure"]
+    pub filmGrainParams: *mut NV_ENC_FILM_GRAIN_PARAMS_AV1,
+    #[doc = "< [in]: Specifies the long term reference frame index to use for marking this frame as LTR."]
+    pub ltrMarkFrameIdx: u32,
+    #[doc = "< [in]: Specifies the associated bitmap of LTR frame indices to use when encoding this frame."]
+    pub ltrUseFrameBitmap: u32,
+    #[doc = "< [in]: Specifies the number of temporal layers to be used for hierarchical coding. The set only takes place when temporalConfigUpdate == 1."]
+    pub numTemporalLayers: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved4: u32,
+    #[doc = "< [in]: Specifies the Content light level metadata syntax"]
+    pub pMaxCll: *mut CONTENT_LIGHT_LEVEL,
+    #[doc = "< [in]: Specifies the Mastering display colour volume metadata syntax"]
+    pub pMasteringDisplay: *mut MASTERING_DISPLAY_INFO,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved2: [u32; 242usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved3: [*mut ::std::os::raw::c_void; 59usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PIC_PARAMS_AV1() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PIC_PARAMS_AV1>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PIC_PARAMS_AV1))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PIC_PARAMS_AV1>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PIC_PARAMS_AV1))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).displayPOCSyntax as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(displayPOCSyntax)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).refPicFlag as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(refPicFlag)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).temporalId as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(temporalId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).forceIntraRefreshWithFrameCnt
+                as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(forceIntraRefreshWithFrameCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).numTileColumns as *const _ as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(numTileColumns)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).numTileRows as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(numTileRows)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).reserved as *const _ as usize },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).tileWidths as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(tileWidths)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).tileHeights as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(tileHeights)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).obuPayloadArrayCnt as *const _
+                as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(obuPayloadArrayCnt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).reserved1 as *const _ as usize
+        },
+        52usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).obuPayloadArray as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(obuPayloadArray)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).filmGrainParams as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(filmGrainParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).ltrMarkFrameIdx as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(ltrMarkFrameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).ltrUseFrameBitmap as *const _
+                as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(ltrUseFrameBitmap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).numTemporalLayers as *const _
+                as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(numTemporalLayers)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).reserved4 as *const _ as usize
+        },
+        84usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(reserved4)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).pMaxCll as *const _ as usize },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(pMaxCll)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).pMasteringDisplay as *const _
+                as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(pMasteringDisplay)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).reserved2 as *const _ as usize
+        },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS_AV1>())).reserved3 as *const _ as usize
+        },
+        1072usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS_AV1),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+}
+impl Default for _NV_ENC_PIC_PARAMS_AV1 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_PIC_PARAMS_AV1 {
+    #[inline]
+    pub fn goldenFrameFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_goldenFrameFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn arfFrameFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_arfFrameFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn arf2FrameFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_arf2FrameFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn bwdFrameFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_bwdFrameFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn overlayFrameFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(4usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_overlayFrameFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(4usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn showExistingFrameFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(5usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_showExistingFrameFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(5usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn errorResilientModeFlag(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(6usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_errorResilientModeFlag(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(6usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn tileConfigUpdate(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(7usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_tileConfigUpdate(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(7usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn enableCustomTileConfig(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(8usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_enableCustomTileConfig(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(8usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn filmGrainParamsUpdate(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(9usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_filmGrainParamsUpdate(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(9usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrMarkFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(10usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrMarkFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(10usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrUseFrames(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(11usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrUseFrames(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(11usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn temporalConfigUpdate(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(12usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_temporalConfigUpdate(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(12usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(13usize, 19u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(13usize, 19u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        goldenFrameFlag: u32,
+        arfFrameFlag: u32,
+        arf2FrameFlag: u32,
+        bwdFrameFlag: u32,
+        overlayFrameFlag: u32,
+        showExistingFrameFlag: u32,
+        errorResilientModeFlag: u32,
+        tileConfigUpdate: u32,
+        enableCustomTileConfig: u32,
+        filmGrainParamsUpdate: u32,
+        ltrMarkFrame: u32,
+        ltrUseFrames: u32,
+        temporalConfigUpdate: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let goldenFrameFlag: u32 = unsafe { ::std::mem::transmute(goldenFrameFlag) };
+            goldenFrameFlag as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let arfFrameFlag: u32 = unsafe { ::std::mem::transmute(arfFrameFlag) };
+            arfFrameFlag as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let arf2FrameFlag: u32 = unsafe { ::std::mem::transmute(arf2FrameFlag) };
+            arf2FrameFlag as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 1u8, {
+            let bwdFrameFlag: u32 = unsafe { ::std::mem::transmute(bwdFrameFlag) };
+            bwdFrameFlag as u64
+        });
+        __bindgen_bitfield_unit.set(4usize, 1u8, {
+            let overlayFrameFlag: u32 = unsafe { ::std::mem::transmute(overlayFrameFlag) };
+            overlayFrameFlag as u64
+        });
+        __bindgen_bitfield_unit.set(5usize, 1u8, {
+            let showExistingFrameFlag: u32 =
+                unsafe { ::std::mem::transmute(showExistingFrameFlag) };
+            showExistingFrameFlag as u64
+        });
+        __bindgen_bitfield_unit.set(6usize, 1u8, {
+            let errorResilientModeFlag: u32 =
+                unsafe { ::std::mem::transmute(errorResilientModeFlag) };
+            errorResilientModeFlag as u64
+        });
+        __bindgen_bitfield_unit.set(7usize, 1u8, {
+            let tileConfigUpdate: u32 = unsafe { ::std::mem::transmute(tileConfigUpdate) };
+            tileConfigUpdate as u64
+        });
+        __bindgen_bitfield_unit.set(8usize, 1u8, {
+            let enableCustomTileConfig: u32 =
+                unsafe { ::std::mem::transmute(enableCustomTileConfig) };
+            enableCustomTileConfig as u64
+        });
+        __bindgen_bitfield_unit.set(9usize, 1u8, {
+            let filmGrainParamsUpdate: u32 =
+                unsafe { ::std::mem::transmute(filmGrainParamsUpdate) };
+            filmGrainParamsUpdate as u64
+        });
+        __bindgen_bitfield_unit.set(10usize, 1u8, {
+            let ltrMarkFrame: u32 = unsafe { ::std::mem::transmute(ltrMarkFrame) };
+            ltrMarkFrame as u64
+        });
+        __bindgen_bitfield_unit.set(11usize, 1u8, {
+            let ltrUseFrames: u32 = unsafe { ::std::mem::transmute(ltrUseFrames) };
+            ltrUseFrames as u64
+        });
+        __bindgen_bitfield_unit.set(12usize, 1u8, {
+            let temporalConfigUpdate: u32 = unsafe { ::std::mem::transmute(temporalConfigUpdate) };
+            temporalConfigUpdate as u64
+        });
+        __bindgen_bitfield_unit.set(13usize, 19u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_PIC_PARAMS_AV1"]
+#[doc = " AV1 specific enc pic params. sent on a per frame basis."]
+pub type NV_ENC_PIC_PARAMS_AV1 = _NV_ENC_PIC_PARAMS_AV1;
+#[doc = " Codec specific per-picture encoding parameters."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub union _NV_ENC_CODEC_PIC_PARAMS {
+    #[doc = "< [in]: H264 encode picture params."]
+    pub h264PicParams: NV_ENC_PIC_PARAMS_H264,
+    #[doc = "< [in]: HEVC encode picture params."]
+    pub hevcPicParams: NV_ENC_PIC_PARAMS_HEVC,
+    #[doc = "< [in]: AV1 encode picture params."]
+    pub av1PicParams: NV_ENC_PIC_PARAMS_AV1,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: [u32; 256usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_CODEC_PIC_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_CODEC_PIC_PARAMS>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_CODEC_PIC_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_CODEC_PIC_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_CODEC_PIC_PARAMS))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CODEC_PIC_PARAMS>())).h264PicParams as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_PIC_PARAMS),
+            "::",
+            stringify!(h264PicParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CODEC_PIC_PARAMS>())).hevcPicParams as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_PIC_PARAMS),
+            "::",
+            stringify!(hevcPicParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CODEC_PIC_PARAMS>())).av1PicParams as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_PIC_PARAMS),
+            "::",
+            stringify!(av1PicParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_CODEC_PIC_PARAMS>())).reserved as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_CODEC_PIC_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+}
+impl Default for _NV_ENC_CODEC_PIC_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Codec specific per-picture encoding parameters."]
+pub type NV_ENC_CODEC_PIC_PARAMS = _NV_ENC_CODEC_PIC_PARAMS;
+#[doc = " \\struct _NV_ENC_PIC_PARAMS"]
+#[doc = " Encoding parameters that need to be sent on a per frame basis."]
+#[repr(C)]
+#[derive(Copy, Clone)]
+pub struct _NV_ENC_PIC_PARAMS {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_PIC_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the input frame width"]
+    pub inputWidth: u32,
+    #[doc = "< [in]: Specifies the input frame height"]
+    pub inputHeight: u32,
+    #[doc = "< [in]: Specifies the input buffer pitch. If pitch value is not known, set this to inputWidth."]
+    pub inputPitch: u32,
+    #[doc = "< [in]: Specifies bit-wise OR of encode picture flags. See ::NV_ENC_PIC_FLAGS enum."]
+    pub encodePicFlags: u32,
+    #[doc = "< [in]: Specifies the frame index associated with the input frame. It is necessary to pass this as monotonically increasing starting 0 when lookaheadLevel, UHQ Tuning Info"]
+    #[doc = "or encoding same frames multiple times without advancing encoder state feature are enabled"]
+    pub frameIdx: u32,
+    #[doc = "< [in]: Specifies opaque data which is associated with the encoded frame, but not actually encoded in the output bitstream."]
+    #[doc = "This opaque data can be used later to uniquely refer to the corresponding encoded frame. For example, it can be used"]
+    #[doc = "for identifying the frame to be invalidated in the reference picture buffer, if lost at the client."]
+    pub inputTimeStamp: u64,
+    #[doc = "< [in]: Specifies duration of the input picture"]
+    pub inputDuration: u64,
+    #[doc = "< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs."]
+    pub inputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Specifies the output buffer pointer."]
+    #[doc = "If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to output buffer. Client should use a pointer obtained from ::NvEncCreateBitstreamBuffer() API."]
+    #[doc = "If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for NV_ENC_ENCODE_OUT_PARAMS struct and encoded bitstream data. Client"]
+    #[doc = "should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this output buffer and assign it to NV_ENC_PIC_PARAMS::outputBitstream."]
+    #[doc = "First 256 bytes of this buffer should be interpreted as NV_ENC_ENCODE_OUT_PARAMS struct followed by encoded bitstream data. Recommended size for output buffer is sum of size of"]
+    #[doc = "NV_ENC_ENCODE_OUT_PARAMS struct and twice the input frame size for lower resolution eg. CIF and 1.5 times the input frame size for higher resolutions. If encoded bitstream size is"]
+    #[doc = "greater than the allocated buffer size for encoded bitstream, then the output buffer will have encoded bitstream data equal to buffer size. All CUDA operations on this buffer must use"]
+    #[doc = "the default stream."]
+    pub outputBitstream: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [in]: Specifies an event to be signaled on completion of encoding of this Frame [only if operating in Asynchronous mode]. Each output buffer should be associated with a distinct event pointer."]
+    pub completionEvent: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Specifies the input buffer format."]
+    pub bufferFmt: NV_ENC_BUFFER_FORMAT,
+    #[doc = "< [in]: Specifies structure of the input picture."]
+    pub pictureStruct: NV_ENC_PIC_STRUCT,
+    #[doc = "< [in]: Specifies input picture type. Client required to be set explicitly by the client if the client has not set NV_ENC_INITALIZE_PARAMS::enablePTD to 1 while calling NvInitializeEncoder."]
+    pub pictureType: NV_ENC_PIC_TYPE,
+    #[doc = "< [in]: Specifies the codec specific per-picture encoding parameters."]
+    pub codecPicParams: NV_ENC_CODEC_PIC_PARAMS,
+    #[doc = "< [in]: For H264 and Hevc, specifies the number of hint candidates per block per direction for the current frame. meHintCountsPerBlock[0] is for L0 predictors and meHintCountsPerBlock[1] is for L1 predictors."]
+    #[doc = "The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder initialization."]
+    pub meHintCountsPerBlock: [NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE; 2usize],
+    #[doc = "< [in]: For H264 and Hevc, Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock."]
+    #[doc = "The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8"]
+    #[doc = "+ 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1)"]
+    pub meExternalHints: *mut NVENC_EXTERNAL_ME_HINT,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved2: [u32; 7usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved5: [*mut ::std::os::raw::c_void; 2usize],
+    #[doc = "< [in]: Specifies the pointer to signed byte array containing value per MB for H264, per CTB for HEVC and per SB for AV1 in raster scan order for the current picture, which will be interpreted depending on NV_ENC_RC_PARAMS::qpMapMode."]
+    #[doc = "If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DELTA, qpDeltaMap specifies QP modifier per MB for H264, per CTB for HEVC and per SB for AV1. This QP modifier will be applied on top of the QP chosen by rate control."]
+    #[doc = "If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_EMPHASIS, qpDeltaMap specifies Emphasis Level Map per MB for H264. This level value along with QP chosen by rate control is used to"]
+    #[doc = "compute the QP modifier, which in turn is applied on top of QP chosen by rate control."]
+    #[doc = "If NV_ENC_RC_PARAMS::qpMapMode is NV_ENC_QP_MAP_DISABLED, value in qpDeltaMap will be ignored."]
+    pub qpDeltaMap: *mut i8,
+    #[doc = "< [in]: Specifies the size in bytes of qpDeltaMap surface allocated by client and pointed to by NV_ENC_PIC_PARAMS::qpDeltaMap. Surface (array) should be picWidthInMbs * picHeightInMbs for H264, picWidthInCtbs * picHeightInCtbs for HEVC and"]
+    #[doc = "picWidthInSbs * picHeightInSbs for AV1"]
+    pub qpDeltaMapSize: u32,
+    #[doc = "< [in]: Reserved bitfields and must be set to 0"]
+    pub reservedBitFields: u32,
+    #[doc = "< [in]: Specifies temporal distance for reference picture (NVENC_EXTERNAL_ME_HINT::refidx = 0) used during external ME with NV_ENC_INITALIZE_PARAMS::enablePTD = 1 . meHintRefPicDist[0] is for L0 hints and meHintRefPicDist[1] is for L1 hints."]
+    #[doc = "If not set, will internally infer distance of 1. Ignored for NV_ENC_INITALIZE_PARAMS::enablePTD = 0"]
+    pub meHintRefPicDist: [u16; 2usize],
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved4: u32,
+    #[doc = "< [in]: Specifies the input alpha buffer pointer. Client must use a pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource() APIs."]
+    #[doc = "Applicable only when encoding hevc with alpha layer is enabled."]
+    pub alphaBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: For AV1,Specifies the pointer to ME external SB hints for the current frame. The size of ME hint buffer should be equal to meSbHintsCount."]
+    pub meExternalSbHints: *mut NVENC_EXTERNAL_ME_SB_HINT,
+    #[doc = "< [in]: For AV1, specifies the total number of external ME SB hint candidates for the frame"]
+    #[doc = "NV_ENC_PIC_PARAMS::meSbHintsCount must never exceed the total number of SBs in frame * the max number of candidates per SB provided during encoder initialization."]
+    #[doc = "The max number of candidates per SB is maxMeHintCountsPerBlock[0].numCandsPerSb + maxMeHintCountsPerBlock[1].numCandsPerSb"]
+    pub meSbHintsCount: u32,
+    #[doc = "< [in]: Specifies the buffer index in which the encoder state will be saved for current frame encode. It must be in the"]
+    #[doc = "range 0 to NV_ENC_INITIALIZE_PARAMS::numStateBuffers - 1."]
+    pub stateBufferIdx: u32,
+    #[doc = "< [in]: Specifies the reconstructed frame buffer pointer to output reconstructed frame, if enabled by setting NV_ENC_INITIALIZE_PARAMS::enableReconFrameOutput."]
+    #[doc = "Client must allocate buffers for writing the reconstructed frames and register them with the Nvidia Video Encoder Interface with NV_ENC_REGISTER_RESOURCE::bufferUsage"]
+    #[doc = "set to NV_ENC_OUTPUT_RECON."]
+    #[doc = "Client must use the pointer obtained from ::NvEncMapInputResource() API and assign it to NV_ENC_PIC_PARAMS::outputReconBuffer."]
+    #[doc = "Reconstructed output will be in NV_ENC_BUFFER_FORMAT_NV12 format when chromaFormatIDC is set to 1."]
+    #[doc = "chromaFormatIDC = 3 is not supported."]
+    pub outputReconBuffer: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved3: [u32; 284usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved6: [*mut ::std::os::raw::c_void; 57usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_PIC_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_PIC_PARAMS>(),
+        3360usize,
+        concat!("Size of: ", stringify!(_NV_ENC_PIC_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_PIC_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_PIC_PARAMS))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).inputWidth as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(inputWidth)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).inputHeight as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(inputHeight)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).inputPitch as *const _ as usize },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(inputPitch)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).encodePicFlags as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(encodePicFlags)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).frameIdx as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(frameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).inputTimeStamp as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(inputTimeStamp)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).inputDuration as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(inputDuration)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).inputBuffer as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(inputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).outputBitstream as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(outputBitstream)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).completionEvent as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(completionEvent)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).bufferFmt as *const _ as usize },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(bufferFmt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).pictureStruct as *const _ as usize
+        },
+        68usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(pictureStruct)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).pictureType as *const _ as usize },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(pictureType)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).codecPicParams as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(codecPicParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).meHintCountsPerBlock as *const _ as usize
+        },
+        1624usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(meHintCountsPerBlock)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).meExternalHints as *const _ as usize
+        },
+        1656usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(meExternalHints)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).reserved2 as *const _ as usize },
+        1664usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).reserved5 as *const _ as usize },
+        1696usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(reserved5)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).qpDeltaMap as *const _ as usize },
+        1712usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(qpDeltaMap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).qpDeltaMapSize as *const _ as usize
+        },
+        1720usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(qpDeltaMapSize)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).reservedBitFields as *const _ as usize
+        },
+        1724usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(reservedBitFields)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).meHintRefPicDist as *const _ as usize
+        },
+        1728usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(meHintRefPicDist)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).reserved4 as *const _ as usize },
+        1732usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(reserved4)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).alphaBuffer as *const _ as usize },
+        1736usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(alphaBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).meExternalSbHints as *const _ as usize
+        },
+        1744usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(meExternalSbHints)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).meSbHintsCount as *const _ as usize
+        },
+        1752usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(meSbHintsCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).stateBufferIdx as *const _ as usize
+        },
+        1756usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(stateBufferIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).outputReconBuffer as *const _ as usize
+        },
+        1760usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(outputReconBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).reserved3 as *const _ as usize },
+        1768usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_PIC_PARAMS>())).reserved6 as *const _ as usize },
+        2904usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_PIC_PARAMS),
+            "::",
+            stringify!(reserved6)
+        )
+    );
+}
+impl Default for _NV_ENC_PIC_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_PIC_PARAMS"]
+#[doc = " Encoding parameters that need to be sent on a per frame basis."]
+pub type NV_ENC_PIC_PARAMS = _NV_ENC_PIC_PARAMS;
+#[doc = " \\struct _NV_ENC_MEONLY_PARAMS"]
+#[doc = " MEOnly parameters that need to be sent on a per motion estimation basis."]
+#[doc = " NV_ENC_MEONLY_PARAMS::meExternalHints is supported for H264 only."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_MEONLY_PARAMS {
+    #[doc = "< [in]: Struct version. Must be set to NV_ENC_MEONLY_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the input frame width"]
+    pub inputWidth: u32,
+    #[doc = "< [in]: Specifies the input frame height"]
+    pub inputHeight: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]: Specifies the input buffer pointer. Client must use a pointer obtained from NvEncCreateInputBuffer() or NvEncMapInputResource() APIs."]
+    pub inputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Specifies the reference frame pointer"]
+    pub referenceFrame: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Specifies the output buffer pointer."]
+    #[doc = "If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 0, specifies the pointer to motion vector data buffer allocated by NvEncCreateMVBuffer."]
+    #[doc = "Client must lock mvBuffer using ::NvEncLockBitstream() API to get the motion vector data."]
+    #[doc = "If NV_ENC_INITIALIZE_PARAMS::enableOutputInVidmem is set to 1, client should allocate buffer in video memory for storing the motion vector data. The size of this buffer must"]
+    #[doc = "be equal to total number of macroblocks multiplied by size of NV_ENC_H264_MV_DATA struct. Client should use a pointer obtained from ::NvEncMapInputResource() API, when mapping this"]
+    #[doc = "output buffer and assign it to NV_ENC_MEONLY_PARAMS::mvBuffer. All CUDA operations on this buffer must use the default stream."]
+    pub mvBuffer: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved2: u32,
+    #[doc = "< [in]: Specifies the input buffer format."]
+    pub bufferFmt: NV_ENC_BUFFER_FORMAT,
+    #[doc = "< [in]: Specifies an event to be signaled on completion of motion estimation"]
+    #[doc = "of this Frame [only if operating in Asynchronous mode]."]
+    #[doc = "Each output buffer should be associated with a distinct event pointer."]
+    pub completionEvent: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Specifies left or right viewID if NV_ENC_CONFIG_H264_MEONLY::bStereoEnable is set."]
+    #[doc = "viewID can be 0,1 if bStereoEnable is set, 0 otherwise."]
+    pub viewID: u32,
+    #[doc = "< [in]: Specifies the number of hint candidates per block for the current frame. meHintCountsPerBlock[0] is for L0 predictors."]
+    #[doc = "The candidate count in NV_ENC_PIC_PARAMS::meHintCountsPerBlock[lx] must never exceed NV_ENC_INITIALIZE_PARAMS::maxMEHintCountsPerBlock[lx] provided during encoder initialization."]
+    pub meHintCountsPerBlock: [NVENC_EXTERNAL_ME_HINT_COUNTS_PER_BLOCKTYPE; 2usize],
+    #[doc = "< [in]: Specifies the pointer to ME external hints for the current frame. The size of ME hint buffer should be equal to number of macroblocks * the total number of candidates per macroblock."]
+    #[doc = "The total number of candidates per MB per direction = 1*meHintCountsPerBlock[Lx].numCandsPerBlk16x16 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk16x8 + 2*meHintCountsPerBlock[Lx].numCandsPerBlk8x8"]
+    #[doc = "+ 4*meHintCountsPerBlock[Lx].numCandsPerBlk8x8. For frames using bidirectional ME , the total number of candidates for single macroblock is sum of total number of candidates per MB for each direction (L0 and L1)"]
+    pub meExternalHints: *mut NVENC_EXTERNAL_ME_HINT,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 241usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved3: [*mut ::std::os::raw::c_void; 59usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_MEONLY_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_MEONLY_PARAMS>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_MEONLY_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_MEONLY_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_MEONLY_PARAMS))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).inputWidth as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(inputWidth)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).inputHeight as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(inputHeight)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).reserved as *const _ as usize },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).inputBuffer as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(inputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).referenceFrame as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(referenceFrame)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).mvBuffer as *const _ as usize },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(mvBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).reserved2 as *const _ as usize },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).bufferFmt as *const _ as usize },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(bufferFmt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).completionEvent as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(completionEvent)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).viewID as *const _ as usize },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(viewID)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).meHintCountsPerBlock as *const _
+                as usize
+        },
+        60usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(meHintCountsPerBlock)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).meExternalHints as *const _ as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(meExternalHints)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).reserved1 as *const _ as usize },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_MEONLY_PARAMS>())).reserved3 as *const _ as usize },
+        1072usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MEONLY_PARAMS),
+            "::",
+            stringify!(reserved3)
+        )
+    );
+}
+impl Default for _NV_ENC_MEONLY_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_MEONLY_PARAMS"]
+#[doc = " MEOnly parameters that need to be sent on a per motion estimation basis."]
+#[doc = " NV_ENC_MEONLY_PARAMS::meExternalHints is supported for H264 only."]
+pub type NV_ENC_MEONLY_PARAMS = _NV_ENC_MEONLY_PARAMS;
+#[doc = " \\struct _NV_ENC_LOCK_BITSTREAM"]
+#[doc = " Bitstream buffer lock parameters."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_LOCK_BITSTREAM {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_LOCK_BITSTREAM_VER."]
+    pub version: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Pointer to the bitstream buffer being locked."]
+    pub outputBitstream: *mut ::std::os::raw::c_void,
+    #[doc = "< [in, out]: Array which receives the slice (H264/HEVC) or tile (AV1) offsets. This is not supported if NV_ENC_CONFIG_H264::sliceMode is 1 on Kepler GPUs. Array size must be equal to size of frame in MBs."]
+    pub sliceOffsets: *mut u32,
+    #[doc = "< [out]: Frame no. for which the bitstream is being retrieved."]
+    pub frameIdx: u32,
+    #[doc = "< [out]: The NvEncodeAPI interface status for the locked picture."]
+    pub hwEncodeStatus: u32,
+    #[doc = "< [out]: Number of slices (H264/HEVC) or tiles (AV1) in the encoded picture. Will be reported only if NV_ENC_INITIALIZE_PARAMS::reportSliceOffsets set to 1."]
+    pub numSlices: u32,
+    #[doc = "< [out]: Actual number of bytes generated and copied to the memory pointed by bitstreamBufferPtr."]
+    #[doc = "When HEVC alpha layer encoding is enabled, this field reports the total encoded size in bytes i.e it is the encoded size of the base plus the alpha layer."]
+    #[doc = "For AV1 when enablePTD is set, this field reports the total encoded size in bytes of all the encoded frames packed into the current output surface i.e. show frame plus all preceding no-show frames"]
+    pub bitstreamSizeInBytes: u32,
+    #[doc = "< [out]: Presentation timestamp associated with the encoded output."]
+    pub outputTimeStamp: u64,
+    #[doc = "< [out]: Presentation duration associates with the encoded output."]
+    pub outputDuration: u64,
+    #[doc = "< [out]: Pointer to the generated output bitstream."]
+    #[doc = "For MEOnly mode _NV_ENC_LOCK_BITSTREAM::bitstreamBufferPtr should be typecast to"]
+    #[doc = "NV_ENC_H264_MV_DATA/NV_ENC_HEVC_MV_DATA pointer respectively for H264/HEVC"]
+    pub bitstreamBufferPtr: *mut ::std::os::raw::c_void,
+    #[doc = "< [out]: Picture type of the encoded picture."]
+    pub pictureType: NV_ENC_PIC_TYPE,
+    #[doc = "< [out]: Structure of the generated output picture."]
+    pub pictureStruct: NV_ENC_PIC_STRUCT,
+    #[doc = "< [out]: Average QP of the frame."]
+    pub frameAvgQP: u32,
+    #[doc = "< [out]: Total SATD cost for whole frame."]
+    pub frameSatd: u32,
+    #[doc = "< [out]: Frame index associated with this LTR frame."]
+    pub ltrFrameIdx: u32,
+    #[doc = "< [out]: Bitmap of LTR frames indices which were used for encoding this frame. Value of 0 if no LTR frames were used."]
+    pub ltrFrameBitmap: u32,
+    #[doc = "< [out]: TemporalId value of the frame when using temporalSVC encoding"]
+    pub temporalId: u32,
+    #[doc = "< [out]: For H264, Number of Intra MBs in the encoded frame. For HEVC, Number of Intra CTBs in the encoded frame. For AV1, Number of Intra SBs in the encoded show frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1."]
+    pub intraMBCount: u32,
+    #[doc = "< [out]: For H264, Number of Inter MBs in the encoded frame, includes skip MBs. For HEVC, Number of Inter CTBs in the encoded frame. For AV1, Number of Inter SBs in the encoded show frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1."]
+    pub interMBCount: u32,
+    #[doc = "< [out]: Average Motion Vector in X direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1."]
+    pub averageMVX: i32,
+    #[doc = "< [out]: Average Motion Vector in y direction for the encoded frame. Supported only if _NV_ENC_LOCK_BITSTREAM::getRCStats set to 1."]
+    pub averageMVY: i32,
+    #[doc = "< [out]: Number of bytes generated for the alpha layer in the encoded output. Applicable only when HEVC with alpha encoding is enabled."]
+    pub alphaLayerSizeInBytes: u32,
+    #[doc = "< [in]: Size of the buffer pointed by NV_ENC_LOCK_BITSTREAM::outputStatsPtr."]
+    pub outputStatsPtrSize: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in, out]: Buffer which receives the encoded frame output stats, if NV_ENC_INITIALIZE_PARAMS::enableOutputStats is set to 1."]
+    pub outputStatsPtr: *mut ::std::os::raw::c_void,
+    #[doc = "< [out]: Frame index in display order"]
+    pub frameIdxDisplay: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 219usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 63usize],
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reservedInternal: [u32; 8usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_LOCK_BITSTREAM() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_LOCK_BITSTREAM>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_LOCK_BITSTREAM))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_LOCK_BITSTREAM>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_LOCK_BITSTREAM))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).outputBitstream as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(outputBitstream)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).sliceOffsets as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(sliceOffsets)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).frameIdx as *const _ as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(frameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).hwEncodeStatus as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(hwEncodeStatus)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).numSlices as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(numSlices)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).bitstreamSizeInBytes as *const _
+                as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(bitstreamSizeInBytes)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).outputTimeStamp as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(outputTimeStamp)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).outputDuration as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(outputDuration)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).bitstreamBufferPtr as *const _
+                as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(bitstreamBufferPtr)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).pictureType as *const _ as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(pictureType)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).pictureStruct as *const _ as usize
+        },
+        68usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(pictureStruct)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).frameAvgQP as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(frameAvgQP)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).frameSatd as *const _ as usize
+        },
+        76usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(frameSatd)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).ltrFrameIdx as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(ltrFrameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).ltrFrameBitmap as *const _ as usize
+        },
+        84usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(ltrFrameBitmap)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).temporalId as *const _ as usize
+        },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(temporalId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).intraMBCount as *const _ as usize
+        },
+        92usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(intraMBCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).interMBCount as *const _ as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(interMBCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).averageMVX as *const _ as usize
+        },
+        100usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(averageMVX)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).averageMVY as *const _ as usize
+        },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(averageMVY)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).alphaLayerSizeInBytes as *const _
+                as usize
+        },
+        108usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(alphaLayerSizeInBytes)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).outputStatsPtrSize as *const _
+                as usize
+        },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(outputStatsPtrSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).reserved as *const _ as usize },
+        116usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).outputStatsPtr as *const _ as usize
+        },
+        120usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(outputStatsPtr)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).frameIdxDisplay as *const _ as usize
+        },
+        128usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(frameIdxDisplay)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).reserved1 as *const _ as usize
+        },
+        132usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).reserved2 as *const _ as usize
+        },
+        1008usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_BITSTREAM>())).reservedInternal as *const _ as usize
+        },
+        1512usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_BITSTREAM),
+            "::",
+            stringify!(reservedInternal)
+        )
+    );
+}
+impl Default for _NV_ENC_LOCK_BITSTREAM {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_LOCK_BITSTREAM {
+    #[inline]
+    pub fn doNotWait(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_doNotWait(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn ltrFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn getRCStats(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_getRCStats(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(3usize, 29u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(3usize, 29u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        doNotWait: u32,
+        ltrFrame: u32,
+        getRCStats: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let doNotWait: u32 = unsafe { ::std::mem::transmute(doNotWait) };
+            doNotWait as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let ltrFrame: u32 = unsafe { ::std::mem::transmute(ltrFrame) };
+            ltrFrame as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 1u8, {
+            let getRCStats: u32 = unsafe { ::std::mem::transmute(getRCStats) };
+            getRCStats as u64
+        });
+        __bindgen_bitfield_unit.set(3usize, 29u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_LOCK_BITSTREAM"]
+#[doc = " Bitstream buffer lock parameters."]
+pub type NV_ENC_LOCK_BITSTREAM = _NV_ENC_LOCK_BITSTREAM;
+#[doc = " \\struct _NV_ENC_LOCK_INPUT_BUFFER"]
+#[doc = " Uncompressed Input Buffer lock parameters."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_LOCK_INPUT_BUFFER {
+    #[doc = "< [in]:  Struct version. Must be set to ::NV_ENC_LOCK_INPUT_BUFFER_VER."]
+    pub version: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]:  Pointer to the input buffer to be locked, client should pass the pointer obtained from ::NvEncCreateInputBuffer() or ::NvEncMapInputResource API."]
+    pub inputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [out]: Pointed to the locked input buffer data. Client can only access input buffer using the \\p bufferDataPtr."]
+    pub bufferDataPtr: *mut ::std::os::raw::c_void,
+    #[doc = "< [out]: Pitch of the locked input buffer."]
+    pub pitch: u32,
+    #[doc = "< [in]:  Reserved and must be set to 0"]
+    pub reserved1: [u32; 251usize],
+    #[doc = "< [in]:  Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_LOCK_INPUT_BUFFER() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_LOCK_INPUT_BUFFER>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_LOCK_INPUT_BUFFER))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_LOCK_INPUT_BUFFER>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_LOCK_INPUT_BUFFER))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_INPUT_BUFFER>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_INPUT_BUFFER),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_INPUT_BUFFER>())).inputBuffer as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_INPUT_BUFFER),
+            "::",
+            stringify!(inputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_INPUT_BUFFER>())).bufferDataPtr as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_INPUT_BUFFER),
+            "::",
+            stringify!(bufferDataPtr)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_LOCK_INPUT_BUFFER>())).pitch as *const _ as usize },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_INPUT_BUFFER),
+            "::",
+            stringify!(pitch)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_INPUT_BUFFER>())).reserved1 as *const _ as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_INPUT_BUFFER),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_LOCK_INPUT_BUFFER>())).reserved2 as *const _ as usize
+        },
+        1032usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_LOCK_INPUT_BUFFER),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_LOCK_INPUT_BUFFER {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_LOCK_INPUT_BUFFER {
+    #[inline]
+    pub fn doNotWait(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_doNotWait(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 31u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 31u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        doNotWait: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let doNotWait: u32 = unsafe { ::std::mem::transmute(doNotWait) };
+            doNotWait as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 31u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_LOCK_INPUT_BUFFER"]
+#[doc = " Uncompressed Input Buffer lock parameters."]
+pub type NV_ENC_LOCK_INPUT_BUFFER = _NV_ENC_LOCK_INPUT_BUFFER;
+#[doc = " \\struct _NV_ENC_MAP_INPUT_RESOURCE"]
+#[doc = " Map an input resource to a Nvidia Encoder Input Buffer"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_MAP_INPUT_RESOURCE {
+    #[doc = "< [in]:  Struct version. Must be set to ::NV_ENC_MAP_INPUT_RESOURCE_VER."]
+    pub version: u32,
+    #[doc = "< [in]:  Deprecated. Do not use."]
+    pub subResourceIndex: u32,
+    #[doc = "< [in]:  Deprecated. Do not use."]
+    pub inputResource: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]:  The Registered resource handle obtained by calling NvEncRegisterInputResource."]
+    pub registeredResource: NV_ENC_REGISTERED_PTR,
+    #[doc = "< [out]: Mapped pointer corresponding to the registeredResource. This pointer must be used in NV_ENC_PIC_PARAMS::inputBuffer parameter in ::NvEncEncodePicture() API."]
+    pub mappedResource: NV_ENC_INPUT_PTR,
+    #[doc = "< [out]: Buffer format of the outputResource. This buffer format must be used in NV_ENC_PIC_PARAMS::bufferFmt if client using the above mapped resource pointer."]
+    pub mappedBufferFmt: NV_ENC_BUFFER_FORMAT,
+    #[doc = "< [in]:  Reserved and must be set to 0."]
+    pub reserved1: [u32; 251usize],
+    #[doc = "< [in]:  Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 63usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_MAP_INPUT_RESOURCE() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_MAP_INPUT_RESOURCE>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_MAP_INPUT_RESOURCE))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_MAP_INPUT_RESOURCE>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_MAP_INPUT_RESOURCE))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).subResourceIndex as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(subResourceIndex)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).inputResource as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(inputResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).registeredResource as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(registeredResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).mappedResource as *const _
+                as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(mappedResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).mappedBufferFmt as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(mappedBufferFmt)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).reserved1 as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_MAP_INPUT_RESOURCE>())).reserved2 as *const _ as usize
+        },
+        1040usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_MAP_INPUT_RESOURCE),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_MAP_INPUT_RESOURCE {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_MAP_INPUT_RESOURCE"]
+#[doc = " Map an input resource to a Nvidia Encoder Input Buffer"]
+pub type NV_ENC_MAP_INPUT_RESOURCE = _NV_ENC_MAP_INPUT_RESOURCE;
+#[doc = " \\struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX"]
+#[doc = " NV_ENC_REGISTER_RESOURCE::resourceToRegister must be a pointer to a variable of this type,"]
+#[doc = " when NV_ENC_REGISTER_RESOURCE::resourceType is NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX"]
+#[repr(C)]
+#[derive(Debug, Default, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX {
+    #[doc = "< [in]: The name of the texture to be used."]
+    pub texture: u32,
+    #[doc = "< [in]: Accepted values are GL_TEXTURE_RECTANGLE and GL_TEXTURE_2D."]
+    pub target: u32,
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_INPUT_RESOURCE_OPENGL_TEX() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_INPUT_RESOURCE_OPENGL_TEX>(),
+        8usize,
+        concat!("Size of: ", stringify!(_NV_ENC_INPUT_RESOURCE_OPENGL_TEX))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_INPUT_RESOURCE_OPENGL_TEX>(),
+        4usize,
+        concat!(
+            "Alignment of ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_OPENGL_TEX)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_OPENGL_TEX>())).texture as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_OPENGL_TEX),
+            "::",
+            stringify!(texture)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_OPENGL_TEX>())).target as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_OPENGL_TEX),
+            "::",
+            stringify!(target)
+        )
+    );
+}
+#[doc = " \\struct _NV_ENC_INPUT_RESOURCE_OPENGL_TEX"]
+#[doc = " NV_ENC_REGISTER_RESOURCE::resourceToRegister must be a pointer to a variable of this type,"]
+#[doc = " when NV_ENC_REGISTER_RESOURCE::resourceType is NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX"]
+pub type NV_ENC_INPUT_RESOURCE_OPENGL_TEX = _NV_ENC_INPUT_RESOURCE_OPENGL_TEX;
+#[doc = " \\struct NV_ENC_FENCE_POINT_D3D12"]
+#[doc = " Fence and fence value for synchronization."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_FENCE_POINT_D3D12 {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_FENCE_POINT_D3D12_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: u32,
+    #[doc = "< [in]: Pointer to ID3D12Fence. This fence object is used for synchronization."]
+    pub pFence: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Fence value to reach or exceed before the GPU operation."]
+    pub waitValue: u64,
+    #[doc = "< [in]: Fence value to set the fence to, after the GPU operation."]
+    pub signalValue: u64,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 7usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_FENCE_POINT_D3D12() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_FENCE_POINT_D3D12>(),
+        64usize,
+        concat!("Size of: ", stringify!(_NV_ENC_FENCE_POINT_D3D12))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_FENCE_POINT_D3D12>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_FENCE_POINT_D3D12))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FENCE_POINT_D3D12>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FENCE_POINT_D3D12),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FENCE_POINT_D3D12>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FENCE_POINT_D3D12),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FENCE_POINT_D3D12>())).pFence as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FENCE_POINT_D3D12),
+            "::",
+            stringify!(pFence)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FENCE_POINT_D3D12>())).waitValue as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FENCE_POINT_D3D12),
+            "::",
+            stringify!(waitValue)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FENCE_POINT_D3D12>())).signalValue as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FENCE_POINT_D3D12),
+            "::",
+            stringify!(signalValue)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_FENCE_POINT_D3D12>())).reserved1 as *const _ as usize
+        },
+        36usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_FENCE_POINT_D3D12),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+}
+impl Default for _NV_ENC_FENCE_POINT_D3D12 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_FENCE_POINT_D3D12 {
+    #[inline]
+    pub fn bWait(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_bWait(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn bSignal(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_bSignal(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitField(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(2usize, 30u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitField(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(2usize, 30u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        bWait: u32,
+        bSignal: u32,
+        reservedBitField: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let bWait: u32 = unsafe { ::std::mem::transmute(bWait) };
+            bWait as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 1u8, {
+            let bSignal: u32 = unsafe { ::std::mem::transmute(bSignal) };
+            bSignal as u64
+        });
+        __bindgen_bitfield_unit.set(2usize, 30u8, {
+            let reservedBitField: u32 = unsafe { ::std::mem::transmute(reservedBitField) };
+            reservedBitField as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct NV_ENC_FENCE_POINT_D3D12"]
+#[doc = " Fence and fence value for synchronization."]
+pub type NV_ENC_FENCE_POINT_D3D12 = _NV_ENC_FENCE_POINT_D3D12;
+#[doc = " \\struct _NV_ENC_INPUT_RESOURCE_D3D12"]
+#[doc = " NV_ENC_PIC_PARAMS::inputBuffer and NV_ENC_PIC_PARAMS::alphaBuffer must be a pointer to a struct of this type,"]
+#[doc = " when D3D12 interface is used"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_INPUT_RESOURCE_D3D12 {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_INPUT_RESOURCE_D3D12_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: u32,
+    #[doc = "< [in]: Specifies the input surface pointer. Client must use a pointer obtained from NvEncMapInputResource() in NV_ENC_MAP_INPUT_RESOURCE::mappedResource"]
+    #[doc = "when mapping the input surface."]
+    pub pInputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Specifies the fence and corresponding fence values to do GPU wait and signal."]
+    pub inputFencePoint: NV_ENC_FENCE_POINT_D3D12,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 16usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved2: [*mut ::std::os::raw::c_void; 16usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_INPUT_RESOURCE_D3D12() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_INPUT_RESOURCE_D3D12>(),
+        272usize,
+        concat!("Size of: ", stringify!(_NV_ENC_INPUT_RESOURCE_D3D12))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_INPUT_RESOURCE_D3D12>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_INPUT_RESOURCE_D3D12))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_D3D12>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_D3D12>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_D3D12>())).pInputBuffer as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(pInputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_D3D12>())).inputFencePoint as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(inputFencePoint)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_D3D12>())).reserved1 as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_INPUT_RESOURCE_D3D12>())).reserved2 as *const _ as usize
+        },
+        144usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_INPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_INPUT_RESOURCE_D3D12 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_INPUT_RESOURCE_D3D12"]
+#[doc = " NV_ENC_PIC_PARAMS::inputBuffer and NV_ENC_PIC_PARAMS::alphaBuffer must be a pointer to a struct of this type,"]
+#[doc = " when D3D12 interface is used"]
+pub type NV_ENC_INPUT_RESOURCE_D3D12 = _NV_ENC_INPUT_RESOURCE_D3D12;
+#[doc = " \\struct _NV_ENC_OUTPUT_RESOURCE_D3D12"]
+#[doc = " NV_ENC_PIC_PARAMS::outputBitstream and NV_ENC_LOCK_BITSTREAM::outputBitstream must be a pointer to a struct of this type,"]
+#[doc = " when D3D12 interface is used"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_OUTPUT_RESOURCE_D3D12 {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_OUTPUT_RESOURCE_D3D12_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: u32,
+    #[doc = "< [in]: Specifies the output buffer pointer. Client must use a pointer obtained from NvEncMapInputResource() in NV_ENC_MAP_INPUT_RESOURCE::mappedResource"]
+    #[doc = "when mapping output bitstream buffer"]
+    pub pOutputBuffer: NV_ENC_INPUT_PTR,
+    #[doc = "< [in]: Specifies the fence and corresponding fence values to do GPU wait and signal."]
+    pub outputFencePoint: NV_ENC_FENCE_POINT_D3D12,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 16usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved2: [*mut ::std::os::raw::c_void; 16usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_OUTPUT_RESOURCE_D3D12() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_OUTPUT_RESOURCE_D3D12>(),
+        272usize,
+        concat!("Size of: ", stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_OUTPUT_RESOURCE_D3D12>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_RESOURCE_D3D12>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_RESOURCE_D3D12>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_RESOURCE_D3D12>())).pOutputBuffer as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(pOutputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_RESOURCE_D3D12>())).outputFencePoint as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(outputFencePoint)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_RESOURCE_D3D12>())).reserved1 as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OUTPUT_RESOURCE_D3D12>())).reserved2 as *const _ as usize
+        },
+        144usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OUTPUT_RESOURCE_D3D12),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_OUTPUT_RESOURCE_D3D12 {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_OUTPUT_RESOURCE_D3D12"]
+#[doc = " NV_ENC_PIC_PARAMS::outputBitstream and NV_ENC_LOCK_BITSTREAM::outputBitstream must be a pointer to a struct of this type,"]
+#[doc = " when D3D12 interface is used"]
+pub type NV_ENC_OUTPUT_RESOURCE_D3D12 = _NV_ENC_OUTPUT_RESOURCE_D3D12;
+#[doc = " \\struct _NV_ENC_REGISTER_RESOURCE"]
+#[doc = " Register a resource for future use with the Nvidia Video Encoder Interface."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_REGISTER_RESOURCE {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_REGISTER_RESOURCE_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specifies the type of resource to be registered."]
+    #[doc = "Supported values are"]
+    #[doc = "::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX,"]
+    #[doc = "::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,"]
+    #[doc = "::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX"]
+    pub resourceType: NV_ENC_INPUT_RESOURCE_TYPE,
+    #[doc = "< [in]: Input frame width."]
+    pub width: u32,
+    #[doc = "< [in]: Input frame height."]
+    pub height: u32,
+    #[doc = "< [in]: Input buffer pitch."]
+    #[doc = "For ::NV_ENC_INPUT_RESOURCE_TYPE_DIRECTX resources, set this to 0."]
+    #[doc = "For ::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR resources, set this to"]
+    #[doc = "the pitch as obtained from cuMemAllocPitch(), or to the width in"]
+    #[doc = "bytes (if this resource was created by using cuMemAlloc()). This"]
+    #[doc = "value must be a multiple of 4."]
+    #[doc = "For ::NV_ENC_INPUT_RESOURCE_TYPE_CUDAARRAY resources, set this to the"]
+    #[doc = "width of the allocation in bytes (i.e."]
+    #[doc = "CUDA_ARRAY3D_DESCRIPTOR::Width * CUDA_ARRAY3D_DESCRIPTOR::NumChannels)."]
+    #[doc = "For ::NV_ENC_INPUT_RESOURCE_TYPE_OPENGL_TEX resources, set this to the"]
+    #[doc = "texture width multiplied by the number of components in the texture"]
+    #[doc = "format."]
+    pub pitch: u32,
+    #[doc = "< [in]: Subresource Index of the DirectX resource to be registered. Should be set to 0 for other interfaces."]
+    pub subResourceIndex: u32,
+    #[doc = "< [in]: Handle to the resource that is being registered."]
+    pub resourceToRegister: *mut ::std::os::raw::c_void,
+    #[doc = "< [out]: Registered resource handle. This should be used in future interactions with the Nvidia Video Encoder Interface."]
+    pub registeredResource: NV_ENC_REGISTERED_PTR,
+    #[doc = "< [in]: Buffer format of resource to be registered."]
+    pub bufferFormat: NV_ENC_BUFFER_FORMAT,
+    #[doc = "< [in]: Usage of resource to be registered."]
+    pub bufferUsage: NV_ENC_BUFFER_USAGE,
+    #[doc = "< [in]: Specifies the input fence and corresponding fence values to do GPU wait and signal."]
+    #[doc = "To be used only when NV_ENC_REGISTER_RESOURCE::resourceToRegister represents D3D12 surface and"]
+    #[doc = "NV_ENC_BUFFER_USAGE::bufferUsage is NV_ENC_INPUT_IMAGE."]
+    #[doc = "The fence NV_ENC_FENCE_POINT_D3D12::pFence and NV_ENC_FENCE_POINT_D3D12::waitValue will be used to do GPU wait"]
+    #[doc = "before starting GPU operation, if NV_ENC_FENCE_POINT_D3D12::bWait is set."]
+    #[doc = "The fence NV_ENC_FENCE_POINT_D3D12::pFence and NV_ENC_FENCE_POINT_D3D12::signalValue will be used to do GPU signal"]
+    #[doc = "when GPU operation finishes, if NV_ENC_FENCE_POINT_D3D12::bSignal is set."]
+    pub pInputFencePoint: *mut NV_ENC_FENCE_POINT_D3D12,
+    #[doc = "< [out]: Chroma offset for the reconstructed output buffer when NV_ENC_BUFFER_USAGE::bufferUsage is set"]
+    #[doc = "to NV_ENC_OUTPUT_RECON and D3D11 interface is used."]
+    #[doc = "When chroma components are interleaved, 'chromaOffset[0]' will contain chroma offset."]
+    #[doc = "chromaOffset[1] is reserved for future use."]
+    pub chromaOffset: [u32; 2usize],
+    #[doc = "< [in]: Chroma offset for input buffer when NV_ENC_BUFFER_USAGE::bufferUsage is set to NV_ENC_INPUT_IMAGE"]
+    #[doc = "and NVCUVID interface is used. This is required only when luma and chroma allocations are not continuous,"]
+    #[doc = "and the planes are padded."]
+    pub chromaOffsetIn: [u32; 2usize],
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved1: [u32; 244usize],
+    #[doc = "< [in]: Reserved and must be set to NULL."]
+    pub reserved2: [*mut ::std::os::raw::c_void; 61usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_REGISTER_RESOURCE() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_REGISTER_RESOURCE>(),
+        1536usize,
+        concat!("Size of: ", stringify!(_NV_ENC_REGISTER_RESOURCE))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_REGISTER_RESOURCE>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_REGISTER_RESOURCE))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).resourceType as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(resourceType)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).width as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(width)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).height as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(height)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).pitch as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(pitch)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).subResourceIndex as *const _
+                as usize
+        },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(subResourceIndex)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).resourceToRegister as *const _
+                as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(resourceToRegister)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).registeredResource as *const _
+                as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(registeredResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).bufferFormat as *const _ as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(bufferFormat)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).bufferUsage as *const _ as usize
+        },
+        44usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(bufferUsage)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).pInputFencePoint as *const _
+                as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(pInputFencePoint)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).chromaOffset as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(chromaOffset)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).chromaOffsetIn as *const _
+                as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(chromaOffsetIn)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).reserved1 as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_REGISTER_RESOURCE>())).reserved2 as *const _ as usize
+        },
+        1048usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_REGISTER_RESOURCE),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_REGISTER_RESOURCE {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_REGISTER_RESOURCE"]
+#[doc = " Register a resource for future use with the Nvidia Video Encoder Interface."]
+pub type NV_ENC_REGISTER_RESOURCE = _NV_ENC_REGISTER_RESOURCE;
+#[doc = " \\struct _NV_ENC_STAT"]
+#[doc = " Encode Stats structure."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_STAT {
+    #[doc = "< [in]:  Struct version. Must be set to ::NV_ENC_STAT_VER."]
+    pub version: u32,
+    #[doc = "< [in]:  Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]:  Specifies the pointer to output bitstream."]
+    pub outputBitStream: NV_ENC_OUTPUT_PTR,
+    #[doc = "< [out]: Size of generated bitstream in bytes."]
+    pub bitStreamSize: u32,
+    #[doc = "< [out]: Picture type of encoded picture. See ::NV_ENC_PIC_TYPE."]
+    pub picType: u32,
+    #[doc = "< [out]: Offset of last valid bytes of completed bitstream"]
+    pub lastValidByteOffset: u32,
+    #[doc = "< [out]: Offsets of each slice"]
+    pub sliceOffsets: [u32; 16usize],
+    #[doc = "< [out]: Picture number"]
+    pub picIdx: u32,
+    #[doc = "< [out]: Average QP of the frame."]
+    pub frameAvgQP: u32,
+    pub _bitfield_align_1: [u32; 0],
+    pub _bitfield_1: __BindgenBitfieldUnit<[u8; 4usize]>,
+    #[doc = "< [out]: Frame index associated with this LTR frame."]
+    pub ltrFrameIdx: u32,
+    #[doc = "< [out]: For H264, Number of Intra MBs in the encoded frame. For HEVC, Number of Intra CTBs in the encoded frame."]
+    pub intraMBCount: u32,
+    #[doc = "< [out]: For H264, Number of Inter MBs in the encoded frame, includes skip MBs. For HEVC, Number of Inter CTBs in the encoded frame."]
+    pub interMBCount: u32,
+    #[doc = "< [out]: Average Motion Vector in X direction for the encoded frame."]
+    pub averageMVX: i32,
+    #[doc = "< [out]: Average Motion Vector in y direction for the encoded frame."]
+    pub averageMVY: i32,
+    #[doc = "< [in]:  Reserved and must be set to 0"]
+    pub reserved1: [u32; 227usize],
+    #[doc = "< [in]:  Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_STAT() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_STAT>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_STAT))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_STAT>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_STAT))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).reserved as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).outputBitStream as *const _ as usize },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(outputBitStream)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).bitStreamSize as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(bitStreamSize)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).picType as *const _ as usize },
+        20usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(picType)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_STAT>())).lastValidByteOffset as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(lastValidByteOffset)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).sliceOffsets as *const _ as usize },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(sliceOffsets)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).picIdx as *const _ as usize },
+        92usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(picIdx)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).frameAvgQP as *const _ as usize },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(frameAvgQP)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).ltrFrameIdx as *const _ as usize },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(ltrFrameIdx)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).intraMBCount as *const _ as usize },
+        108usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(intraMBCount)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).interMBCount as *const _ as usize },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(interMBCount)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).averageMVX as *const _ as usize },
+        116usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(averageMVX)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).averageMVY as *const _ as usize },
+        120usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(averageMVY)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).reserved1 as *const _ as usize },
+        124usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_STAT>())).reserved2 as *const _ as usize },
+        1032usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_STAT),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_STAT {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+impl _NV_ENC_STAT {
+    #[inline]
+    pub fn ltrFrame(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(0usize, 1u8) as u32) }
+    }
+    #[inline]
+    pub fn set_ltrFrame(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(0usize, 1u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn reservedBitFields(&self) -> u32 {
+        unsafe { ::std::mem::transmute(self._bitfield_1.get(1usize, 31u8) as u32) }
+    }
+    #[inline]
+    pub fn set_reservedBitFields(&mut self, val: u32) {
+        unsafe {
+            let val: u32 = ::std::mem::transmute(val);
+            self._bitfield_1.set(1usize, 31u8, val as u64)
+        }
+    }
+    #[inline]
+    pub fn new_bitfield_1(
+        ltrFrame: u32,
+        reservedBitFields: u32,
+    ) -> __BindgenBitfieldUnit<[u8; 4usize]> {
+        let mut __bindgen_bitfield_unit: __BindgenBitfieldUnit<[u8; 4usize]> = Default::default();
+        __bindgen_bitfield_unit.set(0usize, 1u8, {
+            let ltrFrame: u32 = unsafe { ::std::mem::transmute(ltrFrame) };
+            ltrFrame as u64
+        });
+        __bindgen_bitfield_unit.set(1usize, 31u8, {
+            let reservedBitFields: u32 = unsafe { ::std::mem::transmute(reservedBitFields) };
+            reservedBitFields as u64
+        });
+        __bindgen_bitfield_unit
+    }
+}
+#[doc = " \\struct _NV_ENC_STAT"]
+#[doc = " Encode Stats structure."]
+pub type NV_ENC_STAT = _NV_ENC_STAT;
+#[doc = " \\struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD"]
+#[doc = " Sequence and picture paramaters payload."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD {
+    #[doc = "< [in]:  Struct version. Must be set to ::NV_ENC_INITIALIZE_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]:  Specifies the size of the spsppsBuffer provided by the client"]
+    pub inBufferSize: u32,
+    #[doc = "< [in]:  Specifies the SPS id to be used in sequence header. Default value is 0."]
+    pub spsId: u32,
+    #[doc = "< [in]:  Specifies the PPS id to be used in picture header. Default value is 0."]
+    pub ppsId: u32,
+    #[doc = "< [in]:  Specifies bitstream header pointer of size NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize."]
+    #[doc = "It is the client's responsibility to manage this memory."]
+    pub spsppsBuffer: *mut ::std::os::raw::c_void,
+    #[doc = "< [out]: Size of the sequence and picture header in bytes."]
+    pub outSPSPPSPayloadSize: *mut u32,
+    #[doc = "< [in]:  Reserved and must be set to 0"]
+    pub reserved: [u32; 250usize],
+    #[doc = "< [in]:  Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_SEQUENCE_PARAM_PAYLOAD() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).inBufferSize as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(inBufferSize)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).spsId as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(spsId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).ppsId as *const _ as usize
+        },
+        12usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(ppsId)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).spsppsBuffer as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(spsppsBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).outSPSPPSPayloadSize
+                as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(outSPSPPSPayloadSize)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).reserved as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_SEQUENCE_PARAM_PAYLOAD>())).reserved2 as *const _
+                as usize
+        },
+        1032usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_SEQUENCE_PARAM_PAYLOAD),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_SEQUENCE_PARAM_PAYLOAD {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\struct _NV_ENC_SEQUENCE_PARAM_PAYLOAD"]
+#[doc = " Sequence and picture paramaters payload."]
+pub type NV_ENC_SEQUENCE_PARAM_PAYLOAD = _NV_ENC_SEQUENCE_PARAM_PAYLOAD;
+#[doc = " Event registration/unregistration parameters."]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_EVENT_PARAMS {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_EVENT_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved: u32,
+    #[doc = "< [in]: Handle to event to be registered/unregistered with the NvEncodeAPI interface."]
+    pub completionEvent: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 254usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_EVENT_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_EVENT_PARAMS>(),
+        1544usize,
+        concat!("Size of: ", stringify!(_NV_ENC_EVENT_PARAMS))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_EVENT_PARAMS>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENC_EVENT_PARAMS))
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_EVENT_PARAMS>())).version as *const _ as usize },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_EVENT_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_EVENT_PARAMS>())).reserved as *const _ as usize },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_EVENT_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_EVENT_PARAMS>())).completionEvent as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_EVENT_PARAMS),
+            "::",
+            stringify!(completionEvent)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_EVENT_PARAMS>())).reserved1 as *const _ as usize },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_EVENT_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe { &(*(::std::ptr::null::<_NV_ENC_EVENT_PARAMS>())).reserved2 as *const _ as usize },
+        1032usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_EVENT_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_EVENT_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Event registration/unregistration parameters."]
+pub type NV_ENC_EVENT_PARAMS = _NV_ENC_EVENT_PARAMS;
+#[doc = " Encoder Session Creation parameters"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS {
+    #[doc = "< [in]: Struct version. Must be set to ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Specified the device Type"]
+    pub deviceType: NV_ENC_DEVICE_TYPE,
+    #[doc = "< [in]: Pointer to client device."]
+    pub device: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: Reserved and must be set to 0."]
+    pub reserved: *mut ::std::os::raw::c_void,
+    #[doc = "< [in]: API version. Should be set to NVENCAPI_VERSION."]
+    pub apiVersion: u32,
+    #[doc = "< [in]: Reserved and must be set to 0"]
+    pub reserved1: [u32; 253usize],
+    #[doc = "< [in]: Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 64usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>(),
+        1552usize,
+        concat!(
+            "Size of: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS)
+        )
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>(),
+        8usize,
+        concat!(
+            "Alignment of ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).version as *const _
+                as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).deviceType as *const _
+                as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(deviceType)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).device as *const _
+                as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(device)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).reserved as *const _
+                as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).apiVersion as *const _
+                as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(apiVersion)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).reserved1 as *const _
+                as usize
+        },
+        28usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS>())).reserved2 as *const _
+                as usize
+        },
+        1040usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " Encoder Session Creation parameters"]
+pub type NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS = _NV_ENC_OPEN_ENCODE_SESSIONEX_PARAMS;
+extern "C" {
+    #[doc = " \\brief Opens an encoding session."]
+    #[doc = ""]
+    #[doc = " Deprecated."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_ERR_INVALID_CALL\\n"]
+    #[doc = ""]
+    pub fn NvEncOpenEncodeSession(
+        device: *mut ::std::os::raw::c_void,
+        deviceType: u32,
+        encoder: *mut *mut ::std::os::raw::c_void,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves the number of supported encode GUIDs."]
+    #[doc = ""]
+    #[doc = " The function returns the number of codec GUIDs supported by the NvEncodeAPI"]
+    #[doc = " interface."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [out] encodeGUIDCount"]
+    #[doc = "   Number of supported encode GUIDs."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodeGUIDCount(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUIDCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves an array of supported encoder codec GUIDs."]
+    #[doc = ""]
+    #[doc = " The function returns an array of codec GUIDs supported by the NvEncodeAPI interface."]
+    #[doc = " The client must allocate an array where the NvEncodeAPI interface can"]
+    #[doc = " fill the supported GUIDs and pass the pointer in \\p *GUIDs parameter."]
+    #[doc = " The size of the array can be determined by using ::NvEncGetEncodeGUIDCount() API."]
+    #[doc = " The Nvidia Encoding interface returns the number of codec GUIDs it has actually"]
+    #[doc = " filled in the GUID array in the \\p GUIDCount parameter."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] guidArraySize"]
+    #[doc = "   Number of GUIDs to retrieved. Should be set to the number retrieved using"]
+    #[doc = "   ::NvEncGetEncodeGUIDCount."]
+    #[doc = " \\param [out] GUIDs"]
+    #[doc = "   Array of supported Encode GUIDs."]
+    #[doc = " \\param [out] GUIDCount"]
+    #[doc = "   Number of supported Encode GUIDs."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodeGUIDs(
+        encoder: *mut ::std::os::raw::c_void,
+        GUIDs: *mut GUID,
+        guidArraySize: u32,
+        GUIDCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves the number of supported profile GUIDs."]
+    #[doc = ""]
+    #[doc = " The function returns the number of profile GUIDs supported for a given codec."]
+    #[doc = " The client must first enumerate the codec GUIDs supported by the NvEncodeAPI"]
+    #[doc = " interface. After determining the codec GUID, it can query the NvEncodeAPI"]
+    #[doc = " interface to determine the number of profile GUIDs supported for a particular"]
+    #[doc = " codec GUID."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   The codec GUID for which the profile GUIDs are being enumerated."]
+    #[doc = " \\param [out] encodeProfileGUIDCount"]
+    #[doc = "   Number of encode profiles supported for the given encodeGUID."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodeProfileGUIDCount(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        encodeProfileGUIDCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves an array of supported encode profile GUIDs."]
+    #[doc = ""]
+    #[doc = " The function returns an array of supported profile GUIDs for a particular"]
+    #[doc = " codec GUID. The client must allocate an array where the NvEncodeAPI interface"]
+    #[doc = " can populate the profile GUIDs. The client can determine the array size using"]
+    #[doc = " ::NvEncGetEncodeProfileGUIDCount() API. The client must also validiate that the"]
+    #[doc = " NvEncodeAPI interface supports the GUID the client wants to pass as \\p encodeGUID"]
+    #[doc = " parameter."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   The encode GUID whose profile GUIDs are being enumerated."]
+    #[doc = " \\param [in] guidArraySize"]
+    #[doc = "   Number of GUIDs to be retrieved. Should be set to the number retrieved using"]
+    #[doc = "   ::NvEncGetEncodeProfileGUIDCount."]
+    #[doc = " \\param [out] profileGUIDs"]
+    #[doc = "   Array of supported Encode Profile GUIDs"]
+    #[doc = " \\param [out] GUIDCount"]
+    #[doc = "   Number of valid encode profile GUIDs in \\p profileGUIDs array."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodeProfileGUIDs(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        profileGUIDs: *mut GUID,
+        guidArraySize: u32,
+        GUIDCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieve the number of supported Input formats."]
+    #[doc = ""]
+    #[doc = " The function returns the number of supported input formats. The client must"]
+    #[doc = " query the NvEncodeAPI interface to determine the supported input formats"]
+    #[doc = " before creating the input surfaces."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the number of supported input formats"]
+    #[doc = "   is to be retrieved."]
+    #[doc = " \\param [out] inputFmtCount"]
+    #[doc = "   Number of input formats supported for specified Encode GUID."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    pub fn NvEncGetInputFormatCount(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        inputFmtCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves an array of supported Input formats"]
+    #[doc = ""]
+    #[doc = " Returns an array of supported input formats  The client must use the input"]
+    #[doc = " format to create input surface using ::NvEncCreateInputBuffer() API."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the number of supported input formats"]
+    #[doc = "   is to be retrieved."]
+    #[doc = "\\param [in] inputFmtArraySize"]
+    #[doc = "   Size input format count array passed in \\p inputFmts."]
+    #[doc = "\\param [out] inputFmts"]
+    #[doc = "   Array of input formats supported for this Encode GUID."]
+    #[doc = "\\param [out] inputFmtCount"]
+    #[doc = "   The number of valid input format types returned by the NvEncodeAPI"]
+    #[doc = "   interface in \\p inputFmts array."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetInputFormats(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        inputFmts: *mut NV_ENC_BUFFER_FORMAT,
+        inputFmtArraySize: u32,
+        inputFmtCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves the capability value for a specified encoder attribute."]
+    #[doc = ""]
+    #[doc = " The function returns the capability value for a given encoder attribute. The"]
+    #[doc = " client must validate the encodeGUID using ::NvEncGetEncodeGUIDs() API before"]
+    #[doc = " calling this function. The encoder attribute being queried are enumerated in"]
+    #[doc = " ::NV_ENC_CAPS_PARAM enum."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the capability attribute is to be retrieved."]
+    #[doc = " \\param [in] capsParam"]
+    #[doc = "   Used to specify attribute being queried. Refer ::NV_ENC_CAPS_PARAM for  more"]
+    #[doc = " details."]
+    #[doc = " \\param [out] capsVal"]
+    #[doc = "   The value corresponding to the capability attribute being queried."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    pub fn NvEncGetEncodeCaps(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        capsParam: *mut NV_ENC_CAPS_PARAM,
+        capsVal: *mut ::std::os::raw::c_int,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Retrieves the number of supported preset GUIDs."]
+    #[doc = ""]
+    #[doc = " The function returns the number of preset GUIDs available for a given codec."]
+    #[doc = " The client must validate the codec GUID using ::NvEncGetEncodeGUIDs() API"]
+    #[doc = " before calling this function."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the number of supported presets is to"]
+    #[doc = "   be retrieved."]
+    #[doc = " \\param [out] encodePresetGUIDCount"]
+    #[doc = "   Receives the number of supported preset GUIDs."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodePresetCount(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        encodePresetGUIDCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Receives an array of supported encoder preset GUIDs."]
+    #[doc = ""]
+    #[doc = " The function returns an array of encode preset GUIDs available for a given codec."]
+    #[doc = " The client can directly use one of the preset GUIDs based upon the use case"]
+    #[doc = " or target device. The preset GUID chosen can be directly used in"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::presetGUID parameter to ::NvEncEncodePicture() API."]
+    #[doc = " Alternately client can  also use the preset GUID to retrieve the encoding config"]
+    #[doc = " parameters being used by NvEncodeAPI interface for that given preset, using"]
+    #[doc = " ::NvEncGetEncodePresetConfig() API. It can then modify preset config parameters"]
+    #[doc = " as per its use case and send it to NvEncodeAPI interface as part of"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::encodeConfig parameter for NvEncInitializeEncoder()"]
+    #[doc = " API."]
+    #[doc = ""]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the list of supported presets is to be"]
+    #[doc = "   retrieved."]
+    #[doc = " \\param [in] guidArraySize"]
+    #[doc = "   Size of array of preset GUIDs passed in \\p preset GUIDs"]
+    #[doc = " \\param [out] presetGUIDs"]
+    #[doc = "   Array of supported Encode preset GUIDs from the NvEncodeAPI interface"]
+    #[doc = "   to client."]
+    #[doc = " \\param [out] encodePresetGUIDCount"]
+    #[doc = "   Receives the number of preset GUIDs returned by the NvEncodeAPI"]
+    #[doc = "   interface."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodePresetGUIDs(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        presetGUIDs: *mut GUID,
+        guidArraySize: u32,
+        encodePresetGUIDCount: *mut u32,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Returns a preset config structure supported for given preset GUID."]
+    #[doc = ""]
+    #[doc = " The function returns a preset config structure for a given preset GUID."]
+    #[doc = " NvEncGetEncodePresetConfig() API is not applicable to AV1."]
+    #[doc = " Before using this function the client must enumerate the preset GUIDs available for"]
+    #[doc = " a given codec. The preset config structure can be modified by the client depending"]
+    #[doc = " upon its use case and can be then used to initialize the encoder using"]
+    #[doc = " ::NvEncInitializeEncoder() API. The client can use this function only if it"]
+    #[doc = " wants to modify the NvEncodeAPI preset configuration, otherwise it can"]
+    #[doc = " directly use the preset GUID."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the list of supported presets is to be"]
+    #[doc = "   retrieved."]
+    #[doc = " \\param [in] presetGUID"]
+    #[doc = "   Preset GUID, corresponding to which the Encoding configurations is to be"]
+    #[doc = "   retrieved."]
+    #[doc = " \\param [out] presetConfig"]
+    #[doc = "   The requested Preset Encoder Attribute set. Refer ::_NV_ENC_CONFIG for"]
+    #[doc = "    more details."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodePresetConfig(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        presetGUID: GUID,
+        presetConfig: *mut NV_ENC_PRESET_CONFIG,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Returns a preset config structure supported for given preset GUID."]
+    #[doc = ""]
+    #[doc = " The function returns a preset config structure for a given preset GUID and tuning info."]
+    #[doc = " NvEncGetEncodePresetConfigEx() API is not applicable to H264 and HEVC meonly mode."]
+    #[doc = " Before using this function the client must enumerate the preset GUIDs available for"]
+    #[doc = " a given codec. The preset config structure can be modified by the client depending"]
+    #[doc = " upon its use case and can be then used to initialize the encoder using"]
+    #[doc = " ::NvEncInitializeEncoder() API. The client can use this function only if it"]
+    #[doc = " wants to modify the NvEncodeAPI preset configuration, otherwise it can"]
+    #[doc = " directly use the preset GUID."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encodeGUID"]
+    #[doc = "   Encode GUID, corresponding to which the list of supported presets is to be"]
+    #[doc = "   retrieved."]
+    #[doc = " \\param [in] presetGUID"]
+    #[doc = "   Preset GUID, corresponding to which the Encoding configurations is to be"]
+    #[doc = "   retrieved."]
+    #[doc = " \\param [in] tuningInfo"]
+    #[doc = "   tuning info, corresponding to which the Encoding configurations is to be"]
+    #[doc = "   retrieved."]
+    #[doc = " \\param [out] presetConfig"]
+    #[doc = "   The requested Preset Encoder Attribute set. Refer ::_NV_ENC_CONFIG for"]
+    #[doc = "    more details."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodePresetConfigEx(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        presetGUID: GUID,
+        tuningInfo: NV_ENC_TUNING_INFO,
+        presetConfig: *mut NV_ENC_PRESET_CONFIG,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Initialize the encoder."]
+    #[doc = ""]
+    #[doc = " This API must be used to initialize the encoder. The initialization parameter"]
+    #[doc = " is passed using \\p *createEncodeParams  The client must send the following"]
+    #[doc = " fields of the _NV_ENC_INITIALIZE_PARAMS structure with a valid value."]
+    #[doc = " - NV_ENC_INITIALIZE_PARAMS::encodeGUID"]
+    #[doc = " - NV_ENC_INITIALIZE_PARAMS::encodeWidth"]
+    #[doc = " - NV_ENC_INITIALIZE_PARAMS::encodeHeight"]
+    #[doc = ""]
+    #[doc = " The client can pass a preset GUID directly to the NvEncodeAPI interface using"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::presetGUID field. If the client doesn't pass"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::encodeConfig structure, the codec specific parameters"]
+    #[doc = " will be selected based on the preset GUID. The preset GUID must have been"]
+    #[doc = " validated by the client using ::NvEncGetEncodePresetGUIDs() API."]
+    #[doc = " If the client passes a custom ::_NV_ENC_CONFIG structure through"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::encodeConfig , it will override the codec specific parameters"]
+    #[doc = " based on the preset GUID. It is recommended that even if the client passes a custom config,"]
+    #[doc = " it should also send a preset GUID. In this case, the preset GUID passed by the client"]
+    #[doc = " will not override any of the custom config parameters programmed by the client,"]
+    #[doc = " it is only used as a hint by the NvEncodeAPI interface to determine certain encoder parameters"]
+    #[doc = " which are not exposed to the client."]
+    #[doc = ""]
+    #[doc = " There are two modes of operation for the encoder namely:"]
+    #[doc = " - Asynchronous mode"]
+    #[doc = " - Synchronous mode"]
+    #[doc = ""]
+    #[doc = " The client can select asynchronous or synchronous mode by setting the \\p"]
+    #[doc = " enableEncodeAsync field in ::_NV_ENC_INITIALIZE_PARAMS to 1 or 0 respectively."]
+    #[doc = "\\par Asynchronous mode of operation:"]
+    #[doc = " The Asynchronous mode can be enabled by setting NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 1."]
+    #[doc = " The client operating in asynchronous mode must allocate completion event object"]
+    #[doc = " for each output buffer and pass the completion event object in the"]
+    #[doc = " ::NvEncEncodePicture() API. The client can create another thread and wait on"]
+    #[doc = " the event object to be signaled by NvEncodeAPI interface on completion of the"]
+    #[doc = " encoding process for the output frame. This should unblock the main thread from"]
+    #[doc = " submitting work to the encoder. When the event is signaled the client can call"]
+    #[doc = " NvEncodeAPI interfaces to copy the bitstream data using ::NvEncLockBitstream()"]
+    #[doc = " API. This is the preferred mode of operation."]
+    #[doc = ""]
+    #[doc = " NOTE: Asynchronous mode is not supported on Linux."]
+    #[doc = ""]
+    #[doc = "\\par Synchronous mode of operation:"]
+    #[doc = " The client can select synchronous mode by setting NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 0."]
+    #[doc = " The client working in synchronous mode can work in a single threaded or multi"]
+    #[doc = " threaded mode. The client need not allocate any event objects. The client can"]
+    #[doc = " only lock the bitstream data after NvEncodeAPI interface has returned"]
+    #[doc = " ::NV_ENC_SUCCESS from encode picture. The NvEncodeAPI interface can return"]
+    #[doc = " ::NV_ENC_ERR_NEED_MORE_INPUT error code from ::NvEncEncodePicture() API. The"]
+    #[doc = " client must not lock the output buffer in such case but should send the next"]
+    #[doc = " frame for encoding. The client must keep on calling ::NvEncEncodePicture() API"]
+    #[doc = " until it returns ::NV_ENC_SUCCESS. \\n"]
+    #[doc = " The client must always lock the bitstream data in order in which it has submitted."]
+    #[doc = " This is true for both asynchronous and synchronous mode."]
+    #[doc = ""]
+    #[doc = "\\par Picture type decision:"]
+    #[doc = " If the client is taking the picture type decision and it must disable the picture"]
+    #[doc = " type decision module in NvEncodeAPI by setting NV_ENC_INITIALIZE_PARAMS::enablePTD"]
+    #[doc = " to 0. In this case the client is  required to send the picture in encoding"]
+    #[doc = " order to NvEncodeAPI by doing the re-ordering for B frames. \\n"]
+    #[doc = " If the client doesn't want to take the picture type decision it can enable"]
+    #[doc = " picture type decision module in the NvEncodeAPI interface by setting"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::enablePTD to 1 and send the input pictures in display"]
+    #[doc = " order."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] createEncodeParams"]
+    #[doc = "   Refer ::_NV_ENC_INITIALIZE_PARAMS for details."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncInitializeEncoder(
+        encoder: *mut ::std::os::raw::c_void,
+        createEncodeParams: *mut NV_ENC_INITIALIZE_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Allocates Input buffer."]
+    #[doc = ""]
+    #[doc = " This function is used to allocate an input buffer. The client must enumerate"]
+    #[doc = " the input buffer format before allocating the input buffer resources. The"]
+    #[doc = " NV_ENC_INPUT_PTR returned by the NvEncodeAPI interface in the"]
+    #[doc = " NV_ENC_CREATE_INPUT_BUFFER::inputBuffer field can be directly used in"]
+    #[doc = " ::NvEncEncodePicture() API. The number of input buffers to be allocated by the"]
+    #[doc = " client must be at least 4 more than the number of B frames being used for encoding."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] createInputBufferParams"]
+    #[doc = "  Pointer to the ::NV_ENC_CREATE_INPUT_BUFFER structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncCreateInputBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        createInputBufferParams: *mut NV_ENC_CREATE_INPUT_BUFFER,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Release an input buffers."]
+    #[doc = ""]
+    #[doc = " This function is used to free an input buffer. If the client has allocated"]
+    #[doc = " any input buffer using ::NvEncCreateInputBuffer() API, it must free those"]
+    #[doc = " input buffers by calling this function. The client must release the input"]
+    #[doc = " buffers before destroying the encoder using ::NvEncDestroyEncoder() API."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] inputBuffer"]
+    #[doc = "   Pointer to the input buffer to be released."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncDestroyInputBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        inputBuffer: NV_ENC_INPUT_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Set input and output CUDA stream for specified encoder attribute."]
+    #[doc = ""]
+    #[doc = " Encoding may involve CUDA pre-processing on the input and post-processing on encoded output."]
+    #[doc = " This function is used to set input and output CUDA streams to pipeline the CUDA pre-processing"]
+    #[doc = " and post-processing tasks. Clients should call this function before the call to"]
+    #[doc = " NvEncUnlockInputBuffer(). If this function is not called, the default CUDA stream is used for"]
+    #[doc = " input and output processing. After a successful call to this function, the streams specified"]
+    #[doc = " in that call will replace the previously-used streams."]
+    #[doc = " This API is supported for NVCUVID interface only."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] inputStream"]
+    #[doc = "   Pointer to CUstream which is used to process ::NV_ENC_PIC_PARAMS::inputFrame for encode."]
+    #[doc = "   In case of ME-only mode, inputStream is used to process ::NV_ENC_MEONLY_PARAMS::inputBuffer and"]
+    #[doc = "   ::NV_ENC_MEONLY_PARAMS::referenceFrame"]
+    #[doc = " \\param [in] outputStream"]
+    #[doc = "  Pointer to CUstream which is used to process ::NV_ENC_PIC_PARAMS::outputBuffer for encode."]
+    #[doc = "  In case of ME-only mode, outputStream is used to process ::NV_ENC_MEONLY_PARAMS::mvBuffer"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    pub fn NvEncSetIOCudaStreams(
+        encoder: *mut ::std::os::raw::c_void,
+        inputStream: NV_ENC_CUSTREAM_PTR,
+        outputStream: NV_ENC_CUSTREAM_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Allocates an output bitstream buffer"]
+    #[doc = ""]
+    #[doc = " This function is used to allocate an output bitstream buffer and returns a"]
+    #[doc = " NV_ENC_OUTPUT_PTR to bitstream  buffer to the client in the"]
+    #[doc = " NV_ENC_CREATE_BITSTREAM_BUFFER::bitstreamBuffer field."]
+    #[doc = " The client can only call this function after the encoder session has been"]
+    #[doc = " initialized using ::NvEncInitializeEncoder() API. The minimum number of output"]
+    #[doc = " buffers allocated by the client must be at least 4 more than the number of B"]
+    #[doc = " B frames being used for encoding. The client can only access the output"]
+    #[doc = " bitstream data by locking the \\p bitstreamBuffer using the ::NvEncLockBitstream()"]
+    #[doc = " function."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] createBitstreamBufferParams"]
+    #[doc = "   Pointer ::NV_ENC_CREATE_BITSTREAM_BUFFER for details."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncCreateBitstreamBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        createBitstreamBufferParams: *mut NV_ENC_CREATE_BITSTREAM_BUFFER,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Release a bitstream buffer."]
+    #[doc = ""]
+    #[doc = " This function is used to release the output bitstream buffer allocated using"]
+    #[doc = " the ::NvEncCreateBitstreamBuffer() function. The client must release the output"]
+    #[doc = " bitstreamBuffer using this function before destroying the encoder session."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] bitstreamBuffer"]
+    #[doc = "   Pointer to the bitstream buffer being released."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncDestroyBitstreamBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        bitstreamBuffer: NV_ENC_OUTPUT_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Submit an input picture for encoding."]
+    #[doc = ""]
+    #[doc = " This function is used to submit an input picture buffer for encoding. The"]
+    #[doc = " encoding parameters are passed using \\p *encodePicParams which is a pointer"]
+    #[doc = " to the ::_NV_ENC_PIC_PARAMS structure."]
+    #[doc = ""]
+    #[doc = " If the client has set NV_ENC_INITIALIZE_PARAMS::enablePTD to 0, then it must"]
+    #[doc = " send a valid value for the following fields."]
+    #[doc = " - NV_ENC_PIC_PARAMS::pictureType"]
+    #[doc = " - NV_ENC_PIC_PARAMS_H264::displayPOCSyntax (H264 only)"]
+    #[doc = " - NV_ENC_PIC_PARAMS_H264::frameNumSyntax(H264 only)"]
+    #[doc = " - NV_ENC_PIC_PARAMS_H264::refPicFlag(H264 only)"]
+    #[doc = ""]
+    #[doc = "\\par MVC Encoding:"]
+    #[doc = " For MVC encoding the client must call encode picture API for each view separately"]
+    #[doc = " and must pass valid view id in NV_ENC_PIC_PARAMS_MVC::viewID field. Currently"]
+    #[doc = " NvEncodeAPI only support stereo MVC so client must send viewID as 0 for base"]
+    #[doc = " view and view ID as 1 for dependent view."]
+    #[doc = ""]
+    #[doc = "\\par Asynchronous Encoding"]
+    #[doc = " If the client has enabled asynchronous mode of encoding by setting"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 1 in the ::NvEncInitializeEncoder()"]
+    #[doc = " API ,then the client must send a valid NV_ENC_PIC_PARAMS::completionEvent."]
+    #[doc = " Incase of asynchronous mode of operation, client can queue the ::NvEncEncodePicture()"]
+    #[doc = " API commands from the main thread and then queue output buffers to be processed"]
+    #[doc = " to a secondary worker thread. Before the locking the output buffers in the"]
+    #[doc = " secondary thread , the client must wait on NV_ENC_PIC_PARAMS::completionEvent"]
+    #[doc = " it has queued in ::NvEncEncodePicture() API call. The client must always process"]
+    #[doc = " completion event and the output buffer in the same order in which they have been"]
+    #[doc = " submitted for encoding. The NvEncodeAPI interface is responsible for any"]
+    #[doc = " re-ordering required for B frames and will always ensure that encoded bitstream"]
+    #[doc = " data is written in the same order in which output buffer is submitted."]
+    #[doc = " The NvEncodeAPI interface may return ::NV_ENC_ERR_NEED_MORE_INPUT error code for"]
+    #[doc = " some ::NvEncEncodePicture() API calls but the client must not treat it as a fatal error."]
+    #[doc = " The NvEncodeAPI interface might not be able to submit an input picture buffer for encoding"]
+    #[doc = " immediately due to re-ordering for B frames."]
+    #[doc = "\\code"]
+    #[doc = "The below example shows how  asynchronous encoding in case of 1 B frames"]
+    #[doc = "------------------------------------------------------------------------"]
+    #[doc = "Suppose the client allocated 4 input buffers(I1,I2..), 4 output buffers(O1,O2..)"]
+    #[doc = "and 4 completion events(E1, E2, ...). The NvEncodeAPI interface will need to"]
+    #[doc = "keep a copy of the input buffers for re-ordering and it allocates following"]
+    #[doc = "internal buffers (NvI1, NvI2...). These internal buffers are managed by NvEncodeAPI"]
+    #[doc = "and the client is not responsible for the allocating or freeing the memory of"]
+    #[doc = "the internal buffers."]
+    #[doc = ""]
+    #[doc = "a) The client main thread will queue the following encode frame calls."]
+    #[doc = "Note the picture type is unknown to the client, the decision is being taken by"]
+    #[doc = "NvEncodeAPI interface. The client should pass ::_NV_ENC_PIC_PARAMS parameter"]
+    #[doc = "consisting of allocated input buffer, output buffer and output events in successive"]
+    #[doc = "::NvEncEncodePicture() API calls along with other required encode picture params."]
+    #[doc = "For example:"]
+    #[doc = "1st EncodePicture parameters - (I1, O1, E1)"]
+    #[doc = "2nd EncodePicture parameters - (I2, O2, E2)"]
+    #[doc = "3rd EncodePicture parameters - (I3, O3, E3)"]
+    #[doc = ""]
+    #[doc = "b) NvEncodeAPI SW will receive the following encode Commands from the client."]
+    #[doc = "The left side shows input from client in the form (Input buffer, Output Buffer,"]
+    #[doc = "Output Event). The right hand side shows a possible picture type decision take by"]
+    #[doc = "the NvEncodeAPI interface."]
+    #[doc = "(I1, O1, E1)    ---P1 Frame"]
+    #[doc = "(I2, O2, E2)    ---B2 Frame"]
+    #[doc = "(I3, O3, E3)    ---P3 Frame"]
+    #[doc = ""]
+    #[doc = "c) NvEncodeAPI interface will make a copy of the input buffers to its internal"]
+    #[doc = "buffers for re-ordering. These copies are done as part of nvEncEncodePicture"]
+    #[doc = "function call from the client and NvEncodeAPI interface is responsible for"]
+    #[doc = "synchronization of copy operation with the actual encoding operation."]
+    #[doc = "I1 --> NvI1"]
+    #[doc = "I2 --> NvI2"]
+    #[doc = "I3 --> NvI3"]
+    #[doc = ""]
+    #[doc = "d) The NvEncodeAPI encodes I1 as P frame and submits I1 to encoder HW and returns ::NV_ENC_SUCCESS."]
+    #[doc = "The NvEncodeAPI tries to encode I2 as B frame and fails with ::NV_ENC_ERR_NEED_MORE_INPUT error code."]
+    #[doc = "The error is not fatal and it notifies client that I2 is not submitted to encoder immediately."]
+    #[doc = "The NvEncodeAPI encodes I3 as P frame and submits I3 for encoding which will be used as  backward"]
+    #[doc = "reference frame for I2. The NvEncodeAPI then submits I2 for encoding and returns ::NV_ENC_SUCESS."]
+    #[doc = "Both the submission are part of the same ::NvEncEncodePicture() function call."]
+    #[doc = ""]
+    #[doc = "e) After returning from ::NvEncEncodePicture() call , the client must queue the output"]
+    #[doc = "bitstream  processing work to the secondary thread. The output bitstream processing"]
+    #[doc = "for asynchronous mode consist of first waiting on completion event(E1, E2..)"]
+    #[doc = "and then locking the output bitstream buffer(O1, O2..) for reading the encoded"]
+    #[doc = "data. The work queued to the secondary thread by the client is in the following order"]
+    #[doc = "(I1, O1, E1)"]
+    #[doc = "(I2, O2, E2)"]
+    #[doc = "(I3, O3, E3)"]
+    #[doc = "Note they are in the same order in which client calls ::NvEncEncodePicture() API"]
+    #[doc = "in \\p step a)."]
+    #[doc = ""]
+    #[doc = "f) NvEncodeAPI interface  will do the re-ordering such that Encoder HW will receive"]
+    #[doc = "the following encode commands:"]
+    #[doc = "(NvI1, O1, E1)   ---P1 Frame"]
+    #[doc = "(NvI3, O2, E2)   ---P3 Frame"]
+    #[doc = "(NvI2, O3, E3)   ---B2 frame"]
+    #[doc = ""]
+    #[doc = "g) After the encoding operations are completed, the events will be signaled"]
+    #[doc = "by NvEncodeAPI interface in the following order :"]
+    #[doc = "(O1, E1) ---P1 Frame ,output bitstream copied to O1 and event E1 signaled."]
+    #[doc = "(O2, E2) ---P3 Frame ,output bitstream copied to O2 and event E2 signaled."]
+    #[doc = "(O3, E3) ---B2 Frame ,output bitstream copied to O3 and event E3 signaled."]
+    #[doc = ""]
+    #[doc = "h) The client must lock the bitstream data using ::NvEncLockBitstream() API in"]
+    #[doc = "the order O1,O2,O3  to read the encoded data, after waiting for the events"]
+    #[doc = "to be signaled in the same order i.e E1, E2 and E3.The output processing is"]
+    #[doc = "done in the secondary thread in the following order:"]
+    #[doc = "Waits on E1, copies encoded bitstream from O1"]
+    #[doc = "Waits on E2, copies encoded bitstream from O2"]
+    #[doc = "Waits on E3, copies encoded bitstream from O3"]
+    #[doc = ""]
+    #[doc = "-Note the client will receive the events signaling and output buffer in the"]
+    #[doc = "same order in which they have submitted for encoding."]
+    #[doc = "-Note the LockBitstream will have picture type field which will notify the"]
+    #[doc = "output picture type to the clients."]
+    #[doc = "-Note the input, output buffer and the output completion event are free to be"]
+    #[doc = "reused once NvEncodeAPI interfaced has signaled the event and the client has"]
+    #[doc = "copied the data from the output buffer."]
+    #[doc = ""]
+    #[doc = " \\endcode"]
+    #[doc = ""]
+    #[doc = "\\par Synchronous Encoding"]
+    #[doc = " The client can enable synchronous mode of encoding by setting"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::enableEncodeAsync to 0 in ::NvEncInitializeEncoder() API."]
+    #[doc = " The NvEncodeAPI interface may return ::NV_ENC_ERR_NEED_MORE_INPUT error code for"]
+    #[doc = " some ::NvEncEncodePicture() API calls when NV_ENC_INITIALIZE_PARAMS::enablePTD"]
+    #[doc = " is set to 1, but the client must not treat it as a fatal error. The NvEncodeAPI"]
+    #[doc = " interface might not be able to submit an input picture buffer for encoding"]
+    #[doc = " immediately due to re-ordering for B frames. The NvEncodeAPI interface cannot"]
+    #[doc = " submit the input picture which is decided to be encoded as B frame as it waits"]
+    #[doc = " for backward reference from  temporally subsequent frames. This input picture"]
+    #[doc = " is buffered internally and waits for more input picture to arrive. The client"]
+    #[doc = " must not call ::NvEncLockBitstream() API on the output buffers whose"]
+    #[doc = " ::NvEncEncodePicture() API returns ::NV_ENC_ERR_NEED_MORE_INPUT. The client must"]
+    #[doc = " wait for the NvEncodeAPI interface to return ::NV_ENC_SUCCESS before locking the"]
+    #[doc = " output bitstreams to read the encoded bitstream data. The following example"]
+    #[doc = " explains the scenario with synchronous encoding with 2 B frames."]
+    #[doc = "\\code"]
+    #[doc = "The below example shows how  synchronous encoding works in case of 1 B frames"]
+    #[doc = "-----------------------------------------------------------------------------"]
+    #[doc = "Suppose the client allocated 4 input buffers(I1,I2..), 4 output buffers(O1,O2..)"]
+    #[doc = "and 4 completion events(E1, E2, ...). The NvEncodeAPI interface will need to"]
+    #[doc = "keep a copy of the input buffers for re-ordering and it allocates following"]
+    #[doc = "internal buffers (NvI1, NvI2...). These internal buffers are managed by NvEncodeAPI"]
+    #[doc = "and the client is not responsible for the allocating or freeing the memory of"]
+    #[doc = "the internal buffers."]
+    #[doc = ""]
+    #[doc = "The client calls ::NvEncEncodePicture() API with input buffer I1 and output buffer O1."]
+    #[doc = "The NvEncodeAPI decides to encode I1 as P frame and submits it to encoder"]
+    #[doc = "HW and returns ::NV_ENC_SUCCESS."]
+    #[doc = "The client can now read the encoded data by locking the output O1 by calling"]
+    #[doc = "NvEncLockBitstream API."]
+    #[doc = ""]
+    #[doc = "The client calls ::NvEncEncodePicture() API with input buffer I2 and output buffer O2."]
+    #[doc = "The NvEncodeAPI decides to encode I2 as B frame and buffers I2 by copying it"]
+    #[doc = "to internal buffer and returns ::NV_ENC_ERR_NEED_MORE_INPUT."]
+    #[doc = "The error is not fatal and it notifies client that it cannot read the encoded"]
+    #[doc = "data by locking the output O2 by calling ::NvEncLockBitstream() API without submitting"]
+    #[doc = "more work to the NvEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = "The client calls ::NvEncEncodePicture() with input buffer I3 and output buffer O3."]
+    #[doc = "The NvEncodeAPI decides to encode I3 as P frame and it first submits I3 for"]
+    #[doc = "encoding which will be used as backward reference frame for I2."]
+    #[doc = "The NvEncodeAPI then submits I2 for encoding and returns ::NV_ENC_SUCESS. Both"]
+    #[doc = "the submission are part of the same ::NvEncEncodePicture() function call."]
+    #[doc = "The client can now read the encoded data for both the frames by locking the output"]
+    #[doc = "O2 followed by  O3 ,by calling ::NvEncLockBitstream() API."]
+    #[doc = ""]
+    #[doc = "The client must always lock the output in the same order in which it has submitted"]
+    #[doc = "to receive the encoded bitstream in correct encoding order."]
+    #[doc = ""]
+    #[doc = " \\endcode"]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] encodePicParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_PIC_PARAMS structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_BUSY \\n"]
+    #[doc = " ::NV_ENC_ERR_NEED_MORE_INPUT \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncEncodePicture(
+        encoder: *mut ::std::os::raw::c_void,
+        encodePicParams: *mut NV_ENC_PIC_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Lock output bitstream buffer"]
+    #[doc = ""]
+    #[doc = " This function is used to lock the bitstream buffer to read the encoded data."]
+    #[doc = " The client can only access the encoded data by calling this function."]
+    #[doc = " The pointer to client accessible encoded data is returned in the"]
+    #[doc = " NV_ENC_LOCK_BITSTREAM::bitstreamBufferPtr field. The size of the encoded data"]
+    #[doc = " in the output buffer is returned in the NV_ENC_LOCK_BITSTREAM::bitstreamSizeInBytes"]
+    #[doc = " The NvEncodeAPI interface also returns the output picture type and picture structure"]
+    #[doc = " of the encoded frame in NV_ENC_LOCK_BITSTREAM::pictureType and"]
+    #[doc = " NV_ENC_LOCK_BITSTREAM::pictureStruct fields respectively. If the client has"]
+    #[doc = " set NV_ENC_LOCK_BITSTREAM::doNotWait to 1, the function might return"]
+    #[doc = " ::NV_ENC_ERR_LOCK_BUSY if client is operating in synchronous mode. This is not"]
+    #[doc = " a fatal failure if NV_ENC_LOCK_BITSTREAM::doNotWait is set to 1. In the above case the client can"]
+    #[doc = " retry the function after few milliseconds."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] lockBitstreamBufferParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_LOCK_BITSTREAM structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_LOCK_BUSY \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncLockBitstream(
+        encoder: *mut ::std::os::raw::c_void,
+        lockBitstreamBufferParams: *mut NV_ENC_LOCK_BITSTREAM,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Unlock the output bitstream buffer"]
+    #[doc = ""]
+    #[doc = " This function is used to unlock the output bitstream buffer after the client"]
+    #[doc = " has read the encoded data from output buffer. The client must call this function"]
+    #[doc = " to unlock the output buffer which it has previously locked using ::NvEncLockBitstream()"]
+    #[doc = " function. Using a locked bitstream buffer in ::NvEncEncodePicture() API will cause"]
+    #[doc = " the function to fail."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] bitstreamBuffer"]
+    #[doc = "   bitstream buffer pointer being unlocked"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncUnlockBitstream(
+        encoder: *mut ::std::os::raw::c_void,
+        bitstreamBuffer: NV_ENC_OUTPUT_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Restore state of encoder"]
+    #[doc = ""]
+    #[doc = " This function is used to restore the state of encoder with state saved internally in"]
+    #[doc = " state buffer corresponding to index equal to 'NV_ENC_RESTORE_ENCODER_STATE_PARAMS::bfrIndex'."]
+    #[doc = " Client can specify the state type to be updated by specifying appropriate value in"]
+    #[doc = " 'NV_ENC_RESTORE_ENCODER_STATE_PARAMS::state'. The client must call this"]
+    #[doc = " function after all previous encodes have finished."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] restoreState"]
+    #[doc = "   Pointer to the ::_NV_ENC_RESTORE_ENCODER_STATE_PARAMS structure"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncRestoreEncoderState(
+        encoder: *mut ::std::os::raw::c_void,
+        restoreState: *mut NV_ENC_RESTORE_ENCODER_STATE_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Locks an input buffer"]
+    #[doc = ""]
+    #[doc = " This function is used to lock the input buffer to load the uncompressed YUV"]
+    #[doc = " pixel data into input buffer memory. The client must pass the NV_ENC_INPUT_PTR"]
+    #[doc = " it had previously allocated using ::NvEncCreateInputBuffer()in the"]
+    #[doc = " NV_ENC_LOCK_INPUT_BUFFER::inputBuffer field."]
+    #[doc = " The NvEncodeAPI interface returns pointer to client accessible input buffer"]
+    #[doc = " memory in NV_ENC_LOCK_INPUT_BUFFER::bufferDataPtr field."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] lockInputBufferParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_LOCK_INPUT_BUFFER structure"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_LOCK_BUSY \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncLockInputBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        lockInputBufferParams: *mut NV_ENC_LOCK_INPUT_BUFFER,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Unlocks the input buffer"]
+    #[doc = ""]
+    #[doc = " This function is used to unlock the input buffer memory previously locked for"]
+    #[doc = " uploading YUV pixel data. The input buffer must be unlocked before being used"]
+    #[doc = " again for encoding, otherwise NvEncodeAPI will fail the ::NvEncEncodePicture()"]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] inputBuffer"]
+    #[doc = "   Pointer to the input buffer that is being unlocked."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    #[doc = ""]
+    pub fn NvEncUnlockInputBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        inputBuffer: NV_ENC_INPUT_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Get encoding statistics."]
+    #[doc = ""]
+    #[doc = " This function is used to retrieve the encoding statistics."]
+    #[doc = " This API is not supported when encode device type is CUDA."]
+    #[doc = " Note that this API will be removed in future Video Codec SDK release."]
+    #[doc = " Clients should use NvEncLockBitstream() API to retrieve the encoding statistics."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] encodeStats"]
+    #[doc = "   Pointer to the ::_NV_ENC_STAT structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetEncodeStats(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeStats: *mut NV_ENC_STAT,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Get encoded sequence and picture header."]
+    #[doc = ""]
+    #[doc = " This function can be used to retrieve the sequence and picture header out of"]
+    #[doc = " band. The client must call this function only after the encoder has been"]
+    #[doc = " initialized using ::NvEncInitializeEncoder() function. The client must"]
+    #[doc = " allocate the memory where the NvEncodeAPI interface can copy the bitstream"]
+    #[doc = " header and pass the pointer to the memory in NV_ENC_SEQUENCE_PARAM_PAYLOAD::spsppsBuffer."]
+    #[doc = " The size of buffer is passed in the field  NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize."]
+    #[doc = " The NvEncodeAPI interface will copy the bitstream header payload and returns"]
+    #[doc = " the actual size of the bitstream header in the field"]
+    #[doc = " NV_ENC_SEQUENCE_PARAM_PAYLOAD::outSPSPPSPayloadSize."]
+    #[doc = " The client must call  ::NvEncGetSequenceParams() function from the same thread which is"]
+    #[doc = " being used to call ::NvEncEncodePicture() function."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] sequenceParamPayload"]
+    #[doc = "   Pointer to the ::_NV_ENC_SEQUENCE_PARAM_PAYLOAD structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetSequenceParams(
+        encoder: *mut ::std::os::raw::c_void,
+        sequenceParamPayload: *mut NV_ENC_SEQUENCE_PARAM_PAYLOAD,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Get sequence and picture header."]
+    #[doc = ""]
+    #[doc = " This function can be used to retrieve the sequence and picture header out of band, even when"]
+    #[doc = " encoder has not been initialized using ::NvEncInitializeEncoder() function."]
+    #[doc = " The client must allocate the memory where the NvEncodeAPI interface can copy the bitstream"]
+    #[doc = " header and pass the pointer to the memory in NV_ENC_SEQUENCE_PARAM_PAYLOAD::spsppsBuffer."]
+    #[doc = " The size of buffer is passed in the field  NV_ENC_SEQUENCE_PARAM_PAYLOAD::inBufferSize."]
+    #[doc = " If encoder has not been initialized using ::NvEncInitializeEncoder() function, client must"]
+    #[doc = " send NV_ENC_INITIALIZE_PARAMS as input. The NV_ENC_INITIALIZE_PARAMS passed must be same as the"]
+    #[doc = " one which will be used for initializing encoder using ::NvEncInitializeEncoder() function later."]
+    #[doc = " If encoder is already initialized using ::NvEncInitializeEncoder() function, the provided"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS structure is ignored. The NvEncodeAPI interface will copy the bitstream"]
+    #[doc = " header payload and returns the actual size of the bitstream header in the field"]
+    #[doc = " NV_ENC_SEQUENCE_PARAM_PAYLOAD::outSPSPPSPayloadSize. The client must call  ::NvEncGetSequenceParamsEx()"]
+    #[doc = " function from the same thread which is being used to call ::NvEncEncodePicture() function."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] encInitParams"]
+    #[doc = "   Pointer to the _NV_ENC_INITIALIZE_PARAMS structure."]
+    #[doc = " \\param [in,out] sequenceParamPayload"]
+    #[doc = "   Pointer to the ::_NV_ENC_SEQUENCE_PARAM_PAYLOAD structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncGetSequenceParamEx(
+        encoder: *mut ::std::os::raw::c_void,
+        encInitParams: *mut NV_ENC_INITIALIZE_PARAMS,
+        sequenceParamPayload: *mut NV_ENC_SEQUENCE_PARAM_PAYLOAD,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Register event for notification to encoding completion."]
+    #[doc = ""]
+    #[doc = " This function is used to register the completion event with NvEncodeAPI"]
+    #[doc = " interface. The event is required when the client has configured the encoder to"]
+    #[doc = " work in asynchronous mode. In this mode the client needs to send a completion"]
+    #[doc = " event with every output buffer. The NvEncodeAPI interface will signal the"]
+    #[doc = " completion of the encoding process using this event. Only after the event is"]
+    #[doc = " signaled the client can get the encoded data using ::NvEncLockBitstream() function."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] eventParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_EVENT_PARAMS structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncRegisterAsyncEvent(
+        encoder: *mut ::std::os::raw::c_void,
+        eventParams: *mut NV_ENC_EVENT_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Unregister completion event."]
+    #[doc = ""]
+    #[doc = " This function is used to unregister completion event which has been previously"]
+    #[doc = " registered using ::NvEncRegisterAsyncEvent() function. The client must unregister"]
+    #[doc = " all events before destroying the encoder using ::NvEncDestroyEncoder() function."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] eventParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_EVENT_PARAMS structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncUnregisterAsyncEvent(
+        encoder: *mut ::std::os::raw::c_void,
+        eventParams: *mut NV_ENC_EVENT_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Map an externally created input resource pointer for encoding."]
+    #[doc = ""]
+    #[doc = " Maps an externally allocated input resource [using and returns a NV_ENC_INPUT_PTR"]
+    #[doc = " which can be used for encoding in the ::NvEncEncodePicture() function. The"]
+    #[doc = " mapped resource is returned in the field NV_ENC_MAP_INPUT_RESOURCE::outputResourcePtr."]
+    #[doc = " The NvEncodeAPI interface also returns the buffer format of the mapped resource"]
+    #[doc = " in the field NV_ENC_MAP_INPUT_RESOURCE::outbufferFmt."]
+    #[doc = " This function provides synchronization guarantee that any graphics work submitted"]
+    #[doc = " on the input buffer is completed before the buffer is used for encoding. This is"]
+    #[doc = " also true for compute (i.e. CUDA) work, provided that the previous workload using"]
+    #[doc = " the input resource was submitted to the default stream."]
+    #[doc = " The client should not access any input buffer while they are mapped by the encoder."]
+    #[doc = " For D3D12 interface type, this function does not provide synchronization guarantee."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] mapInputResParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_MAP_INPUT_RESOURCE structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \\n"]
+    #[doc = " ::NV_ENC_ERR_MAP_FAILED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncMapInputResource(
+        encoder: *mut ::std::os::raw::c_void,
+        mapInputResParams: *mut NV_ENC_MAP_INPUT_RESOURCE,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief  UnMaps a NV_ENC_INPUT_PTR  which was mapped for encoding"]
+    #[doc = ""]
+    #[doc = ""]
+    #[doc = " UnMaps an input buffer which was previously mapped using ::NvEncMapInputResource()"]
+    #[doc = " API. The mapping created using ::NvEncMapInputResource() should be invalidated"]
+    #[doc = " using this API before the external resource is destroyed by the client. The client"]
+    #[doc = " must unmap the buffer after ::NvEncLockBitstream() API returns successfully for encode"]
+    #[doc = " work submitted using the mapped input buffer."]
+    #[doc = ""]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] mappedInputBuffer"]
+    #[doc = "   Pointer to the NV_ENC_INPUT_PTR"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \\n"]
+    #[doc = " ::NV_ENC_ERR_RESOURCE_NOT_MAPPED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncUnmapInputResource(
+        encoder: *mut ::std::os::raw::c_void,
+        mappedInputBuffer: NV_ENC_INPUT_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Destroy Encoding Session"]
+    #[doc = ""]
+    #[doc = " Destroys the encoder session previously created using ::NvEncOpenEncodeSession()"]
+    #[doc = " function. The client must flush the encoder before freeing any resources. In order"]
+    #[doc = " to flush the encoder the client must pass a NULL encode picture packet and either"]
+    #[doc = " wait for the ::NvEncEncodePicture() function to return in synchronous mode or wait"]
+    #[doc = " for the flush event to be signaled by the encoder in asynchronous mode."]
+    #[doc = " The client must free all the input and output resources created using the"]
+    #[doc = " NvEncodeAPI interface before destroying the encoder. If the client is operating"]
+    #[doc = " in asynchronous mode, it must also unregister the completion events previously"]
+    #[doc = " registered."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncDestroyEncoder(encoder: *mut ::std::os::raw::c_void) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Invalidate reference frames"]
+    #[doc = ""]
+    #[doc = " Invalidates reference frame based on the time stamp provided by the client."]
+    #[doc = " The encoder marks any reference frames or any frames which have been reconstructed"]
+    #[doc = " using the corrupt frame as invalid for motion estimation and uses older reference"]
+    #[doc = " frames for motion estimation. The encoder forces the current frame to be encoded"]
+    #[doc = " as an intra frame if no reference frames are left after invalidation process."]
+    #[doc = " This is useful for low latency application for error resiliency. The client"]
+    #[doc = " is recommended to set NV_ENC_CONFIG_H264::maxNumRefFrames to a large value so"]
+    #[doc = " that encoder can keep a backup of older reference frames in the DPB and can use them"]
+    #[doc = " for motion estimation when the newer reference frames have been invalidated."]
+    #[doc = " This API can be called multiple times."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] invalidRefFrameTimeStamp"]
+    #[doc = "   Timestamp of the invalid reference frames which needs to be invalidated."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncInvalidateRefFrames(
+        encoder: *mut ::std::os::raw::c_void,
+        invalidRefFrameTimeStamp: u64,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Opens an encoding session."]
+    #[doc = ""]
+    #[doc = " Opens an encoding session and returns a pointer to the encoder interface in"]
+    #[doc = " the \\p **encoder parameter. The client should start encoding process by calling"]
+    #[doc = " this API first."]
+    #[doc = " The client must pass a pointer to IDirect3DDevice9 device or CUDA context in the \\p *device parameter."]
+    #[doc = " For the OpenGL interface, \\p device must be NULL. An OpenGL context must be current when"]
+    #[doc = " calling all NvEncodeAPI functions."]
+    #[doc = " If the creation of encoder session fails, the client must call ::NvEncDestroyEncoder API"]
+    #[doc = " before exiting."]
+    #[doc = ""]
+    #[doc = " \\param [in] openSessionExParams"]
+    #[doc = "    Pointer to a ::NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS structure."]
+    #[doc = " \\param [out] encoder"]
+    #[doc = "    Encode Session pointer to the NvEncodeAPI interface."]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_NO_ENCODE_DEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_DEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_DEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncOpenEncodeSessionEx(
+        openSessionExParams: *mut NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS,
+        encoder: *mut *mut ::std::os::raw::c_void,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Registers a resource with the Nvidia Video Encoder Interface."]
+    #[doc = ""]
+    #[doc = " Registers a resource with the Nvidia Video Encoder Interface for book keeping."]
+    #[doc = " The client is expected to pass the registered resource handle as well, while calling ::NvEncMapInputResource API."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NVEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " \\param [in] registerResParams"]
+    #[doc = "   Pointer to a ::_NV_ENC_REGISTER_RESOURCE structure"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_RESOURCE_REGISTER_FAILED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = " ::NV_ENC_ERR_UNIMPLEMENTED \\n"]
+    #[doc = ""]
+    pub fn NvEncRegisterResource(
+        encoder: *mut ::std::os::raw::c_void,
+        registerResParams: *mut NV_ENC_REGISTER_RESOURCE,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Unregisters a resource previously registered with the Nvidia Video Encoder Interface."]
+    #[doc = ""]
+    #[doc = " Unregisters a resource previously registered with the Nvidia Video Encoder Interface."]
+    #[doc = " The client is expected to unregister any resource that it has registered with the"]
+    #[doc = " Nvidia Video Encoder Interface before destroying the resource."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NVEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " \\param [in] registeredResource"]
+    #[doc = "   The registered resource pointer that was returned in ::NvEncRegisterResource."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_RESOURCE_NOT_REGISTERED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = " ::NV_ENC_ERR_UNIMPLEMENTED \\n"]
+    #[doc = ""]
+    pub fn NvEncUnregisterResource(
+        encoder: *mut ::std::os::raw::c_void,
+        registeredResource: NV_ENC_REGISTERED_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Reconfigure an existing encoding session."]
+    #[doc = ""]
+    #[doc = " Reconfigure an existing encoding session."]
+    #[doc = " The client should call this API to change/reconfigure the parameter passed during"]
+    #[doc = " NvEncInitializeEncoder API call."]
+    #[doc = " Currently Reconfiguration of following are not supported."]
+    #[doc = " Change in GOP structure."]
+    #[doc = " Change in sync-Async mode."]
+    #[doc = " Change in MaxWidth & MaxHeight."]
+    #[doc = " Change in PTD mode."]
+    #[doc = ""]
+    #[doc = " Resolution change is possible only if maxEncodeWidth & maxEncodeHeight of NV_ENC_INITIALIZE_PARAMS"]
+    #[doc = " is set while creating encoder session."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NVEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " \\param [in] reInitEncodeParams"]
+    #[doc = "    Pointer to a ::NV_ENC_RECONFIGURE_PARAMS structure."]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_NO_ENCODE_DEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_DEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_DEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = ""]
+    pub fn NvEncReconfigureEncoder(
+        encoder: *mut ::std::os::raw::c_void,
+        reInitEncodeParams: *mut NV_ENC_RECONFIGURE_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Allocates output MV buffer for ME only mode."]
+    #[doc = ""]
+    #[doc = " This function is used to allocate an output MV buffer. The size of the mvBuffer is"]
+    #[doc = " dependent on the frame height and width of the last ::NvEncCreateInputBuffer() call."]
+    #[doc = " The NV_ENC_OUTPUT_PTR returned by the NvEncodeAPI interface in the"]
+    #[doc = " ::NV_ENC_CREATE_MV_BUFFER::mvBuffer field should be used in"]
+    #[doc = " ::NvEncRunMotionEstimationOnly() API."]
+    #[doc = " Client must lock ::NV_ENC_CREATE_MV_BUFFER::mvBuffer using ::NvEncLockBitstream() API to get the motion vector data."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in,out] createMVBufferParams"]
+    #[doc = "  Pointer to the ::NV_ENC_CREATE_MV_BUFFER structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    pub fn NvEncCreateMVBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        createMVBufferParams: *mut NV_ENC_CREATE_MV_BUFFER,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Release an output MV buffer for ME only mode."]
+    #[doc = ""]
+    #[doc = " This function is used to release the output MV buffer allocated using"]
+    #[doc = " the ::NvEncCreateMVBuffer() function. The client must release the output"]
+    #[doc = " mvBuffer using this function before destroying the encoder session."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] mvBuffer"]
+    #[doc = "   Pointer to the mvBuffer being released."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    pub fn NvEncDestroyMVBuffer(
+        encoder: *mut ::std::os::raw::c_void,
+        mvBuffer: NV_ENC_OUTPUT_PTR,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Submit an input picture and reference frame for motion estimation in ME only mode."]
+    #[doc = ""]
+    #[doc = " This function is used to submit the input frame and reference frame for motion"]
+    #[doc = " estimation. The ME parameters are passed using *meOnlyParams which is a pointer"]
+    #[doc = " to ::_NV_ENC_MEONLY_PARAMS structure."]
+    #[doc = " Client must lock ::NV_ENC_CREATE_MV_BUFFER::mvBuffer using ::NvEncLockBitstream() API to get the motion vector data."]
+    #[doc = " to get motion vector data."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] meOnlyParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_MEONLY_PARAMS structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    #[doc = " ::NV_ENC_ERR_NEED_MORE_INPUT \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    pub fn NvEncRunMotionEstimationOnly(
+        encoder: *mut ::std::os::raw::c_void,
+        meOnlyParams: *mut NV_ENC_MEONLY_PARAMS,
+    ) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Get the largest NvEncodeAPI version supported by the driver."]
+    #[doc = ""]
+    #[doc = " This function can be used by clients to determine if the driver supports"]
+    #[doc = " the NvEncodeAPI header the application was compiled with."]
+    #[doc = ""]
+    #[doc = " \\param [out] version"]
+    #[doc = "   Pointer to the requested value. The 4 least significant bits in the returned"]
+    #[doc = "   indicate the minor version and the rest of the bits indicate the major"]
+    #[doc = "   version of the largest supported version."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    pub fn NvEncodeAPIGetMaxSupportedVersion(version: *mut u32) -> NVENCSTATUS;
+}
+extern "C" {
+    #[doc = " \\brief Get the description of the last error reported by the API."]
+    #[doc = ""]
+    #[doc = " This function returns a null-terminated string that can be used by clients to better understand the reason"]
+    #[doc = " for failure of a previous API call."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = "   Pointer to buffer containing the details of the last error encountered by the API."]
+    pub fn NvEncGetLastErrorString(
+        encoder: *mut ::std::os::raw::c_void,
+    ) -> *const ::std::os::raw::c_char;
+}
+extern "C" {
+    #[doc = " \\brief Submit an input picture for lookahead."]
+    #[doc = ""]
+    #[doc = " This function can be used by clients to submit input frame for lookahead. Client could call this function"]
+    #[doc = " NV_ENC_INITIALIZE_PARAMS::lookaheadDepth plus one number of frames, before calling NvEncEncodePicture() for the first frame."]
+    #[doc = ""]
+    #[doc = " \\param [in] encoder"]
+    #[doc = "   Pointer to the NvEncodeAPI interface."]
+    #[doc = " \\param [in] lookaheadParams"]
+    #[doc = "   Pointer to the ::_NV_ENC_LOOKAHEAD_PIC_PARAMS structure."]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS \\n"]
+    #[doc = " ::NV_ENC_NEED_MORE_INPUT \\n  should we return this error is lookahead queue is not full?"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR \\n"]
+    #[doc = " ::NV_ENC_ERR_ENCODER_NOT_INITIALIZED \\n"]
+    #[doc = " ::NV_ENC_ERR_GENERIC \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_ENCODERDEVICE \\n"]
+    #[doc = " ::NV_ENC_ERR_DEVICE_NOT_EXIST \\n"]
+    #[doc = " ::NV_ENC_ERR_UNSUPPORTED_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_OUT_OF_MEMORY \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PARAM \\n"]
+    #[doc = " ::NV_ENC_ERR_INVALID_VERSION \\n"]
+    pub fn NvEncLookaheadPicture(
+        encoder: *mut ::std::os::raw::c_void,
+        lookaheadParamas: *mut NV_ENC_LOOKAHEAD_PIC_PARAMS,
+    ) -> NVENCSTATUS;
+}
+#[doc = " \\cond API PFN"]
+pub type PNVENCOPENENCODESESSION = ::std::option::Option<
+    unsafe extern "C" fn(
+        device: *mut ::std::os::raw::c_void,
+        deviceType: u32,
+        encoder: *mut *mut ::std::os::raw::c_void,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEGUIDCOUNT = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUIDCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEGUIDS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        GUIDs: *mut GUID,
+        guidArraySize: u32,
+        GUIDCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEPROFILEGUIDCOUNT = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        encodeProfileGUIDCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEPROFILEGUIDS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        profileGUIDs: *mut GUID,
+        guidArraySize: u32,
+        GUIDCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETINPUTFORMATCOUNT = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        inputFmtCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETINPUTFORMATS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        inputFmts: *mut NV_ENC_BUFFER_FORMAT,
+        inputFmtArraySize: u32,
+        inputFmtCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODECAPS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        capsParam: *mut NV_ENC_CAPS_PARAM,
+        capsVal: *mut ::std::os::raw::c_int,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEPRESETCOUNT = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        encodePresetGUIDCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEPRESETGUIDS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        presetGUIDs: *mut GUID,
+        guidArraySize: u32,
+        encodePresetGUIDCount: *mut u32,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEPRESETCONFIG = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        presetGUID: GUID,
+        presetConfig: *mut NV_ENC_PRESET_CONFIG,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODEPRESETCONFIGEX = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeGUID: GUID,
+        presetGUID: GUID,
+        tuningInfo: NV_ENC_TUNING_INFO,
+        presetConfig: *mut NV_ENC_PRESET_CONFIG,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCINITIALIZEENCODER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        createEncodeParams: *mut NV_ENC_INITIALIZE_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCCREATEINPUTBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        createInputBufferParams: *mut NV_ENC_CREATE_INPUT_BUFFER,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCDESTROYINPUTBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        inputBuffer: NV_ENC_INPUT_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCCREATEBITSTREAMBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        createBitstreamBufferParams: *mut NV_ENC_CREATE_BITSTREAM_BUFFER,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCDESTROYBITSTREAMBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        bitstreamBuffer: NV_ENC_OUTPUT_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCENCODEPICTURE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodePicParams: *mut NV_ENC_PIC_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCLOCKBITSTREAM = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        lockBitstreamBufferParams: *mut NV_ENC_LOCK_BITSTREAM,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCUNLOCKBITSTREAM = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        bitstreamBuffer: NV_ENC_OUTPUT_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCLOCKINPUTBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        lockInputBufferParams: *mut NV_ENC_LOCK_INPUT_BUFFER,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCUNLOCKINPUTBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        inputBuffer: NV_ENC_INPUT_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETENCODESTATS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encodeStats: *mut NV_ENC_STAT,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETSEQUENCEPARAMS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        sequenceParamPayload: *mut NV_ENC_SEQUENCE_PARAM_PAYLOAD,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCREGISTERASYNCEVENT = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        eventParams: *mut NV_ENC_EVENT_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCUNREGISTERASYNCEVENT = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        eventParams: *mut NV_ENC_EVENT_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCMAPINPUTRESOURCE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        mapInputResParams: *mut NV_ENC_MAP_INPUT_RESOURCE,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCUNMAPINPUTRESOURCE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        mappedInputBuffer: NV_ENC_INPUT_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCDESTROYENCODER = ::std::option::Option<
+    unsafe extern "C" fn(encoder: *mut ::std::os::raw::c_void) -> NVENCSTATUS,
+>;
+pub type PNVENCINVALIDATEREFFRAMES = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        invalidRefFrameTimeStamp: u64,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCOPENENCODESESSIONEX = ::std::option::Option<
+    unsafe extern "C" fn(
+        openSessionExParams: *mut NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS,
+        encoder: *mut *mut ::std::os::raw::c_void,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCREGISTERRESOURCE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        registerResParams: *mut NV_ENC_REGISTER_RESOURCE,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCUNREGISTERRESOURCE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        registeredRes: NV_ENC_REGISTERED_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCRECONFIGUREENCODER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        reInitEncodeParams: *mut NV_ENC_RECONFIGURE_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCCREATEMVBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        createMVBufferParams: *mut NV_ENC_CREATE_MV_BUFFER,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCDESTROYMVBUFFER = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        mvBuffer: NV_ENC_OUTPUT_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCRUNMOTIONESTIMATIONONLY = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        meOnlyParams: *mut NV_ENC_MEONLY_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETLASTERROR = ::std::option::Option<
+    unsafe extern "C" fn(encoder: *mut ::std::os::raw::c_void) -> *const ::std::os::raw::c_char,
+>;
+pub type PNVENCSETIOCUDASTREAMS = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        inputStream: NV_ENC_CUSTREAM_PTR,
+        outputStream: NV_ENC_CUSTREAM_PTR,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCGETSEQUENCEPARAMEX = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        encInitParams: *mut NV_ENC_INITIALIZE_PARAMS,
+        sequenceParamPayload: *mut NV_ENC_SEQUENCE_PARAM_PAYLOAD,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCRESTOREENCODERSTATE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        restoreState: *mut NV_ENC_RESTORE_ENCODER_STATE_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+pub type PNVENCLOOKAHEADPICTURE = ::std::option::Option<
+    unsafe extern "C" fn(
+        encoder: *mut ::std::os::raw::c_void,
+        lookaheadParams: *mut NV_ENC_LOOKAHEAD_PIC_PARAMS,
+    ) -> NVENCSTATUS,
+>;
+#[doc = " \\ingroup ENCODER_STRUCTURE"]
+#[doc = " NV_ENCODE_API_FUNCTION_LIST"]
+#[repr(C)]
+#[derive(Debug, Copy, Clone, Hash, PartialOrd, Ord, PartialEq, Eq)]
+pub struct _NV_ENCODE_API_FUNCTION_LIST {
+    #[doc = "< [in]: Client should pass NV_ENCODE_API_FUNCTION_LIST_VER."]
+    pub version: u32,
+    #[doc = "< [in]: Reserved and should be set to 0."]
+    pub reserved: u32,
+    #[doc = "< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer."]
+    pub nvEncOpenEncodeSession: PNVENCOPENENCODESESSION,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodeGUIDCount() API through this pointer."]
+    pub nvEncGetEncodeGUIDCount: PNVENCGETENCODEGUIDCOUNT,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodeProfileGUIDCount() API through this pointer."]
+    pub nvEncGetEncodeProfileGUIDCount: PNVENCGETENCODEPROFILEGUIDCOUNT,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodeProfileGUIDs() API through this pointer."]
+    pub nvEncGetEncodeProfileGUIDs: PNVENCGETENCODEPROFILEGUIDS,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodeGUIDs() API through this pointer."]
+    pub nvEncGetEncodeGUIDs: PNVENCGETENCODEGUIDS,
+    #[doc = "< [out]: Client should access ::NvEncGetInputFormatCount() API through this pointer."]
+    pub nvEncGetInputFormatCount: PNVENCGETINPUTFORMATCOUNT,
+    #[doc = "< [out]: Client should access ::NvEncGetInputFormats() API through this pointer."]
+    pub nvEncGetInputFormats: PNVENCGETINPUTFORMATS,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodeCaps() API through this pointer."]
+    pub nvEncGetEncodeCaps: PNVENCGETENCODECAPS,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodePresetCount() API through this pointer."]
+    pub nvEncGetEncodePresetCount: PNVENCGETENCODEPRESETCOUNT,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodePresetGUIDs() API through this pointer."]
+    pub nvEncGetEncodePresetGUIDs: PNVENCGETENCODEPRESETGUIDS,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodePresetConfig() API through this pointer."]
+    pub nvEncGetEncodePresetConfig: PNVENCGETENCODEPRESETCONFIG,
+    #[doc = "< [out]: Client should access ::NvEncInitializeEncoder() API through this pointer."]
+    pub nvEncInitializeEncoder: PNVENCINITIALIZEENCODER,
+    #[doc = "< [out]: Client should access ::NvEncCreateInputBuffer() API through this pointer."]
+    pub nvEncCreateInputBuffer: PNVENCCREATEINPUTBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncDestroyInputBuffer() API through this pointer."]
+    pub nvEncDestroyInputBuffer: PNVENCDESTROYINPUTBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncCreateBitstreamBuffer() API through this pointer."]
+    pub nvEncCreateBitstreamBuffer: PNVENCCREATEBITSTREAMBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncDestroyBitstreamBuffer() API through this pointer."]
+    pub nvEncDestroyBitstreamBuffer: PNVENCDESTROYBITSTREAMBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncEncodePicture() API through this pointer."]
+    pub nvEncEncodePicture: PNVENCENCODEPICTURE,
+    #[doc = "< [out]: Client should access ::NvEncLockBitstream() API through this pointer."]
+    pub nvEncLockBitstream: PNVENCLOCKBITSTREAM,
+    #[doc = "< [out]: Client should access ::NvEncUnlockBitstream() API through this pointer."]
+    pub nvEncUnlockBitstream: PNVENCUNLOCKBITSTREAM,
+    #[doc = "< [out]: Client should access ::NvEncLockInputBuffer() API through this pointer."]
+    pub nvEncLockInputBuffer: PNVENCLOCKINPUTBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncUnlockInputBuffer() API through this pointer."]
+    pub nvEncUnlockInputBuffer: PNVENCUNLOCKINPUTBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodeStats() API through this pointer."]
+    pub nvEncGetEncodeStats: PNVENCGETENCODESTATS,
+    #[doc = "< [out]: Client should access ::NvEncGetSequenceParams() API through this pointer."]
+    pub nvEncGetSequenceParams: PNVENCGETSEQUENCEPARAMS,
+    #[doc = "< [out]: Client should access ::NvEncRegisterAsyncEvent() API through this pointer."]
+    pub nvEncRegisterAsyncEvent: PNVENCREGISTERASYNCEVENT,
+    #[doc = "< [out]: Client should access ::NvEncUnregisterAsyncEvent() API through this pointer."]
+    pub nvEncUnregisterAsyncEvent: PNVENCUNREGISTERASYNCEVENT,
+    #[doc = "< [out]: Client should access ::NvEncMapInputResource() API through this pointer."]
+    pub nvEncMapInputResource: PNVENCMAPINPUTRESOURCE,
+    #[doc = "< [out]: Client should access ::NvEncUnmapInputResource() API through this pointer."]
+    pub nvEncUnmapInputResource: PNVENCUNMAPINPUTRESOURCE,
+    #[doc = "< [out]: Client should access ::NvEncDestroyEncoder() API through this pointer."]
+    pub nvEncDestroyEncoder: PNVENCDESTROYENCODER,
+    #[doc = "< [out]: Client should access ::NvEncInvalidateRefFrames() API through this pointer."]
+    pub nvEncInvalidateRefFrames: PNVENCINVALIDATEREFFRAMES,
+    #[doc = "< [out]: Client should access ::NvEncOpenEncodeSession() API through this pointer."]
+    pub nvEncOpenEncodeSessionEx: PNVENCOPENENCODESESSIONEX,
+    #[doc = "< [out]: Client should access ::NvEncRegisterResource() API through this pointer."]
+    pub nvEncRegisterResource: PNVENCREGISTERRESOURCE,
+    #[doc = "< [out]: Client should access ::NvEncUnregisterResource() API through this pointer."]
+    pub nvEncUnregisterResource: PNVENCUNREGISTERRESOURCE,
+    #[doc = "< [out]: Client should access ::NvEncReconfigureEncoder() API through this pointer."]
+    pub nvEncReconfigureEncoder: PNVENCRECONFIGUREENCODER,
+    pub reserved1: *mut ::std::os::raw::c_void,
+    #[doc = "< [out]: Client should access ::NvEncCreateMVBuffer API through this pointer."]
+    pub nvEncCreateMVBuffer: PNVENCCREATEMVBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncDestroyMVBuffer API through this pointer."]
+    pub nvEncDestroyMVBuffer: PNVENCDESTROYMVBUFFER,
+    #[doc = "< [out]: Client should access ::NvEncRunMotionEstimationOnly API through this pointer."]
+    pub nvEncRunMotionEstimationOnly: PNVENCRUNMOTIONESTIMATIONONLY,
+    #[doc = "< [out]: Client should access ::nvEncGetLastErrorString API through this pointer."]
+    pub nvEncGetLastErrorString: PNVENCGETLASTERROR,
+    #[doc = "< [out]: Client should access ::nvEncSetIOCudaStreams API through this pointer."]
+    pub nvEncSetIOCudaStreams: PNVENCSETIOCUDASTREAMS,
+    #[doc = "< [out]: Client should access ::NvEncGetEncodePresetConfigEx() API through this pointer."]
+    pub nvEncGetEncodePresetConfigEx: PNVENCGETENCODEPRESETCONFIGEX,
+    #[doc = "< [out]: Client should access ::NvEncGetSequenceParamEx() API through this pointer."]
+    pub nvEncGetSequenceParamEx: PNVENCGETSEQUENCEPARAMEX,
+    #[doc = "< [out]: Client should access ::NvEncRestoreEncoderState() API through this pointer."]
+    pub nvEncRestoreEncoderState: PNVENCRESTOREENCODERSTATE,
+    #[doc = "< [out]: Client should access ::NvEncLookaheadPicture() API through this pointer."]
+    pub nvEncLookaheadPicture: PNVENCLOOKAHEADPICTURE,
+    #[doc = "< [in]:  Reserved and must be set to NULL"]
+    pub reserved2: [*mut ::std::os::raw::c_void; 275usize],
+}
+#[test]
+fn bindgen_test_layout__NV_ENCODE_API_FUNCTION_LIST() {
+    assert_eq!(
+        ::std::mem::size_of::<_NV_ENCODE_API_FUNCTION_LIST>(),
+        2552usize,
+        concat!("Size of: ", stringify!(_NV_ENCODE_API_FUNCTION_LIST))
+    );
+    assert_eq!(
+        ::std::mem::align_of::<_NV_ENCODE_API_FUNCTION_LIST>(),
+        8usize,
+        concat!("Alignment of ", stringify!(_NV_ENCODE_API_FUNCTION_LIST))
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).version as *const _ as usize
+        },
+        0usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(version)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).reserved as *const _ as usize
+        },
+        4usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(reserved)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncOpenEncodeSession
+                as *const _ as usize
+        },
+        8usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncOpenEncodeSession)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodeGUIDCount
+                as *const _ as usize
+        },
+        16usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodeGUIDCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodeProfileGUIDCount
+                as *const _ as usize
+        },
+        24usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodeProfileGUIDCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodeProfileGUIDs
+                as *const _ as usize
+        },
+        32usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodeProfileGUIDs)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodeGUIDs as *const _
+                as usize
+        },
+        40usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodeGUIDs)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetInputFormatCount
+                as *const _ as usize
+        },
+        48usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetInputFormatCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetInputFormats
+                as *const _ as usize
+        },
+        56usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetInputFormats)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodeCaps as *const _
+                as usize
+        },
+        64usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodeCaps)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodePresetCount
+                as *const _ as usize
+        },
+        72usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodePresetCount)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodePresetGUIDs
+                as *const _ as usize
+        },
+        80usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodePresetGUIDs)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodePresetConfig
+                as *const _ as usize
+        },
+        88usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodePresetConfig)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncInitializeEncoder
+                as *const _ as usize
+        },
+        96usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncInitializeEncoder)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncCreateInputBuffer
+                as *const _ as usize
+        },
+        104usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncCreateInputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncDestroyInputBuffer
+                as *const _ as usize
+        },
+        112usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncDestroyInputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncCreateBitstreamBuffer
+                as *const _ as usize
+        },
+        120usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncCreateBitstreamBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncDestroyBitstreamBuffer
+                as *const _ as usize
+        },
+        128usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncDestroyBitstreamBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncEncodePicture as *const _
+                as usize
+        },
+        136usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncEncodePicture)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncLockBitstream as *const _
+                as usize
+        },
+        144usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncLockBitstream)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncUnlockBitstream
+                as *const _ as usize
+        },
+        152usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncUnlockBitstream)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncLockInputBuffer
+                as *const _ as usize
+        },
+        160usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncLockInputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncUnlockInputBuffer
+                as *const _ as usize
+        },
+        168usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncUnlockInputBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodeStats as *const _
+                as usize
+        },
+        176usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodeStats)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetSequenceParams
+                as *const _ as usize
+        },
+        184usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetSequenceParams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncRegisterAsyncEvent
+                as *const _ as usize
+        },
+        192usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncRegisterAsyncEvent)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncUnregisterAsyncEvent
+                as *const _ as usize
+        },
+        200usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncUnregisterAsyncEvent)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncMapInputResource
+                as *const _ as usize
+        },
+        208usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncMapInputResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncUnmapInputResource
+                as *const _ as usize
+        },
+        216usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncUnmapInputResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncDestroyEncoder as *const _
+                as usize
+        },
+        224usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncDestroyEncoder)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncInvalidateRefFrames
+                as *const _ as usize
+        },
+        232usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncInvalidateRefFrames)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncOpenEncodeSessionEx
+                as *const _ as usize
+        },
+        240usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncOpenEncodeSessionEx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncRegisterResource
+                as *const _ as usize
+        },
+        248usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncRegisterResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncUnregisterResource
+                as *const _ as usize
+        },
+        256usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncUnregisterResource)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncReconfigureEncoder
+                as *const _ as usize
+        },
+        264usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncReconfigureEncoder)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).reserved1 as *const _ as usize
+        },
+        272usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(reserved1)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncCreateMVBuffer as *const _
+                as usize
+        },
+        280usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncCreateMVBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncDestroyMVBuffer
+                as *const _ as usize
+        },
+        288usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncDestroyMVBuffer)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncRunMotionEstimationOnly
+                as *const _ as usize
+        },
+        296usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncRunMotionEstimationOnly)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetLastErrorString
+                as *const _ as usize
+        },
+        304usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetLastErrorString)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncSetIOCudaStreams
+                as *const _ as usize
+        },
+        312usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncSetIOCudaStreams)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetEncodePresetConfigEx
+                as *const _ as usize
+        },
+        320usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetEncodePresetConfigEx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncGetSequenceParamEx
+                as *const _ as usize
+        },
+        328usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncGetSequenceParamEx)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncRestoreEncoderState
+                as *const _ as usize
+        },
+        336usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncRestoreEncoderState)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).nvEncLookaheadPicture
+                as *const _ as usize
+        },
+        344usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(nvEncLookaheadPicture)
+        )
+    );
+    assert_eq!(
+        unsafe {
+            &(*(::std::ptr::null::<_NV_ENCODE_API_FUNCTION_LIST>())).reserved2 as *const _ as usize
+        },
+        352usize,
+        concat!(
+            "Offset of field: ",
+            stringify!(_NV_ENCODE_API_FUNCTION_LIST),
+            "::",
+            stringify!(reserved2)
+        )
+    );
+}
+impl Default for _NV_ENCODE_API_FUNCTION_LIST {
+    fn default() -> Self {
+        let mut s = ::std::mem::MaybeUninit::<Self>::uninit();
+        unsafe {
+            ::std::ptr::write_bytes(s.as_mut_ptr(), 0, 1);
+            s.assume_init()
+        }
+    }
+}
+#[doc = " \\ingroup ENCODER_STRUCTURE"]
+#[doc = " NV_ENCODE_API_FUNCTION_LIST"]
+pub type NV_ENCODE_API_FUNCTION_LIST = _NV_ENCODE_API_FUNCTION_LIST;
+extern "C" {
+    #[doc = " \\ingroup ENCODE_FUNC"]
+    #[doc = " Entry Point to the NvEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " Creates an instance of the NvEncodeAPI interface, and populates the"]
+    #[doc = " pFunctionList with function pointers to the API routines implemented by the"]
+    #[doc = " NvEncodeAPI interface."]
+    #[doc = ""]
+    #[doc = " \\param [out] functionList"]
+    #[doc = ""]
+    #[doc = " \\return"]
+    #[doc = " ::NV_ENC_SUCCESS"]
+    #[doc = " ::NV_ENC_ERR_INVALID_PTR"]
+    pub fn NvEncodeAPICreateInstance(functionList: *mut NV_ENCODE_API_FUNCTION_LIST)
+        -> NVENCSTATUS;
+}
+
+const fn nv_struct_version(ver: u32) -> u32 {
+    NVENCAPI_VERSION | ((ver) << 16) | (0x7 << 28)
+}
+pub const NV_ENC_CAPS_PARAM_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_RESTORE_ENCODER_STATE_PARAMS_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_OUTPUT_STATS_BLOCK_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_OUTPUT_STATS_ROW_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_ENCODE_OUT_PARAMS_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_LOOKAHEAD_PIC_PARAMS_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_CREATE_INPUT_BUFFER_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_CREATE_BITSTREAM_BUFFER_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_CREATE_MV_BUFFER_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_RC_PARAMS_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_CONFIG_VER: u32 = nv_struct_version(9) | (1u32 << 31);
+pub const NV_ENC_INITIALIZE_PARAMS_VER: u32 = nv_struct_version(7) | (1u32 << 31);
+pub const NV_ENC_RECONFIGURE_PARAMS_VER: u32 = nv_struct_version(2) | (1u32 << 31);
+pub const NV_ENC_PRESET_CONFIG_VER: u32 = nv_struct_version(5) | (1u32 << 31);
+pub const NV_ENC_PIC_PARAMS_MVC_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_PIC_PARAMS_VER: u32 = nv_struct_version(7) | (1u32 << 31);
+pub const NV_ENC_MEONLY_PARAMS_VER: u32 = nv_struct_version(4);
+pub const NV_ENC_LOCK_BITSTREAM_VER: u32 = nv_struct_version(2) | (1u32 << 31);
+pub const NV_ENC_LOCK_INPUT_BUFFER_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_MAP_INPUT_RESOURCE_VER: u32 = nv_struct_version(4);
+pub const NV_ENC_REGISTER_RESOURCE_VER: u32 = nv_struct_version(5);
+pub const NV_ENC_STAT_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_SEQUENCE_PARAM_PAYLOAD_VER: u32 = nv_struct_version(1);
+pub const NV_ENC_EVENT_PARAMS_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER: u32 = nv_struct_version(1);
+pub const NV_ENCODE_API_FUNCTION_LIST_VER: u32 = nv_struct_version(2);
+pub const NV_ENC_CODEC_H264_GUID: GUID = GUID {
+    Data1: 0x6bc82762,
+    Data2: 0x4e63,
+    Data3: 0x4ca4,
+    Data4: [0xaa, 0x85, 0x1e, 0x50, 0xf3, 0x21, 0xf6, 0xbf],
+};
+pub const NV_ENC_CODEC_HEVC_GUID: GUID = GUID {
+    Data1: 0x790cdc88,
+    Data2: 0x4522,
+    Data3: 0x4d7b,
+    Data4: [0x94, 0x25, 0xbd, 0xa9, 0x97, 0x5f, 0x76, 0x3],
+};
+pub const NV_ENC_CODEC_AV1_GUID: GUID = GUID {
+    Data1: 0x0a352289,
+    Data2: 0x0aa7,
+    Data3: 0x4759,
+    Data4: [0x86, 0x2d, 0x5d, 0x15, 0xcd, 0x16, 0xd2, 0x54],
+};
+pub const NV_ENC_CODEC_PROFILE_AUTOSELECT_GUID: GUID = GUID {
+    Data1: 0xbfd6f8e7,
+    Data2: 0x233c,
+    Data3: 0x4341,
+    Data4: [0x8b, 0x3e, 0x48, 0x18, 0x52, 0x38, 0x3, 0xf4],
+};
+pub const NV_ENC_H264_PROFILE_BASELINE_GUID: GUID = GUID {
+    Data1: 0x727bcaa,
+    Data2: 0x78c4,
+    Data3: 0x4c83,
+    Data4: [0x8c, 0x2f, 0xef, 0x3d, 0xff, 0x26, 0x7c, 0x6a],
+};
+pub const NV_ENC_H264_PROFILE_MAIN_GUID: GUID = GUID {
+    Data1: 0x60b5c1d4,
+    Data2: 0x67fe,
+    Data3: 0x4790,
+    Data4: [0x94, 0xd5, 0xc4, 0x72, 0x6d, 0x7b, 0x6e, 0x6d],
+};
+pub const NV_ENC_H264_PROFILE_HIGH_GUID: GUID = GUID {
+    Data1: 0xe7cbc309,
+    Data2: 0x4f7a,
+    Data3: 0x4b89,
+    Data4: [0xaf, 0x2a, 0xd5, 0x37, 0xc9, 0x2b, 0xe3, 0x10],
+};
+pub const NV_ENC_H264_PROFILE_HIGH_10_GUID: GUID = GUID {
+    Data1: 0x8f0c337e,
+    Data2: 0x186c,
+    Data3: 0x48e9,
+    Data4: [0xa6, 0x9d, 0x7a, 0x83, 0x34, 0x08, 0x97, 0x58],
+};
+pub const NV_ENC_H264_PROFILE_HIGH_422_GUID: GUID = GUID {
+    Data1: 0xff3242e9,
+    Data2: 0x613c,
+    Data3: 0x4295,
+    Data4: [0xa1, 0xe8, 0x2a, 0x7f, 0xe9, 0x4d, 0x81, 0x33],
+};
+pub const NV_ENC_H264_PROFILE_HIGH_444_GUID: GUID = GUID {
+    Data1: 0x7ac663cb,
+    Data2: 0xa598,
+    Data3: 0x4960,
+    Data4: [0xb8, 0x44, 0x33, 0x9b, 0x26, 0x1a, 0x7d, 0x52],
+};
+pub const NV_ENC_H264_PROFILE_STEREO_GUID: GUID = GUID {
+    Data1: 0x40847bf5,
+    Data2: 0x33f7,
+    Data3: 0x4601,
+    Data4: [0x90, 0x84, 0xe8, 0xfe, 0x3c, 0x1d, 0xb8, 0xb7],
+};
+pub const NV_ENC_H264_PROFILE_PROGRESSIVE_HIGH_GUID: GUID = GUID {
+    Data1: 0xb405afac,
+    Data2: 0xf32b,
+    Data3: 0x417b,
+    Data4: [0x89, 0xc4, 0x9a, 0xbe, 0xed, 0x3e, 0x59, 0x78],
+};
+pub const NV_ENC_H264_PROFILE_CONSTRAINED_HIGH_GUID: GUID = GUID {
+    Data1: 0xaec1bd87,
+    Data2: 0xe85b,
+    Data3: 0x48f2,
+    Data4: [0x84, 0xc3, 0x98, 0xbc, 0xa6, 0x28, 0x50, 0x72],
+};
+pub const NV_ENC_HEVC_PROFILE_MAIN_GUID: GUID = GUID {
+    Data1: 0xb514c39a,
+    Data2: 0xb55b,
+    Data3: 0x40fa,
+    Data4: [0x87, 0x8f, 0xf1, 0x25, 0x3b, 0x4d, 0xfd, 0xec],
+};
+pub const NV_ENC_HEVC_PROFILE_MAIN10_GUID: GUID = GUID {
+    Data1: 0xfa4d2b6c,
+    Data2: 0x3a5b,
+    Data3: 0x411a,
+    Data4: [0x80, 0x18, 0x0a, 0x3f, 0x5e, 0x3c, 0x9b, 0xe5],
+};
+pub const NV_ENC_HEVC_PROFILE_FREXT_GUID: GUID = GUID {
+    Data1: 0x51ec32b5,
+    Data2: 0x1b4c,
+    Data3: 0x453c,
+    Data4: [0x9c, 0xbd, 0xb6, 0x16, 0xbd, 0x62, 0x13, 0x41],
+};
+pub const NV_ENC_AV1_PROFILE_MAIN_GUID: GUID = GUID {
+    Data1: 0x5f2a39f5,
+    Data2: 0xf14e,
+    Data3: 0x4f95,
+    Data4: [0x9a, 0x9e, 0xb7, 0x6d, 0x56, 0x8f, 0xcf, 0x97],
+};
+pub const NV_ENC_PRESET_P1_GUID: GUID = GUID {
+    Data1: 0xfc0a8d3e,
+    Data2: 0x45f8,
+    Data3: 0x4cf8,
+    Data4: [0x80, 0xc7, 0x29, 0x88, 0x71, 0x59, 0xe, 0xbf],
+};
+pub const NV_ENC_PRESET_P2_GUID: GUID = GUID {
+    Data1: 0xf581cfb8,
+    Data2: 0x88d6,
+    Data3: 0x4381,
+    Data4: [0x93, 0xf0, 0xdf, 0x13, 0xf9, 0xc2, 0x7d, 0xab],
+};
+pub const NV_ENC_PRESET_P3_GUID: GUID = GUID {
+    Data1: 0x36850110,
+    Data2: 0x3a07,
+    Data3: 0x441f,
+    Data4: [0x94, 0xd5, 0x36, 0x70, 0x63, 0x1f, 0x91, 0xf6],
+};
+pub const NV_ENC_PRESET_P4_GUID: GUID = GUID {
+    Data1: 0x90a7b826,
+    Data2: 0xdf06,
+    Data3: 0x4862,
+    Data4: [0xb9, 0xd2, 0xcd, 0x6d, 0x73, 0xa0, 0x86, 0x81],
+};
+pub const NV_ENC_PRESET_P5_GUID: GUID = GUID {
+    Data1: 0x21c6e6b4,
+    Data2: 0x297a,
+    Data3: 0x4cba,
+    Data4: [0x99, 0x8f, 0xb6, 0xcb, 0xde, 0x72, 0xad, 0xe3],
+};
+pub const NV_ENC_PRESET_P6_GUID: GUID = GUID {
+    Data1: 0x8e75c279,
+    Data2: 0x6299,
+    Data3: 0x4ab6,
+    Data4: [0x83, 0x2, 0xb, 0x21, 0x5a, 0x33, 0x5c, 0xf5],
+};
+pub const NV_ENC_PRESET_P7_GUID: GUID = GUID {
+    Data1: 0x84848c12,
+    Data2: 0x6f71,
+    Data3: 0x4c13,
+    Data4: [0x93, 0x1b, 0x53, 0xe2, 0x83, 0xf5, 0x79, 0x74],
+};
diff --git a/pixelflux/nvcodec-sys/src/cuda.rs b/pixelflux/nvcodec-sys/src/cuda.rs
new file mode 100644
index 0000000..02f4bc8
--- /dev/null
+++ b/pixelflux/nvcodec-sys/src/cuda.rs
@@ -0,0 +1,8 @@
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+#![allow(non_upper_case_globals)]
+#![allow(deref_nullptr)]
+#![allow(clippy::all)]
+
+// CUDA driver bindings (the subset NVENC needs). Regenerated in place by build.rs under `regen`.
+include!("bindgen/cuda.rs");
diff --git a/pixelflux/nvcodec-sys/src/lib.rs b/pixelflux/nvcodec-sys/src/lib.rs
new file mode 100644
index 0000000..1084609
--- /dev/null
+++ b/pixelflux/nvcodec-sys/src/lib.rs
@@ -0,0 +1,13 @@
+//! FFI bindings for the NVIDIA Video Codec SDK (NVENC) and the CUDA driver subset pixelflux uses.
+//!
+//! The NVENC bindings are generated from the bundled `headers/nvEncodeAPI.h` (NVENCAPI 13.0); the
+//! CUDA bindings are generated from the CUDA toolkit's `$CUDA_PATH/include/cuda.h` (not bundled --
+//! it's large and ships with the toolkit). Both are committed under `src/bindgen/`, so a normal
+//! build needs no libclang or CUDA toolkit. Regenerate after a header bump with
+//! `cargo build --features regen` (NVENC needs libclang; CUDA also needs CUDA_PATH).
+
+mod nvenc;
+pub use nvenc::*;
+
+#[cfg(feature = "cuda")]
+pub mod cuda;
diff --git a/pixelflux/nvcodec-sys/src/nvenc.rs b/pixelflux/nvcodec-sys/src/nvenc.rs
new file mode 100644
index 0000000..1e732fc
--- /dev/null
+++ b/pixelflux/nvcodec-sys/src/nvenc.rs
@@ -0,0 +1,28 @@
+#![allow(non_camel_case_types)]
+#![allow(non_snake_case)]
+#![allow(non_upper_case_globals)]
+#![allow(deref_nullptr)]
+#![allow(clippy::all)]
+#![allow(unknown_lints)]
+#![allow(unpredictable_function_pointer_comparisons)]
+#![allow(unnecessary_transmutes)]
+
+// The bindgen output (types/enums/functions + the regex-extracted NV_ENC_*_VER constants and
+// codec/preset GUIDs). Regenerated in place by build.rs under the `regen` feature.
+include!("bindgen/nvenc.rs");
+
+// Loader entry points bindgen doesn't emit: the dlsym symbol names + their signatures, and the
+// runtime library name. NvEncodeAPIGetMaxSupportedVersion drives the API-version negotiation.
+pub const NV_ENCODE_API_GET_MAX_SUPPORTED_VERSION_FN_NAME: &[u8] =
+    b"NvEncodeAPIGetMaxSupportedVersion\0";
+pub type NvEncodeApiGetMaxSupportedVersionFn =
+    unsafe extern "C" fn(version: *mut u32) -> NVENCSTATUS;
+
+pub const NV_ENCODE_API_CREATE_INSTANCE_FN_NAME: &[u8] = b"NvEncodeAPICreateInstance\0";
+pub type NvEncodeApiCreateInstanceFn =
+    unsafe extern "C" fn(functionList: *mut NV_ENCODE_API_FUNCTION_LIST) -> NVENCSTATUS;
+
+#[cfg(not(windows))]
+pub const NVENC_DLL_NAME: &str = "libnvidia-encode.so.1";
+#[cfg(windows)]
+pub const NVENC_DLL_NAME: &str = "nvEncodeAPI64.dll";
diff --git a/pixelflux/screen_capture_module.cpp b/pixelflux/screen_capture_module.cpp
deleted file mode 100644
index bdcf067..0000000
--- a/pixelflux/screen_capture_module.cpp
+++ /dev/null
@@ -1,3928 +0,0 @@
-/*
- * This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at https://mozilla.org/MPL/2.0/.
- */
-
-/*
-  ▘    ▜ ▐▘▜     
-▛▌▌▚▘█▌▐ ▜▘▐ ▌▌▚▘
-▙▌▌▞▖▙▖▐▖▐ ▐▖▙▌▞▖
-▌                
-*/
-
-#include <atomic>
-#include <chrono>
-#include <condition_variable>
-#include <cstdio>
-#include <cstdlib>
-#include <cstring>
-#include <future>
-#include <iomanip>
-#include <iostream>
-#include <list>
-#include <memory>
-#include <mutex>
-#include <numeric>
-#include <queue>
-#include <sstream>
-#include <stdexcept>
-#include <thread>
-#include <vector>
-#include <algorithm>
-#include <X11/Xlib.h>
-#include <X11/extensions/XShm.h>
-#include <X11/extensions/Xfixes.h>
-#include <X11/Xutil.h>
-#include <jpeglib.h>
-#include <netinet/in.h>
-#include <sys/ipc.h>
-#include <sys/shm.h>
-#define XXH_STATIC_LINKING_ONLY
-#include "xxhash.h"
-#include <libyuv/convert.h>
-#include <libyuv/convert_from.h>
-#include <libyuv/convert_from_argb.h>
-#include <libyuv/planar_functions.h>
-#include <x264.h>
-#include <string>
-#include <cmath>
-#include <dlfcn.h>
-#include "nvEncodeAPI.h"
-#ifndef STB_IMAGE_IMPLEMENTATION_DEFINED
-#include <fcntl.h>
-#include <unistd.h>
-#include <dirent.h>
-#include <va/va.h>
-#include <va/va_drm.h>
-#include <va/va_enc_h264.h>
-#define VA_ENC_PACKED_HEADER_DATA (VA_ENC_PACKED_HEADER_SEQUENCE | VA_ENC_PACKED_HEADER_PICTURE)
-#define STB_IMAGE_IMPLEMENTATION_DEFINED
-#define STB_IMAGE_IMPLEMENTATION
-#endif
-#include "stb_image.h"
-extern "C" {
-#include <libavcodec/avcodec.h>
-#include <libavutil/hwcontext.h>
-#include <libavutil/opt.h>
-#include <libavutil/pixdesc.h>
-}
-
-typedef enum CUresult_enum { CUDA_SUCCESS = 0 } CUresult;
-typedef int CUdevice;
-typedef struct CUctx_st* CUcontext;
-typedef CUresult (*tcuInit)(unsigned int);
-typedef CUresult (*tcuDeviceGet)(CUdevice*, int);
-typedef CUresult (*tcuCtxCreate)(CUcontext*, unsigned int, CUdevice);
-typedef CUresult (*tcuCtxDestroy)(CUcontext);
-
-/**
- * @brief Holds function pointers for the CUDA driver API.
- * This struct is populated by `LoadCudaApi` using `dlsym` to allow for
- * dynamic loading of the CUDA library (`libcuda.so`), avoiding a hard
- * link-time dependency.
- */
-struct CudaFunctions {
-  tcuInit pfn_cuInit = nullptr;
-  tcuDeviceGet pfn_cuDeviceGet = nullptr;
-  tcuCtxCreate pfn_cuCtxCreate = nullptr;
-  tcuCtxDestroy pfn_cuCtxDestroy = nullptr;
-};
-
-CudaFunctions g_cuda_funcs;
-static void* g_cuda_lib_handle = nullptr;
-
-/**
- * @brief Manages the state of an NVENC H.264 encoder session.
- * This struct encapsulates all the necessary handles, parameters, and buffer
- * pools for a single NVENC encoding pipeline. It maintains the CUDA context,
- * the encoder session, configuration details, and pools of input/output
- * buffers to facilitate asynchronous encoding.
- */
-struct NvencEncoderState {
-  NV_ENCODE_API_FUNCTION_LIST nvenc_funcs = {0};
-  void* encoder_session = nullptr;
-  NV_ENC_INITIALIZE_PARAMS init_params = {0};
-  NV_ENC_CONFIG encode_config = {0};
-  std::vector<NV_ENC_INPUT_PTR> input_buffers;
-  std::vector<NV_ENC_OUTPUT_PTR> output_buffers;
-  uint32_t current_input_buffer_idx = 0;
-  uint32_t current_output_buffer_idx = 0;
-  int buffer_pool_size = 4;
-  bool initialized = false;
-  int initialized_width = 0;
-  int initialized_height = 0;
-  int initialized_qp = -1;
-  bool cbr_mode = false;
-  int initialized_bitrate_kbps = 0;
-  NV_ENC_BUFFER_FORMAT initialized_buffer_format = NV_ENC_BUFFER_FORMAT_UNDEFINED;
-  CUcontext cuda_context = nullptr;
-
-  NvencEncoderState() {
-    nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
-    init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
-  }
-};
-
-static void* g_nvenc_lib_handle = nullptr;
-typedef NVENCSTATUS(NVENCAPI* PFN_NvEncodeAPICreateInstance)(
-  NV_ENCODE_API_FUNCTION_LIST*);
-
-/**
- * @brief Manages the state of a VA-API H.264 encoder session using libavcodec.
- * This struct encapsulates all necessary libav objects for a VA-API hardware-
- * accelerated encoding pipeline. This includes the hardware device context,
- * hardware frame context for surface allocation, the codec context for the
- * h264_vaapi encoder, and reusable frame/packet objects.
- */
-struct VaapiEncoderState {
-    AVBufferRef *hw_device_ctx = nullptr;
-    AVBufferRef *hw_frames_ctx = nullptr;
-    AVCodecContext *codec_ctx = nullptr;
-    AVFrame *sw_frame = nullptr;
-    AVFrame *hw_frame = nullptr;
-    AVPacket *packet = nullptr;
-    bool initialized = false;
-    int initialized_width = 0;
-    int initialized_height = 0;
-    int initialized_qp = -1;
-    bool initialized_is_444 = false;
-    unsigned int frame_count = 0;
-    bool initialized_cbr = false;
-    int initialized_bitrate_kbps = 0;
-};
-
-
-/**
- * @brief Custom X11 error handler specifically for the XShmAttach call.
- * This function is temporarily installed as the X11 error handler. It catches
- * any error, sets the g_shm_attach_failed flag to true, and returns 0 to
- * signal that the error has been "handled," preventing program termination.
- */
-static bool g_shm_attach_failed = false;
-static int shm_attach_error_handler(Display* dpy, XErrorEvent* ev) {
-    g_shm_attach_failed = true;
-    return 0;
-}
-
-/**
- * @brief Manages a pool of H.264 encoders and associated picture buffers.
- * This struct provides thread-safe storage and management for x264 encoder
- * instances, input pictures, and their initialization states. This allows
- * different threads to use separate encoder instances, particularly for
- * encoding different stripes of a video frame concurrently.
- */
-struct MinimalEncoderStore {
-  std::vector<x264_t*> encoders;
-  std::vector<bool> initialized_flags;
-  std::vector<int> initialized_widths;
-  std::vector<int> initialized_heights;
-  std::vector<int> initialized_crfs;
-  std::vector<int> initialized_csps;
-  std::vector<int> initialized_colorspaces;
-  std::vector<bool> initialized_full_range_flags;
-  std::vector<bool> force_idr_flags;
-  std::vector<bool> initialized_cbr_flags;
-  std::vector<int> initialized_bitrates;
-  std::mutex store_mutex;
-
-  /**
-   * @brief Ensures that the internal vectors are large enough for the given thread_id.
-   * If thread_id is out of bounds, resizes all vectors to accommodate it,
-   * initializing new elements to default values.
-   * @param thread_id The ID of the thread, used as an index.
-   */
-  void ensure_size(int thread_id) {
-    if (thread_id >= static_cast<int>(encoders.size())) {
-      size_t new_size = static_cast<size_t>(thread_id) + 1;
-      encoders.resize(new_size, nullptr);
-      initialized_flags.resize(new_size, false);
-      initialized_widths.resize(new_size, 0);
-      initialized_heights.resize(new_size, 0);
-      initialized_crfs.resize(new_size, -1);
-      initialized_csps.resize(new_size, X264_CSP_NONE);
-      initialized_colorspaces.resize(new_size, 0);
-      initialized_full_range_flags.resize(new_size, false);
-      force_idr_flags.resize(new_size, false);
-      initialized_cbr_flags.resize(new_size, false);
-      initialized_bitrates.resize(new_size, 0);
-    }
-  }
-
-  /**
-   * @brief Resets the store by closing all encoders and freeing resources.
-   * Clears all internal vectors, ensuring a clean state. This should be called
-   * when encoder settings change significantly (e.g., resolution) or when
-   * the capture module is stopped.
-   */
-  void reset() {
-    std::lock_guard<std::mutex> lock(store_mutex);
-    for (size_t i = 0; i < encoders.size(); ++i) {
-      if (encoders[i]) {
-        x264_encoder_close(encoders[i]);
-        encoders[i] = nullptr;
-      }
-    }
-    encoders.clear();
-    initialized_flags.clear();
-    initialized_widths.clear();
-    initialized_heights.clear();
-    initialized_crfs.clear();
-    initialized_csps.clear();
-    initialized_colorspaces.clear();
-    initialized_full_range_flags.clear();
-    force_idr_flags.clear();
-    initialized_cbr_flags.clear();
-    initialized_bitrates.clear();
-  }
-
-  /**
-   * @brief Destructor for MinimalEncoderStore.
-   * Calls reset() to ensure all resources are released upon destruction.
-   */
-  ~MinimalEncoderStore() {
-    reset();
-  }
-};
-
-/**
- * @brief Enumerates the possible output modes for encoding.
- */
-enum class OutputMode : int {
-  JPEG = 0, /**< Output frames as JPEG images. */
-  H264 = 1  /**< Output frames as H.264 video. */
-};
-
-/**
- * @brief Enumerates the data types for encoded stripes.
- */
-enum class StripeDataType {
-  UNKNOWN = 0, /**< Unknown or uninitialized data type. */
-  JPEG    = 1, /**< Data is JPEG encoded. */
-  H264    = 2  /**< Data is H.264 encoded. */
-};
-
-/**
- * @brief Enumerates the watermark location identifiers
- */
-enum class WatermarkLocation : int {
-  NONE = 0, TL = 1, TR = 2, BL = 3, BR = 4, MI = 5, AN = 6
-};
-
-/**
- * @brief Holds settings for screen capture and encoding.
- * This struct aggregates all configurable parameters for the capture process,
- * including dimensions, frame rate, quality settings, and output mode.
- */
-struct CaptureSettings {
-  int capture_width;
-  int capture_height;
-  double scale;
-  int capture_x;
-  int capture_y;
-  double target_fps;
-  int jpeg_quality;
-  int paint_over_jpeg_quality;
-  bool use_paint_over_quality;
-  int paint_over_trigger_frames;
-  int damage_block_threshold;
-  int damage_block_duration;
-  OutputMode output_mode;
-  int h264_crf;
-  int h264_paintover_crf;
-  int h264_paintover_burst_frames;
-  bool h264_fullcolor;
-  bool h264_fullframe;
-  bool h264_streaming_mode;
-  bool capture_cursor;
-  const char* watermark_path;
-  WatermarkLocation watermark_location_enum;
-  int vaapi_render_node_index;
-  bool use_cpu;
-  bool debug_logging;
-  bool h264_cbr_mode;
-  int h264_bitrate_kbps;
-  int h264_vbv_buffer_size_kb;
-  bool auto_adjust_screen_capture_size;
-
-  /**
-   * @brief Default constructor for CaptureSettings.
-   * Initializes settings with common default values.
-   */
-  CaptureSettings()
-    : capture_width(1920),
-      capture_height(1080),
-      scale(1.0),
-      capture_x(0),
-      capture_y(0),
-      target_fps(60.0),
-      jpeg_quality(85),
-      paint_over_jpeg_quality(95),
-      use_paint_over_quality(false),
-      paint_over_trigger_frames(10),
-      damage_block_threshold(15),
-      damage_block_duration(30),
-      output_mode(OutputMode::JPEG),
-      h264_crf(25),
-      h264_paintover_crf(18),
-      h264_paintover_burst_frames(5),
-      h264_fullcolor(false),
-      h264_fullframe(false),
-      h264_streaming_mode(false),
-      capture_cursor(false),
-      watermark_path(nullptr),
-      watermark_location_enum(WatermarkLocation::NONE),
-      vaapi_render_node_index(-1),
-      use_cpu(false),
-      debug_logging(false),
-      h264_cbr_mode(false),
-      h264_bitrate_kbps(4000),
-      h264_vbv_buffer_size_kb(0),
-      auto_adjust_screen_capture_size(false) {}
-
-  /**
-   * @brief Parameterized constructor for CaptureSettings.
-   * Allows initializing all settings with specific values.
-   * @param cw Capture width.
-   * @param ch Capture height.
-   * @param cx Capture X offset.
-   * @param cy Capture Y offset.
-   * @param fps Target frames per second.
-   * @param jq JPEG quality.
-   * @param pojq Paint-over JPEG quality.
-   * @param upoq Use paint-over quality flag.
-   * @param potf Paint-over trigger frames.
-   * @param dbt Damage block threshold.
-   * @param dbd Damage block duration.
-   * @param om Output mode (JPEG or H.264).
-   * @param crf H.264 Constant Rate Factor.
-   * @param h264_po_crf H.264 paint-over CRF.
-   * @param h264_po_burst H.264 paint-over burst frames.
-   * @param h264_fc H.264 full color (I444) flag.
-   * @param h264_ff H.264 full frame encoding flag.
-   * @param h264_sm H.264 streaming mode flag.
-   * @param capture_cursor Capture cursor flag.
-   * @param wm_path Watermark image file path.
-   * @param wm_loc Watermark location enum.
-   * @param vaapi_idx VA-API render node index.
-   * @param use_cpu_flag Force CPU encoding flag.
-   * @param debug_log Enable debug logging flag.
-   * @param cbr_mode H.264 CBR mode flag.
-   * @param bitrate_kbps H.264 CBR bitrate in kbps.
-   * @param vbv_buffer_size_kb H.264 VBV buffer size in kb.
-   * @param adjust_size Auto-adjust screen capture size flag.
-   */
-  CaptureSettings(int cw, int ch, int cx, int cy, double fps, int jq,
-                  int pojq, bool upoq, int potf, int dbt, int dbd,
-                  OutputMode om = OutputMode::JPEG, int crf = 25, int h264_po_crf = 18, int h264_po_burst = 5,
-                  bool h264_fc = false, bool h264_ff = false, bool h264_sm = false,
-                  bool capture_cursor = false,
-                  const char* wm_path = nullptr,
-                  WatermarkLocation wm_loc = WatermarkLocation::NONE,
-                  int vaapi_idx = -1, bool use_cpu_flag = false, bool debug_log = false,
-                  bool cbr_mode = false, int bitrate_kbps = 4000, int vbv_buffer_size_kb = 0, bool adjust_size = false)
-    : capture_width(cw),
-      capture_height(ch),
-      capture_x(cx),
-      capture_y(cy),
-      target_fps(fps),
-      jpeg_quality(jq),
-      paint_over_jpeg_quality(pojq),
-      use_paint_over_quality(upoq),
-      paint_over_trigger_frames(potf),
-      damage_block_threshold(dbt),
-      damage_block_duration(dbd),
-      output_mode(om),
-      h264_crf(crf),
-      h264_paintover_crf(h264_po_crf),
-      h264_paintover_burst_frames(h264_po_burst),
-      h264_fullcolor(h264_fc),
-      h264_fullframe(h264_ff),
-      h264_streaming_mode(h264_sm),
-      capture_cursor(capture_cursor),
-      watermark_path(wm_path),
-      watermark_location_enum(wm_loc),
-      vaapi_render_node_index(vaapi_idx),
-      use_cpu(use_cpu_flag),
-      debug_logging(debug_log),
-      h264_cbr_mode(cbr_mode),
-      h264_bitrate_kbps(bitrate_kbps),
-      h264_vbv_buffer_size_kb(vbv_buffer_size_kb),
-      auto_adjust_screen_capture_size(adjust_size) {}
-};
-
-/**
- * @brief Represents the result of encoding a single stripe of a frame.
- * Contains the encoded data, its type, dimensions, and frame identifier.
- * This struct uses move semantics for efficient data transfer.
- */
-struct StripeEncodeResult {
-  StripeDataType type;
-  int stripe_y_start;
-  int stripe_height;
-  int size;
-  unsigned char* data;
-  int frame_id;
-
-  /**
-   * @brief Default constructor for StripeEncodeResult.
-   * Initializes members to default/null values.
-   */
-  StripeEncodeResult()
-    : type(StripeDataType::UNKNOWN),
-      stripe_y_start(0),
-      stripe_height(0),
-      size(0),
-      data(nullptr),
-      frame_id(-1) {}
-
-  /**
-   * @brief Move constructor for StripeEncodeResult.
-   * Transfers ownership of data from the 'other' object.
-   * @param other The StripeEncodeResult to move from.
-   */
-  StripeEncodeResult(StripeEncodeResult&& other) noexcept;
-
-  /**
-   * @brief Move assignment operator for StripeEncodeResult.
-   * Transfers ownership of data from the 'other' object, freeing existing data.
-   * @param other The StripeEncodeResult to move assign from.
-   * @return Reference to this object.
-   */
-  StripeEncodeResult& operator=(StripeEncodeResult&& other) noexcept;
-
-private:
-  StripeEncodeResult(const StripeEncodeResult&) = delete;
-  StripeEncodeResult& operator=(const StripeEncodeResult&) = delete;
-};
-
-/**
- * @brief Move constructor implementation for StripeEncodeResult.
- * @param other The StripeEncodeResult to move data from.
- */
-StripeEncodeResult::StripeEncodeResult(StripeEncodeResult&& other) noexcept
-  : type(other.type),
-    stripe_y_start(other.stripe_y_start),
-    stripe_height(other.stripe_height),
-    size(other.size),
-    data(other.data),
-    frame_id(other.frame_id) {
-  other.type = StripeDataType::UNKNOWN;
-  other.stripe_y_start = 0;
-  other.stripe_height = 0;
-  other.size = 0;
-  other.data = nullptr;
-  other.frame_id = -1;
-}
-
-/**
- * @brief Move assignment operator implementation for StripeEncodeResult.
- * @param other The StripeEncodeResult to move data from.
- * @return A reference to this StripeEncodeResult.
- */
-StripeEncodeResult& StripeEncodeResult::operator=(StripeEncodeResult&& other) noexcept {
-  if (this != &other) {
-    if (data) {
-      delete[] data;
-      data = nullptr;
-    }
-    type = other.type;
-    stripe_y_start = other.stripe_y_start;
-    stripe_height = other.stripe_height;
-    size = other.size;
-    data = other.data;
-    frame_id = other.frame_id;
-
-    other.type = StripeDataType::UNKNOWN;
-    other.stripe_y_start = 0;
-    other.stripe_height = 0;
-    other.size = 0;
-    other.data = nullptr;
-    other.frame_id = -1;
-  }
-  return *this;
-}
-
-/**
- * @brief Dynamically loads the CUDA driver library and resolves required function pointers.
- *
- * This function checks if the library is already loaded. If not, it uses `dlopen`
- * to load `libcuda.so` and `dlsym` to find the addresses of `cuInit`, `cuDeviceGet`,
- * `cuCtxCreate`, and `cuCtxDestroy`. The function pointers are stored in the
- * global `g_cuda_funcs` struct. This must be successful before any NVENC
- * operations that use a CUDA context can be performed.
- *
- * @return true if the library was loaded and all required function pointers were
- *         successfully resolved, false otherwise.
- */
-bool LoadCudaApi() {
-    if (g_cuda_lib_handle) {
-        return true;
-    }
-
-    g_cuda_lib_handle = dlopen("libcuda.so", RTLD_LAZY);
-    if (!g_cuda_lib_handle) {
-        std::cerr << "CUDA_API_LOAD: dlopen failed for libcuda.so" << std::endl;
-        return false;
-    }
-
-    g_cuda_funcs.pfn_cuInit = (tcuInit)dlsym(g_cuda_lib_handle, "cuInit");
-    g_cuda_funcs.pfn_cuDeviceGet = (tcuDeviceGet)dlsym(g_cuda_lib_handle, "cuDeviceGet");
-    g_cuda_funcs.pfn_cuCtxCreate = (tcuCtxCreate)dlsym(g_cuda_lib_handle, "cuCtxCreate");
-    g_cuda_funcs.pfn_cuCtxDestroy = (tcuCtxDestroy)dlsym(g_cuda_lib_handle, "cuCtxDestroy");
-
-    if (!g_cuda_funcs.pfn_cuInit || !g_cuda_funcs.pfn_cuDeviceGet || !g_cuda_funcs.pfn_cuCtxCreate || !g_cuda_funcs.pfn_cuCtxDestroy) {
-        std::cerr << "CUDA_API_LOAD: dlsym failed for one or more CUDA functions." << std::endl;
-        dlclose(g_cuda_lib_handle);
-        g_cuda_lib_handle = nullptr;
-        memset(&g_cuda_funcs, 0, sizeof(CudaFunctions));
-        return false;
-    }
-    return true;
-}
-
-/**
- * @brief Unloads the CUDA driver library if it was previously loaded.
- *
- * This function calls `dlclose` on the CUDA library handle and clears the global
- * function pointer struct to ensure a clean state. It should be called when
- * CUDA functionality is no longer needed.
- */
-void UnloadCudaApi() {
-    if (g_cuda_lib_handle) {
-        dlclose(g_cuda_lib_handle);
-        g_cuda_lib_handle = nullptr;
-        memset(&g_cuda_funcs, 0, sizeof(CudaFunctions));
-    }
-}
-
-/**
- * @brief Dynamically loads the NVIDIA Encoder (NVENC) library and initializes the API function list.
- *
- * This function checks if the API is already loaded. If not, it attempts to load
- * `libnvidia-encode.so.1` or `libnvidia-encode.so` using `dlopen`. It then uses
- * `dlsym` to get the `NvEncodeAPICreateInstance` function and calls it to populate
- * the provided function list struct.
- *
- * @return true if the library was loaded and the function list was successfully
- *         populated, false otherwise.
- */
-bool LoadNvencApi(NV_ENCODE_API_FUNCTION_LIST& nvenc_funcs) {
-  if (nvenc_funcs.nvEncOpenEncodeSessionEx != nullptr) {
-    return true;
-  }
-  if (!g_nvenc_lib_handle) {
-      const char* lib_names[] = {"libnvidia-encode.so.1", "libnvidia-encode.so"};
-      for (const char* name : lib_names) {
-        g_nvenc_lib_handle = dlopen(name, RTLD_LAZY | RTLD_GLOBAL);
-        if (g_nvenc_lib_handle) {
-          break;
-        }
-      }
-  }
-
-  if (!g_nvenc_lib_handle) {
-    return false;
-  }
-
-  memset(&nvenc_funcs, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
-  nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
-
-  PFN_NvEncodeAPICreateInstance NvEncodeAPICreateInstance_func_ptr =
-    (PFN_NvEncodeAPICreateInstance)dlsym(g_nvenc_lib_handle, "NvEncodeAPICreateInstance");
-
-  if (!NvEncodeAPICreateInstance_func_ptr) {
-    return false;
-  }
-
-  NVENCSTATUS status = NvEncodeAPICreateInstance_func_ptr(&nvenc_funcs);
-  if (status != NV_ENC_SUCCESS) {
-    memset(&nvenc_funcs, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
-    nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
-    return false;
-  }
-  if (!nvenc_funcs.nvEncOpenEncodeSessionEx) {
-    memset(&nvenc_funcs, 0, sizeof(NV_ENCODE_API_FUNCTION_LIST));
-    nvenc_funcs.version = NV_ENCODE_API_FUNCTION_LIST_VER;
-    return false;
-  }
-  return true;
-}
-
-/**
- * @brief Scans the system for available VA-API compatible DRM render nodes.
- * This function searches the `/dev/dri/` directory for device files named
- * `renderD*`, which represent GPU render nodes that can be used for
- * hardware-accelerated computation like video encoding without needing a
- * graphical display server.
- * @return A sorted vector of strings, where each string is the full path to a
- *         found render node (e.g., "/dev/dri/renderD128").
- */
-std::vector<std::string> find_vaapi_render_nodes() {
-    std::vector<std::string> nodes;
-    const char* drm_dir_path = "/dev/dri/";
-    DIR *dir = opendir(drm_dir_path);
-    if (!dir) {
-        return nodes;
-    }
-
-    struct dirent *entry;
-    while ((entry = readdir(dir)) != nullptr) {
-        if (strncmp(entry->d_name, "renderD", 7) == 0) {
-            nodes.push_back(std::string(drm_dir_path) + entry->d_name);
-        }
-    }
-    closedir(dir);
-    std::sort(nodes.begin(), nodes.end());
-    return nodes;
-}
-
-/**
- * @brief Callback function type for processing encoded stripes.
- * @param result Pointer to the StripeEncodeResult containing the encoded data.
- * @param user_data User-defined data passed to the callback.
- */
-typedef void (*StripeCallback)(StripeEncodeResult* result, void* user_data);
-
-/**
- * @brief Encodes a horizontal stripe of an image from shared memory into JPEG format.
- * @param thread_id Identifier for the calling thread, used for managing encoder resources.
- * @param stripe_y_start The Y-coordinate of the top of the stripe within the full image.
- * @param stripe_height The height of the stripe to encode.
- * @param capture_width_actual The actual width of the stripe (and full image).
- * @param shm_data_base Pointer to the beginning of the full image data in shared memory.
- * @param shm_stride_bytes The stride (bytes per row) of the shared memory image.
- * @param shm_bytes_per_pixel The number of bytes per pixel in the shared memory image (e.g., 4 for BGRX).
- * @param jpeg_quality The JPEG quality setting (0-100).
- * @param frame_counter The identifier of the current frame.
- * @return A StripeEncodeResult containing the JPEG data, or an empty result on failure.
- *         The result data includes a custom 4-byte header: frame_id (uint16_t network byte order)
- *         and stripe_y_start (uint16_t network byte order).
- */
-StripeEncodeResult encode_stripe_jpeg(
-  int thread_id,
-  int stripe_y_start,
-  int stripe_height,
-  int capture_width_actual,
-  const unsigned char* shm_data_base,
-  int shm_stride_bytes,
-  int shm_bytes_per_pixel,
-  int jpeg_quality,
-  int frame_counter);
-
-/**
- * @brief Encodes a horizontal stripe of YUV data into H.264 format using x264.
- * @param thread_id Identifier for the calling thread, used for managing encoder resources.
- * @param stripe_y_start The Y-coordinate of the top of the stripe.
- * @param stripe_height The height of the stripe to encode (must be even).
- * @param capture_width_actual The width of the stripe (must be even).
- * @param y_plane_stripe_start Pointer to the start of the Y plane data for this stripe.
- * @param y_stride Stride of the Y plane.
- * @param u_plane_stripe_start Pointer to the start of the U plane data for this stripe.
- * @param u_stride Stride of the U plane.
- * @param v_plane_stripe_start Pointer to the start of the V plane data for this stripe.
- * @param v_stride Stride of the V plane.
- * @param is_i444_input True if the input YUV data is I444, false if I420.
- * @param frame_counter The identifier of the current frame.
- * @param current_crf_setting The H.264 CRF (Constant Rate Factor) to use for encoding.
- * @param colorspace_setting An integer indicating input YUV format (420 for I420, 444 for I444).
- * @param use_full_range True if full range color should be signaled in VUI, false for limited range.
- * @param h264_streaming_mode True to enable streaming mode optimizations.
- * @param force_idr True to force the encoder to generate an IDR (key) frame.
- * @param is_cbr True to enable Constant Bitrate (CBR) mode, false for CRF mode.
- * @param bitrate_kbps Target bitrate in kbps when CBR mode is enabled.
- * @param vbv_buffer_size_kb VBV buffer size in kb for CBR mode (0 for auto/default).
- * @return A StripeEncodeResult containing the H.264 NAL units, or an empty result on failure.
- *         The result data includes a custom 10-byte header: type tag (0x04), frame type,
- *         frame_id (uint16_t), stripe_y_start (uint16_t), width (uint16_t), height (uint16_t),
- *         all multi-byte fields in network byte order.
- */
-StripeEncodeResult encode_stripe_h264(
-  MinimalEncoderStore& h264_minimal_store,
-  int thread_id,
-  int stripe_y_start,
-  int stripe_height,
-  int capture_width_actual,
-  const uint8_t* y_plane_stripe_start, int y_stride,
-  const uint8_t* u_plane_stripe_start, int u_stride,
-  const uint8_t* v_plane_stripe_start, int v_stride,
-  bool is_i444_input,
-  int frame_counter,
-  int current_crf_setting,
-  int colorspace_setting,
-  bool use_full_range,
-  bool h264_streaming_mode,
-  bool force_id,
-  bool is_cbr,
-  int bitrate_kbps,
-  int vbv_buffer_size_kb);
-
-/**
- * @brief Calculates a 64-bit XXH3 hash for a stripe of YUV data.
- * @param y_plane_stripe_start Pointer to the Y plane data for the stripe.
- * @param y_stride Stride of the Y plane.
- * @param u_plane_stripe_start Pointer to the U plane data for the stripe.
- * @param u_stride Stride of the U plane.
- * @param v_plane_stripe_start Pointer to the V plane data for the stripe.
- * @param v_stride Stride of the V plane.
- * @param width Width of the stripe.
- * @param height Height of the stripe.
- * @param is_i420 True if the YUV format is I420 (chroma planes are half width/height),
- *                false if I444 (chroma planes are full width/height).
- * @param use_fullframe_hashing True to use full-frame hashing (samples every 12th row),
- *                              false to hash every row.
- * @return A 64-bit hash value of the stripe data, or 0 on error.
- */
-uint64_t calculate_yuv_stripe_hash(const uint8_t* y_plane_stripe_start, int y_stride,
-                                   const uint8_t* u_plane_stripe_start, int u_stride,
-                                   const uint8_t* v_plane_stripe_start, int v_stride,
-                                   int width, int height, bool is_i420, bool use_fullframe_hashing);
-
-/**
- * @brief Calculates a hash for a stripe of BGR(X) data directly from shared memory.
- * Extracts BGR components for hashing.
- * @param shm_stripe_physical_start Pointer to the start of the stripe data in shared memory.
- * @param shm_stride_bytes Stride (bytes per row) of the shared memory image.
- * @param stripe_width Width of the stripe.
- * @param stripe_height Height of the stripe.
- * @param shm_bytes_per_pixel Bytes per pixel in the shared memory (e.g., 3 for BGR, 4 for BGRX).
- * @return A 64-bit hash value of the BGR data in the stripe, or 0 on error.
- */
-uint64_t calculate_bgr_stripe_hash_from_shm(const unsigned char* shm_stripe_physical_start,
-                                            int shm_stride_bytes,
-                                            int stripe_width, int stripe_height,
-                                            int shm_bytes_per_pixel);
-
-/**
- * @brief Manages the screen capture process, including settings and threading.
- * This class encapsulates the logic for capturing screen content using XShm,
- * dividing it into stripes, encoding these stripes (JPEG or H.264) based on
- * damage detection and other heuristics, and invoking a callback with the encoded data.
- * It supports dynamic modification of capture settings.
- */
-class ScreenCaptureModule {
-public:
-  int capture_width = 1024;
-  int capture_height = 768;
-  int capture_x = 0;
-  int capture_y = 0;
-  double target_fps = 60.0;
-  int jpeg_quality = 85;
-  int paint_over_jpeg_quality = 95;
-  bool use_paint_over_quality = false;
-  int paint_over_trigger_frames = 10;
-  int damage_block_threshold = 15;
-  int damage_block_duration = 30;
-  int h264_crf = 25;
-  int h264_paintover_crf = 18;
-  int h264_paintover_burst_frames = 5;
-  bool h264_fullcolor = false;
-  bool h264_fullframe = false;
-  bool h264_streaming_mode = false;
-  bool capture_cursor = false;
-  OutputMode output_mode = OutputMode::H264;
-  std::string watermark_path_internal;
-  WatermarkLocation watermark_location_internal;
-  bool use_cpu = false;
-  bool debug_logging = false;
-  bool h264_cbr_mode = false;
-  int h264_bitrate_kbps = 4000;
-  int h264_vbv_buffer_size_kb = 0;
-  bool auto_adjust_screen_capture_size = false;
-
-  std::atomic<bool> stop_requested;
-  std::thread capture_thread;
-  StripeCallback stripe_callback = nullptr;
-  void* user_data = nullptr;
-  int frame_counter = 0;
-  int encoded_frame_count = 0;
-  int total_stripes_encoded_this_interval = 0;
-  mutable std::mutex settings_mutex;
-  bool is_nvidia_system_detected = false;
-  bool nvenc_operational = false;
-  int vaapi_render_node_index = -1;
-  bool vaapi_operational = false;
-
-private:
-    MinimalEncoderStore h264_minimal_store_;
-    NvencEncoderState nvenc_state_;
-    std::mutex nvenc_mutex_;
-    std::atomic<bool> nvenc_force_next_idr_{true};
-    VaapiEncoderState vaapi_state_;
-    std::mutex vaapi_mutex_;
-    std::atomic<bool> vaapi_force_next_idr_{true};
-    std::atomic<bool> force_next_idr_{false};
-
-    std::vector<uint8_t> full_frame_y_plane_;
-    std::vector<uint8_t> full_frame_u_plane_;
-    std::vector<uint8_t> full_frame_v_plane_;
-    int full_frame_y_stride_;
-    int full_frame_u_stride_;
-    int full_frame_v_stride_;
-    bool yuv_planes_are_i444_;
-    std::vector<uint32_t> watermark_image_data_;
-    int watermark_width_;
-    int watermark_height_;
-    bool watermark_loaded_;
-    int watermark_current_x_;
-    int watermark_current_y_;
-    int watermark_dx_;
-    int watermark_dy_;
-    mutable std::mutex watermark_data_mutex_;
-
-    void reset_nvenc_encoder();
-    bool initialize_nvenc_encoder(int width, int height, int target_qp, double fps, bool use_yuv444, bool is_cbr, int bitrate_kbps, int vbv_buffer_size_kb);
-    StripeEncodeResult encode_fullframe_nvenc(int width, int height, const uint8_t* y_plane, int y_stride, const uint8_t* u_plane, int u_stride, const uint8_t* v_plane, int v_stride, bool is_i444, int frame_counter, bool force_idr_frame);
-    void reset_vaapi_encoder();
-    bool initialize_vaapi_encoder(int render_node_idx, int width, int height, int qp, bool use_yuv444, bool is_cbr, int bitrate_kbps, int vbv_buffer_size_kb);
-    StripeEncodeResult encode_fullframe_vaapi(int width, int height, double fps, const uint8_t* y_plane, int y_stride, const uint8_t* u_plane, int u_stride, const uint8_t* v_plane, int v_stride, bool is_i444, int frame_counter, bool force_idr_frame);
-
-    void load_watermark_image();
-    void capture_loop();
-    void overlay_image(int image_height, int image_width, const uint32_t *image_ptr,
-                     int image_x, int image_y, int frame_height, int frame_width,
-                     unsigned char *frame_ptr, int frame_stride_bytes, int frame_bytes_per_pixel);
-
-public:
-  /**
-   * @brief Default constructor for ScreenCaptureModule.
-   * Initializes stop_requested to false and YUV plane strides to 0.
-   */
-  ScreenCaptureModule() : watermark_path_internal(""),
-                          watermark_location_internal(WatermarkLocation::NONE),
-                          stop_requested(false),
-                          stripe_callback(nullptr),
-                          full_frame_y_stride_(0), full_frame_u_stride_(0), full_frame_v_stride_(0),
-                          yuv_planes_are_i444_(false),
-                          watermark_width_(0),
-                          watermark_height_(0),
-                          watermark_loaded_(false),
-                          watermark_current_x_(0),
-                          watermark_current_y_(0),
-                          watermark_dx_(2),
-                          watermark_dy_(2) {}
-
-  /**
-   * @brief Destructor for ScreenCaptureModule.
-   * Ensures that the capture process is stopped and resources are released.
-   * Calls stop_capture().
-   */
-  ~ScreenCaptureModule() {
-    stop_capture();
-  }
-
-  /**
-   * @brief Starts the screen capture process in a new thread.
-   * If a capture thread is already running, it is stopped first.
-   * Resets encoder stores and frame counters. The actual settings used by
-   * the capture loop are read from member variables which should be set
-   * via modify_settings() before calling start_capture().
-   */
-  void start_capture();
-
-  /**
-   * @brief Stops the screen capture process.
-   * Sets the stop_requested flag and waits for the capture thread to join.
-   * This is a blocking call.
-   */
-  void stop_capture();
-
-  /**
-   * @brief Modifies the capture and encoding settings.
-   * This function is thread-safe. The new settings will be picked up by
-   * the capture loop at the beginning of its next iteration.
-   * If dimensions or H.264 color format change, XShm and encoders may be reinitialized.
-   * @param new_settings A CaptureSettings struct containing the new settings.
-   */
-  void modify_settings(const CaptureSettings& new_settings) {
-    std::lock_guard<std::mutex> lock(settings_mutex);
-    capture_width = new_settings.capture_width;
-    capture_height = new_settings.capture_height;
-    capture_x = new_settings.capture_x;
-    capture_y = new_settings.capture_y;
-    target_fps = new_settings.target_fps;
-    jpeg_quality = new_settings.jpeg_quality;
-    paint_over_jpeg_quality = new_settings.paint_over_jpeg_quality;
-    use_paint_over_quality = new_settings.use_paint_over_quality;
-    paint_over_trigger_frames = new_settings.paint_over_trigger_frames;
-    damage_block_threshold = new_settings.damage_block_threshold;
-    damage_block_duration = new_settings.damage_block_duration;
-    output_mode = new_settings.output_mode;
-    h264_crf = new_settings.h264_crf;
-    h264_paintover_crf = new_settings.h264_paintover_crf;
-    h264_paintover_burst_frames = new_settings.h264_paintover_burst_frames;
-    h264_fullcolor = new_settings.h264_fullcolor;
-    h264_fullframe = new_settings.h264_fullframe;
-    h264_streaming_mode = new_settings.h264_streaming_mode;
-    capture_cursor = new_settings.capture_cursor;
-    vaapi_render_node_index = new_settings.vaapi_render_node_index;
-    use_cpu = new_settings.use_cpu;
-    debug_logging = new_settings.debug_logging;
-    std::string new_wm_path_str = new_settings.watermark_path ? new_settings.watermark_path : "";
-    bool path_actually_changed_in_settings = (watermark_path_internal != new_wm_path_str);
-  
-    watermark_path_internal = new_wm_path_str;
-    watermark_location_internal = new_settings.watermark_location_enum;
-
-    if (path_actually_changed_in_settings) {
-        std::lock_guard<std::mutex> data_lock(watermark_data_mutex_);
-        watermark_loaded_ = false;
-    }
-
-    h264_cbr_mode = new_settings.h264_cbr_mode;
-    h264_bitrate_kbps = new_settings.h264_bitrate_kbps;
-    h264_vbv_buffer_size_kb = new_settings.h264_vbv_buffer_size_kb;
-    auto_adjust_screen_capture_size = new_settings.auto_adjust_screen_capture_size;
-  }
-
-  /**
-   * @brief Retrieves the current capture and encoding settings.
-   * This function is thread-safe.
-   * @return A CaptureSettings struct containing the current settings as known
-   *         to the module (may not yet be active in the capture loop if recently modified).
-   */
-  CaptureSettings get_current_settings() const {
-    std::lock_guard<std::mutex> lock(settings_mutex);
-    return CaptureSettings(
-      capture_width, capture_height, capture_x, capture_y, target_fps,
-      jpeg_quality, paint_over_jpeg_quality, use_paint_over_quality,
-      paint_over_trigger_frames, damage_block_threshold,
-      damage_block_duration, output_mode, h264_crf, h264_paintover_crf,
-      h264_paintover_burst_frames, h264_fullcolor, h264_fullframe, h264_streaming_mode,
-      capture_cursor, watermark_path_internal.c_str(), watermark_location_internal,
-      vaapi_render_node_index, use_cpu, debug_logging, h264_cbr_mode, h264_bitrate_kbps,
-      h264_vbv_buffer_size_kb, auto_adjust_screen_capture_size);
-  }
-
-  /**
-   * @brief Requests the next encoded frame to be an IDR (key) frame.
-   * This function is thread-safe. It sets flags that will cause the next
-   * encoded frame to be an IDR frame in the appropriate encoder backend.
-   */
-  void request_idr() {
-    if (debug_logging) {
-      const char* backend = use_cpu ? "CPU" : (nvenc_operational ? "NVENC" : (vaapi_operational ? "VAAPI" : "None"));
-      std::cout << "[pixelflux] Request IDR -> " << backend << std::endl;
-    }
-
-    if (use_cpu) force_next_idr_ = true;
-    else if (nvenc_operational) nvenc_force_next_idr_ = true;
-    else if (vaapi_operational) vaapi_force_next_idr_ = true;
-  }
-
-  /**
-   * @brief Updates the target video bitrate for H.264 CBR mode.
-   * This function is thread-safe. If CBR mode is not enabled, it does nothing.
-   * @param bitrate The new target bitrate in kbps.
-   */
-  void update_video_bitrate(int bitrate) {
-    if (!h264_cbr_mode) return;
-
-    if (debug_logging) {
-      std::cout << "[pixelflux] Updating video bitrate from " << h264_bitrate_kbps << " to " << bitrate << std::endl;
-    }
-     
-    std::lock_guard<std::mutex> lock(settings_mutex);
-    h264_bitrate_kbps = static_cast<int>(std::abs(bitrate));
-  }
-
-  /**
-   * @brief Updates the VBV buffer size for H.264 CBR mode.
-   * This function is thread-safe. If CBR mode is not enabled, it does nothing.
-   * @param vbv_buffer_size_kb The new VBV buffer size in kb
-   */
-  void update_vbv_buffer_size(int vbv_buffer_size_kb) {
-    if (!h264_cbr_mode) return;
-
-    if (debug_logging) {
-      std::cout << "[pixelflux] Updating VBV buffer size from " << h264_vbv_buffer_size_kb << " to " << vbv_buffer_size_kb << std::endl;
-    }
-     
-    std::lock_guard<std::mutex> lock(settings_mutex);
-    h264_vbv_buffer_size_kb = static_cast<int>(std::abs(vbv_buffer_size_kb));
-  }
-
-  /**
-   * @brief Updates the target framerate for video encoding.
-   * This function is thread-safe.
-   * @param fps The new target frames per second.
-   */
-  void update_framerate(double fps) {
-    if (debug_logging) {
-      std::cout << "[pixelflux] Updating video framerate from " << target_fps << " to " << fps << std::endl;
-    }
-     
-    std::lock_guard<std::mutex> lock(settings_mutex);
-    target_fps = static_cast<double>(std::abs(fps));
-  }
-};
-
-/**
- * @brief Starts the screen capture process in a new thread.
- * If a capture thread is already running, this function will stop it first.
- * It resets all encoder states (CPU, NVENC, VAAPI) and frame counters to
- * ensure a clean start. It also probes for hardware encoder availability and
- * pre-loads any configured watermark image. The capture itself runs in the
- * background. The settings for the capture must be set via `modify_settings`
- * prior to calling this function.
- */
-void ScreenCaptureModule::start_capture() {
-    if (capture_thread.joinable()) {
-      stop_capture();
-    }
-    if (LoadNvencApi(nvenc_state_.nvenc_funcs)) {
-      is_nvidia_system_detected = true;
-    } else {
-      is_nvidia_system_detected = false;
-    }
-    h264_minimal_store_.reset();
-    nvenc_operational = false;
-    nvenc_force_next_idr_ = true;
-    vaapi_operational = false;
-    vaapi_force_next_idr_ = true;
-    stop_requested = false;
-    frame_counter = 0;
-    encoded_frame_count = 0;
-    total_stripes_encoded_this_interval = 0;
-    if (!watermark_path_internal.empty() && watermark_location_internal != WatermarkLocation::NONE) {
-      load_watermark_image();
-    }
-    capture_thread = std::thread(&ScreenCaptureModule::capture_loop, this);
-}
-
-/**
- * @brief Stops the screen capture process and releases resources.
- * This function signals the background capture thread to stop. It is a
- * blocking call that waits for the thread to finish its current work and
- * join. After the thread has terminated, it cleans up any active hardware
- * encoder sessions (NVENC, VAAPI) and unloads associated dynamic libraries.
- */
-void ScreenCaptureModule::stop_capture() {
-    stop_requested = true;
-    if (capture_thread.joinable()) {
-      capture_thread.join();
-    }
-    if (nvenc_state_.initialized) {
-      reset_nvenc_encoder();
-    }
-    if (vaapi_state_.initialized) {
-      reset_vaapi_encoder();
-    }
-    UnloadCudaApi();
-}
-
-/**
- * @brief Resets and tears down the current NVENC encoder session.
- * This function is thread-safe. It destroys all allocated input and output
- * buffers, destroys the encoder session, and releases the CUDA context.
- * It ensures that all GPU resources associated with the encoder are freed.
- */
-void ScreenCaptureModule::reset_nvenc_encoder() {
-  std::lock_guard<std::mutex> lock(nvenc_mutex_);
-
-  if (!nvenc_state_.initialized) {
-    return;
-  }
-
-  if (nvenc_state_.encoder_session && nvenc_state_.nvenc_funcs.nvEncDestroyEncoder) {
-    for (NV_ENC_INPUT_PTR& ptr : nvenc_state_.input_buffers) {
-        if (ptr && nvenc_state_.nvenc_funcs.nvEncDestroyInputBuffer)
-            nvenc_state_.nvenc_funcs.nvEncDestroyInputBuffer(nvenc_state_.encoder_session, ptr);
-        ptr = nullptr;
-    }
-    nvenc_state_.input_buffers.clear();
-
-    for (NV_ENC_OUTPUT_PTR& ptr : nvenc_state_.output_buffers) {
-        if (ptr && nvenc_state_.nvenc_funcs.nvEncDestroyBitstreamBuffer)
-            nvenc_state_.nvenc_funcs.nvEncDestroyBitstreamBuffer(nvenc_state_.encoder_session, ptr);
-        ptr = nullptr;
-    }
-    nvenc_state_.output_buffers.clear();
-
-    nvenc_state_.nvenc_funcs.nvEncDestroyEncoder(nvenc_state_.encoder_session);
-    nvenc_state_.encoder_session = nullptr;
-  }
-
-  if (nvenc_state_.cuda_context && g_cuda_funcs.pfn_cuCtxDestroy) {
-    g_cuda_funcs.pfn_cuCtxDestroy(nvenc_state_.cuda_context);
-    nvenc_state_.cuda_context = nullptr;
-  }
-
-  nvenc_state_.initialized = false;
-}
-
-/**
- * @brief Initializes or reconfigures the NVENC H.264 encoder.
- * This function is thread-safe. It sets up a new encoder session if one
- * is not already active. If an active session exists with the correct
- * dimensions and color format, it attempts a lightweight reconfiguration for
- * the target QP (quality) or bitrate. If dimensions or color format have changed, it
- * performs a full teardown and re-initialization.
- * @param width The target encoding width.
- * @param height The target encoding height.
- * @param target_qp The target Quantization Parameter (lower is higher quality) for CRF mode.
- * @param fps The target frames per second, used for rate control hints.
- * @param use_yuv444 True to configure for YUV 4:4:4, false for NV12 (4:2:0).
- * @param is_cbr True to enable Constant Bitrate (CBR) mode, false for CRF mode.
- * @param bitrate_kbps Target bitrate in kbps when CBR mode is enabled.
- * @param vbv_buffer_size_kb VBV buffer size in kb for CBR mode (0 for auto/default).
- * @return True if the encoder is successfully initialized or reconfigured, false otherwise.
- */
-bool ScreenCaptureModule::initialize_nvenc_encoder(int width,
-                              int height,
-                              int target_qp,
-                              double fps,
-                              bool use_yuv444,
-                              bool is_cbr,
-                              int bitrate_kbps,
-                              int vbv_buffer_size_kb) {
-  std::lock_guard<std::mutex> lock(nvenc_mutex_);
-
-  NV_ENC_BUFFER_FORMAT target_buffer_format =
-    use_yuv444 ? NV_ENC_BUFFER_FORMAT_YUV444 : NV_ENC_BUFFER_FORMAT_NV12;
-
-  if (nvenc_state_.initialized && nvenc_state_.initialized_width == width &&
-      nvenc_state_.initialized_height == height &&
-      nvenc_state_.initialized_qp == target_qp &&
-      nvenc_state_.initialized_buffer_format == target_buffer_format &&
-      nvenc_state_.cbr_mode == is_cbr &&
-      nvenc_state_.initialized_bitrate_kbps == bitrate_kbps) {
-    return true;
-  }
-
-  if (nvenc_state_.initialized && nvenc_state_.initialized_width == width &&
-      nvenc_state_.initialized_height == height &&
-      nvenc_state_.initialized_buffer_format == target_buffer_format &&
-      nvenc_state_.cbr_mode == is_cbr) {
-
-    bool reconfig_needed = false;
-    if (is_cbr) {
-        if (nvenc_state_.initialized_bitrate_kbps != bitrate_kbps) reconfig_needed = true;
-    } else {
-        if (nvenc_state_.initialized_qp != target_qp) reconfig_needed = true;
-    }
-
-    if (reconfig_needed) {
-      NV_ENC_RECONFIGURE_PARAMS reconfigure_params = {0};
-      NV_ENC_CONFIG new_config = nvenc_state_.encode_config;
-
-      reconfigure_params.version = NV_ENC_RECONFIGURE_PARAMS_VER;
-      reconfigure_params.reInitEncodeParams = nvenc_state_.init_params;
-      reconfigure_params.reInitEncodeParams.encodeConfig = &new_config;
-
-      if (is_cbr) {
-        new_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
-        uint32_t bps = static_cast<uint32_t>(bitrate_kbps * 1000);
-        new_config.rcParams.averageBitRate = bps;
-        new_config.rcParams.maxBitRate = bps;
-        if (vbv_buffer_size_kb > 0) {
-          new_config.rcParams.vbvBufferSize = static_cast<uint32_t>(vbv_buffer_size_kb * 1000);
-        } else {
-          new_config.rcParams.vbvBufferSize = (bps + 9) / 10;
-        }
-        reconfigure_params.forceIDR = false;
-      } else {
-        new_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
-        new_config.rcParams.constQP.qpInterP = target_qp;
-        new_config.rcParams.constQP.qpIntra = target_qp;
-        new_config.rcParams.constQP.qpInterB = target_qp;
-        
-        bool is_quality_increasing = (target_qp < nvenc_state_.initialized_qp);
-        reconfigure_params.forceIDR = is_quality_increasing;
-      }
-
-      NVENCSTATUS status = nvenc_state_.nvenc_funcs.nvEncReconfigureEncoder(
-        nvenc_state_.encoder_session, &reconfigure_params);
-
-      if (status == NV_ENC_SUCCESS) {
-        nvenc_state_.initialized_qp = target_qp;
-        nvenc_state_.cbr_mode = is_cbr;
-        nvenc_state_.initialized_bitrate_kbps = bitrate_kbps;
-        nvenc_state_.encode_config = new_config;
-        return true;
-      }
-    } else {
-      return true;
-    }
-  }
-
-  if (nvenc_state_.initialized) {
-    // Manually unlock before recursive call to reset, then re-lock.
-    nvenc_mutex_.unlock();
-    reset_nvenc_encoder();
-    nvenc_mutex_.lock();
-  }
-
-  if (!LoadCudaApi()) {
-    std::cerr << "NVENC_INIT_FATAL: Failed to load CUDA driver API." << std::endl;
-    return false;
-  }
-  
-  if (!LoadNvencApi(nvenc_state_.nvenc_funcs)) {
-      nvenc_state_.initialized = false;
-      return false;
-  }
-
-  if (!nvenc_state_.nvenc_funcs.nvEncOpenEncodeSessionEx) {
-    nvenc_state_.initialized = false;
-    return false;
-  }
-
-  CUresult cu_status = g_cuda_funcs.pfn_cuInit(0);
-  if (cu_status != CUDA_SUCCESS) {
-      std::cerr << "NVENC_INIT_ERROR: cuInit failed with code " << cu_status << std::endl;
-      return false;
-  }
-  CUdevice cu_device;
-  cu_status = g_cuda_funcs.pfn_cuDeviceGet(&cu_device, 0);
-  if (cu_status != CUDA_SUCCESS) {
-      std::cerr << "NVENC_INIT_ERROR: cuDeviceGet failed with code " << cu_status << std::endl;
-      return false;
-  }
-  cu_status = g_cuda_funcs.pfn_cuCtxCreate(&nvenc_state_.cuda_context, 0, cu_device);
-  if (cu_status != CUDA_SUCCESS) {
-      std::cerr << "NVENC_INIT_ERROR: cuCtxCreate failed with code " << cu_status << std::endl;
-      return false;
-  }
-
-  NVENCSTATUS status;
-  NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS session_params = {0};
-  session_params.version = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER;
-  session_params.deviceType = NV_ENC_DEVICE_TYPE_CUDA;
-  session_params.device = nvenc_state_.cuda_context;
-  session_params.apiVersion = NVENCAPI_VERSION;
-
-  status = nvenc_state_.nvenc_funcs.nvEncOpenEncodeSessionEx(
-    &session_params, &nvenc_state_.encoder_session);
-
-  if (status != NV_ENC_SUCCESS) {
-    std::string error_str = "NVENC_INIT_ERROR: nvEncOpenEncodeSessionEx (CUDA Path) FAILED: " + std::to_string(status);
-    std::cerr << error_str << std::endl;
-    nvenc_state_.encoder_session = nullptr;
-    nvenc_mutex_.unlock();
-    reset_nvenc_encoder();
-    nvenc_mutex_.lock();
-    return false;
-  }
-  if (!nvenc_state_.encoder_session) {
-    nvenc_mutex_.unlock();
-    reset_nvenc_encoder();
-    nvenc_mutex_.lock();
-    return false;
-  }
-
-  memset(&nvenc_state_.init_params, 0, sizeof(nvenc_state_.init_params));
-  nvenc_state_.init_params.version = NV_ENC_INITIALIZE_PARAMS_VER;
-  nvenc_state_.init_params.encodeGUID = NV_ENC_CODEC_H264_GUID;
-  nvenc_state_.init_params.presetGUID = NV_ENC_PRESET_P1_GUID;
-  nvenc_state_.init_params.tuningInfo = NV_ENC_TUNING_INFO_ULTRA_LOW_LATENCY;
-  nvenc_state_.init_params.encodeWidth = width;
-  nvenc_state_.init_params.encodeHeight = height;
-  nvenc_state_.init_params.darWidth = width;
-  nvenc_state_.init_params.darHeight = height;
-  nvenc_state_.init_params.frameRateNum = static_cast<uint32_t>(fps < 1.0 ? 30 : fps);
-  nvenc_state_.init_params.frameRateDen = 1;
-  nvenc_state_.init_params.enablePTD = 1;
-
-  NV_ENC_PRESET_CONFIG preset_config = {0};
-  preset_config.version = NV_ENC_PRESET_CONFIG_VER;
-  preset_config.presetCfg.version = NV_ENC_CONFIG_VER;
-
-  if (nvenc_state_.nvenc_funcs.nvEncGetEncodePresetConfigEx) {
-    status = nvenc_state_.nvenc_funcs.nvEncGetEncodePresetConfigEx(
-      nvenc_state_.encoder_session,
-      nvenc_state_.init_params.encodeGUID,
-      nvenc_state_.init_params.presetGUID,
-      nvenc_state_.init_params.tuningInfo,
-      &preset_config);
-
-    if (status != NV_ENC_SUCCESS) {
-      std::cerr << "NVENC_INIT_WARN: nvEncGetEncodePresetConfigEx FAILED: " << status
-                << ". Falling back to manual config." << std::endl;
-      memset(&nvenc_state_.encode_config, 0, sizeof(nvenc_state_.encode_config));
-      nvenc_state_.encode_config.version = NV_ENC_CONFIG_VER;
-    } else {
-      nvenc_state_.encode_config = preset_config.presetCfg;
-      nvenc_state_.encode_config.version = NV_ENC_CONFIG_VER;
-    }
-  } else {
-    std::cerr << "NVENC_INIT_WARN: nvEncGetEncodePresetConfigEx not available. Using manual "
-                 "config."
-              << std::endl;
-    memset(&nvenc_state_.encode_config, 0, sizeof(nvenc_state_.encode_config));
-    nvenc_state_.encode_config.version = NV_ENC_CONFIG_VER;
-  }
-
-  nvenc_state_.encode_config.profileGUID =
-    use_yuv444 ? NV_ENC_H264_PROFILE_HIGH_444_GUID : NV_ENC_H264_PROFILE_HIGH_GUID;
-  
-  if (is_cbr) {
-     nvenc_state_.encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CBR;
-     uint32_t bps = static_cast<uint32_t>(bitrate_kbps * 1000);
-     nvenc_state_.encode_config.rcParams.averageBitRate = bps;
-     nvenc_state_.encode_config.rcParams.maxBitRate = bps;
-     if (vbv_buffer_size_kb > 0) {
-       nvenc_state_.encode_config.rcParams.vbvBufferSize = static_cast<uint32_t>(vbv_buffer_size_kb * 1000);
-     } else {
-       nvenc_state_.encode_config.rcParams.vbvBufferSize = bps * 0.1;
-     }
-  } else {
-      nvenc_state_.encode_config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_CONSTQP;
-      nvenc_state_.encode_config.rcParams.constQP.qpInterP = target_qp;
-      nvenc_state_.encode_config.rcParams.constQP.qpIntra = target_qp;
-      nvenc_state_.encode_config.rcParams.constQP.qpInterB = target_qp;
-  }
-  nvenc_state_.encode_config.gopLength = NVENC_INFINITE_GOPLENGTH;
-  nvenc_state_.encode_config.frameIntervalP = 1;
-
-  NV_ENC_CONFIG_H264* h264_cfg = &nvenc_state_.encode_config.encodeCodecConfig.h264Config;
-  h264_cfg->chromaFormatIDC = use_yuv444 ? 3 : 1;
-  h264_cfg->h264VUIParameters.videoFullRangeFlag = use_yuv444 ? 1 : 0;
-  h264_cfg->repeatSPSPPS = 1;
-  nvenc_state_.init_params.encodeConfig = &nvenc_state_.encode_config;
-
-  status = nvenc_state_.nvenc_funcs.nvEncInitializeEncoder(nvenc_state_.encoder_session,
-                                                            &nvenc_state_.init_params);
-  if (status != NV_ENC_SUCCESS) {
-    std::string error_str =
-      "NVENC_INIT_ERROR: nvEncInitializeEncoder FAILED: " + std::to_string(status);
-    if (nvenc_state_.nvenc_funcs.nvEncGetLastErrorString) {
-      const char* api_err =
-        nvenc_state_.nvenc_funcs.nvEncGetLastErrorString(nvenc_state_.encoder_session);
-      if (api_err)
-        error_str += " - API Error: " + std::string(api_err);
-    }
-    std::cerr << error_str << std::endl;
-
-    nvenc_mutex_.unlock();
-    reset_nvenc_encoder();
-    nvenc_mutex_.lock();
-    return false;
-  }
-
-  nvenc_state_.input_buffers.resize(nvenc_state_.buffer_pool_size);
-  nvenc_state_.output_buffers.resize(nvenc_state_.buffer_pool_size);
-  for (int i = 0; i < nvenc_state_.buffer_pool_size; ++i) {
-    NV_ENC_CREATE_INPUT_BUFFER icp = {0};
-    icp.version = NV_ENC_CREATE_INPUT_BUFFER_VER;
-    icp.width = width;
-    icp.height = height;
-    icp.bufferFmt = target_buffer_format;
-    status = nvenc_state_.nvenc_funcs.nvEncCreateInputBuffer(nvenc_state_.encoder_session,
-                                                              &icp);
-    if (status != NV_ENC_SUCCESS) {
-      nvenc_mutex_.unlock();
-      reset_nvenc_encoder();
-      nvenc_mutex_.lock();
-      return false;
-    }
-    nvenc_state_.input_buffers[i] = icp.inputBuffer;
-    NV_ENC_CREATE_BITSTREAM_BUFFER ocp = {0};
-    ocp.version = NV_ENC_CREATE_BITSTREAM_BUFFER_VER;
-    status = nvenc_state_.nvenc_funcs.nvEncCreateBitstreamBuffer(
-      nvenc_state_.encoder_session, &ocp);
-    if (status != NV_ENC_SUCCESS) {
-      nvenc_mutex_.unlock();
-      reset_nvenc_encoder();
-      nvenc_mutex_.lock();
-      return false;
-    }
-    nvenc_state_.output_buffers[i] = ocp.bitstreamBuffer;
-  }
-  nvenc_state_.initialized_width = width;
-  nvenc_state_.initialized_height = height;
-  nvenc_state_.initialized_qp = target_qp;
-  nvenc_state_.initialized_buffer_format = target_buffer_format;
-  nvenc_state_.cbr_mode = is_cbr;
-  nvenc_state_.initialized_bitrate_kbps = bitrate_kbps;
-  nvenc_state_.initialized = true;
-  return true;
-}
-
-/**
- * @brief Encodes a full YUV frame using the initialized NVENC session.
- * This function is thread-safe. It takes YUV plane data, copies it into a
- * locked NVENC input buffer, and submits it for encoding. It then retrieves
- * the resulting H.264 bitstream, prepends a custom 10-byte header, and
- * returns it.
- * @param width The width of the frame.
- * @param height The height of the frame.
- * @param y_plane Pointer to the Y plane data.
- * @param y_stride Stride of the Y plane.
- * @param u_plane Pointer to the U plane (or interleaved UV plane for NV12).
- * @param u_stride Stride of the U/UV plane.
- * @param v_plane Pointer to the V plane (used for I444, null for NV12).
- * @param v_stride Stride of the V plane.
- * @param is_i444 True if the input is YUV 4:4:4, false if NV12.
- * @param frame_counter The current frame ID.
- * @param force_idr_frame True to force the encoder to generate an IDR (key) frame.
- * @return A StripeEncodeResult containing the encoded H.264 data.
- * @throws std::runtime_error if any NVENC API call fails during the process.
- */
-StripeEncodeResult ScreenCaptureModule::encode_fullframe_nvenc(int width,
-                                          int height,
-                                          const uint8_t* y_plane, int y_stride,
-                                          const uint8_t* u_plane, int u_stride,
-                                          const uint8_t* v_plane, int v_stride,
-                                          bool is_i444,
-                                          int frame_counter,
-                                          bool force_idr_frame) {
-  StripeEncodeResult result;
-  result.type = StripeDataType::H264;
-  result.stripe_y_start = 0;
-  result.stripe_height = height;
-  result.frame_id = frame_counter;
-
-  std::lock_guard<std::mutex> lock(nvenc_mutex_);
-
-  if (!nvenc_state_.initialized) {
-    throw std::runtime_error("NVENC_ENCODE_FATAL: Not initialized.");
-  }
-
-  NV_ENC_INPUT_PTR in_ptr =
-    nvenc_state_.input_buffers[nvenc_state_.current_input_buffer_idx];
-  NV_ENC_OUTPUT_PTR out_ptr =
-    nvenc_state_.output_buffers[nvenc_state_.current_output_buffer_idx];
-
-  NV_ENC_LOCK_INPUT_BUFFER lip = {0};
-  lip.version = NV_ENC_LOCK_INPUT_BUFFER_VER;
-  lip.inputBuffer = in_ptr;
-  NVENCSTATUS status =
-    nvenc_state_.nvenc_funcs.nvEncLockInputBuffer(nvenc_state_.encoder_session, &lip);
-  if (status != NV_ENC_SUCCESS)
-    throw std::runtime_error("NVENC_ENCODE_ERROR: nvEncLockInputBuffer FAILED: " +
-                             std::to_string(status));
-
-  unsigned char* locked_buffer = static_cast<unsigned char*>(lip.bufferDataPtr);
-  int locked_pitch = lip.pitch;
-
-  uint8_t* y_dst = locked_buffer;
-  uint8_t* uv_or_u_dst = locked_buffer + static_cast<size_t>(locked_pitch) * height;
-
-  if (is_i444) {
-    uint8_t* v_dst = uv_or_u_dst + static_cast<size_t>(locked_pitch) * height;
-    libyuv::CopyPlane(y_plane, y_stride, y_dst, locked_pitch, width, height);
-    libyuv::CopyPlane(u_plane, u_stride, uv_or_u_dst, locked_pitch, width, height);
-    libyuv::CopyPlane(v_plane, v_stride, v_dst, locked_pitch, width, height);
-  } else {
-    if (v_plane) {
-        libyuv::I420ToNV12(y_plane, y_stride, u_plane, u_stride, v_plane, v_stride,
-                            y_dst, locked_pitch, uv_or_u_dst, locked_pitch, width, height);
-    } else {
-        libyuv::CopyPlane(y_plane, y_stride, y_dst, locked_pitch, width, height);
-        libyuv::CopyPlane(u_plane, u_stride, uv_or_u_dst, locked_pitch, width, height / 2);
-    }
-  }
-
-  nvenc_state_.nvenc_funcs.nvEncUnlockInputBuffer(nvenc_state_.encoder_session, in_ptr);
-
-  NV_ENC_PIC_PARAMS pp = {0};
-  pp.version = NV_ENC_PIC_PARAMS_VER;
-  pp.inputBuffer = in_ptr;
-  pp.outputBitstream = out_ptr;
-  pp.bufferFmt = nvenc_state_.initialized_buffer_format;
-  pp.inputWidth = width;
-  pp.inputHeight = height;
-  pp.inputPitch = locked_pitch;
-  pp.pictureStruct = NV_ENC_PIC_STRUCT_FRAME;
-  pp.inputTimeStamp = frame_counter;
-  pp.frameIdx = frame_counter;
-  if (force_idr_frame) {
-    pp.encodePicFlags = NV_ENC_PIC_FLAG_FORCEIDR;
-  }
-
-  status =
-    nvenc_state_.nvenc_funcs.nvEncEncodePicture(nvenc_state_.encoder_session, &pp);
-  if (status != NV_ENC_SUCCESS) {
-    std::string err_msg = "NVENC_ENCODE_ERROR: nvEncEncodePicture FAILED: " + std::to_string(status);
-    throw std::runtime_error(err_msg);
-  }
-
-  NV_ENC_LOCK_BITSTREAM lbs = {0};
-  lbs.version = NV_ENC_LOCK_BITSTREAM_VER;
-  lbs.outputBitstream = out_ptr;
-  status =
-    nvenc_state_.nvenc_funcs.nvEncLockBitstream(nvenc_state_.encoder_session, &lbs);
-  if (status != NV_ENC_SUCCESS) {
-    throw std::runtime_error("NVENC_ENCODE_ERROR: nvEncLockBitstream FAILED: " + std::to_string(status));
-  }
-
-  if (lbs.bitstreamSizeInBytes > 0) {
-    const unsigned char TAG = 0x04;
-    unsigned char type_hdr = 0x00;
-    if (lbs.pictureType == NV_ENC_PIC_TYPE_IDR) type_hdr = 0x01;
-    else if (lbs.pictureType == NV_ENC_PIC_TYPE_I) type_hdr = 0x02;
-
-    int header_sz = 10;
-    result.data = new unsigned char[lbs.bitstreamSizeInBytes + header_sz];
-    result.size = lbs.bitstreamSizeInBytes + header_sz;
-    result.data[0] = TAG;
-    result.data[1] = type_hdr;
-    uint16_t net_val = htons(static_cast<uint16_t>(result.frame_id % 65536));
-    std::memcpy(result.data + 2, &net_val, 2);
-    net_val = htons(static_cast<uint16_t>(result.stripe_y_start));
-    std::memcpy(result.data + 4, &net_val, 2);
-    net_val = htons(static_cast<uint16_t>(width));
-    std::memcpy(result.data + 6, &net_val, 2);
-    net_val = htons(static_cast<uint16_t>(height));
-    std::memcpy(result.data + 8, &net_val, 2);
-    std::memcpy(result.data + header_sz, lbs.bitstreamBufferPtr, lbs.bitstreamSizeInBytes);
-  } else {
-    result.size = 0;
-    result.data = nullptr;
-  }
-
-  nvenc_state_.nvenc_funcs.nvEncUnlockBitstream(nvenc_state_.encoder_session, out_ptr);
-
-  nvenc_state_.current_input_buffer_idx = (nvenc_state_.current_input_buffer_idx + 1) % nvenc_state_.buffer_pool_size;
-  nvenc_state_.current_output_buffer_idx = (nvenc_state_.current_output_buffer_idx + 1) % nvenc_state_.buffer_pool_size;
-
-  return result;
-}
-
-/**
- * @brief Releases all resources associated with the VA-API encoder session.
- * This function is thread-safe. It frees all allocated libav objects,
- * including the codec context, hardware device and frame contexts, and reusable
- * frame and packet structures. It resets the state to uninitialized.
- */
-void ScreenCaptureModule::reset_vaapi_encoder() {
-    std::lock_guard<std::mutex> lock(vaapi_mutex_);
-    if (!vaapi_state_.initialized) {
-        return;
-    }
-    if (vaapi_state_.codec_ctx) {
-        avcodec_free_context(&vaapi_state_.codec_ctx);
-    }
-    if (vaapi_state_.hw_frames_ctx) {
-        av_buffer_unref(&vaapi_state_.hw_frames_ctx);
-    }
-    if (vaapi_state_.hw_device_ctx) {
-        av_buffer_unref(&vaapi_state_.hw_device_ctx);
-    }
-    if (vaapi_state_.sw_frame) {
-        av_frame_free(&vaapi_state_.sw_frame);
-    }
-    if (vaapi_state_.hw_frame) {
-        av_frame_free(&vaapi_state_.hw_frame);
-    }
-    if (vaapi_state_.packet) {
-        av_packet_free(&vaapi_state_.packet);
-    }
-    vaapi_state_ = {};
-    if (debug_logging) {
-        std::cout << "VAAPI: Encoder resources released." << std::endl;
-    }
-}
-
-/**
- * @brief Initializes a VA-API H.264 hardware encoder using libavcodec.
- * This function is thread-safe. It configures and opens the 'h264_vaapi'
- * encoder. This involves creating a VA-API hardware device context for a
- * specific DRM render node, setting up a hardware frame context for GPU
- * surface management, and configuring the encoder with the specified
- * dimensions, quality (QP), and pixel format.
- * @param render_node_idx The index of the /dev/dri/renderD node to use.
- * @param width The target encoding width.
- * @param height The target encoding height.
- * @param qp The target Quantization Parameter for Constant QP (CQP) rate control.
- * @param use_yuv444 If true, configures the encoder for YUV 4:4:4 input;
- *                   otherwise, configures for YUV 4:2:0 (NV12).
- * @param is_cbr True to enable Constant Bitrate (CBR) mode, false for CQP mode.
- * @param bitrate_kbps Target bitrate in kbps when CBR mode is enabled.
- * @param vbv_buffer_size_kb VBV buffer size in kb for CBR mode (0 for auto/default).
- * @return True if the encoder was successfully initialized, false otherwise.
- */
-bool ScreenCaptureModule::initialize_vaapi_encoder(int render_node_idx, int width, int height, int qp, bool use_yuv444, bool is_cbr, int bitrate_kbps, int vbv_buffer_size_kb) {
-    std::unique_lock<std::mutex> lock(vaapi_mutex_);
-    if (vaapi_state_.initialized && vaapi_state_.initialized_width == width &&
-        vaapi_state_.initialized_height == height &&
-        vaapi_state_.initialized_is_444 == use_yuv444 &&
-        vaapi_state_.initialized_cbr == is_cbr) {
-        if (is_cbr) {
-            if (vaapi_state_.initialized_bitrate_kbps == bitrate_kbps) return true;
-        } else {
-            if (vaapi_state_.initialized_qp == qp) return true;
-        }
-    }
-    if (vaapi_state_.initialized) {
-        lock.unlock();
-        reset_vaapi_encoder();
-        lock.lock();
-    }
-    int ret = 0;
-    const AVCodec *codec = avcodec_find_encoder_by_name("h264_vaapi");
-    if (!codec) {
-        std::cerr << "VAAPI_INIT: Codec 'h264_vaapi' not found." << std::endl;
-        return false;
-    }
-    std::vector<std::string> nodes = find_vaapi_render_nodes();
-    if (nodes.empty()) {
-        std::cerr << "VAAPI_INIT: No /dev/dri/renderD nodes found." << std::endl;
-        return false;
-    }
-    std::string node_to_use = (render_node_idx >= 0 && render_node_idx < (int)nodes.size()) ? nodes[render_node_idx] : nodes[0];
-    if (debug_logging) {
-        std::cout << "VAAPI_INIT: Using render node: " << node_to_use << std::endl;
-    }
-    ret = av_hwdevice_ctx_create(&vaapi_state_.hw_device_ctx, AV_HWDEVICE_TYPE_VAAPI, node_to_use.c_str(), NULL, 0);
-    if (ret < 0) {
-        std::cerr << "VAAPI_INIT: Failed to create VAAPI hardware device context: " << ret << std::endl;
-        return false;
-    }
-    vaapi_state_.codec_ctx = avcodec_alloc_context3(codec);
-    if (!vaapi_state_.codec_ctx) {
-        std::cerr << "VAAPI_INIT: Failed to allocate codec context." << std::endl;
-        return false;
-    }
-    vaapi_state_.codec_ctx->width = width;
-    vaapi_state_.codec_ctx->height = height;
-    vaapi_state_.codec_ctx->time_base = {1, (int)target_fps};
-    vaapi_state_.codec_ctx->framerate = {(int)target_fps, 1};
-    vaapi_state_.codec_ctx->pix_fmt = AV_PIX_FMT_VAAPI;
-    vaapi_state_.codec_ctx->gop_size = INT_MAX;
-    vaapi_state_.codec_ctx->max_b_frames = 0;
-    av_opt_set(vaapi_state_.codec_ctx->priv_data, "tune", "zerolatency", 0);
-    av_opt_set(vaapi_state_.codec_ctx->priv_data, "preset", "ultrafast", 0);
-    if (use_yuv444) {
-        av_opt_set_int(vaapi_state_.codec_ctx, "profile", AV_PROFILE_H264_HIGH_444_PREDICTIVE, 0);
-    } else {
-        av_opt_set_int(vaapi_state_.codec_ctx, "profile", AV_PROFILE_H264_HIGH, 0);
-    }
-
-     if (is_cbr) {
-        av_opt_set(vaapi_state_.codec_ctx->priv_data, "rc_mode", "CBR", 0);
-        int64_t bps = static_cast<int64_t>(bitrate_kbps) * 1000;
-        vaapi_state_.codec_ctx->bit_rate = bps;
-        vaapi_state_.codec_ctx->rc_max_rate = bps;
-        if (vbv_buffer_size_kb > 0) {
-          vaapi_state_.codec_ctx->rc_buffer_size = static_cast<int64_t>(vbv_buffer_size_kb) * 1000;
-        } else {
-          vaapi_state_.codec_ctx->rc_buffer_size = (bps + 9) / 10;
-        }
-        vaapi_state_.codec_ctx->rc_min_rate = bps;
-     } else {
-         av_opt_set(vaapi_state_.codec_ctx->priv_data, "rc_mode", "CQP", 0);
-         av_opt_set_int(vaapi_state_.codec_ctx->priv_data, "qp", qp, 0);
-      }
-
-    vaapi_state_.hw_frames_ctx = av_hwframe_ctx_alloc(vaapi_state_.hw_device_ctx);
-    if (!vaapi_state_.hw_frames_ctx) {
-        std::cerr << "VAAPI_INIT: Failed to create hardware frames context." << std::endl;
-        return false;
-    }
-    AVHWFramesContext *frames_ctx = (AVHWFramesContext *)(vaapi_state_.hw_frames_ctx->data);
-    frames_ctx->format = AV_PIX_FMT_VAAPI;
-    frames_ctx->sw_format = use_yuv444 ? AV_PIX_FMT_YUV444P : AV_PIX_FMT_NV12;
-    frames_ctx->width = width;
-    frames_ctx->height = height;
-    frames_ctx->initial_pool_size = 20;
-    ret = av_hwframe_ctx_init(vaapi_state_.hw_frames_ctx);
-    if (ret < 0) {
-        std::cerr << "VAAPI_INIT: Failed to initialize hardware frames context: " << ret << std::endl;
-        return false;
-    }
-    vaapi_state_.codec_ctx->hw_frames_ctx = av_buffer_ref(vaapi_state_.hw_frames_ctx);
-    if (!vaapi_state_.codec_ctx->hw_frames_ctx) {
-        std::cerr << "VAAPI_INIT: Failed to link hardware frames context." << std::endl;
-        return false;
-    }
-    ret = avcodec_open2(vaapi_state_.codec_ctx, codec, NULL);
-    if (ret < 0) {
-        std::cerr << "VAAPI_INIT: Failed to open codec: " << ret << std::endl;
-        return false;
-    }
-    vaapi_state_.sw_frame = av_frame_alloc();
-    vaapi_state_.hw_frame = av_frame_alloc();
-    vaapi_state_.packet = av_packet_alloc();
-    if (!vaapi_state_.sw_frame || !vaapi_state_.hw_frame || !vaapi_state_.packet) {
-        std::cerr << "VAAPI_INIT: Failed to allocate reusable frame/packet objects." << std::endl;
-        return false;
-    }
-    vaapi_state_.initialized = true;
-    vaapi_state_.initialized_width = width;
-    vaapi_state_.initialized_height = height;
-    vaapi_state_.initialized_qp = qp;
-    vaapi_state_.initialized_is_444 = use_yuv444;
-    vaapi_state_.frame_count = 0;
-    vaapi_state_.initialized_cbr = is_cbr;
-    vaapi_state_.initialized_bitrate_kbps = bitrate_kbps;
-    if (debug_logging) {
-        std::cout << "VAAPI_INIT: Encoder initialized successfully via FFmpeg for "
-                  << width << "x" << height << " " << (use_yuv444 ? "YUV444P" : "NV12")
-                  << (is_cbr ? "with CBR:" + std::to_string(bitrate_kbps) : " with QP: " + std::to_string(qp)) << "." << std::endl;
-    }
-    return true;
-}
-
-/**
- * @brief Encodes a full YUV frame using the initialized VA-API session.
- * This function is thread-safe. It takes YUV plane data, transfers it from
- * system memory to a hardware surface on the GPU, submits it to the encoder,
- * and retrieves the resulting H.264 bitstream packet. The encoded data is
- * packaged into a StripeEncodeResult with a prepended 10-byte custom header.
- * @param width The width of the input frame.
- * @param height The height of the input frame.
- * @param fps The target frames per second (used for PTS calculation).
- * @param y_plane Pointer to the start of the Y plane data.
- * @param y_stride Stride in bytes for the Y plane.
- * @param u_plane Pointer to the start of the U plane (for I444) or interleaved
- *                UV plane (for NV12).
- * @param u_stride Stride in bytes for the U or UV plane.
- * @param v_plane Pointer to the start of the V plane (for I444); should be
- *                nullptr for NV12.
- * @param v_stride Stride in bytes for the V plane.
- * @param is_i444 True if the input format is YUV444P, false for NV12.
- * @param frame_counter The unique identifier for the current frame.
- * @param force_idr_frame If true, flags the frame as a keyframe (IDR).
- * @return A StripeEncodeResult containing the encoded H.264 data. On failure
- *         or if no packet is output, the result may be empty.
- * @throws std::runtime_error if a critical libav API call fails.
- */
-StripeEncodeResult ScreenCaptureModule::encode_fullframe_vaapi(int width, int height, double fps,
-                                          const uint8_t* y_plane, int y_stride,
-                                          const uint8_t* u_plane, int u_stride,
-                                          const uint8_t* v_plane, int v_stride,
-                                          bool is_i444,
-                                          int frame_counter,
-                                          bool force_idr_frame) {
-    std::lock_guard<std::mutex> lock(vaapi_mutex_);
-    if (!vaapi_state_.initialized) {
-        throw std::runtime_error("VAAPI_ENCODE_FATAL: Not initialized.");
-    }
-    int ret = av_hwframe_get_buffer(vaapi_state_.hw_frames_ctx, vaapi_state_.hw_frame, 0);
-    if (ret < 0) {
-        throw std::runtime_error("VAAPI_ENCODE_ERROR: Failed to get hardware frame from pool: " + std::to_string(ret));
-    }
-    AVFrame *tmp_sw_frame = av_frame_alloc();
-    if (!tmp_sw_frame) {
-        av_frame_unref(vaapi_state_.hw_frame);
-        throw std::runtime_error("VAAPI_ENCODE_ERROR: Failed to allocate temporary mapping frame.");
-    }
-    ret = av_hwframe_map(tmp_sw_frame, vaapi_state_.hw_frame, AV_HWFRAME_MAP_WRITE);
-    if (ret < 0) {
-        av_frame_free(&tmp_sw_frame);
-        av_frame_unref(vaapi_state_.hw_frame);
-        throw std::runtime_error("VAAPI_ENCODE_ERROR: Failed to map hardware frame for writing: " + std::to_string(ret));
-    }
-    if (is_i444) {
-        libyuv::CopyPlane(y_plane, y_stride, tmp_sw_frame->data[0], tmp_sw_frame->linesize[0], width, height);
-        libyuv::CopyPlane(u_plane, u_stride, tmp_sw_frame->data[1], tmp_sw_frame->linesize[1], width, height);
-        libyuv::CopyPlane(v_plane, v_stride, tmp_sw_frame->data[2], tmp_sw_frame->linesize[2], width, height);
-    } else {
-        libyuv::CopyPlane(y_plane, y_stride, tmp_sw_frame->data[0], tmp_sw_frame->linesize[0], width, height);
-        libyuv::CopyPlane(u_plane, u_stride, tmp_sw_frame->data[1], tmp_sw_frame->linesize[1], width, height / 2);
-    }
-    av_frame_unref(tmp_sw_frame);
-    av_frame_free(&tmp_sw_frame);
-    vaapi_state_.hw_frame->pts = vaapi_state_.frame_count++;
-    if (force_idr_frame) {
-        vaapi_state_.hw_frame->pict_type = AV_PICTURE_TYPE_I;
-    } else {
-        vaapi_state_.hw_frame->pict_type = AV_PICTURE_TYPE_NONE;
-    }
-    ret = avcodec_send_frame(vaapi_state_.codec_ctx, vaapi_state_.hw_frame);
-    av_frame_unref(vaapi_state_.hw_frame);
-    if (ret < 0) {
-        throw std::runtime_error("VAAPI_ENCODE_ERROR: Failed to send frame to encoder: " + std::to_string(ret));
-    }
-    while (true) {
-        ret = avcodec_receive_packet(vaapi_state_.codec_ctx, vaapi_state_.packet);
-        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) {
-            return {};
-        } else if (ret < 0) {
-            throw std::runtime_error("VAAPI_ENCODE_ERROR: Failed to receive packet from encoder: " + std::to_string(ret));
-        }
-        StripeEncodeResult result;
-        result.type = StripeDataType::H264;
-        result.stripe_y_start = 0;
-        result.stripe_height = height;
-        result.frame_id = frame_counter;
-        if (vaapi_state_.packet->size > 0) {
-            const unsigned char TAG = 0x04;
-            unsigned char type_hdr = (vaapi_state_.packet->flags & AV_PKT_FLAG_KEY) ? 0x01 : 0x00;
-            int header_sz = 10;
-            result.data = new unsigned char[vaapi_state_.packet->size + header_sz];
-            result.size = vaapi_state_.packet->size + header_sz;
-            result.data[0] = TAG;
-            result.data[1] = type_hdr;
-            uint16_t net_val = htons(static_cast<uint16_t>(result.frame_id % 65536));
-            std::memcpy(result.data + 2, &net_val, 2);
-            net_val = htons(static_cast<uint16_t>(result.stripe_y_start));
-            std::memcpy(result.data + 4, &net_val, 2);
-            net_val = htons(static_cast<uint16_t>(width));
-            std::memcpy(result.data + 6, &net_val, 2);
-            net_val = htons(static_cast<uint16_t>(height));
-            std::memcpy(result.data + 8, &net_val, 2);
-            std::memcpy(result.data + header_sz, vaapi_state_.packet->data, vaapi_state_.packet->size);
-        }
-        av_packet_unref(vaapi_state_.packet);
-        return result;
-    }
-}
-
-/**
- * @brief Loads a watermark image from disk into memory.
- * This function is thread-safe. It reads the image file specified by the
- * internal watermark path setting using the stb_image library. It then
- * converts the pixel data to a 32-bit ARGB format suitable for fast
- * alpha blending in the `overlay_image` function.
- */
-void ScreenCaptureModule::load_watermark_image() {
-    std::string path_for_this_load;
-    WatermarkLocation location_for_this_load;
-
-    {
-      std::lock_guard<std::mutex> settings_lock(settings_mutex);
-      path_for_this_load = watermark_path_internal;
-      location_for_this_load = watermark_location_internal;
-    }
-
-    if (path_for_this_load.empty() || location_for_this_load == WatermarkLocation::NONE) {
-      std::lock_guard<std::mutex> data_lock(watermark_data_mutex_);
-      if (watermark_loaded_) {
-        std::cout << "Watermark cleared or not configured." << std::endl;
-      }
-      watermark_loaded_ = false;
-      watermark_image_data_.clear();
-      watermark_width_ = 0;
-      watermark_height_ = 0;
-      return;
-    }
-    int temp_w = 0, temp_h = 0, temp_channels = 0;
-    unsigned char* stbi_img_data = stbi_load(path_for_this_load.c_str(), &temp_w, &temp_h, &temp_channels, 4);
-    std::vector<uint32_t> temp_image_data_argb;
-    bool temp_loaded_successfully = false;
-
-    if (stbi_img_data) {
-      if (temp_w > 0 && temp_h > 0) {
-        temp_image_data_argb.resize(static_cast<size_t>(temp_w) * temp_h);
-        for (int y_idx = 0; y_idx < temp_h; ++y_idx) {
-          for (int x_idx = 0; x_idx < temp_w; ++x_idx) {
-            size_t src_pixel_idx = (static_cast<size_t>(y_idx) * temp_w + x_idx) * 4;
-            uint8_t r_val = stbi_img_data[src_pixel_idx + 0];
-            uint8_t g_val = stbi_img_data[src_pixel_idx + 1];
-            uint8_t b_val = stbi_img_data[src_pixel_idx + 2];
-            uint8_t a_val = stbi_img_data[src_pixel_idx + 3];
-            temp_image_data_argb[static_cast<size_t>(y_idx) * temp_w + x_idx] =
-                (static_cast<uint32_t>(a_val) << 24) |
-                (static_cast<uint32_t>(r_val) << 16) |
-                (static_cast<uint32_t>(g_val) << 8)  |
-                static_cast<uint32_t>(b_val);
-          }
-        }
-        temp_loaded_successfully = true;
-      } else {
-         std::cerr << "Watermark image loaded with invalid dimensions: " << path_for_this_load
-                   << " (" << temp_w << "x" << temp_h << ")" << std::endl;
-      }
-      stbi_image_free(stbi_img_data);
-    } else {
-      std::cerr << "Error loading watermark image: " << path_for_this_load
-                << " - " << stbi_failure_reason() << std::endl;
-    }
-    std::lock_guard<std::mutex> data_lock(watermark_data_mutex_);
-    if (temp_loaded_successfully) {
-      watermark_image_data_ = std::move(temp_image_data_argb);
-      watermark_width_ = temp_w;
-      watermark_height_ = temp_h;
-      watermark_loaded_ = true;
-      std::cout << "Watermark loaded: " << path_for_this_load
-                << " (" << watermark_width_ << "x" << watermark_height_ << ")" << std::endl;
-
-      if (location_for_this_load == WatermarkLocation::AN) {
-        watermark_current_x_ = 0;
-        watermark_current_y_ = 0;
-        watermark_dx_ = (watermark_dx_ != 0) ? std::abs(watermark_dx_) : 2;
-        watermark_dy_ = (watermark_dy_ != 0) ? std::abs(watermark_dy_) : 2;
-      }
-    } else {
-      watermark_loaded_ = false;
-      watermark_image_data_.clear();
-      watermark_width_ = 0;
-      watermark_height_ = 0;
-    }
-}
-
-/**
- * @brief Overlays a source image onto a destination frame with alpha blending.
- * This function iterates through the pixels of the source image and blends
- * them onto the destination frame buffer at the specified coordinates. It
- * handles transparency based on the alpha channel of the source image.
- * @param image_height Height of the source image to overlay.
- * @param image_width Width of the source image to overlay.
- * @param image_ptr Pointer to the source image data (32-bit ARGB format).
- * @param image_x The X-coordinate on the destination frame to place the top-left of the source image.
- * @param image_y The Y-coordinate on the destination frame to place the top-left of the source image.
- * @param frame_height Height of the destination frame buffer.
- * @param frame_width Width of the destination frame buffer.
- * @param frame_ptr Pointer to the destination frame buffer data (BGR or BGRX format).
- * @param frame_stride_bytes The stride (bytes per row) of the destination frame.
- * @param frame_bytes_per_pixel The bytes per pixel of the destination frame.
- */
-void ScreenCaptureModule::overlay_image(int image_height, int image_width, const uint32_t *image_ptr,
-                     int image_x, int image_y, int frame_height, int frame_width,
-                     unsigned char *frame_ptr, int frame_stride_bytes, int frame_bytes_per_pixel) {
-    for (int y = 0; y < image_height; ++y) {
-      for (int x = 0; x < image_width; ++x) {
-        uint32_t src_pixel = image_ptr[y * image_width + x];
-        uint8_t alpha = (src_pixel >> 24) & 0xFF;
-        uint8_t red = (src_pixel >> 16) & 0xFF;
-        uint8_t green = (src_pixel >> 8) & 0xFF;
-        uint8_t blue = src_pixel & 0xFF;
-
-        int target_x = image_x + x;
-        int target_y = image_y + y;
-
-        if (target_y >= 0 && target_y < frame_height &&
-            target_x >= 0 && target_x < frame_width) {
-
-          unsigned char *dst_pixel = frame_ptr +
-                                      target_y * frame_stride_bytes +
-                                      target_x * frame_bytes_per_pixel;
-
-          if (alpha == 255)
-          {
-            dst_pixel[0] = blue;
-            dst_pixel[1] = green;
-            dst_pixel[2] = red;
-          }
-          else if (alpha > 0)
-          {
-            dst_pixel[0] = (blue * alpha + dst_pixel[0] * (255 - alpha)) / 255;
-            dst_pixel[1] = (green * alpha + dst_pixel[1] * (255 - alpha)) / 255;
-            dst_pixel[2] = (red * alpha + dst_pixel[2] * (255 - alpha)) / 255;
-          }
-        }
-      }
-    }
-}
-
-/**
- * @brief The main function for the screen capture thread.
- * This loop continuously captures the screen at the target FPS. It handles
- * settings changes, screen capture via XShm, optional cursor and watermark
- * overlaying, color space conversion (BGRX to YUV), damage detection via
- * hashing, and dispatching encoding tasks to a thread pool. It then collects
- * the encoded results and invokes the user-provided callback.
- */
-void ScreenCaptureModule::capture_loop() {
-    auto start_time_loop = std::chrono::high_resolution_clock::now();
-    int frame_count_loop = 0;
-
-    int local_capture_width_actual;
-    int local_capture_height_actual;
-    int local_capture_x_offset;
-    int local_capture_y_offset;
-    double local_current_target_fps;
-    int local_current_jpeg_quality;
-    int local_current_paint_over_jpeg_quality;
-    bool local_current_use_paint_over_quality;
-    int local_current_paint_over_trigger_frames;
-    int local_current_damage_block_threshold;
-    int local_current_damage_block_duration;
-    int local_current_h264_crf;
-    int local_current_h264_paintover_crf;
-    int local_current_h264_paintover_burst_frames;
-    bool local_current_h264_fullcolor;
-    bool local_current_h264_fullframe;
-    bool local_current_h264_streaming_mode;
-    OutputMode local_current_output_mode;
-    bool local_current_capture_cursor;
-    int local_vaapi_render_node_index;
-    int xfixes_event_base = 0;
-    int xfixes_error_base = 0;
-    std::string local_watermark_path_setting;
-    WatermarkLocation local_watermark_location_setting;
-    bool local_use_cpu;
-    bool local_debug_logging;
-    bool local_current_h264_cbr_mode;
-    int local_current_h264_bitrate_kbps;
-    int local_current_h264_vbv_buffer_size_kb;
-    bool local_current_auto_adjust_screen_capture_size;
-
-    {
-      std::lock_guard<std::mutex> lock(settings_mutex);
-      local_capture_width_actual = capture_width;
-      local_capture_height_actual = capture_height;
-      local_capture_x_offset = capture_x;
-      local_capture_y_offset = capture_y;
-      local_current_target_fps = target_fps;
-      local_current_jpeg_quality = jpeg_quality;
-      local_current_paint_over_jpeg_quality = paint_over_jpeg_quality;
-      local_current_use_paint_over_quality = use_paint_over_quality;
-      local_current_paint_over_trigger_frames = paint_over_trigger_frames;
-      local_current_damage_block_threshold = damage_block_threshold;
-      local_current_damage_block_duration = damage_block_duration;
-      local_current_output_mode = output_mode;
-      local_current_h264_crf = h264_crf;
-      local_current_h264_paintover_crf = h264_paintover_crf;
-      local_current_h264_paintover_burst_frames = h264_paintover_burst_frames;
-      local_current_h264_fullcolor = h264_fullcolor;
-      local_current_h264_fullframe = h264_fullframe;
-      local_current_h264_streaming_mode = h264_streaming_mode;
-      local_current_capture_cursor = capture_cursor;
-      local_vaapi_render_node_index = vaapi_render_node_index;
-      local_use_cpu = use_cpu;
-      local_debug_logging = debug_logging;
-      local_watermark_path_setting = watermark_path_internal;
-      local_watermark_location_setting = watermark_location_internal;
-      local_current_h264_cbr_mode = h264_cbr_mode;
-      local_current_h264_bitrate_kbps = h264_bitrate_kbps;
-      local_current_h264_vbv_buffer_size_kb = h264_vbv_buffer_size_kb;
-      local_current_auto_adjust_screen_capture_size = auto_adjust_screen_capture_size;
-    }
-    if (local_current_output_mode == OutputMode::H264) {
-      if (local_capture_width_actual % 2 != 0 && local_capture_width_actual > 0) {
-        local_capture_width_actual--;
-      }
-      if (local_capture_height_actual % 2 != 0 && local_capture_height_actual > 0) {
-        local_capture_height_actual--;
-      }
-    }
-    if (local_capture_width_actual <=0 || local_capture_height_actual <=0) {
-        std::cerr << "Error: Invalid capture dimensions after initial adjustment." << std::endl;
-        return;
-    }
-
-    this->vaapi_operational = false;
-    this->nvenc_operational = false;
-
-    if (!local_use_cpu && local_vaapi_render_node_index >= 0 &&
-        local_current_output_mode == OutputMode::H264 && local_current_h264_fullframe) {
-        if (this->initialize_vaapi_encoder(local_vaapi_render_node_index,
-                                      local_capture_width_actual,
-                                      local_capture_height_actual,
-                                      local_current_h264_crf,
-                                      local_current_h264_fullcolor,
-                                      local_current_h264_cbr_mode,
-                                      local_current_h264_bitrate_kbps,
-                                      local_current_h264_vbv_buffer_size_kb)) {
-            this->vaapi_operational = true;
-            this->vaapi_force_next_idr_ = true;
-            std::cout << "VAAPI Encoder Initialized successfully." << std::endl;
-        } else {
-            std::cerr << "VAAPI Encoder initialization failed. Falling back to CPU." << std::endl;
-            local_use_cpu = true;
-            std::lock_guard<std::mutex> lock(settings_mutex);
-            this->use_cpu = true;
-        }
-    } else {
-      if (!local_use_cpu && this->is_nvidia_system_detected &&
-          local_current_output_mode == OutputMode::H264 && local_current_h264_fullframe) {
-        if (this->initialize_nvenc_encoder(local_capture_width_actual,
-                                     local_capture_height_actual,
-                                     local_current_h264_crf,
-                                     local_current_target_fps,
-                                     local_current_h264_fullcolor,
-                                     local_current_h264_cbr_mode,
-                                     local_current_h264_bitrate_kbps,
-                                     local_current_h264_vbv_buffer_size_kb)) {
-          this->nvenc_operational = true;
-          this->nvenc_force_next_idr_ = true;
-          std::cout << "NVENC Encoder Initialized successfully." << std::endl;
-        } else {
-          std::cerr << "NVENC Encoder initialization failed. Falling back to x264." << std::endl;
-          local_use_cpu = true;
-          std::lock_guard<std::mutex> lock(settings_mutex);
-          this->use_cpu = true;
-        }
-      } else {
-          if (!this->nvenc_operational && this->nvenc_state_.initialized) {
-            this->reset_nvenc_encoder();
-          }
-      }
-    }
-
-    std::chrono::duration < double > target_frame_duration_seconds =
-      std::chrono::duration < double > (1.0 / local_current_target_fps);
-
-    auto next_frame_time =
-      std::chrono::high_resolution_clock::now() + target_frame_duration_seconds;
-
-    const int MAX_ATTACH_ATTEMPTS = 5;
-    const int RETRY_BACKOFF_MS = 500;
-    char* display_env = std::getenv("DISPLAY");
-    const char* display_name = display_env ? display_env : ":0";
-    Display* display = XOpenDisplay(display_name);
-
-    if (!display) {
-      std::cerr << "Error: Failed to open X display " << display_name << std::endl;
-      return;
-    }
-
-    Window root_window = DefaultRootWindow(display);
-    int screen = DefaultScreen(display);
-    XWindowAttributes attributes;
-
-    if (XGetWindowAttributes(display, root_window, &attributes)) {
-      if (local_current_auto_adjust_screen_capture_size) {
-          std::cout << "[pixelflux] auto_adjust_screen_capture_size is enabled, ignoring requested capture size "
-                    << local_capture_width_actual << "x" << local_capture_height_actual
-                    << " and resetting x and y offset to 0" << std::endl;
-          
-          local_capture_width_actual = attributes.width;
-          local_capture_height_actual = attributes.height;
-          local_capture_x_offset = 0;
-          local_capture_y_offset = 0;
-
-          std::lock_guard<std::mutex> lock(settings_mutex);
-          this->capture_width = attributes.width;
-          this->capture_height = attributes.height;
-      } else {
-          if (local_capture_width_actual > attributes.width) {
-          local_capture_width_actual = attributes.width;
-          local_capture_x_offset = 0;
-        }
-        if (local_capture_height_actual > attributes.height) {
-            local_capture_height_actual = attributes.height;
-            local_capture_y_offset = 0;
-        }
-        if (local_capture_x_offset + local_capture_width_actual > attributes.width) {
-            local_capture_x_offset = attributes.width - local_capture_width_actual;
-        }
-        if (local_capture_y_offset + local_capture_height_actual > attributes.height) {
-            local_capture_y_offset = attributes.height - local_capture_height_actual;
-        }
-        if (local_capture_x_offset < 0) local_capture_x_offset = 0;
-        if (local_capture_y_offset < 0) local_capture_y_offset = 0;
-      }
-    }
-
-    this->yuv_planes_are_i444_ = local_current_h264_fullcolor;
-    if (local_current_output_mode == OutputMode::H264) {
-        bool use_nv12_planes = !local_use_cpu && local_current_h264_fullframe && !local_current_h264_fullcolor &&
-                       ((this->is_nvidia_system_detected && local_vaapi_render_node_index < 0) || (local_vaapi_render_node_index >= 0));
-        size_t y_plane_size = static_cast<size_t>(local_capture_width_actual) *
-                              local_capture_height_actual;
-        full_frame_y_plane_.assign(y_plane_size, 0);
-        full_frame_y_stride_ = local_capture_width_actual;
-
-        if (this->yuv_planes_are_i444_) {
-            full_frame_u_plane_.assign(y_plane_size, 0);
-            full_frame_v_plane_.assign(y_plane_size, 0);
-            full_frame_u_stride_ = local_capture_width_actual;
-            full_frame_v_stride_ = local_capture_width_actual;
-        } else if (use_nv12_planes) { 
-            size_t uv_plane_size = static_cast<size_t>(local_capture_width_actual) * (static_cast<size_t>(local_capture_height_actual) / 2);
-            full_frame_u_plane_.assign(uv_plane_size, 0);
-            full_frame_u_stride_ = local_capture_width_actual;
-            full_frame_v_plane_.clear();
-            full_frame_v_stride_ = 0;
-        } else {
-            size_t chroma_plane_size =
-                (static_cast<size_t>(local_capture_width_actual) / 2) *
-                (static_cast<size_t>(local_capture_height_actual) / 2);
-            full_frame_u_plane_.assign(chroma_plane_size, 0);
-            full_frame_v_plane_.assign(chroma_plane_size, 0);
-            full_frame_u_stride_ = local_capture_width_actual / 2;
-            full_frame_v_stride_ = local_capture_width_actual / 2;
-        }
-    } else {
-        full_frame_y_plane_.clear();
-        full_frame_u_plane_.clear();
-        full_frame_v_plane_.clear();
-    }
-
-    if (!local_watermark_path_setting.empty() && local_watermark_location_setting != WatermarkLocation::NONE) {
-        load_watermark_image();
-    }
-
-    if (!XShmQueryExtension(display)) {
-      std::cerr << "Error: X Shared Memory Extension not available!" << std::endl;
-      XCloseDisplay(display);
-      return;
-    }
-
-    std::cout << "X Shared Memory Extension available." << std::endl;
-
-    if (local_current_capture_cursor) {
-      if (!XFixesQueryExtension(display, &xfixes_event_base, &xfixes_error_base)) {
-        std::cerr << "Error: XFixes extension not available!" << std::endl;
-        XCloseDisplay(display);
-        return;
-      }
-      std::cout << "XFixes Extension available." << std::endl;
-    }
-
-    XShmSegmentInfo shminfo;
-    XImage* shm_image = nullptr;
-    bool shm_setup_complete = false;
-
-    for (int attempt = 1; attempt <= MAX_ATTACH_ATTEMPTS; ++attempt) {
-        memset(&shminfo, 0, sizeof(shminfo));
-        shm_image = XShmCreateImage(display, DefaultVisual(display, screen), DefaultDepth(display, screen),
-                                    ZPixmap, nullptr, &shminfo, local_capture_width_actual,
-                                    local_capture_height_actual);
-        if (!shm_image) {
-            std::cerr << "Attempt " << attempt << ": XShmCreateImage failed." << std::endl;
-            if (attempt < MAX_ATTACH_ATTEMPTS) std::this_thread::sleep_for(std::chrono::milliseconds(RETRY_BACKOFF_MS));
-            continue;
-        }
-
-        shminfo.shmid = shmget(IPC_PRIVATE, static_cast<size_t>(shm_image->bytes_per_line) * shm_image->height, IPC_CREAT | 0600);
-        if (shminfo.shmid < 0) {
-            perror("shmget");
-            XDestroyImage(shm_image);
-            if (attempt < MAX_ATTACH_ATTEMPTS) std::this_thread::sleep_for(std::chrono::milliseconds(RETRY_BACKOFF_MS));
-            continue;
-        }
-
-        shminfo.shmaddr = (char*)shmat(shminfo.shmid, nullptr, 0);
-        if (shminfo.shmaddr == (char*)-1) {
-            perror("shmat");
-            shmctl(shminfo.shmid, IPC_RMID, 0);
-            XDestroyImage(shm_image);
-            if (attempt < MAX_ATTACH_ATTEMPTS) std::this_thread::sleep_for(std::chrono::milliseconds(RETRY_BACKOFF_MS));
-            continue;
-        }
-
-        shminfo.readOnly = False;
-        shm_image->data = shminfo.shmaddr;
-        g_shm_attach_failed = false;
-        XErrorHandler old_handler = XSetErrorHandler(shm_attach_error_handler);
-        XShmAttach(display, &shminfo);
-        XSync(display, False);
-        XSetErrorHandler(old_handler);
-
-        if (g_shm_attach_failed) {
-            std::cerr << "Attempt " << attempt << "/" << MAX_ATTACH_ATTEMPTS << ": XShmAttach failed with an X server error." << std::endl;
-            shmdt(shminfo.shmaddr);
-            shmctl(shminfo.shmid, IPC_RMID, 0);
-            XDestroyImage(shm_image);
-            if (attempt < MAX_ATTACH_ATTEMPTS) {
-                std::this_thread::sleep_for(std::chrono::milliseconds(RETRY_BACKOFF_MS));
-            }
-            continue;
-        }
-        
-        shm_setup_complete = true;
-        break;
-    }
-
-    if (!shm_setup_complete) {
-        std::cerr << "ERROR: Failed to set up XShm after " << MAX_ATTACH_ATTEMPTS << " attempts. Exiting capture thread." << std::endl;
-        if (display) {
-            XCloseDisplay(display);
-            display = nullptr;
-        }
-        return;
-    }
-
-    std::cout << "XShm setup complete for " << local_capture_width_actual
-              << "x" << local_capture_height_actual << "." << std::endl;
-
-    int num_cores = std::max(1, (int)std::thread::hardware_concurrency());
-    std::cout << "CPU cores available: " << num_cores << std::endl;
-    int num_stripes_config = num_cores;
-
-    int N_processing_stripes;
-    if (local_capture_height_actual <= 0) {
-      N_processing_stripes = 0;
-    } else {
-      if (local_current_output_mode == OutputMode::H264) {
-        if (local_current_h264_fullframe) {
-          N_processing_stripes = 1;
-        } else {
-          const int MIN_H264_STRIPE_HEIGHT_PX = 64;
-          if (local_capture_height_actual < MIN_H264_STRIPE_HEIGHT_PX) {
-            N_processing_stripes = 1;
-          } else {
-            int max_stripes_by_min_height =
-              local_capture_height_actual / MIN_H264_STRIPE_HEIGHT_PX;
-            N_processing_stripes =
-              std::min(num_stripes_config, max_stripes_by_min_height);
-            if (N_processing_stripes == 0) N_processing_stripes = 1;
-          }
-        }
-      } else {
-        N_processing_stripes =
-          std::min(num_stripes_config, local_capture_height_actual);
-        if (N_processing_stripes == 0 && local_capture_height_actual > 0) {
-          N_processing_stripes = 1;
-        }
-      }
-    }
-    if (N_processing_stripes == 0 && local_capture_height_actual > 0) {
-       N_processing_stripes = 1;
-    }
-    std::stringstream settings_ss;
-    settings_ss << "Stream settings active -> Res: " << local_capture_width_actual << "x"
-                << local_capture_height_actual
-                << " | FPS: " << std::fixed << std::setprecision(1) << local_current_target_fps
-                << " | Stripes: " << N_processing_stripes;
-    if (local_current_output_mode == OutputMode::JPEG) {
-        settings_ss << " | Mode: JPEG";
-        settings_ss << " | Quality: " << local_current_jpeg_quality;
-        if (local_current_use_paint_over_quality) {
-            settings_ss << " | PaintOver Q: " << local_current_paint_over_jpeg_quality
-                        << " (Trigger: " << local_current_paint_over_trigger_frames << "f)";
-        }
-    } else {
-        std::string encoder_type = "CPU";
-        if (this->vaapi_operational) encoder_type = "VAAPI";
-        else if (this->nvenc_operational) encoder_type = "NVENC";
-        settings_ss << " | Mode: H264 (" << encoder_type << ")";
-        settings_ss << (local_current_h264_fullframe ? " FullFrame" : " Striped");
-        if (local_current_h264_streaming_mode) settings_ss << " Streaming";
-        if (!local_current_h264_cbr_mode) settings_ss << " | CRF: " << local_current_h264_crf;
-        else settings_ss << " | CBR: " << local_current_h264_bitrate_kbps;
-        if (local_current_use_paint_over_quality) {
-            settings_ss << " | PaintOver CRF: " << local_current_h264_paintover_crf
-                        << " (Burst: " << local_current_h264_paintover_burst_frames << "f)";
-        }
-        settings_ss << " | Colorspace: " << (local_current_h264_fullcolor ? "I444 (Full Range)" : "I420 (Limited Range)");
-    }
-    settings_ss << " | Damage Thresh: " << local_current_damage_block_threshold << "f"
-                << " | Damage Dur: " << local_current_damage_block_duration << "f";
-    std::cout << settings_ss.str() << std::endl;
-
-    std::vector<uint64_t> previous_hashes(num_stripes_config, 0);
-    std::vector<int> no_motion_frame_counts(num_stripes_config, 0);
-    std::vector<bool> paint_over_sent(num_stripes_config, false);
-    std::vector<int> current_jpeg_qualities(num_stripes_config);
-    std::vector<int> consecutive_stripe_changes(num_stripes_config, 0);
-    std::vector<bool> stripe_is_in_damage_block(num_stripes_config, false);
-    std::vector<int> stripe_damage_block_frames_remaining(num_stripes_config, 0);
-    std::vector<uint64_t> stripe_hash_at_damage_block_start(num_stripes_config, 0);
-    std::vector<int> h264_paintover_burst_frames_remaining(num_stripes_config, 0);
-
-    for (int i = 0; i < num_stripes_config; ++i) {
-      current_jpeg_qualities[i] =
-        local_current_use_paint_over_quality
-          ? local_current_paint_over_jpeg_quality
-          : local_current_jpeg_quality;
-    }
-
-    auto last_output_time = std::chrono::high_resolution_clock::now();
-
-    while (!stop_requested) {
-      auto current_loop_iter_start_time = std::chrono::high_resolution_clock::now();
-
-      if (current_loop_iter_start_time < next_frame_time) {
-        auto time_to_sleep = next_frame_time - current_loop_iter_start_time;
-        if (time_to_sleep > std::chrono::milliseconds(0)) {
-          std::this_thread::sleep_for(time_to_sleep);
-        }
-      }
-      auto intended_current_frame_time = next_frame_time;
-      next_frame_time += target_frame_duration_seconds;
-
-      int old_w = local_capture_width_actual;
-      int old_h = local_capture_height_actual;
-      bool yuv_config_changed = false;
-      std::string previous_watermark_path_in_loop = local_watermark_path_setting;
-      WatermarkLocation previous_watermark_location_in_loop = local_watermark_location_setting;
-      {
-        std::lock_guard<std::mutex> lock(settings_mutex);
-        local_capture_width_actual = capture_width;
-        local_capture_height_actual = capture_height;
-        local_capture_x_offset = capture_x;
-        local_capture_y_offset = capture_y;
-
-        if (local_current_target_fps != target_fps) {
-          local_current_target_fps = target_fps;
-          target_frame_duration_seconds = std::chrono::duration < double > (1.0 / local_current_target_fps);
-          next_frame_time = intended_current_frame_time + target_frame_duration_seconds;
-        }
-        local_current_jpeg_quality = jpeg_quality;
-        local_current_paint_over_jpeg_quality = paint_over_jpeg_quality;
-        local_current_use_paint_over_quality = use_paint_over_quality;
-        local_current_paint_over_trigger_frames = paint_over_trigger_frames;
-        local_current_damage_block_threshold = damage_block_threshold;
-        local_current_damage_block_duration = damage_block_duration;
-
-        if (local_current_output_mode != output_mode ||
-            local_current_h264_fullcolor != h264_fullcolor) {
-            yuv_config_changed = true;
-        }
-        local_current_output_mode = output_mode;
-        local_current_h264_crf = h264_crf;
-        local_current_h264_paintover_crf = h264_paintover_crf;
-        local_current_h264_paintover_burst_frames = h264_paintover_burst_frames;
-        local_current_h264_fullcolor = h264_fullcolor;
-        local_current_h264_fullframe = h264_fullframe;
-        local_current_h264_streaming_mode = h264_streaming_mode;
-        local_current_capture_cursor = capture_cursor;
-        local_vaapi_render_node_index = vaapi_render_node_index;
-        local_watermark_path_setting = watermark_path_internal;
-        local_watermark_location_setting = watermark_location_internal;
-        local_use_cpu = use_cpu;
-        local_debug_logging = debug_logging;
-        local_current_h264_cbr_mode = h264_cbr_mode;
-        local_current_h264_bitrate_kbps = h264_bitrate_kbps;
-        local_current_h264_vbv_buffer_size_kb = h264_vbv_buffer_size_kb;
-        local_current_auto_adjust_screen_capture_size = auto_adjust_screen_capture_size;
-      }
-
-      bool current_watermark_is_actually_loaded_in_loop;
-      {
-        std::lock_guard<std::mutex> data_lock(watermark_data_mutex_);
-        current_watermark_is_actually_loaded_in_loop = watermark_loaded_;
-      }
-
-      bool path_setting_changed_from_last_loop_iter = (local_watermark_path_setting != previous_watermark_path_in_loop);
-      bool location_setting_changed_from_last_loop_iter = (local_watermark_location_setting != previous_watermark_location_in_loop);
-      bool needs_load_due_to_state = (local_watermark_location_setting != WatermarkLocation::NONE &&
-                                      !local_watermark_path_setting.empty() &&
-                                      !current_watermark_is_actually_loaded_in_loop);
-      bool needs_clear_due_to_state = ( (local_watermark_location_setting == WatermarkLocation::NONE || local_watermark_path_setting.empty()) &&
-                                       current_watermark_is_actually_loaded_in_loop);
-
-      if (path_setting_changed_from_last_loop_iter ||
-          location_setting_changed_from_last_loop_iter ||
-          needs_load_due_to_state ||
-          needs_clear_due_to_state ||
-          (local_watermark_location_setting == WatermarkLocation::AN && previous_watermark_location_in_loop != WatermarkLocation::AN)
-          ) {
-          load_watermark_image();
-          previous_watermark_path_in_loop = local_watermark_path_setting;
-          previous_watermark_location_in_loop = local_watermark_location_setting;
-      }
-
-      if (local_current_output_mode == OutputMode::H264) {
-        if (local_capture_width_actual % 2 != 0 && local_capture_width_actual > 0) {
-          local_capture_width_actual--;
-        }
-        if (local_capture_height_actual % 2 != 0 && local_capture_height_actual > 0) {
-          local_capture_height_actual--;
-        }
-      }
-      if (local_capture_width_actual <=0 || local_capture_height_actual <=0) {
-          std::this_thread::sleep_for(std::chrono::milliseconds(10));
-          continue;
-      }
-
-      if (local_current_auto_adjust_screen_capture_size) {
-        XWindowAttributes attributes;
-        if (XGetWindowAttributes(display, root_window, &attributes)) {
-          if (local_capture_width_actual != attributes.width ||
-              local_capture_height_actual != attributes.height) {
-                if (debug_logging) {
-                    std::cout << "[pixelflux] Auto-adjusting capture size from "
-                              << local_capture_width_actual << "x"
-                              << local_capture_height_actual << " to "
-                              << attributes.width << "x"
-                              << attributes.height << std::endl;
-                }
-            local_capture_width_actual = attributes.width;
-            local_capture_height_actual = attributes.height;
-            local_capture_x_offset = 0;
-            local_capture_y_offset = 0;
-
-            std::lock_guard<std::mutex> lock(settings_mutex);
-            capture_width = local_capture_width_actual;
-            capture_height = local_capture_height_actual;
-            capture_x = 0;
-            capture_y = 0;
-          }
-        }
-      }
-
-      if (old_w != local_capture_width_actual || old_h != local_capture_height_actual ||
-          yuv_config_changed) {
-        std::cout << "Capture parameters changed. Re-initializing XShm and YUV planes."
-                  << std::endl;
-
-        if (shm_image) {
-            if (shminfo.shmaddr && shminfo.shmaddr != (char*)-1) {
-                XShmDetach(display, &shminfo);
-                shmdt(shminfo.shmaddr);
-                shminfo.shmaddr = (char*)-1;
-            }
-            if (shminfo.shmid != -1 && shminfo.shmid != 0) {
-                shmctl(shminfo.shmid, IPC_RMID, 0);
-                shminfo.shmid = -1;
-            }
-            XDestroyImage(shm_image);
-            shm_image = nullptr;
-            memset(&shminfo, 0, sizeof(shminfo));
-        }
-
-        shm_image = XShmCreateImage(
-          display, DefaultVisual(display, screen), DefaultDepth(display, screen),
-          ZPixmap, nullptr, &shminfo, local_capture_width_actual,
-          local_capture_height_actual);
-        if (!shm_image) {
-          std::cerr << "Error: XShmCreateImage failed during re-init." << std::endl;
-          if(display) { XCloseDisplay(display); } display = nullptr; return;
-        }
-        shminfo.shmid = shmget(
-          IPC_PRIVATE, static_cast<size_t>(shm_image->bytes_per_line) * shm_image->height,
-          IPC_CREAT | 0600);
-        if (shminfo.shmid < 0) {
-          perror("shmget re-init"); if(shm_image) { XDestroyImage(shm_image); } shm_image = nullptr;
-          if(display) { XCloseDisplay(display); } display = nullptr; return;
-        }
-        shminfo.shmaddr = (char*)shmat(shminfo.shmid, nullptr, 0);
-        if (shminfo.shmaddr == (char*)-1) {
-          perror("shmat re-init"); 
-          if(shminfo.shmid != -1) { shmctl(shminfo.shmid, IPC_RMID, 0); } shminfo.shmid = -1;
-          if(shm_image) { XDestroyImage(shm_image); } shm_image = nullptr;
-          if(display) { XCloseDisplay(display); } display = nullptr; return;
-        }
-        shminfo.readOnly = False;
-        shm_image->data = shminfo.shmaddr;
-        if (!XShmAttach(display, &shminfo)) {
-          if(shminfo.shmaddr != (char*)-1) { shmdt(shminfo.shmaddr); } shminfo.shmaddr = (char*)-1;
-          if(shminfo.shmid != -1) { shmctl(shminfo.shmid, IPC_RMID, 0); } shminfo.shmid = -1;
-          if(shm_image) { XDestroyImage(shm_image); } shm_image = nullptr;
-          if(display) { XCloseDisplay(display); } display = nullptr; return;
-        }
-
-        this->yuv_planes_are_i444_ = local_current_h264_fullcolor;
-        if (local_current_output_mode == OutputMode::H264) {
-            bool use_nv12_planes = !local_use_cpu && this->is_nvidia_system_detected && local_current_h264_fullframe && !local_current_h264_fullcolor;
-            
-            size_t y_plane_size = static_cast<size_t>(local_capture_width_actual) *
-                                  local_capture_height_actual;
-            full_frame_y_plane_.assign(y_plane_size, 0);
-            full_frame_y_stride_ = local_capture_width_actual;
-
-            if (this->yuv_planes_are_i444_) {
-                full_frame_u_plane_.assign(y_plane_size, 0);
-                full_frame_v_plane_.assign(y_plane_size, 0);
-                full_frame_u_stride_ = local_capture_width_actual;
-                full_frame_v_stride_ = local_capture_width_actual;
-            } else if (use_nv12_planes) {
-                size_t uv_plane_size = static_cast<size_t>(local_capture_width_actual) * (static_cast<size_t>(local_capture_height_actual) / 2);
-                full_frame_u_plane_.assign(uv_plane_size, 0);
-                full_frame_u_stride_ = local_capture_width_actual;
-                full_frame_v_plane_.clear();
-                full_frame_v_stride_ = 0;
-            } else {
-                size_t chroma_plane_size =
-                    (static_cast<size_t>(local_capture_width_actual) / 2) *
-                    (static_cast<size_t>(local_capture_height_actual) / 2);
-                full_frame_u_plane_.assign(chroma_plane_size, 0);
-                full_frame_v_plane_.assign(chroma_plane_size, 0);
-                full_frame_u_stride_ = local_capture_width_actual / 2;
-                full_frame_v_stride_ = local_capture_width_actual / 2;
-            }
-        } else {
-            full_frame_y_plane_.clear();
-            full_frame_u_plane_.clear();
-            full_frame_v_plane_.clear();
-        }
-
-        std::cout << "XShm and YUV planes re-initialization complete." << std::endl;
-        h264_minimal_store_.reset();
-      }
-
-      if (XShmGetImage(display, root_window, shm_image, local_capture_x_offset, local_capture_y_offset, AllPlanes)) {
-        unsigned char* shm_data_ptr = (unsigned char*)shm_image->data;
-        int shm_stride_bytes = shm_image->bytes_per_line;
-        int shm_bytes_per_pixel = shm_image->bits_per_pixel / 8;
-        if (local_current_capture_cursor) {
-          XFixesCursorImage *cursor_image = XFixesGetCursorImage(display);
-          if (cursor_image) {
-            std::vector<uint32_t> converted_cursor_pixels;
-            if (cursor_image->width > 0 && cursor_image->height > 0) {
-                converted_cursor_pixels.resize(static_cast<size_t>(cursor_image->width) * cursor_image->height);
-                for (int r = 0; r < cursor_image->height; ++r) {
-                    for (int c = 0; c < cursor_image->width; ++c) {
-                        unsigned long raw_pixel = cursor_image->pixels[static_cast<size_t>(r) * cursor_image->width + c];
-                        converted_cursor_pixels[static_cast<size_t>(r) * cursor_image->width + c] = static_cast<uint32_t>(raw_pixel);
-                    }
-                }
-            }
-
-            if (!converted_cursor_pixels.empty()) {
-                overlay_image(cursor_image->height, cursor_image->width, 
-                              converted_cursor_pixels.data(),
-                              cursor_image->x - local_capture_x_offset,
-                              cursor_image->y - local_capture_y_offset,
-                              local_capture_height_actual, local_capture_width_actual, 
-                              shm_data_ptr, shm_stride_bytes, shm_bytes_per_pixel);
-            }
-            XFree(cursor_image);
-          }
-        }
-
-        bool should_overlay_watermark_this_frame = false;
-        int overlay_wm_x = 0;
-        int overlay_wm_y = 0;
-        int temp_wm_w = 0;
-        int temp_wm_h = 0;
-        std::vector<uint32_t> local_watermark_data_copy;
-
-        {
-          std::lock_guard<std::mutex> data_lock(watermark_data_mutex_);
-          if (watermark_loaded_ && local_watermark_location_setting != WatermarkLocation::NONE &&
-              !watermark_image_data_.empty() && watermark_width_ > 0 && watermark_height_ > 0) {
-            
-            should_overlay_watermark_this_frame = true;
-            temp_wm_w = watermark_width_;
-            temp_wm_h = watermark_height_;
-            local_watermark_data_copy = watermark_image_data_;
-
-            if (local_watermark_location_setting == WatermarkLocation::AN) {
-              watermark_current_x_ += watermark_dx_;
-              watermark_current_y_ += watermark_dy_;
-  
-              if (watermark_current_x_ + watermark_width_ > local_capture_width_actual) {
-                watermark_current_x_ = local_capture_width_actual - watermark_width_;
-                if (watermark_current_x_ < 0) {
-                  watermark_current_x_ = 0;
-                }
-                watermark_dx_ *= -1;
-              } else if (watermark_current_x_ < 0) {
-                watermark_current_x_ = 0;
-                watermark_dx_ *= -1;
-              }
-  
-              if (watermark_current_y_ + watermark_height_ > local_capture_height_actual) {
-                watermark_current_y_ = local_capture_height_actual - watermark_height_;
-                if (watermark_current_y_ < 0) {
-                  watermark_current_y_ = 0;
-                }
-                watermark_dy_ *= -1;
-              } else if (watermark_current_y_ < 0) {
-                watermark_current_y_ = 0;
-                watermark_dy_ *= -1;
-              }
-              overlay_wm_x = watermark_current_x_;
-              overlay_wm_y = watermark_current_y_;
-            }
-          }
-        }
-
-        if (should_overlay_watermark_this_frame) {
-          if (local_watermark_location_setting != WatermarkLocation::AN) { 
-            switch (local_watermark_location_setting) {
-              case WatermarkLocation::TL:
-                overlay_wm_x = 0;
-                overlay_wm_y = 0;
-                break;
-              case WatermarkLocation::TR:
-                overlay_wm_x = local_capture_width_actual - temp_wm_w;
-                overlay_wm_y = 0;
-                break;
-              case WatermarkLocation::BL:
-                overlay_wm_x = 0;
-                overlay_wm_y = local_capture_height_actual - temp_wm_h;
-                break;
-              case WatermarkLocation::BR:
-                overlay_wm_x = local_capture_width_actual - temp_wm_w;
-                overlay_wm_y = local_capture_height_actual - temp_wm_h;
-                break;
-              case WatermarkLocation::MI:
-                overlay_wm_x = (local_capture_width_actual - temp_wm_w) / 2;
-                overlay_wm_y = (local_capture_height_actual - temp_wm_h) / 2;
-                break;
-              default:
-                should_overlay_watermark_this_frame = false;
-                break; 
-            }
-          }
-
-          if (should_overlay_watermark_this_frame) { 
-            if (overlay_wm_x < 0) {
-              overlay_wm_x = 0;
-            }
-            if (overlay_wm_y < 0) {
-              overlay_wm_y = 0;
-            }
-            
-            overlay_image(temp_wm_h, temp_wm_w,
-                          local_watermark_data_copy.data(), 
-                          overlay_wm_x, overlay_wm_y,
-                          local_capture_height_actual, local_capture_width_actual,
-                          shm_data_ptr, shm_stride_bytes, shm_bytes_per_pixel);
-          }
-        }
-
-        if (local_current_output_mode == OutputMode::H264) {
-            bool use_nv12_for_hw_encoder = (this->nvenc_operational || this->vaapi_operational) && !this->yuv_planes_are_i444_;
-
-            if (use_nv12_for_hw_encoder) {
-                libyuv::ARGBToNV12(shm_data_ptr, shm_stride_bytes,
-                                   full_frame_y_plane_.data(), full_frame_y_stride_,
-                                   full_frame_u_plane_.data(), full_frame_u_stride_,
-                                   local_capture_width_actual, local_capture_height_actual);
-            } else if (this->yuv_planes_are_i444_) {
-                libyuv::ARGBToI444(shm_data_ptr, shm_stride_bytes,
-                                   full_frame_y_plane_.data(), full_frame_y_stride_,
-                                   full_frame_u_plane_.data(), full_frame_u_stride_,
-                                   full_frame_v_plane_.data(), full_frame_v_stride_,
-                                   local_capture_width_actual, local_capture_height_actual);
-            } else {
-                libyuv::ARGBToI420(shm_data_ptr, shm_stride_bytes,
-                                   full_frame_y_plane_.data(), full_frame_y_stride_,
-                                   full_frame_u_plane_.data(), full_frame_u_stride_,
-                                   full_frame_v_plane_.data(), full_frame_v_stride_,
-                                   local_capture_width_actual, local_capture_height_actual);
-            }
-        }
-
-        std::vector<std::future<StripeEncodeResult>> futures;
-        std::vector<std::thread> threads;
-
-        int N_processing_stripes;
-        if (local_capture_height_actual <= 0) {
-          N_processing_stripes = 0;
-        } else {
-          if (local_current_output_mode == OutputMode::H264) {
-            if (local_current_h264_fullframe) {
-              N_processing_stripes = 1;
-            } else {
-              const int MIN_H264_STRIPE_HEIGHT_PX = 64;
-              if (local_capture_height_actual < MIN_H264_STRIPE_HEIGHT_PX) {
-                N_processing_stripes = 1;
-              } else {
-                int max_stripes_by_min_height =
-                  local_capture_height_actual / MIN_H264_STRIPE_HEIGHT_PX;
-                N_processing_stripes =
-                  std::min(num_stripes_config, max_stripes_by_min_height);
-                if (N_processing_stripes == 0) N_processing_stripes = 1;
-              }
-            }
-          } else {
-            N_processing_stripes =
-              std::min(num_stripes_config, local_capture_height_actual);
-            if (N_processing_stripes == 0 && local_capture_height_actual > 0) {
-              N_processing_stripes = 1;
-            }
-          }
-        }
-        if (N_processing_stripes == 0 && local_capture_height_actual > 0) {
-           N_processing_stripes = 1;
-        }
-
-        if (static_cast<int>(previous_hashes.size()) != N_processing_stripes) {
-            previous_hashes.assign(N_processing_stripes, 0);
-            no_motion_frame_counts.assign(N_processing_stripes, 0);
-            paint_over_sent.assign(N_processing_stripes, false);
-            current_jpeg_qualities.resize(N_processing_stripes);
-            consecutive_stripe_changes.assign(N_processing_stripes, 0);
-            stripe_is_in_damage_block.assign(N_processing_stripes, false);
-            stripe_damage_block_frames_remaining.assign(N_processing_stripes, 0);
-            stripe_hash_at_damage_block_start.assign(N_processing_stripes, 0);
-            h264_paintover_burst_frames_remaining.assign(N_processing_stripes, 0);
-
-            for(int k=0; k < N_processing_stripes; ++k) {
-                 current_jpeg_qualities[k] = local_current_use_paint_over_quality ?
-                                             local_current_paint_over_jpeg_quality :
-                                             local_current_jpeg_quality;
-            }
-        }
-
-        int h264_base_even_height = 0;
-        int h264_num_stripes_with_extra_pair = 0;
-        int current_y_start_for_stripe = 0;
-
-        if (local_current_output_mode == OutputMode::H264 && !local_current_h264_fullframe &&
-            N_processing_stripes > 0 && local_capture_height_actual > 0) {
-          int H = local_capture_height_actual;
-          int N = N_processing_stripes;
-          int base_h = H / N;
-          h264_base_even_height = (base_h > 0) ? (base_h - (base_h % 2)) : 0;
-          if (h264_base_even_height == 0 && H >= 2) {
-            h264_base_even_height = 2;
-          } else if (h264_base_even_height == 0 && H > 0 && N == 1) {
-             h264_base_even_height = H - (H % 2);
-             if (h264_base_even_height == 0 && H >= 2) h264_base_even_height = 2;
-          } else if (h264_base_even_height == 0 && H > 0) {
-             N_processing_stripes = 0;
-          }
-
-          if (h264_base_even_height > 0) {
-            int H_base_covered = h264_base_even_height * N;
-            int H_remaining = H - H_base_covered;
-            if (H_remaining < 0) H_remaining = 0;
-            h264_num_stripes_with_extra_pair = H_remaining / 2;
-            h264_num_stripes_with_extra_pair =
-              std::min(h264_num_stripes_with_extra_pair, N);
-          } else if (H > 0 && N_processing_stripes > 0) {
-             N_processing_stripes = 0;
-          }
-        }
-        bool any_stripe_encoded_this_frame = false;
-
-        int derived_h264_colorspace_setting;
-        bool derived_h264_use_full_range;
-        if (local_current_h264_fullcolor) {
-          derived_h264_colorspace_setting = 444;
-          derived_h264_use_full_range = true;
-        } else {
-          derived_h264_colorspace_setting = 420;
-          derived_h264_use_full_range = false;
-        }
-
-        for (int i = 0; i < N_processing_stripes; ++i) {
-          int start_y = 0;
-          int current_stripe_height = 0;
-
-          if (local_current_output_mode == OutputMode::H264) {
-            if (local_current_h264_fullframe) {
-                start_y = 0;
-                current_stripe_height = local_capture_height_actual;
-            } else {
-                start_y = current_y_start_for_stripe;
-                if (h264_base_even_height > 0) {
-                    current_stripe_height = h264_base_even_height;
-                    if (i < h264_num_stripes_with_extra_pair) {
-                        current_stripe_height += 2;
-                    }
-                } else if (N_processing_stripes == 1) {
-                    current_stripe_height = local_capture_height_actual -
-                                            (local_capture_height_actual % 2);
-                    if (current_stripe_height == 0 && local_capture_height_actual >=2)
-                        current_stripe_height = 2;
-                } else {
-                    current_stripe_height = 0;
-                }
-            }
-          } else {
-            if (N_processing_stripes > 0) {
-                int base_stripe_height_jpeg = local_capture_height_actual / N_processing_stripes;
-                int remainder_height_jpeg = local_capture_height_actual % N_processing_stripes;
-                start_y = i * base_stripe_height_jpeg + std::min(i, remainder_height_jpeg);
-                current_stripe_height = base_stripe_height_jpeg +
-                                        (i < remainder_height_jpeg ? 1 : 0);
-            } else {
-                current_stripe_height = 0;
-            }
-          }
-
-          if (current_stripe_height <= 0) {
-            continue;
-          }
-
-          if (start_y + current_stripe_height > local_capture_height_actual) {
-             current_stripe_height = local_capture_height_actual - start_y;
-             if (current_stripe_height <= 0) continue;
-             if (local_current_output_mode == OutputMode::H264 && !local_current_h264_fullframe &&
-                 current_stripe_height % 2 != 0 && current_stripe_height > 0) {
-                 current_stripe_height--;
-             }
-             if (current_stripe_height <= 0) continue;
-          }
-
-          if (local_current_output_mode == OutputMode::H264 && !local_current_h264_fullframe) {
-            current_y_start_for_stripe += current_stripe_height;
-          }
-
-          auto calculate_current_hash = [&]() {
-              if (local_current_output_mode == OutputMode::H264) {
-                  const uint8_t* y_plane_stripe_ptr = full_frame_y_plane_.data() +
-                      static_cast<size_t>(start_y) * full_frame_y_stride_;
-                  const uint8_t* u_plane_stripe_ptr = full_frame_u_plane_.data() +
-                      (static_cast<size_t>(this->yuv_planes_are_i444_ ?
-                      start_y : (start_y / 2)) * full_frame_u_stride_);
-
-                  bool use_nv12_path = this->nvenc_operational && !this->yuv_planes_are_i444_;
-
-                  const uint8_t* v_plane_stripe_ptr = use_nv12_path ? nullptr :
-                      (full_frame_v_plane_.empty() ? nullptr : full_frame_v_plane_.data() +
-                      (static_cast<size_t>(this->yuv_planes_are_i444_ ?
-                      start_y : (start_y / 2)) * full_frame_v_stride_));
-
-                  int v_stride = use_nv12_path ? 0 : full_frame_v_stride_;
-
-                  return calculate_yuv_stripe_hash(
-                      y_plane_stripe_ptr, full_frame_y_stride_,
-                      u_plane_stripe_ptr, full_frame_u_stride_,
-                      v_plane_stripe_ptr, v_stride,
-                      local_capture_width_actual, current_stripe_height,
-                      !this->yuv_planes_are_i444_, local_current_h264_fullframe);
-              } else {
-                  const unsigned char* shm_stripe_start_ptr = shm_data_ptr +
-                      static_cast<size_t>(start_y) * shm_stride_bytes;
-                  return calculate_bgr_stripe_hash_from_shm(
-                      shm_stripe_start_ptr, shm_stride_bytes,
-                      local_capture_width_actual, current_stripe_height,
-                      shm_bytes_per_pixel);
-              }
-          };
-
-          uint64_t current_hash = 0;
-          bool hash_calculated_this_iteration = false;
-          bool send_this_stripe = false;
-          bool force_idr_for_paintover = false;
-          int crf_for_encode = local_current_h264_crf;
-          if (local_current_output_mode == OutputMode::H264 && h264_paintover_burst_frames_remaining[i] > 0) {
-              send_this_stripe = true;
-              crf_for_encode = local_current_h264_paintover_crf;
-              h264_paintover_burst_frames_remaining[i]--;
-              current_hash = calculate_current_hash();
-              hash_calculated_this_iteration = true;
-              if (current_hash != previous_hashes[i]) {
-                  h264_paintover_burst_frames_remaining[i] = 0;
-                  paint_over_sent[i] = false;
-                  crf_for_encode = local_current_h264_crf;
-                  consecutive_stripe_changes[i] = 1;
-              }
-          }
-          else if (local_current_output_mode == OutputMode::H264 && local_current_h264_streaming_mode) {
-              send_this_stripe = true;
-          }
-          else if (stripe_is_in_damage_block[i]) {
-              send_this_stripe = true;
-              stripe_damage_block_frames_remaining[i]--;
-              if (stripe_damage_block_frames_remaining[i] <= 0) {
-                  current_hash = calculate_current_hash();
-                  hash_calculated_this_iteration = true;
-
-                  if (current_hash != stripe_hash_at_damage_block_start[i]) {
-                      stripe_damage_block_frames_remaining[i] = local_current_damage_block_duration;
-                      stripe_hash_at_damage_block_start[i] = current_hash;
-                  } else {
-                      stripe_is_in_damage_block[i] = false;
-                      consecutive_stripe_changes[i] = 0;
-                      no_motion_frame_counts[i] = 1;
-                  }
-              }
-          }
-          else {
-              current_hash = calculate_current_hash();
-              hash_calculated_this_iteration = true;
-              if (current_hash != previous_hashes[i]) {
-                  send_this_stripe = true;
-                  no_motion_frame_counts[i] = 0;
-                  paint_over_sent[i] = false;
-                  consecutive_stripe_changes[i]++;
-                  current_jpeg_qualities[i] = local_current_jpeg_quality;
-                  h264_paintover_burst_frames_remaining[i] = 0;
-                  if (consecutive_stripe_changes[i] >= local_current_damage_block_threshold) {
-                      stripe_is_in_damage_block[i] = true;
-                      stripe_damage_block_frames_remaining[i] = local_current_damage_block_duration;
-                      stripe_hash_at_damage_block_start[i] = current_hash;
-                  }
-              } else {
-                  send_this_stripe = false;
-                  consecutive_stripe_changes[i] = 0;
-                  no_motion_frame_counts[i]++;
-                  if (no_motion_frame_counts[i] >= local_current_paint_over_trigger_frames && !paint_over_sent[i]) {
-                      if (local_current_output_mode == OutputMode::JPEG && 
-                          local_current_use_paint_over_quality &&
-                          local_current_paint_over_jpeg_quality > local_current_jpeg_quality) {
-                          send_this_stripe = true;
-                          current_jpeg_qualities[i] = local_current_paint_over_jpeg_quality;
-                          paint_over_sent[i] = true;
-                      } else if (local_current_output_mode == OutputMode::H264) {
-                          if (local_current_use_paint_over_quality && local_current_h264_paintover_crf < local_current_h264_crf) {
-                              send_this_stripe = true;
-                              paint_over_sent[i] = true;
-                              if (this->nvenc_operational) {
-                                  this->nvenc_force_next_idr_ = true;
-                                  h264_paintover_burst_frames_remaining[i] = local_current_h264_paintover_burst_frames - 1;
-                              } else if (this->vaapi_operational) {
-                                  this->vaapi_force_next_idr_ = true;
-                                  h264_paintover_burst_frames_remaining[i] = 0;
-                              } else {
-                                  force_idr_for_paintover = true;
-                                  crf_for_encode = local_current_h264_paintover_crf;
-                                  h264_paintover_burst_frames_remaining[i] = local_current_h264_paintover_burst_frames - 1;
-                              }
-                          }
-                      }
-                  }
-              }
-          }
-
-          if (hash_calculated_this_iteration) {
-              previous_hashes[i] = current_hash;
-          }
-
-          if (send_this_stripe) {
-            any_stripe_encoded_this_frame = true;
-            total_stripes_encoded_this_interval++;
-            if (local_current_output_mode == OutputMode::JPEG) {
-              int quality_to_use = current_jpeg_qualities[i];
-              if (paint_over_sent[i] && local_current_use_paint_over_quality &&
-                  no_motion_frame_counts[i] >= local_current_paint_over_trigger_frames) {
-                   quality_to_use = local_current_paint_over_jpeg_quality;
-              }
-
-              std::packaged_task<StripeEncodeResult(
-                int, int, int, int, const unsigned char*, int, int, int, int)>
-                task(encode_stripe_jpeg);
-              futures.push_back(task.get_future());
-              threads.push_back(std::thread(
-                std::move(task), i, start_y, current_stripe_height,
-                local_capture_width_actual,
-                shm_data_ptr,
-                shm_stride_bytes,
-                shm_bytes_per_pixel,
-                quality_to_use,
-                this->frame_counter));
-            } else {
-              if (this->vaapi_operational) {
-                std::packaged_task<StripeEncodeResult()> task([=, this]() {
-                    bool force_idr = this->vaapi_force_next_idr_.exchange(false);
-                    return this->encode_fullframe_vaapi(
-                        local_capture_width_actual, local_capture_height_actual, local_current_target_fps,
-                        full_frame_y_plane_.data(), full_frame_y_stride_,
-                        full_frame_u_plane_.data(), full_frame_u_stride_,
-                        this->yuv_planes_are_i444_ ? full_frame_v_plane_.data() : nullptr,
-                        this->yuv_planes_are_i444_ ? full_frame_v_stride_ : 0,
-                        this->yuv_planes_are_i444_,
-                        this->frame_counter, force_idr
-                    );
-                });
-                futures.push_back(task.get_future());
-                threads.push_back(std::thread(std::move(task)));
-              } else if (this->nvenc_operational) {
-                int target_qp_for_frame = crf_for_encode;
-                if (local_current_use_paint_over_quality && this->nvenc_force_next_idr_) {
-                    target_qp_for_frame = local_current_h264_paintover_crf;
-                }
-                if (!this->initialize_nvenc_encoder(local_capture_width_actual,
-                                              local_capture_height_actual,
-                                              target_qp_for_frame,
-                                              local_current_target_fps,
-                                              local_current_h264_fullcolor,
-                                              local_current_h264_cbr_mode,
-                                              local_current_h264_bitrate_kbps,
-                                              local_current_h264_vbv_buffer_size_kb)) {
-                    std::cerr << "NVENC: Re-initialization for QP change failed. Disabling NVENC." << std::endl;
-                    this->nvenc_operational = false;
-                    this->reset_nvenc_encoder();
-                    continue;
-                }
-                std::packaged_task<StripeEncodeResult()> task([=, this]() {
-                    bool force_idr = this->nvenc_force_next_idr_.exchange(false);
-                    return this->encode_fullframe_nvenc(
-                        local_capture_width_actual, local_capture_height_actual,
-                        full_frame_y_plane_.data(), full_frame_y_stride_,
-                        full_frame_u_plane_.data(), full_frame_u_stride_,
-                        this->yuv_planes_are_i444_ ? full_frame_v_plane_.data() : nullptr,
-                        this->yuv_planes_are_i444_ ? full_frame_v_stride_ : 0,
-                        this->yuv_planes_are_i444_, this->frame_counter, force_idr
-                    );
-                });
-                futures.push_back(task.get_future());
-                threads.push_back(std::thread(std::move(task)));
-              } else {
-                if (force_idr_for_paintover) {
-                  std::lock_guard<std::mutex> lock(h264_minimal_store_.store_mutex);
-                  h264_minimal_store_.ensure_size(i);
-                  if (i < static_cast<int>(h264_minimal_store_.force_idr_flags.size())) {
-                      h264_minimal_store_.force_idr_flags[i] = true;
-                  }
-                }
-
-                const uint8_t* y_plane_for_thread = full_frame_y_plane_.data() +
-                    static_cast<size_t>(start_y) * full_frame_y_stride_;
-                const uint8_t* u_plane_for_thread = full_frame_u_plane_.data() +
-                    (static_cast<size_t>(this->yuv_planes_are_i444_ ?
-                     start_y : (start_y / 2)) * full_frame_u_stride_);
-                const uint8_t* v_plane_for_thread = full_frame_v_plane_.data() +
-                    (static_cast<size_t>(this->yuv_planes_are_i444_ ?
-                     start_y : (start_y / 2)) * full_frame_v_stride_);
-                
-                bool force_idr = this->force_next_idr_.exchange(false);
-                std::packaged_task<StripeEncodeResult(
-                  MinimalEncoderStore&, int, int, int, int, const uint8_t*, int,
-                  const uint8_t*, int, const uint8_t*, int, bool, int, int, int,
-                  bool, int, bool, bool, int, int)>
-                  task(encode_stripe_h264);
-                futures.push_back(task.get_future());
-                threads.push_back(std::thread(
-                  std::move(task), std::ref(h264_minimal_store_), i, start_y, current_stripe_height,
-                  local_capture_width_actual,
-                  y_plane_for_thread, full_frame_y_stride_,
-                  u_plane_for_thread, full_frame_u_stride_,
-                  v_plane_for_thread, full_frame_v_stride_,
-                  this->yuv_planes_are_i444_,
-                  this->frame_counter,
-                  crf_for_encode,
-                  derived_h264_colorspace_setting,
-                  derived_h264_use_full_range,
-                  local_current_h264_streaming_mode,
-                  force_idr,
-                  local_current_h264_cbr_mode,
-                  local_current_h264_bitrate_kbps,
-                  local_current_h264_vbv_buffer_size_kb
-                  ));
-              }
-            }
-          }
-        }
-
-
-        std::vector<StripeEncodeResult> stripe_results;
-        stripe_results.reserve(futures.size());
-        for (auto& future : futures) {
-          try {
-            stripe_results.push_back(future.get());
-          } catch (const std::runtime_error& e) {
-            if (std::string(e.what()).find("NVENC_") != std::string::npos) {
-                std::cerr << "ENCODE_THREAD_ERROR: " << e.what() << std::endl;
-                std::cerr << "Disabling NVENC for this session due to runtime error." << std::endl;
-                this->nvenc_operational = false;
-                this->reset_nvenc_encoder();
-                this->nvenc_force_next_idr_ = true;
-            } else if (std::string(e.what()).find("VAAPI_") != std::string::npos) {
-                std::cerr << "ENCODE_THREAD_ERROR: " << e.what() << std::endl;
-                std::cerr << "Disabling VAAPI for this session due to runtime error." << std::endl;
-                this->vaapi_operational = false;
-                this->reset_vaapi_encoder();
-                this->vaapi_force_next_idr_ = true;
-            } else {
-                std::cerr << "ENCODE_THREAD_ERROR: " << e.what() << std::endl;
-            }
-            stripe_results.push_back({});
-          }
-        }
-        futures.clear();
-
-        for (StripeEncodeResult& result : stripe_results) {
-          if (stripe_callback != nullptr && result.data != nullptr && result.size > 0) {
-            stripe_callback(&result, user_data);
-          }
-        }
-        stripe_results.clear();
-
-        for (auto& thread : threads) {
-          if (thread.joinable()) {
-            thread.join();
-          }
-        }
-        threads.clear();
-
-        this->frame_counter++;
-        if (any_stripe_encoded_this_frame) {
-          encoded_frame_count++;
-        }
-        frame_count_loop++;
-
-        auto current_time_for_fps_log = std::chrono::high_resolution_clock::now();
-        auto elapsed_time_for_fps_log =
-          std::chrono::duration_cast<std::chrono::seconds>(
-            current_time_for_fps_log - start_time_loop);
-
-        if (elapsed_time_for_fps_log.count() >= 1) {
-          frame_count_loop = 0;
-          start_time_loop = std::chrono::high_resolution_clock::now();
-        }
-
-        auto current_output_time_log = std::chrono::high_resolution_clock::now();
-        auto output_elapsed_time_log =
-          std::chrono::duration_cast<std::chrono::seconds>(
-            current_output_time_log - last_output_time);
-
-        if (local_debug_logging && output_elapsed_time_log.count() >= 1) {
-          double actual_fps_val =
-            (encoded_frame_count > 0 && output_elapsed_time_log.count() > 0)
-            ? static_cast<double>(encoded_frame_count) / output_elapsed_time_log.count()
-            : 0.0;
-          double total_stripes_per_second_val =
-            (total_stripes_encoded_this_interval > 0 && output_elapsed_time_log.count() > 0)
-            ? static_cast<double>(total_stripes_encoded_this_interval) /
-              output_elapsed_time_log.count()
-            : 0.0;
-
-          std::cout << "Res: " << local_capture_width_actual << "x"
-                    << local_capture_height_actual
-                    << " Mode: "
-                    << (local_current_output_mode == OutputMode::JPEG ? "JPEG" : (this->vaapi_operational ? "H264 (VAAPI)" : (this->nvenc_operational ? "H264 (NVENC)" : "H264 (CPU)")))
-                    << (local_current_output_mode == OutputMode::H264
-                        ? (std::string(local_current_h264_fullcolor ?
-                                       " CS_IN:I444" : " CS_IN:I420") +
-                           (derived_h264_use_full_range ? " FR" : " LR") +
-                           (local_current_h264_fullframe ? " FF" : " Striped"))
-                        : std::string(""))
-                    << " Stripes: " << N_processing_stripes
-                    << (local_current_output_mode == OutputMode::H264 && local_current_h264_cbr_mode ?
-                        " CBR:" + std::to_string(local_current_h264_bitrate_kbps): 
-                        (local_current_output_mode == OutputMode::H264
-                        ? " CRF" + std::to_string(local_current_h264_crf)
-                        : " Q:" + std::to_string(local_current_jpeg_quality)))
-                    << " EncFPS: " << std::fixed << std::setprecision(2) << actual_fps_val
-                    << " EncStripes/s: " << std::fixed << std::setprecision(2)
-                    << total_stripes_per_second_val
-                    << std::endl;
-
-          encoded_frame_count = 0;
-          total_stripes_encoded_this_interval = 0;
-          last_output_time = std::chrono::high_resolution_clock::now();
-        }
-
-      } else {
-        std::cerr << "Failed to capture XImage using XShmGetImage" << std::endl;
-        std::this_thread::sleep_for(std::chrono::milliseconds(100));
-      }
-    }
-
-    if (display) {
-        if (shm_image) {
-            if (shminfo.shmaddr && shminfo.shmaddr != (char*)-1) {
-                 XShmDetach(display, &shminfo);
-                 shmdt(shminfo.shmaddr);
-                 shminfo.shmaddr = (char*)-1;
-            }
-            if (shminfo.shmid != -1 && shminfo.shmid != 0) {
-                 shmctl(shminfo.shmid, IPC_RMID, 0);
-                 shminfo.shmid = -1;
-            }
-            XDestroyImage(shm_image);
-            shm_image = nullptr;
-        }
-        XSync(display, False);
-        XCloseDisplay(display);
-        display = nullptr;
-    }
-    std::cout << "Capture loop stopped. X resources released." << std::endl;
-}
-
-/**
- * @brief Encodes a horizontal stripe of an image from shared memory into JPEG format.
- *
- * This function takes a segment of raw image data (assumed to be in BGRX or similar format
- * where BGR components are accessible) from a shared memory buffer, converts it to RGB,
- * and then compresses it into a JPEG image. The resulting JPEG data is prepended with a
- * custom 4-byte header containing the frame ID and stripe's Y-offset.
- *
- * @param thread_id Identifier for the calling thread, primarily for logging purposes.
- * @param stripe_y_start The Y-coordinate of the top edge of the stripe within the full source image.
- * @param stripe_height The height of the stripe in pixels.
- * @param capture_width_actual The width of the stripe in pixels.
- * @param shm_data_base Pointer to the beginning of the *full* source image data in shared memory.
- *                      The function calculates the offset to the stripe using stripe_y_start.
- * @param shm_stride_bytes The number of bytes from the start of one row of the source image
- *                         to the start of the next row (pitch).
- * @param shm_bytes_per_pixel The number of bytes per pixel in the source shared memory image
- *                            (e.g., 4 for BGRX, 3 for BGR).
- * @param jpeg_quality The desired JPEG quality, ranging from 0 (lowest) to 100 (highest).
- * @param frame_counter An identifier for the current frame, included in the output header.
- * @return A StripeEncodeResult struct.
- *         - If successful, `type` is `StripeDataType::JPEG`, `data` points to the
- *           encoded JPEG (including a 4-byte custom header: frame_id (uint16_t MSB)
- *           and stripe_y_start (uint16_t MSB)), and `size` is the total size of `data`.
- *         - On failure (e.g., invalid input, memory allocation error), `type` is
- *           `StripeDataType::UNKNOWN`, and `data` is `nullptr`.
- *         The caller is responsible for freeing `result.data` using
- *         `free_stripe_encode_result_data` or `delete[]`.
- */
-StripeEncodeResult encode_stripe_jpeg(
-  int thread_id,
-  int stripe_y_start,
-  int stripe_height,
-  int capture_width_actual,
-  const unsigned char* shm_data_base,
-  int shm_stride_bytes,
-  int shm_bytes_per_pixel,
-  int jpeg_quality,
-  int frame_counter) {
-  StripeEncodeResult result;
-  result.type = StripeDataType::JPEG;
-  result.stripe_y_start = stripe_y_start;
-  result.stripe_height = stripe_height;
-  result.frame_id = frame_counter;
-
-  if (!shm_data_base || stripe_height <= 0 || capture_width_actual <= 0 ||
-      shm_bytes_per_pixel <= 0) {
-    std::cerr << "JPEG T" << thread_id
-              << ": Invalid input for JPEG encoding from SHM." << std::endl;
-    result.type = StripeDataType::UNKNOWN;
-    return result;
-  }
-
-  jpeg_compress_struct cinfo;
-  jpeg_error_mgr jerr;
-  cinfo.err = jpeg_std_error(&jerr);
-  jpeg_create_compress(&cinfo);
-
-  cinfo.image_width = capture_width_actual;
-  cinfo.image_height = stripe_height;
-
-  if (shm_bytes_per_pixel == 4) {
-    cinfo.input_components = 4;
-    cinfo.in_color_space = JCS_EXT_BGRX;
-  } else {
-    cinfo.input_components = 3;
-    cinfo.in_color_space = JCS_EXT_BGR;
-  }
-
-  jpeg_set_defaults(&cinfo);
-  jpeg_set_quality(&cinfo, jpeg_quality, TRUE);
-
-  unsigned char* jpeg_buffer = nullptr;
-  unsigned long jpeg_size_temp = 0;
-  jpeg_mem_dest(&cinfo, &jpeg_buffer, &jpeg_size_temp);
-
-  jpeg_start_compress(&cinfo, TRUE);
-
-  JSAMPROW row_pointer[1];
-  for (int y_in_stripe = 0; y_in_stripe < stripe_height; ++y_in_stripe) {
-    const unsigned char* shm_current_row_in_full_frame_ptr =
-        shm_data_base + static_cast<size_t>(stripe_y_start + y_in_stripe) * shm_stride_bytes;
-    row_pointer[0] = (JSAMPROW)shm_current_row_in_full_frame_ptr;
-    jpeg_write_scanlines(&cinfo, row_pointer, 1);
-  }
-
-  jpeg_finish_compress(&cinfo);
-
-  if (jpeg_size_temp > 0 && jpeg_buffer) {
-    int padding_size = 4;
-    result.data = new (std::nothrow) unsigned char[jpeg_size_temp + padding_size];
-    if (!result.data) {
-      std::cerr << "JPEG T" << thread_id
-                << ": Failed to allocate memory for JPEG output." << std::endl;
-      jpeg_destroy_compress(&cinfo);
-      if (jpeg_buffer)
-        free(jpeg_buffer);
-      result.type = StripeDataType::UNKNOWN;
-      return result;
-    }
-
-    uint16_t frame_counter_net = htons(static_cast<uint16_t>(frame_counter % 65536));
-    uint16_t stripe_y_start_net = htons(static_cast<uint16_t>(stripe_y_start));
-
-    std::memcpy(result.data, &frame_counter_net, 2);
-    std::memcpy(result.data + 2, &stripe_y_start_net, 2);
-    std::memcpy(result.data + padding_size, jpeg_buffer, jpeg_size_temp);
-    result.size = static_cast<int>(jpeg_size_temp) + padding_size;
-  } else {
-    result.size = 0;
-    result.data = nullptr;
-  }
-
-  jpeg_destroy_compress(&cinfo);
-  if (jpeg_buffer) {
-    free(jpeg_buffer);
-  }
-  return result;
-}
-
-/**
- * @brief Encodes a horizontal YUV stripe into an H.264 bitstream using x264.
- *
- * Manages a thread-specific x264 encoder instance from the provided encoder store.
- * The encoder is re-initialized if input parameters
- * such as resolution or colorspace change. The CRF or bitrate can be reconfigured
- * between frames without a full re-initialization.
- *
- * The output NAL units are packaged into a StripeEncodeResult with a custom
- * 10-byte header.
- *
- * @param h264_minimal_store A reference to the encoder store for this instance.
- * @param thread_id         Identifier for the calling thread, used to select a
- *                          dedicated encoder instance.
- * @param stripe_y_start    The Y-coordinate of the stripe's top edge.
- * @param stripe_height     Height of the stripe in pixels. Must be an even value.
- * @param capture_width_actual Width of the stripe in pixels. Must be an even value.
- * @param y_plane_stripe_start Pointer to the start of the Y plane data for this stripe.
- * @param y_stride          Stride in bytes for the Y plane.
- * @param u_plane_stripe_start Pointer to the start of the U plane data for this stripe.
- * @param u_stride          Stride in bytes for the U plane.
- * @param v_plane_stripe_start Pointer to the start of the V plane data for this stripe.
- * @param v_stride          Stride in bytes for the V plane.
- * @param is_i444_input     `true` for I444 colorspace, `false` for I420.
- * @param frame_counter     The frame number, used to set the picture's PTS.
- * @param current_crf_setting The target Constant Rate Factor (CRF) for CRF mode.
- * @param colorspace_setting Integer representing the colorspace (444 or 420).
- * @param use_full_range    If `true`, signals full-range color in the VUI.
- * @param h264_streaming_mode If `true`, enables streaming mode optimizations.
- * @param force_idr         If `true`, forces the encoder to generate an IDR frame.
- * @param is_cbr            If `true`, enables Constant Bitrate (CBR) mode.
- * @param h264_bitrate_kbps Target bitrate in kbps for CBR mode.
- * @param vbv_buffer_size_kb VBV buffer size in kb for CBR mode (0 for auto/default).
- * @return                  A `StripeEncodeResult` containing the encoded bitstream.
- *                          The `data` buffer is dynamically allocated and must be
- *                          freed by the caller.
- */
-StripeEncodeResult encode_stripe_h264(
-  MinimalEncoderStore& h264_minimal_store,
-  int thread_id,
-  int stripe_y_start,
-  int stripe_height,
-  int capture_width_actual,
-  const uint8_t* y_plane_stripe_start, int y_stride,
-  const uint8_t* u_plane_stripe_start, int u_stride,
-  const uint8_t* v_plane_stripe_start, int v_stride,
-  bool is_i444_input,
-  int frame_counter,
-  int current_crf_setting,
-  int colorspace_setting,
-  bool use_full_range,
-  bool h264_streaming_mode,
-  bool force_idr,
-  bool is_cbr,
-  int h264_bitrate_kbps,
-  int vbv_buffer_size_kb) {
-
-  StripeEncodeResult result;
-  result.type = StripeDataType::H264;
-  result.stripe_y_start = stripe_y_start;
-  result.stripe_height = stripe_height;
-  result.frame_id = frame_counter;
-  result.data = nullptr;
-  result.size = 0;
-
-  if (!y_plane_stripe_start || !u_plane_stripe_start || !v_plane_stripe_start) {
-    std::cerr << "H264 T" << thread_id << ": Error - null YUV plane data for stripe Y"
-              << stripe_y_start << std::endl;
-    result.type = StripeDataType::UNKNOWN;
-    return result;
-  }
-  if (stripe_height <= 0 || capture_width_actual <= 0) {
-    std::cerr << "H264 T" << thread_id << ": Invalid dimensions ("
-              << capture_width_actual << "x" << stripe_height
-              << ") for stripe Y" << stripe_y_start << std::endl;
-    result.type = StripeDataType::UNKNOWN;
-    return result;
-  }
-  if (capture_width_actual % 2 != 0 || stripe_height % 2 != 0) {
-    std::cerr << "H264 T" << thread_id << ": Warning - Odd dimensions ("
-              << capture_width_actual << "x" << stripe_height
-              << ") for stripe Y" << stripe_y_start
-              << ". Encoder might behave unexpectedly or fail." << std::endl;
-  }
-
-  x264_t* current_encoder = nullptr;
-  int target_x264_csp;
-  switch (colorspace_setting) {
-    case 444:
-      target_x264_csp = X264_CSP_I444;
-      break;
-    case 420:
-    default:
-      target_x264_csp = X264_CSP_I420;
-      break;
-  }
-
-  {
-    std::lock_guard<std::mutex> lock(h264_minimal_store.store_mutex);
-    h264_minimal_store.ensure_size(thread_id);
-
-    bool is_first_init = !h264_minimal_store.initialized_flags[thread_id];
-    bool dims_changed = !is_first_init &&
-                        (h264_minimal_store.initialized_widths[thread_id] !=
-                            capture_width_actual ||
-                         h264_minimal_store.initialized_heights[thread_id] !=
-                            stripe_height);
-    bool cs_or_fr_changed = !is_first_init &&
-                            (h264_minimal_store.initialized_csps[thread_id] !=
-                                target_x264_csp ||
-                             h264_minimal_store.initialized_colorspaces[thread_id] !=
-                                colorspace_setting ||
-                             h264_minimal_store.initialized_full_range_flags[thread_id] !=
-                                use_full_range);
-    bool rc_changed = !is_first_init && h264_minimal_store.initialized_cbr_flags[thread_id] != is_cbr;
-
-
-    bool needs_crf_reinit = false;
-    if (!is_first_init &&
-        h264_minimal_store.initialized_crfs[thread_id] != current_crf_setting) {
-        needs_crf_reinit = true;
-    }
-    
-    bool needs_cbr_reinit = false;
-    if (!is_first_init && is_cbr && h264_minimal_store.initialized_bitrates[thread_id] != h264_bitrate_kbps) {
-        needs_cbr_reinit = true;
-    }
-
-    bool perform_full_reinit = is_first_init || dims_changed || cs_or_fr_changed || rc_changed;
-
-    if (perform_full_reinit) {
-      if (h264_minimal_store.encoders[thread_id]) {
-        x264_encoder_close(h264_minimal_store.encoders[thread_id]);
-        h264_minimal_store.encoders[thread_id] = nullptr;
-      }
-      h264_minimal_store.initialized_flags[thread_id] = false;
-
-      x264_param_t param;
-      if (x264_param_default_preset(&param, "ultrafast", "zerolatency") < 0) {
-        std::cerr << "H264 T" << thread_id
-                  << ": x264_param_default_preset FAILED." << std::endl;
-        result.type = StripeDataType::UNKNOWN;
-      } else {
-        param.i_width = capture_width_actual;
-        param.i_height = stripe_height;
-        param.i_csp = target_x264_csp;
-        param.i_fps_num = 60;
-        param.i_fps_den = 1;
-        param.i_keyint_max = X264_KEYINT_MAX_INFINITE;
-        param.b_repeat_headers = 1;
-        param.b_annexb = 1;
-        param.i_sync_lookahead = 0;
-        param.i_bframe = 0;
-        param.i_threads = h264_streaming_mode ? 0 : 1;
-        param.i_log_level = X264_LOG_ERROR;
-        param.vui.b_fullrange = 0;
-        param.vui.i_sar_width = 1;
-        param.vui.i_sar_height = 1;
-        if (is_cbr) {
-            param.rc.i_rc_method = X264_RC_ABR;
-            int abs_bitrate = static_cast<int>(std::abs(h264_bitrate_kbps));
-            param.rc.i_bitrate = abs_bitrate;
-            param.rc.i_vbv_max_bitrate = abs_bitrate;
-            if (vbv_buffer_size_kb > 0) {
-              param.rc.i_vbv_buffer_size = vbv_buffer_size_kb;
-            } else {
-              param.rc.i_vbv_buffer_size = (abs_bitrate + 9) / 10;
-            }
-            param.rc.b_filler = 0;
-        } else {
-            param.rc.i_rc_method = X264_RC_CRF;
-            param.rc.f_rf_constant = static_cast<float>(std::max(0, std::min(51, current_crf_setting)));
-        }
-        if (param.i_csp == X264_CSP_I444) {
-             param.vui.i_colorprim = 1;
-             param.vui.i_transfer = 1;
-             param.vui.i_colmatrix = 6;
-             x264_param_apply_profile(&param, "high444");
-        } else {
-           param.vui.i_colorprim = 1;
-           param.vui.i_transfer  = 1;
-           param.vui.i_colmatrix = 6;
-           x264_param_apply_profile(&param, "baseline");
-        }
-        param.b_aud = 0;
-
-        h264_minimal_store.encoders[thread_id] = x264_encoder_open(&param);
-        if (!h264_minimal_store.encoders[thread_id]) {
-          std::cerr << "H264 T" << thread_id << ": x264_encoder_open FAILED." << std::endl;
-          result.type = StripeDataType::UNKNOWN;
-        } else {
-          h264_minimal_store.initialized_flags[thread_id] = true;
-          h264_minimal_store.initialized_widths[thread_id] = param.i_width;
-          h264_minimal_store.initialized_heights[thread_id] = param.i_height;
-          h264_minimal_store.initialized_crfs[thread_id] = current_crf_setting;
-          h264_minimal_store.initialized_csps[thread_id] = param.i_csp;
-          h264_minimal_store.initialized_colorspaces[thread_id] = colorspace_setting;
-          h264_minimal_store.initialized_full_range_flags[thread_id] = use_full_range;
-          h264_minimal_store.force_idr_flags[thread_id] = true;
-          h264_minimal_store.initialized_cbr_flags[thread_id] = is_cbr;
-          h264_minimal_store.initialized_bitrates[thread_id] = static_cast<int>(std::abs(h264_bitrate_kbps));
-        }
-      }
-    } else if (needs_crf_reinit) {
-      x264_t* encoder_to_reconfig = h264_minimal_store.encoders[thread_id];
-      if (encoder_to_reconfig) {
-        x264_param_t params_for_reconfig;
-        x264_encoder_parameters(encoder_to_reconfig, &params_for_reconfig);
-        params_for_reconfig.rc.f_rf_constant =
-          static_cast<float>(std::max(0, std::min(51, current_crf_setting)));
-        if (x264_encoder_reconfig(encoder_to_reconfig, &params_for_reconfig) == 0) {
-          h264_minimal_store.initialized_crfs[thread_id] = current_crf_setting;
-        } else {
-          std::cerr << "H264 T" << thread_id
-                    << ": x264_encoder_reconfig for CRF FAILED. Old CRF "
-                    << h264_minimal_store.initialized_crfs[thread_id]
-                    << " may persist." << std::endl;
-        }
-      }
-    } else if (needs_cbr_reinit) {
-      x264_t* encoder_to_reconfig = h264_minimal_store.encoders[thread_id];
-      if (encoder_to_reconfig) {
-        x264_param_t params_for_reconfig;
-        x264_encoder_parameters(encoder_to_reconfig, &params_for_reconfig);
-        int abs_bitrate = static_cast<int>(std::abs(h264_bitrate_kbps));
-        params_for_reconfig.rc.i_bitrate = abs_bitrate;
-        params_for_reconfig.rc.i_vbv_max_bitrate = abs_bitrate;
-        if (vbv_buffer_size_kb > 0) {
-          params_for_reconfig.rc.i_vbv_buffer_size = vbv_buffer_size_kb;
-        } else {
-          params_for_reconfig.rc.i_vbv_buffer_size = static_cast<int>(abs_bitrate * 0.1);
-        }
-        if (x264_encoder_reconfig(encoder_to_reconfig, &params_for_reconfig) == 0) {
-          h264_minimal_store.initialized_bitrates[thread_id] = abs_bitrate;
-        } else {
-          std::cerr << "H264 T" << thread_id
-                    << ": x264_encoder_reconfig for CBR FAILED. Old CBR "
-                    << h264_minimal_store.initialized_bitrates[thread_id]
-                    << " may persist." << std::endl;
-        }
-      }
-    }
-
-    if (h264_minimal_store.initialized_flags[thread_id]) {
-      current_encoder = h264_minimal_store.encoders[thread_id];
-    }
-  }
-
-  if (result.type == StripeDataType::UNKNOWN) return result;
-  if (!current_encoder) {
-    std::cerr << "H264 T" << thread_id << ": Encoder not ready post-init for Y"
-              << stripe_y_start << "." << std::endl;
-    result.type = StripeDataType::UNKNOWN; return result;
-  }
-
-  x264_picture_t pic_in;
-  x264_picture_init(&pic_in);
-  pic_in.i_pts = static_cast<int64_t>(frame_counter);
-  pic_in.img.i_csp = target_x264_csp;
-
-  pic_in.img.plane[0] = (uint8_t*)y_plane_stripe_start;
-  pic_in.img.plane[1] = (uint8_t*)u_plane_stripe_start;
-  pic_in.img.plane[2] = (uint8_t*)v_plane_stripe_start;
-  pic_in.img.i_stride[0] = y_stride;
-  pic_in.img.i_stride[1] = u_stride;
-  pic_in.img.i_stride[2] = v_stride;
-
-  bool force_idr_now = force_idr ? true :  false;
-  {
-    std::lock_guard<std::mutex> lock(h264_minimal_store.store_mutex);
-    h264_minimal_store.ensure_size(thread_id);
-    if (h264_minimal_store.initialized_flags[thread_id] &&
-        thread_id < static_cast<int>(h264_minimal_store.force_idr_flags.size()) &&
-        h264_minimal_store.force_idr_flags[thread_id]) {
-      force_idr_now = true;
-    }
-  }
-  pic_in.i_type = force_idr_now ? X264_TYPE_IDR : X264_TYPE_AUTO;
-
-  x264_nal_t* nals = nullptr;
-  int i_nals = 0;
-  x264_picture_t pic_out;
-  x264_picture_init(&pic_out);
-
-  int frame_size = x264_encoder_encode(current_encoder, &nals, &i_nals,
-                                       &pic_in, &pic_out);
-
-  if (frame_size < 0) {
-    std::cerr << "H264 T" << thread_id << ": x264_encoder_encode FAILED: " << frame_size
-              << " (Y" << stripe_y_start << ")" << std::endl;
-    result.type = StripeDataType::UNKNOWN; return result;
-  }
-
-  if (frame_size > 0) {
-    if (force_idr_now && pic_out.b_keyframe &&
-        (pic_out.i_type == X264_TYPE_IDR || pic_out.i_type == X264_TYPE_I)) {
-      std::lock_guard<std::mutex> lock(h264_minimal_store.store_mutex);
-      if (thread_id < static_cast<int>(h264_minimal_store.force_idr_flags.size())) {
-        h264_minimal_store.force_idr_flags[thread_id] = false;
-      }
-    }
-
-    const unsigned char DATA_TYPE_H264_STRIPED_TAG = 0x04;
-    unsigned char frame_type_header_byte = 0x00;
-    if (pic_out.i_type == X264_TYPE_IDR) frame_type_header_byte = 0x01;
-    else if (pic_out.i_type == X264_TYPE_I) frame_type_header_byte = 0x02;
-
-    int header_sz = 10;
-    int total_sz = frame_size + header_sz;
-    result.data = new (std::nothrow) unsigned char[total_sz];
-    if (!result.data) {
-      std::cerr << "H264 T" << thread_id << ": new result.data FAILED (Y"
-                << stripe_y_start << ")" << std::endl;
-      result.type = StripeDataType::UNKNOWN; return result;
-    }
-
-    result.data[0] = DATA_TYPE_H264_STRIPED_TAG;
-    result.data[1] = frame_type_header_byte;
-    uint16_t net_val;
-    net_val = htons(static_cast<uint16_t>(result.frame_id % 65536));
-    std::memcpy(result.data + 2, &net_val, 2);
-    net_val = htons(static_cast<uint16_t>(result.stripe_y_start));
-    std::memcpy(result.data + 4, &net_val, 2);
-    net_val = htons(static_cast<uint16_t>(capture_width_actual));
-    std::memcpy(result.data + 6, &net_val, 2);
-    net_val = htons(static_cast<uint16_t>(result.stripe_height));
-    std::memcpy(result.data + 8, &net_val, 2);
-
-    unsigned char* payload_ptr = result.data + header_sz;
-    size_t bytes_copied = 0;
-    for (int k = 0; k < i_nals; ++k) {
-      if (bytes_copied + nals[k].i_payload > static_cast<size_t>(frame_size)) {
-        std::cerr << "H264 T" << thread_id
-                  << ": NAL copy overflow detected (Y" << stripe_y_start << ")" << std::endl;
-        delete[] result.data; result.data = nullptr; result.size = 0;
-        result.type = StripeDataType::UNKNOWN; return result;
-      }
-      std::memcpy(payload_ptr + bytes_copied, nals[k].p_payload, nals[k].i_payload);
-      bytes_copied += nals[k].i_payload;
-    }
-    result.size = total_sz;
-  } else {
-    result.data = nullptr;
-    result.size = 0;
-  }
-  return result;
-}
-
-/**
- * @brief Calculates a 64-bit XXH3 hash for a stripe of YUV data.
- *
- * This function processes the Y, U, and V planes of a given YUV image stripe
- * to compute a single hash value. This is typically used for damage detection
- * by comparing hashes of the same stripe across consecutive frames.
- *
- * @param y_plane_stripe_start Pointer to the beginning of the Y (luma) plane data
- *                             for the stripe.
- * @param y_stride The stride (bytes per row) of the Y plane.
- * @param u_plane_stripe_start Pointer to the beginning of the U (chroma) plane data
- *                             for the stripe.
- * @param u_stride The stride (bytes per row) of the U plane.
- * @param v_plane_stripe_start Pointer to the beginning of the V (chroma) plane data
- *                             for the stripe.
- * @param v_stride The stride (bytes per row) of the V plane.
- * @param width The width of the luma plane of the stripe in pixels.
- * @param height The height of the luma plane of the stripe in pixels.
- * @param is_i420 True if the YUV format is I420 (chroma planes are half width and
- *                half height of the luma plane). False if I444 (chroma planes have
- *                the same dimensions as the luma plane).
- * @param use_fullframe_hashing True to use full-frame hashing (samples every 12th row),
- *                              false to hash every row.
- * @return A 64-bit hash value representing the content of the YUV stripe.
- *         Returns 0 if input parameters are invalid (e.g., null pointers,
- *         non-positive dimensions).
- */
-uint64_t calculate_yuv_stripe_hash(const uint8_t* y_plane_stripe_start, int y_stride,
-                                   const uint8_t* u_plane_stripe_start, int u_stride,
-                                   const uint8_t* v_plane_stripe_start, int v_stride,
-                                   int width, int height, bool is_i420, bool use_fullframe_hashing) {
-    if (!y_plane_stripe_start || !u_plane_stripe_start || width <= 0 || height <= 0) {
-        return 0;
-    }
-
-    const int row_step = use_fullframe_hashing ? 12 : 1;
-    XXH3_state_t hash_state;
-    XXH3_64bits_reset(&hash_state);
-
-    for (int r = 0; r < height; r += row_step) {
-        XXH3_64bits_update(&hash_state, y_plane_stripe_start +
-                           static_cast<size_t>(r) * y_stride, width);
-    }
-
-    if (v_plane_stripe_start) {
-        int chroma_width = is_i420 ? (width / 2) : width;
-        int chroma_height = is_i420 ? (height / 2) : height;
-
-        if (chroma_width > 0 && chroma_height > 0) {
-            for (int r = 0; r < chroma_height; r += row_step) {
-                XXH3_64bits_update(&hash_state, u_plane_stripe_start +
-                                   static_cast<size_t>(r) * u_stride, chroma_width);
-            }
-            for (int r = 0; r < chroma_height; r += row_step) {
-                XXH3_64bits_update(&hash_state, v_plane_stripe_start +
-                                   static_cast<size_t>(r) * v_stride, chroma_width);
-            }
-        }
-    } else {
-        int uv_plane_height = height / 2;
-        int uv_plane_width_bytes = width;
-
-        if (uv_plane_height > 0) {
-             for (int r = 0; r < uv_plane_height; r += row_step) {
-                XXH3_64bits_update(&hash_state, u_plane_stripe_start +
-                                   static_cast<size_t>(r) * u_stride, uv_plane_width_bytes);
-            }
-        }
-    }
-    
-    return XXH3_64bits_digest(&hash_state);
-}
-
-/**
- * @brief Calculates a 64-bit XXH3 hash for a stripe of BGR(X) image data from shared memory.
- *
- * This function reads pixel data row by row from the provided shared memory buffer,
- * extracts the B, G, and R components (assuming BGR ordering, e.g., BGRX or BGR),
- * and computes a hash of this BGR data. This is useful for damage detection on
- * image data that is natively in a BGR-like format.
- *
- * @param shm_stripe_physical_start Pointer to the beginning of the stripe's pixel data
- *                                  within the shared memory buffer.
- * @param shm_stride_bytes The stride (bytes per row) of the image in shared memory.
- * @param stripe_width The width of the stripe in pixels.
- * @param stripe_height The height of the stripe in pixels.
- * @param shm_bytes_per_pixel The number of bytes per pixel in the shared memory image
- *                            (e.g., 4 for BGRX, 3 for BGR). It's assumed that the
- *                            blue component is at offset 0, green at 1, and red at 2
- *                            within each pixel.
- * @return A 64-bit hash value representing the BGR content of the stripe.
- *         Returns 0 if input parameters are invalid (e.g., null pointer,
- *         non-positive dimensions, insufficient bytes per pixel).
- */
-uint64_t calculate_bgr_stripe_hash_from_shm(const unsigned char* shm_start_ptr,
-                                            int stride_bytes,
-                                            int width, int height,
-                                            int bytes_per_pixel) {
-    XXH64_state_t* const state = XXH64_createState();
-    if (state==NULL) abort();
-    XXH64_reset(state, 0);
-
-    for (int y = 0; y < height; ++y) {
-        const unsigned char* row_ptr = shm_start_ptr + static_cast<size_t>(y) * stride_bytes;
-        XXH64_update(state, row_ptr, static_cast<size_t>(width) * bytes_per_pixel);
-    }
-
-    uint64_t const hash = XXH64_digest(state);
-    XXH64_freeState(state);
-    return hash;
-}
-
-extern "C" {
-
-  typedef void* ScreenCaptureModuleHandle;
-
-  /**
-   * @brief Creates a new instance of the ScreenCaptureModule.
-   * @return A handle to the created ScreenCaptureModule instance.
-   */
-  ScreenCaptureModuleHandle create_screen_capture_module() {
-    return static_cast<ScreenCaptureModuleHandle>(new ScreenCaptureModule());
-  }
-
-  /**
-   * @brief Destroys a ScreenCaptureModule instance.
-   * @param module_handle Handle to the ScreenCaptureModule instance to destroy.
-   */
-  void destroy_screen_capture_module(ScreenCaptureModuleHandle module_handle) {
-    if (module_handle) {
-      delete static_cast<ScreenCaptureModule*>(module_handle);
-    }
-  }
-
-  /**
-   * @brief Starts the screen capture process with the given settings and callback.
-   * @param module_handle Handle to the ScreenCaptureModule instance.
-   * @param settings The initial capture and encoding settings.
-   * @param callback A function pointer to be called when an encoded stripe is ready.
-   * @param user_data User-defined data to be passed to the callback function.
-   */
-  void start_screen_capture(ScreenCaptureModuleHandle module_handle,
-                            CaptureSettings settings,
-                            StripeCallback callback,
-                            void* user_data) {
-    if (module_handle) {
-      ScreenCaptureModule* module = static_cast<ScreenCaptureModule*>(module_handle);
-      module->modify_settings(settings);
-
-      {
-        std::lock_guard<std::mutex> lock(module->settings_mutex);
-        module->stripe_callback = callback;
-        module->user_data = user_data;
-      }
-
-      module->start_capture();
-    }
-  }
-
-  /**
-   * @brief Requests an IDR frame from an encoder
-   * @param moduel_handle Handle to the ScreenCaptureModule instance.
-  */
-  void request_idr(ScreenCaptureModuleHandle module_handle) {
-		if (module_handle) {
-			ScreenCaptureModule* module = static_cast<ScreenCaptureModule*>(module_handle);
-			if (module) {
-				module->request_idr();
-			}
-		}
-  }
-
-  /**
-   * @brief Update video bitrate of an encoder
-   * @param moduel_handle Handle to the ScreenCaptureModule instance.
-   * @param bitrate video bitrate to set 
-   */
-  void update_video_bitrate(ScreenCaptureModuleHandle module_handle, int bitrate) {
-    if (module_handle) {
-      ScreenCaptureModule* module = static_cast<ScreenCaptureModule*>(module_handle);
-      if (module) module->update_video_bitrate(bitrate);
-    }
-  }
-
-  /**
-   * @brief Updates the framerate of the screen capture process.
-   * @param module_handle Handle to the ScreenCaptureModule instance.
-   * @param fps The new framerate to set.
-   */
-  void update_framerate(ScreenCaptureModuleHandle module_handle, double fps) {
-    if (module_handle) {
-      ScreenCaptureModule* module = static_cast<ScreenCaptureModule*>(module_handle);
-      if (module) module->update_framerate(fps);
-    }
-  }
-
-    /**
-   * @brief Updates the VBV buffer size for H.264 CBR mode.
-   * @param module_handle Handle to the ScreenCaptureModule instance.
-   * @param vbv_buffer_size_kbps The new VBV buffer size in kb
-   */
-  void update_vbv_buffer_size(ScreenCaptureModuleHandle module_handle, int vbv_buffer_size_kb) {
-    if (module_handle) {
-      ScreenCaptureModule* module = static_cast<ScreenCaptureModule*>(module_handle);
-      if (module) module->update_vbv_buffer_size(vbv_buffer_size_kb);
-    }
-  }
-
-  /**
-   * @brief Stops the screen capture process.
-   * @param module_handle Handle to the ScreenCaptureModule instance.
-   */
-  void stop_screen_capture(ScreenCaptureModuleHandle module_handle) {
-    if (module_handle) {
-      static_cast<ScreenCaptureModule*>(module_handle)->stop_capture();
-    }
-  }
-
-  /**
-   * @brief Frees the data buffer within a StripeEncodeResult.
-   * This is called from Python via ctypes to prevent memory leaks.
-   * @param result Pointer to the StripeEncodeResult whose data needs freeing.
-   */
-  void free_stripe_encode_result_data(StripeEncodeResult* result) {
-    if (result && result->data) {
-      delete[] result->data;
-      result->data = nullptr;
-    }
-  }
-
-}
diff --git a/pixelflux_wayland/src/encoders/mod.rs b/pixelflux/src/encoders/mod.rs
similarity index 81%
rename from pixelflux_wayland/src/encoders/mod.rs
rename to pixelflux/src/encoders/mod.rs
index 234db46..9be74c7 100644
--- a/pixelflux_wayland/src/encoders/mod.rs
+++ b/pixelflux/src/encoders/mod.rs
@@ -1,4 +1,5 @@
 pub mod software;
 pub mod nvenc;
 pub mod vaapi;
+pub mod oh264;
 pub mod overlay;
diff --git a/pixelflux_wayland/src/encoders/nvenc.rs b/pixelflux/src/encoders/nvenc.rs
similarity index 57%
rename from pixelflux_wayland/src/encoders/nvenc.rs
rename to pixelflux/src/encoders/nvenc.rs
index 3f64b50..6b2cb67 100644
--- a/pixelflux_wayland/src/encoders/nvenc.rs
+++ b/pixelflux/src/encoders/nvenc.rs
@@ -12,8 +12,8 @@ use smithay::backend::allocator::{dmabuf::Dmabuf, Buffer};
 
 use crate::recording_sink::RecordingSink;
 use crate::RustCaptureSettings;
-use nvenc_sys::cuda::*;
-use nvenc_sys::*;
+use nvcodec_sys::cuda::*;
+use nvcodec_sys::*;
 
 /// @brief EGL constants and type definitions for C interop.
 type EGLDisplay = *const c_void;
@@ -109,6 +109,8 @@ struct CudaFunctions {
         ByteCount: usize,
     ) -> CUresult,
     cuMemcpy2D_v2: unsafe extern "C" fn(pCopy: *const CUDA_MEMCPY2D) -> CUresult,
+    cuMemHostRegister_v2: unsafe extern "C" fn(p: *mut c_void, bytesize: usize, flags: u32) -> CUresult,
+    cuMemHostUnregister: unsafe extern "C" fn(p: *mut c_void) -> CUresult,
     cuGraphicsEGLRegisterImage: unsafe extern "C" fn(
         pCudaResource: *mut CUgraphicsResource,
         image: EGLImageKHR,
@@ -133,6 +135,160 @@ struct NvencLibrary {
     create_instance: unsafe extern "C" fn(
         functionList: *mut NV_ENCODE_API_FUNCTION_LIST,
     ) -> NVENCSTATUS,
+    // Optional: lets us cap probing at the driver's max API version (absent on very old drivers).
+    get_max_version: Option<unsafe extern "C" fn(*mut u32) -> NVENCSTATUS>,
+}
+
+/// Negotiated NVENC API (major, minor), set once per process. None until negotiation runs.
+static NVENC_NEG_VER: std::sync::OnceLock<(u32, u32)> = std::sync::OnceLock::new();
+
+/// The NVENC structs this encoder version-tags. Struct revisions (bits 16-23) and the 1<<31
+/// flag changed across SDKs, so a down-negotiated session must send exactly the words the
+/// negotiated SDK defined -- older drivers reject anything else with NV_ENC_ERR_INVALID_VERSION.
+#[derive(Clone, Copy, Debug)]
+enum NvStruct {
+    FunctionList,
+    OpenSessionExParams,
+    Config,
+    RcParams,
+    PresetConfig,
+    InitializeParams,
+    ReconfigureParams,
+    RegisterResource,
+    MapInputResource,
+    CreateBitstreamBuffer,
+    PicParams,
+    LockBitstream,
+}
+
+impl NvStruct {
+    /// (struct revision, 1<<31 flag) as defined by the SDK with packed version `api`
+    /// ((major<<4)|minor). Sourced from nvEncodeAPI.h at FFmpeg nv-codec-headers tags
+    /// n10.0.26.2, n11.0.10.3, n11.1.5.3, n12.0.16.1, n12.1.14.0, n12.2.72.0, n13.0.19.0;
+    /// 10.0 is the negotiation floor, so the oldest arm also covers anything below it.
+    fn rev(self, api: u32) -> (u32, bool) {
+        match self {
+            NvStruct::FunctionList => (2, false),
+            NvStruct::OpenSessionExParams => (1, false),
+            NvStruct::Config => match api {
+                0xC2.. => (9, true),
+                0xC0..=0xC1 => (8, true),
+                _ => (7, true),
+            },
+            NvStruct::RcParams => (1, false),
+            NvStruct::PresetConfig => (if api >= 0xC2 { 5 } else { 4 }, true),
+            NvStruct::InitializeParams => match api {
+                0xC2.. => (7, true),
+                0xC1 => (6, true),
+                _ => (5, true),
+            },
+            NvStruct::ReconfigureParams => (if api >= 0xC2 { 2 } else { 1 }, true),
+            NvStruct::RegisterResource => match api {
+                0xC2.. => (5, false),
+                0xC0..=0xC1 => (4, false),
+                _ => (3, false),
+            },
+            NvStruct::MapInputResource => (4, false),
+            NvStruct::CreateBitstreamBuffer => (1, false),
+            NvStruct::PicParams => match api {
+                0xC2.. => (7, true),
+                0xC0..=0xC1 => (6, true),
+                _ => (4, true),
+            },
+            NvStruct::LockBitstream => match api {
+                0xC2.. => (2, true),
+                0xC1 => (1, true),
+                0xC0 => (2, false),
+                _ => (1, false),
+            },
+        }
+    }
+}
+
+/// The NVENCAPI_STRUCT_VERSION word for `s` at API (major, minor): that SDK's struct revision
+/// and flag bit, API major in bits 0-7, minor in bits 24-27, magic 0x7 in bits 28-30. For the
+/// pinned nvcodec-sys version this reproduces the compile-time NV_ENC_*_VER constants exactly,
+/// so a current driver is byte-for-byte unchanged.
+fn nvenc_struct_ver(s: NvStruct, maj: u32, min: u32) -> u32 {
+    let (rev, high_bit) = s.rev((maj << 4) | (min & 0xF));
+    (maj & 0xFF) | ((min & 0xF) << 24) | (rev << 16) | (0x7 << 28) | ((high_bit as u32) << 31)
+}
+
+#[inline]
+fn nvenc_cur_ver() -> (u32, u32) {
+    NVENC_NEG_VER
+        .get()
+        .copied()
+        .unwrap_or((NVENCAPI_VERSION & 0xFF, (NVENCAPI_VERSION >> 24) & 0xFF))
+}
+
+/// Struct-version word for `s` tagged with the negotiated API version.
+#[inline]
+fn sv(s: NvStruct) -> u32 {
+    let (m, n) = nvenc_cur_ver();
+    nvenc_struct_ver(s, m, n)
+}
+
+/// The raw apiVersion (major | minor<<24) for NvEncOpenEncodeSessionEx.
+#[inline]
+fn neg_api() -> u32 {
+    let (m, n) = nvenc_cur_ver();
+    m | (n << 24)
+}
+
+/// Probe NVENC API versions newest-first against the driver and remember the highest accepted:
+/// the bundled headers are NVENC 13.0 (`pinned`), so a current driver negotiates 13.0 natively
+/// while older drivers down-negotiate through 12.x/11.x to 10.0 (~R445). The struct-version words
+/// are derived per negotiated version from the revision table, so the 13.0-layout structs are
+/// stamped with the exact word each older SDK defined. Set-once per process.
+fn nvenc_negotiate(lib: &NvencLibrary) {
+    NVENC_NEG_VER.get_or_init(|| {
+        let pinned = (NVENCAPI_VERSION & 0xFF, (NVENCAPI_VERSION >> 24) & 0xFF);
+        let mut drv_max: u32 = 0; // (major<<4)|minor; 0 = unknown -> rely on createInstance probing
+        if let Some(get_max) = lib.get_max_version {
+            let mut m: u32 = 0;
+            if unsafe { get_max(&mut m) } == NVENCSTATUS::NV_ENC_SUCCESS {
+                drv_max = m;
+            }
+        }
+        // Optional cap for testing/pinning a lower version, e.g. PIXELFLUX_NVENC_MAX_API="11.0".
+        if let Ok(cap) = std::env::var("PIXELFLUX_NVENC_MAX_API") {
+            let mut it = cap.split('.');
+            if let (Some(a), Some(b)) = (it.next(), it.next()) {
+                if let (Ok(cm), Ok(cn)) = (a.parse::<u32>(), b.parse::<u32>()) {
+                    let capv = (cm << 4) | (cn & 0xF);
+                    if capv != 0 && (drv_max == 0 || capv < drv_max) {
+                        drv_max = capv;
+                    }
+                }
+            }
+        }
+        let candidates = [pinned, (12, 1), (12, 0), (11, 1), (11, 0), (10, 0)];
+        for (maj, min) in candidates {
+            let vv = (maj << 4) | min;
+            if drv_max != 0 && vv > drv_max {
+                continue; // driver can't support this version; skip
+            }
+            let mut probe = NV_ENCODE_API_FUNCTION_LIST {
+                version: nvenc_struct_ver(NvStruct::FunctionList, maj, min),
+                ..Default::default()
+            };
+            let st = unsafe { (lib.create_instance)(&mut probe) };
+            // Require every entry point the encode path unwrap()s, not just the session opener:
+            // a driver may accept the function-list word yet leave newer entries null.
+            if st == NVENCSTATUS::NV_ENC_SUCCESS
+                && probe.nvEncOpenEncodeSessionEx.is_some()
+                && probe.nvEncInitializeEncoder.is_some()
+                && probe.nvEncGetEncodePresetConfigEx.is_some()
+                && probe.nvEncEncodePicture.is_some()
+                && probe.nvEncLockBitstream.is_some()
+            {
+                eprintln!("[pixelflux] NVENC API version negotiated: {}.{}", maj, min);
+                return (maj, min);
+            }
+        }
+        pinned
+    });
 }
 
 /// @brief Cache entry for repeated DMABuf imports.
@@ -180,11 +336,14 @@ pub struct NvencEncoder {
     bitstream_buffers: Vec<NV_ENC_OUTPUT_PTR>,
     current_buffer_idx: usize,
     dmabuf_cache: HashMap<i32, CachedDmaBuf>,
+    // Page-locked host upload sources: base ptr -> registered len (0 = registration failed).
+    pinned_hosts: HashMap<usize, usize>,
     cuda: Arc<CudaFunctions>,
     egl: Arc<EglFunctions>,
     _nvenc_lib: Arc<NvencLibrary>,
     nvenc_funcs: NV_ENCODE_API_FUNCTION_LIST,
     recording_sink: Option<Arc<RecordingSink>>,
+    omit_stripe_headers: bool,
 }
 
 unsafe impl Send for NvencEncoder {}
@@ -242,6 +401,12 @@ impl Drop for NvencEncoder {
                 (self.egl.eglDestroyImageKHR)(self.egl_display, cache.egl_image);
             }
 
+            for (base, len) in &self.pinned_hosts {
+                if *len > 0 {
+                    (self.cuda.cuMemHostUnregister)(*base as *mut c_void);
+                }
+            }
+
             if !self.encoder_session.is_null() {
                 (self.nvenc_funcs.nvEncDestroyEncoder.unwrap())(self.encoder_session);
             }
@@ -252,6 +417,34 @@ impl Drop for NvencEncoder {
     }
 }
 
+/// Minimum H.264 level (nvcodec_sys numeric values: 52/60/61/62) whose MaxFS
+/// (frame size in MBs) and MaxMBPS (MB rate) fit this resolution+fps per H.264
+/// Annex-A Table A-1, floored at 5.2. The level table starts at 5.2 (no 5.1
+/// entry) so every resolution up to 4K resolves deterministically to High@5.2.
+/// A flat hardcoded 5.2 fails NVENC init above 4K (5.2 MaxFS=36864 MBs ~= 4096x2304),
+/// so only >4K steps UP to 6.0/6.1/6.2. We pick the LOWEST fitting level at or above
+/// the 5.2 floor so the SPS advertises the smallest level a decoder must support,
+/// while the level is fixed from frame 1 (profile stays High) -- no mid-stream bump.
+fn min_h264_level(width: u32, height: u32, fps: u32) -> u32 {
+    // Frame size in macroblocks and the per-second MB rate.
+    let mbs = ((width as u64 + 15) / 16) * ((height as u64 + 15) / 16);
+    let mbps = mbs * fps.max(1) as u64;
+    // (numeric level, MaxFS, MaxMBPS) ascending; floored at 5.2 (no 5.1 entry) so typical
+    // <=4K streams resolve to a single deterministic High@5.2.
+    const LEVELS: [(u32, u64, u64); 4] = [
+        (52, 36864, 2073600),   // 5.2 (floor)
+        (60, 139264, 4177920),  // 6.0
+        (61, 139264, 8355840),  // 6.1
+        (62, 139264, 16711680), // 6.2
+    ];
+    for &(level, max_fs, max_mbps) in &LEVELS {
+        if mbs <= max_fs && mbps <= max_mbps {
+            return level;
+        }
+    }
+    62 // Above 6.2's limits NVENC has no higher level; best effort.
+}
+
 impl NvencEncoder {
     /// @brief Loads the EGL library and required extensions.
     /// @return Result containing the loaded EGL function table.
@@ -328,6 +521,8 @@ impl NvencEncoder {
                 cuMemcpyHtoD_v2: load!(lib, b"cuMemcpyHtoD_v2\0"),
                 cuMemcpyDtoH_v2: load!(lib, b"cuMemcpyDtoH_v2\0"),
                 cuMemcpy2D_v2: load!(lib, b"cuMemcpy2D_v2\0"),
+                cuMemHostRegister_v2: load!(lib, b"cuMemHostRegister_v2\0"),
+                cuMemHostUnregister: load!(lib, b"cuMemHostUnregister\0"),
                 cuGraphicsEGLRegisterImage: load!(lib, b"cuGraphicsEGLRegisterImage\0"),
                 cuGraphicsUnregisterResource: load!(lib, b"cuGraphicsUnregisterResource\0"),
                 cuGraphicsResourceGetMappedEglFrame: load!(
@@ -347,18 +542,22 @@ impl NvencEncoder {
     /// @return Result containing the loaded NVENC library wrapper.
     fn load_nvenc() -> Result<NvencLibrary, String> {
         unsafe {
-            let lib_name = if cfg!(windows) {
-                "nvEncodeAPI64.dll"
-            } else {
-                "libnvidia-encode.so.1"
-            };
+            let lib_name = NVENC_DLL_NAME;
             let lib = Library::new(lib_name)
                 .map_err(|e| format!("Could not load NVENC library ({}): {}", lib_name, e))?;
 
+            let create_instance = *lib
+                .get(NV_ENCODE_API_CREATE_INSTANCE_FN_NAME)
+                .map_err(|e| e.to_string())?;
+            let get_max_version = lib
+                .get::<NvEncodeApiGetMaxSupportedVersionFn>(
+                    NV_ENCODE_API_GET_MAX_SUPPORTED_VERSION_FN_NAME,
+                )
+                .map(|s| *s)
+                .ok();
             Ok(NvencLibrary {
-                create_instance: *lib
-                    .get(b"NvEncodeAPICreateInstance\0")
-                    .map_err(|e| e.to_string())?,
+                create_instance,
+                get_max_version,
                 _lib: lib,
             })
         }
@@ -370,7 +569,7 @@ impl NvencEncoder {
         if (cuda.cuGetErrorName)(err, &mut p_str) == CUresult::CUDA_SUCCESS && !p_str.is_null() {
             CStr::from_ptr(p_str).to_string_lossy().into_owned()
         } else {
-            format!("Unknown CUDA Error ({})", err as u32)
+            format!("Unknown CUDA Error ({})", err.0)
         }
     }
 
@@ -418,6 +617,13 @@ impl NvencEncoder {
         let egl = Arc::new(Self::load_egl()?);
         let cuda = Arc::new(Self::load_cuda()?);
         let nvenc_lib = Arc::new(Self::load_nvenc()?);
+        // Negotiate the NVENC API version against the driver (set-once) before tagging structs.
+        nvenc_negotiate(&nvenc_lib);
+
+        // libcuda + libnvidia-encode are now loaded: install the multi-GPU GET_ATTACHED_IDS ioctl
+        // filter before cuInit enumerates devices (a no-op unless a host GPU is hidden from this
+        // container). Must run AFTER the NVIDIA libs are dlopened so their GOTs can be patched.
+        crate::nvgpufilter::install();
 
         static LEAK_ONCE: std::sync::Once = std::sync::Once::new();
         LEAK_ONCE.call_once(|| {
@@ -479,22 +685,29 @@ impl NvencEncoder {
             }
 
             let mut function_list = NV_ENCODE_API_FUNCTION_LIST {
-                version: NV_ENCODE_API_FUNCTION_LIST_VER,
+                version: sv(NvStruct::FunctionList),
                 ..Default::default()
             };
-            (nvenc_lib.create_instance)(&mut function_list);
+            if (nvenc_lib.create_instance)(&mut function_list) != NVENCSTATUS::NV_ENC_SUCCESS {
+                (cuda.cuMemFree_v2)(input_device_ptr);
+                (cuda.cuCtxDestroy_v2)(cu_context);
+                return Err("NvEncodeAPICreateInstance failed".into());
+            }
 
             let mut session_params = NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS {
-                version: NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER,
+                version: sv(NvStruct::OpenSessionExParams),
                 deviceType: NV_ENC_DEVICE_TYPE::NV_ENC_DEVICE_TYPE_CUDA,
                 device: cu_context as *mut c_void,
-                apiVersion: NVENCAPI_VERSION,
+                apiVersion: neg_api(),
                 ..Default::default()
             };
 
             let mut encoder_session: *mut c_void = ptr::null_mut();
             let open_fn = function_list.nvEncOpenEncodeSessionEx.unwrap();
             if open_fn(&mut session_params, &mut encoder_session) != NVENCSTATUS::NV_ENC_SUCCESS {
+                // Free the already-allocated ARGB buffer + CUDA context on init failure.
+                (cuda.cuMemFree_v2)(input_device_ptr);
+                (cuda.cuCtxDestroy_v2)(cu_context);
                 return Err("Failed to open NVENC session".into());
             }
 
@@ -506,38 +719,86 @@ impl NvencEncoder {
             };
 
             let mut config = NV_ENC_CONFIG {
-                version: NV_ENC_CONFIG_VER,
+                version: sv(NvStruct::Config),
                 ..Default::default()
             };
             let mut preset_config = NV_ENC_PRESET_CONFIG {
-                version: NV_ENC_PRESET_CONFIG_VER,
+                version: sv(NvStruct::PresetConfig),
                 presetCfg: config,
                 ..Default::default()
             };
 
             let get_preset_ex = function_list.nvEncGetEncodePresetConfigEx.unwrap();
-            get_preset_ex(
+            let preset_status = get_preset_ex(
                 encoder_session,
                 NV_ENC_CODEC_H264_GUID,
                 NV_ENC_PRESET_P3_GUID,
                 NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_LOW_LATENCY,
                 &mut preset_config,
             );
+            if preset_status != NVENCSTATUS::NV_ENC_SUCCESS {
+                // Proceed with the (zeroed) default config rather than aborting, but surface WHY
+                // the preset lookup failed instead of silently encoding with an empty preset.
+                let detail = function_list.nvEncGetLastErrorString.and_then(|f| {
+                    let p = f(encoder_session);
+                    if p.is_null() {
+                        None
+                    } else {
+                        Some(CStr::from_ptr(p).to_string_lossy().into_owned())
+                    }
+                });
+                eprintln!(
+                    "[NVENC] nvEncGetEncodePresetConfigEx failed ({preset_status:?}): {}",
+                    detail.as_deref().unwrap_or("no error string")
+                );
+            }
 
             config = preset_config.presetCfg;
+            // Version the NV_ENC_CONFIG; the embedded rcParams is left with the version the preset
+            // fill returns (libnvidia-encode's own clients don't stamp it separately).
+            config.version = sv(NvStruct::Config);
             config.profileGUID = profile_guid;
-            config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CONSTQP;
-            config.rcParams.constQP.qpInterP = settings.h264_crf as u32;
-            config.rcParams.constQP.qpInterB = settings.h264_crf as u32;
-            config.rcParams.constQP.qpIntra = settings.h264_crf as u32;
-            config.frameIntervalP = 1; 
+            if settings.h264_cbr_mode {
+                // CBR: average==max bitrate; VBV = explicit kb, or ~100ms of bitrate when unset.
+                let bps = (settings.h264_bitrate_kbps.max(0) as u32).saturating_mul(1000);
+                config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CBR;
+                config.rcParams.averageBitRate = bps;
+                config.rcParams.maxBitRate = bps;
+                config.rcParams.vbvBufferSize = if settings.h264_vbv_buffer_size_kb > 0 {
+                    (settings.h264_vbv_buffer_size_kb as u32).saturating_mul(1000)
+                } else {
+                    (bps + 9) / 10
+                };
+            } else {
+                config.rcParams.rateControlMode = NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CONSTQP;
+                config.rcParams.constQP.qpInterP = settings.h264_crf as u32;
+                config.rcParams.constQP.qpInterB = settings.h264_crf as u32;
+                config.rcParams.constQP.qpIntra = settings.h264_crf as u32;
+            }
+            config.frameIntervalP = 1;
             config.gopLength = 0xFFFFFFFF;
+            // Pin an explicit H.264 level + idrPeriod so the very first access unit already
+            // declares the final High profile at a deterministic level. Leaving level at
+            // AUTOSELECT lets early frames advertise a lower level; when NVENC later bumps it
+            // mid-stream, Windows Chromium's D3D11VideoDecoder (and WebCodecs) must re-init and
+            // drops frames. Compute the minimum Annex-A level for this resolution+fps, floored
+            // at 5.2: <=4K stays High@5.2; >4K needs 6.0/6.1/6.2 (else NVENC init fails).
+            // idrPeriod matches the infinite GOP set above.
+            config.encodeCodecConfig.h264Config.level =
+                min_h264_level(width, height, settings.target_fps as u32);
+            config.encodeCodecConfig.h264Config.idrPeriod = 0xFFFFFFFF;
             config.encodeCodecConfig.h264Config.h264VUIParameters.videoSignalTypePresentFlag = 1;
-            config.encodeCodecConfig.h264Config.h264VUIParameters.videoFormat = 5;
+            // Signal the colorimetry in the VUI: video format unspecified, BT.709 primaries/
+            // transfer/matrix, matching the BT.709 the encoder produces.
+            config.encodeCodecConfig.h264Config.h264VUIParameters.videoFormat =
+                NV_ENC_VUI_VIDEO_FORMAT::NV_ENC_VUI_VIDEO_FORMAT_UNSPECIFIED;
             config.encodeCodecConfig.h264Config.h264VUIParameters.colourDescriptionPresentFlag = 1;
-            config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries = 1;
-            config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics = 1;
-            config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix = 1;
+            config.encodeCodecConfig.h264Config.h264VUIParameters.colourPrimaries =
+                NV_ENC_VUI_COLOR_PRIMARIES::NV_ENC_VUI_COLOR_PRIMARIES_BT709;
+            config.encodeCodecConfig.h264Config.h264VUIParameters.transferCharacteristics =
+                NV_ENC_VUI_TRANSFER_CHARACTERISTIC::NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709;
+            config.encodeCodecConfig.h264Config.h264VUIParameters.colourMatrix =
+                NV_ENC_VUI_MATRIX_COEFFS::NV_ENC_VUI_MATRIX_COEFFS_BT709;
             config.encodeCodecConfig.h264Config.chromaFormatIDC = if is_444 { 3 } else { 1 };
             config.encodeCodecConfig.h264Config.h264VUIParameters.videoFullRangeFlag =
                 if is_444 { 1 } else { 0 };
@@ -545,7 +806,7 @@ impl NvencEncoder {
             config.encodeCodecConfig.h264Config.set_outputAUD(1);
 
             let mut init_params = NV_ENC_INITIALIZE_PARAMS {
-                version: NV_ENC_INITIALIZE_PARAMS_VER,
+                version: sv(NvStruct::InitializeParams),
                 encodeGUID: NV_ENC_CODEC_H264_GUID,
                 presetGUID: NV_ENC_PRESET_P3_GUID,
                 tuningInfo: NV_ENC_TUNING_INFO::NV_ENC_TUNING_INFO_LOW_LATENCY,
@@ -562,11 +823,18 @@ impl NvencEncoder {
 
             let init_fn = function_list.nvEncInitializeEncoder.unwrap();
             if init_fn(encoder_session, &mut init_params) != NVENCSTATUS::NV_ENC_SUCCESS {
+                // Tear down session + ARGB buffer + CUDA context on init failure.
+                (function_list.nvEncDestroyEncoder.unwrap())(encoder_session);
+                (cuda.cuMemFree_v2)(input_device_ptr);
+                (cuda.cuCtxDestroy_v2)(cu_context);
                 return Err("Failed to initialize encoder".into());
             }
 
+            // null the pointer to the soon-to-be-moved local `config`; reconfigure repoints it.
+            init_params.encodeConfig = ptr::null_mut();
+
             let mut reg_res = NV_ENC_REGISTER_RESOURCE {
-                version: NV_ENC_REGISTER_RESOURCE_VER,
+                version: sv(NvStruct::RegisterResource),
                 resourceType: NV_ENC_INPUT_RESOURCE_TYPE::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
                 width: width,
                 height: height,
@@ -579,16 +847,28 @@ impl NvencEncoder {
 
             let register_fn = function_list.nvEncRegisterResource.unwrap();
             if register_fn(encoder_session, &mut reg_res) != NVENCSTATUS::NV_ENC_SUCCESS {
+                // Registration failed (nothing to unregister): tear down session + buffer + context.
+                (function_list.nvEncDestroyEncoder.unwrap())(encoder_session);
+                (cuda.cuMemFree_v2)(input_device_ptr);
+                (cuda.cuCtxDestroy_v2)(cu_context);
                 return Err("Failed to register input buffer".into());
             }
 
             let mut map_params = NV_ENC_MAP_INPUT_RESOURCE {
-                version: NV_ENC_MAP_INPUT_RESOURCE_VER,
+                version: sv(NvStruct::MapInputResource),
                 registeredResource: reg_res.registeredResource,
                 ..Default::default()
             };
             let map_fn = function_list.nvEncMapInputResource.unwrap();
             if map_fn(encoder_session, &mut map_params) != NVENCSTATUS::NV_ENC_SUCCESS {
+                // Map failed: unregister the resource, then tear down session + buffer + context.
+                (function_list.nvEncUnregisterResource.unwrap())(
+                    encoder_session,
+                    reg_res.registeredResource,
+                );
+                (function_list.nvEncDestroyEncoder.unwrap())(encoder_session);
+                (cuda.cuMemFree_v2)(input_device_ptr);
+                (cuda.cuCtxDestroy_v2)(cu_context);
                 return Err("Failed to map input buffer".into());
             }
 
@@ -596,12 +876,28 @@ impl NvencEncoder {
             let create_bs_fn = function_list.nvEncCreateBitstreamBuffer.unwrap();
             for _ in 0..4 {
                 let mut bitstream_params = NV_ENC_CREATE_BITSTREAM_BUFFER {
-                    version: NV_ENC_CREATE_BITSTREAM_BUFFER_VER,
+                    version: sv(NvStruct::CreateBitstreamBuffer),
                     ..Default::default()
                 };
                 if create_bs_fn(encoder_session, &mut bitstream_params)
                     != NVENCSTATUS::NV_ENC_SUCCESS
                 {
+                    // Destroy any bitstream buffers made so far, unmap + unregister the input,
+                    // then tear down session + ARGB buffer + context.
+                    for &bs in &bitstream_buffers {
+                        (function_list.nvEncDestroyBitstreamBuffer.unwrap())(encoder_session, bs);
+                    }
+                    (function_list.nvEncUnmapInputResource.unwrap())(
+                        encoder_session,
+                        map_params.mappedResource,
+                    );
+                    (function_list.nvEncUnregisterResource.unwrap())(
+                        encoder_session,
+                        reg_res.registeredResource,
+                    );
+                    (function_list.nvEncDestroyEncoder.unwrap())(encoder_session);
+                    (cuda.cuMemFree_v2)(input_device_ptr);
+                    (cuda.cuCtxDestroy_v2)(cu_context);
                     return Err("Failed to create bitstream buffer".into());
                 }
                 bitstream_buffers.push(bitstream_params.bitstreamBuffer);
@@ -629,11 +925,13 @@ impl NvencEncoder {
                 bitstream_buffers,
                 current_buffer_idx: 0,
                 dmabuf_cache: HashMap::new(),
+                pinned_hosts: HashMap::new(),
                 cuda,
                 egl,
                 _nvenc_lib: nvenc_lib,
                 nvenc_funcs: function_list,
                 recording_sink,
+                omit_stripe_headers: settings.omit_stripe_headers,
             })
         }
     }
@@ -642,6 +940,12 @@ impl NvencEncoder {
     /// @input target_qp: The new desired QP value.
     /// @return bool: True if reconfiguration occurred, false otherwise.
     unsafe fn reconfigure_if_needed(&mut self, target_qp: u32) -> bool {
+        // CBR is bitrate-controlled, so QP-based paint-over reconfigures don't apply.
+        if self.encode_config.rcParams.rateControlMode
+            == NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CBR
+        {
+            return false;
+        }
         if self.current_qp != target_qp {
             self.encode_config.rcParams.constQP.qpInterP = target_qp;
             self.encode_config.rcParams.constQP.qpInterB = target_qp;
@@ -649,7 +953,7 @@ impl NvencEncoder {
             self.init_params.encodeConfig = &mut self.encode_config;
 
             let mut reconfig_params = NV_ENC_RECONFIGURE_PARAMS {
-                version: NV_ENC_RECONFIGURE_PARAMS_VER,
+                version: sv(NvStruct::ReconfigureParams),
                 reInitEncodeParams: self.init_params,
                 ..Default::default()
             };
@@ -671,6 +975,55 @@ impl NvencEncoder {
         false
     }
 
+    /// Apply a runtime rate-control / framerate change to the live session: the CBR target
+    /// bitrate + VBV (ignored unless CBR is active) and the target fps. Reconfigures only when
+    /// something actually changed, so it is cheap to call every frame.
+    pub fn reconfigure_rate(&mut self, settings: &RustCaptureSettings) {
+        unsafe {
+            let mut changed = false;
+            if self.encode_config.rcParams.rateControlMode
+                == NV_ENC_PARAMS_RC_MODE::NV_ENC_PARAMS_RC_CBR
+            {
+                let bps = (settings.h264_bitrate_kbps.max(0) as u32).saturating_mul(1000);
+                let vbv = if settings.h264_vbv_buffer_size_kb > 0 {
+                    (settings.h264_vbv_buffer_size_kb as u32).saturating_mul(1000)
+                } else {
+                    (bps + 9) / 10
+                };
+                if self.encode_config.rcParams.averageBitRate != bps
+                    || self.encode_config.rcParams.maxBitRate != bps
+                    || self.encode_config.rcParams.vbvBufferSize != vbv
+                {
+                    self.encode_config.rcParams.averageBitRate = bps;
+                    self.encode_config.rcParams.maxBitRate = bps;
+                    self.encode_config.rcParams.vbvBufferSize = vbv;
+                    changed = true;
+                }
+            }
+            let fps = (settings.target_fps.max(1.0)) as u32;
+            if self.init_params.frameRateNum != fps {
+                self.init_params.frameRateNum = fps;
+                self.init_params.frameRateDen = 1;
+                changed = true;
+            }
+            if !changed {
+                return;
+            }
+            self.init_params.encodeConfig = &mut self.encode_config;
+            let mut reconfig_params = NV_ENC_RECONFIGURE_PARAMS {
+                version: sv(NvStruct::ReconfigureParams),
+                reInitEncodeParams: self.init_params,
+                ..Default::default()
+            };
+            let reconfig_fn = self.nvenc_funcs.nvEncReconfigureEncoder.unwrap();
+            if reconfig_fn(self.encoder_session, &mut reconfig_params)
+                != NVENCSTATUS::NV_ENC_SUCCESS
+            {
+                eprintln!("[NVENC] Rate reconfigure failed.");
+            }
+        }
+    }
+
     /// @brief Submits a frame to NVENC, locks the output bitstream, and retrieves the encoded data.
     /// @input mapped_buffer: The CUDA-mapped input resource containing the image.
     /// @input frame_number: Monotonically increasing frame index.
@@ -679,6 +1032,7 @@ impl NvencEncoder {
     unsafe fn submit_frame(
         &mut self,
         mapped_buffer: NV_ENC_INPUT_PTR,
+        buffer_format: NV_ENC_BUFFER_FORMAT,
         frame_number: u64,
         force_idr: bool,
     ) -> Result<Vec<u8>, String> {
@@ -686,12 +1040,12 @@ impl NvencEncoder {
         self.current_buffer_idx = (self.current_buffer_idx + 1) % self.bitstream_buffers.len();
 
         let mut pic_params = NV_ENC_PIC_PARAMS {
-            version: NV_ENC_PIC_PARAMS_VER,
+            version: sv(NvStruct::PicParams),
             inputWidth: self.width,
             inputHeight: self.height,
             inputBuffer: mapped_buffer,
             outputBitstream: output_bitstream,
-            bufferFmt: NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB,
+            bufferFmt: buffer_format,
             pictureStruct: NV_ENC_PIC_STRUCT::NV_ENC_PIC_STRUCT_FRAME,
             encodePicFlags: if force_idr {
                 NV_ENC_PIC_FLAGS::NV_ENC_PIC_FLAG_FORCEIDR as u32
@@ -701,14 +1055,6 @@ impl NvencEncoder {
             ..Default::default()
         };
 
-        if mapped_buffer == self.nv12_mapped_buffer.unwrap_or(ptr::null_mut()) {
-            if self.encode_config.encodeCodecConfig.h264Config.chromaFormatIDC == 3 {
-                pic_params.bufferFmt = NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_YUV444;
-            } else {
-                pic_params.bufferFmt = NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12;
-            }
-        }
-
         let encode_fn = self.nvenc_funcs.nvEncEncodePicture.unwrap();
         let res = encode_fn(self.encoder_session, &mut pic_params);
         if res != NVENCSTATUS::NV_ENC_SUCCESS {
@@ -716,7 +1062,7 @@ impl NvencEncoder {
         }
 
         let mut lock_params = NV_ENC_LOCK_BITSTREAM {
-            version: NV_ENC_LOCK_BITSTREAM_VER,
+            version: sv(NvStruct::LockBitstream),
             outputBitstream: output_bitstream,
             ..Default::default()
         };
@@ -729,14 +1075,24 @@ impl NvencEncoder {
 
         let data_ptr = lock_params.bitstreamBufferPtr as *const u8;
         let data_size = lock_params.bitstreamSizeInBytes as usize;
-        let mut output = Vec::with_capacity(10 + data_size);
-
-        output.push(0x04);
-        output.push(if force_idr { 0x01 } else { 0x00 });
-        output.extend_from_slice(&(frame_number as u16).to_be_bytes());
-        output.extend_from_slice(&0u16.to_be_bytes());
-        output.extend_from_slice(&(self.width as u16).to_be_bytes());
-        output.extend_from_slice(&(self.height as u16).to_be_bytes());
+        let header_sz = if self.omit_stripe_headers { 0 } else { 10 };
+        let mut output = Vec::with_capacity(header_sz + data_size);
+
+        if !self.omit_stripe_headers {
+            // Derive the type byte from the ACTUAL encoded picture type
+            // (IDR=0x01, I=0x02, P=0x00), not from the force_idr request.
+            let type_hdr = match lock_params.pictureType {
+                NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_IDR => 0x01u8,
+                NV_ENC_PIC_TYPE::NV_ENC_PIC_TYPE_I => 0x02u8,
+                _ => 0x00u8,
+            };
+            output.push(0x04);
+            output.push(type_hdr);
+            output.extend_from_slice(&(frame_number as u16).to_be_bytes());
+            output.extend_from_slice(&0u16.to_be_bytes());
+            output.extend_from_slice(&(self.width as u16).to_be_bytes());
+            output.extend_from_slice(&(self.height as u16).to_be_bytes());
+        }
 
         if data_size > 0 && !data_ptr.is_null() {
             let slice = std::slice::from_raw_parts(data_ptr, data_size);
@@ -765,8 +1121,10 @@ impl NvencEncoder {
     ) -> Result<Vec<u8>, String> {
         unsafe {
             self.reconfigure_if_needed(target_qp);
-            let _ = (self.cuda.cuCtxPushCurrent_v2)(self.cuda_context);
+            // Extract fd before pushing the context so the `?` can't return with
+            // the context left pushed (stack imbalance).
             let fd = dmabuf.handles().next().ok_or("No handles")?.as_raw_fd();
+            let _ = (self.cuda.cuCtxPushCurrent_v2)(self.cuda_context);
 
             if !self.dmabuf_cache.contains_key(&fd) {
                 let stride = dmabuf.strides().next().unwrap_or(0) as i32;
@@ -870,7 +1228,86 @@ impl NvencEncoder {
                 return Err("Sanitization copy failed".into());
             }
 
-            let result = self.submit_frame(self.mapped_input_buffer, frame_number, force_idr);
+            let result = self.submit_frame(
+                self.mapped_input_buffer,
+                NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB,
+                frame_number,
+                force_idr,
+            );
+            (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
+            result
+        }
+    }
+
+    /// @brief Encodes a host ARGB frame directly, with no explicit ARGB->NV12 conversion.
+    ///
+    /// Uploads the packed ARGB rows straight into the registered ARGB input surface
+    /// and lets NVENC's hardware CSC produce YUV. Bytes must be in NVENC ARGB order
+    /// (B,G,R,A in memory), i.e. the host BGRA layout an XShm grab produces.
+    /// `src_stride` is the source row stride in bytes (>= width*4).
+    /// @input argb: Host pixel buffer (height rows of width*4 at src_stride).
+    /// @input src_stride: Source row stride in bytes.
+    /// @input frame_number: Frame index.
+    /// @input target_qp: Desired quality parameter.
+    /// @input force_idr: Force keyframe generation.
+    /// @return Result containing encoded byte vector.
+    pub fn encode_cpu_argb(
+        &mut self,
+        argb: &[u8],
+        src_stride: usize,
+        frame_number: u64,
+        target_qp: u32,
+        force_idr: bool,
+    ) -> Result<Vec<u8>, String> {
+        unsafe {
+            self.reconfigure_if_needed(target_qp);
+            let _ = (self.cuda.cuCtxPushCurrent_v2)(self.cuda_context);
+
+            let width_bytes = (self.width * 4) as usize;
+            let rows = self.height as usize;
+            // Source must hold `rows` lines of width*4 bytes at src_stride.
+            let needed = if rows == 0 { 0 } else { src_stride * (rows - 1) + width_bytes };
+            if src_stride < width_bytes || argb.len() < needed {
+                (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
+                return Err(format!(
+                    "ARGB buffer too small: len={} need>={} (stride={}, {}x{})",
+                    argb.len(), needed, src_stride, self.width, self.height
+                ));
+            }
+
+            // Page-lock the persistent, bounded shm source once so cuMemcpy2D becomes a direct
+            // pinned DMA instead of a pageable copy staged through a driver bounce buffer.
+            if std::env::var("PIXELFLUX_NVENC_PIN").as_deref() != Ok("0") {
+                let base = argb.as_ptr() as usize;
+                if let std::collections::hash_map::Entry::Vacant(e) = self.pinned_hosts.entry(base) {
+                    let st = (self.cuda.cuMemHostRegister_v2)(argb.as_ptr() as *mut c_void, argb.len(), 0);
+                    // 0 sentinel: registration failed -> stay pageable, never re-probe.
+                    e.insert(if st == CUresult::CUDA_SUCCESS { argb.len() } else { 0 });
+                }
+            }
+
+            let copy = CUDA_MEMCPY2D {
+                srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
+                srcHost: argb.as_ptr() as *const c_void,
+                srcPitch: src_stride,
+                dstMemoryType: CUmemorytype::CU_MEMORYTYPE_DEVICE,
+                dstDevice: self.input_device_ptr,
+                dstPitch: self.input_pitch,
+                WidthInBytes: width_bytes,
+                Height: rows,
+                ..Default::default()
+            };
+            if (self.cuda.cuMemcpy2D_v2)(&copy) != CUresult::CUDA_SUCCESS {
+                (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
+                return Err("ARGB host->device copy failed".into());
+            }
+
+            let result = self.submit_frame(
+                self.mapped_input_buffer,
+                NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_ARGB,
+                frame_number,
+                force_idr,
+            );
             (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
             result
         }
@@ -924,7 +1361,7 @@ impl NvencEncoder {
                 };
 
                 let mut reg_res = NV_ENC_REGISTER_RESOURCE {
-                    version: NV_ENC_REGISTER_RESOURCE_VER,
+                    version: sv(NvStruct::RegisterResource),
                     resourceType:
                         NV_ENC_INPUT_RESOURCE_TYPE::NV_ENC_INPUT_RESOURCE_TYPE_CUDADEVICEPTR,
                     width: self.width,
@@ -938,17 +1375,23 @@ impl NvencEncoder {
 
                 let register_fn = self.nvenc_funcs.nvEncRegisterResource.unwrap();
                 if register_fn(self.encoder_session, &mut reg_res) != NVENCSTATUS::NV_ENC_SUCCESS {
+                    (self.cuda.cuMemFree_v2)(d_ptr);
                     (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
                     return Err("Failed to register raw input buffer".into());
                 }
 
                 let mut map_params = NV_ENC_MAP_INPUT_RESOURCE {
-                    version: NV_ENC_MAP_INPUT_RESOURCE_VER,
+                    version: sv(NvStruct::MapInputResource),
                     registeredResource: reg_res.registeredResource,
                     ..Default::default()
                 };
                 let map_fn = self.nvenc_funcs.nvEncMapInputResource.unwrap();
                 if map_fn(self.encoder_session, &mut map_params) != NVENCSTATUS::NV_ENC_SUCCESS {
+                    (self.nvenc_funcs.nvEncUnregisterResource.unwrap())(
+                        self.encoder_session,
+                        reg_res.registeredResource,
+                    );
+                    (self.cuda.cuMemFree_v2)(d_ptr);
                     (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
                     return Err("Failed to map raw input buffer".into());
                 }
@@ -966,6 +1409,12 @@ impl NvencEncoder {
 
             if is_444 {
                 let plane_size = width_bytes * height;
+                // The Y copy reads exactly `plane_size` bytes from the host slice; refuse rather
+                // than read out of bounds if the caller handed us a short buffer.
+                if raw_data.len() < plane_size {
+                    (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
+                    return Err("raw frame smaller than the Y plane (444)".into());
+                }
 
                 let copy_y = CUDA_MEMCPY2D {
                     srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
@@ -983,7 +1432,9 @@ impl NvencEncoder {
                     return Err("Failed to copy Y plane (444)".into());
                 }
 
-                if plane_size < raw_data.len() {
+                // Each chroma copy reads a FULL plane_size from its offset: require the whole
+                // plane to be present (not just its start offset) so we never read past the end.
+                if raw_data.len() >= 2 * plane_size {
                     let copy_u = CUDA_MEMCPY2D {
                         srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
                         srcHost: raw_data[plane_size..].as_ptr() as *const c_void,
@@ -1001,7 +1452,7 @@ impl NvencEncoder {
                     }
                 }
 
-                if 2 * plane_size < raw_data.len() {
+                if raw_data.len() >= 3 * plane_size {
                     let copy_v = CUDA_MEMCPY2D {
                         srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
                         srcHost: raw_data[2 * plane_size..].as_ptr() as *const c_void,
@@ -1019,6 +1470,12 @@ impl NvencEncoder {
                     }
                 }
             } else {
+                let y_size = width_bytes * height;
+                // The Y copy reads `y_size` bytes; refuse a short buffer rather than read OOB.
+                if raw_data.len() < y_size {
+                    (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
+                    return Err("raw frame smaller than the Y plane".into());
+                }
                 let copy_y = CUDA_MEMCPY2D {
                     srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
                     srcHost: raw_data.as_ptr() as *const c_void,
@@ -1035,8 +1492,10 @@ impl NvencEncoder {
                     return Err("Failed to copy Y plane".into());
                 }
 
-                let uv_offset = width_bytes * height;
-                if uv_offset < raw_data.len() {
+                let uv_offset = y_size;
+                // The interleaved UV copy reads width_bytes * (height/2) bytes from uv_offset:
+                // require that whole span, not just its start, so we never read past the end.
+                if raw_data.len() >= uv_offset + width_bytes * (height / 2) {
                     let copy_uv = CUDA_MEMCPY2D {
                         srcMemoryType: CUmemorytype::CU_MEMORYTYPE_HOST,
                         srcHost: raw_data[uv_offset..].as_ptr() as *const c_void,
@@ -1055,10 +1514,93 @@ impl NvencEncoder {
                 }
             }
 
+            let raw_format = if self.encode_config.encodeCodecConfig.h264Config.chromaFormatIDC == 3 {
+                NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_YUV444
+            } else {
+                NV_ENC_BUFFER_FORMAT::NV_ENC_BUFFER_FORMAT_NV12
+            };
             let result =
-                self.submit_frame(self.nv12_mapped_buffer.unwrap(), frame_number, force_idr);
+                self.submit_frame(self.nv12_mapped_buffer.unwrap(), raw_format, frame_number, force_idr);
             (self.cuda.cuCtxPopCurrent_v2)(ptr::null_mut());
             result
         }
     }
 }
+
+#[cfg(test)]
+mod version_tests {
+    use super::*;
+
+    /// Every version-tagged struct, in one fixed order, with its pinned compile-time constant.
+    const ALL: [(NvStruct, u32); 12] = [
+        (NvStruct::FunctionList, NV_ENCODE_API_FUNCTION_LIST_VER),
+        (NvStruct::OpenSessionExParams, NV_ENC_OPEN_ENCODE_SESSION_EX_PARAMS_VER),
+        (NvStruct::Config, NV_ENC_CONFIG_VER),
+        (NvStruct::RcParams, NV_ENC_RC_PARAMS_VER),
+        (NvStruct::PresetConfig, NV_ENC_PRESET_CONFIG_VER),
+        (NvStruct::InitializeParams, NV_ENC_INITIALIZE_PARAMS_VER),
+        (NvStruct::ReconfigureParams, NV_ENC_RECONFIGURE_PARAMS_VER),
+        (NvStruct::RegisterResource, NV_ENC_REGISTER_RESOURCE_VER),
+        (NvStruct::MapInputResource, NV_ENC_MAP_INPUT_RESOURCE_VER),
+        (NvStruct::CreateBitstreamBuffer, NV_ENC_CREATE_BITSTREAM_BUFFER_VER),
+        (NvStruct::PicParams, NV_ENC_PIC_PARAMS_VER),
+        (NvStruct::LockBitstream, NV_ENC_LOCK_BITSTREAM_VER),
+    ];
+
+    // For the pinned nvcodec-sys version the table must reproduce every compile-time
+    // NV_ENC_*_VER constant exactly, guaranteeing a current driver is byte-for-byte unchanged.
+    // Also fails loudly if the bundled header is bumped without extending the revision table.
+    #[test]
+    fn table_is_identity_for_pinned_version() {
+        let maj = NVENCAPI_VERSION & 0xFF;
+        let min = (NVENCAPI_VERSION >> 24) & 0xFF;
+        for (s, base) in ALL {
+            assert_eq!(nvenc_struct_ver(s, maj, min), base, "{:?}", s);
+        }
+        assert_eq!(maj | (min << 24), NVENCAPI_VERSION);
+    }
+
+    // The exact NV_ENC_*_VER words each SDK defined, hardcoded from nvEncodeAPI.h at the FFmpeg
+    // nv-codec-headers tag named per row; the table must reproduce them for every negotiable
+    // version, in ALL order. (The n10.0.26.2 header spells the flag `1<<31` instead of `1u<<31`;
+    // same bit.)
+    #[test]
+    fn table_matches_historical_headers() {
+        #[rustfmt::skip]
+        let expected: [(u32, u32, [u32; 12]); 7] = [
+            // n10.0.26.2 (SDK 10.0)
+            (10, 0, [0x7002000A, 0x7001000A, 0xF007000A, 0x7001000A, 0xF004000A, 0xF005000A, 0xF001000A,
+                     0x7003000A, 0x7004000A, 0x7001000A, 0xF004000A, 0x7001000A]),
+            // n11.0.10.3 (SDK 11.0)
+            (11, 0, [0x7002000B, 0x7001000B, 0xF007000B, 0x7001000B, 0xF004000B, 0xF005000B, 0xF001000B,
+                     0x7003000B, 0x7004000B, 0x7001000B, 0xF004000B, 0x7001000B]),
+            // n11.1.5.3 (SDK 11.1)
+            (11, 1, [0x7102000B, 0x7101000B, 0xF107000B, 0x7101000B, 0xF104000B, 0xF105000B, 0xF101000B,
+                     0x7103000B, 0x7104000B, 0x7101000B, 0xF104000B, 0x7101000B]),
+            // n12.0.16.1 (SDK 12.0)
+            (12, 0, [0x7002000C, 0x7001000C, 0xF008000C, 0x7001000C, 0xF004000C, 0xF005000C, 0xF001000C,
+                     0x7004000C, 0x7004000C, 0x7001000C, 0xF006000C, 0x7002000C]),
+            // n12.1.14.0 (SDK 12.1)
+            (12, 1, [0x7102000C, 0x7101000C, 0xF108000C, 0x7101000C, 0xF104000C, 0xF106000C, 0xF101000C,
+                     0x7104000C, 0x7104000C, 0x7101000C, 0xF106000C, 0xF101000C]),
+            // n12.2.72.0 (SDK 12.2)
+            (12, 2, [0x7202000C, 0x7201000C, 0xF209000C, 0x7201000C, 0xF205000C, 0xF207000C, 0xF202000C,
+                     0x7205000C, 0x7204000C, 0x7201000C, 0xF207000C, 0xF202000C]),
+            // n13.0.19.0 (SDK 13.0)
+            (13, 0, [0x7002000D, 0x7001000D, 0xF009000D, 0x7001000D, 0xF005000D, 0xF007000D, 0xF002000D,
+                     0x7005000D, 0x7004000D, 0x7001000D, 0xF007000D, 0xF002000D]),
+        ];
+        for (maj, min, words) in expected {
+            for ((s, _), want) in ALL.iter().zip(words) {
+                assert_eq!(
+                    nvenc_struct_ver(*s, maj, min),
+                    want,
+                    "{:?} at {}.{}",
+                    s,
+                    maj,
+                    min
+                );
+            }
+        }
+    }
+}
diff --git a/pixelflux/src/encoders/oh264.rs b/pixelflux/src/encoders/oh264.rs
new file mode 100644
index 0000000..28da2b2
--- /dev/null
+++ b/pixelflux/src/encoders/oh264.rs
@@ -0,0 +1,450 @@
+//! Software H.264 via Cisco OpenH264 (BSD-licensed). A full-frame, 4:2:0-only,
+//! Annex-B alternative to the x264 stripe path, selected with `use_openh264`.
+//!
+//! Keyframes are driven externally (an effectively infinite intra period plus
+//! on-demand `force_intra_frame`), matching the NVENC/x264 strict-GOP streaming
+//! behavior. Host ARGB is converted to I420 with the same BT.709 path the x264
+//! encoder uses, then fed to OpenH264 as borrowed planes.
+
+use crate::recording_sink::RecordingSink;
+use std::sync::Arc;
+use crate::RustCaptureSettings;
+use openh264::encoder::{
+    BitRate, Complexity, Encoder, EncoderConfig, FrameRate, FrameType, IntraFramePeriod, QpRange,
+    RateControlMode, UsageType,
+};
+use openh264::formats::YUVSlices;
+use openh264::OpenH264API;
+use yuv::{BufferStoreMut, YuvConversionMode, YuvPlanarImageMut, YuvRange, YuvStandardMatrix};
+
+/// Large enough that OpenH264 never inserts its own periodic IDR within a session;
+/// recovery keyframes are driven by the pipeline's force-IDR decision instead.
+const INFINITE_INTRA_PERIOD: u32 = 300_000;
+
+/// Oversized bitrate ceiling (well under level 5.2's cap): CRF's rate budget, and the max
+/// bitrate live CBR changes lift the session to -- init pins the max to the starting target,
+/// which would otherwise reject any raise above it.
+const BITRATE_CEILING_BPS: u32 = 100_000_000;
+
+pub struct Openh264Encoder {
+    encoder: Encoder,
+    width: usize,
+    height: usize,
+    y_buf: Vec<u8>,
+    u_buf: Vec<u8>,
+    v_buf: Vec<u8>,
+    current_bitrate_bps: i32,
+    // true = CBR (RC_BITRATE_MODE, target bitrate); false = CRF/CQP (RC_BITRATE_MODE, pinned QP).
+    is_cbr: bool,
+    omit_stripe_headers: bool,
+    recording_sink: Option<Arc<RecordingSink>>,
+}
+
+impl Openh264Encoder {
+    /// Build an OpenH264 encoder from the capture settings. Dimensions are rounded
+    /// down to even values (4:2:0 requires it). Returns None on init failure so the
+    /// caller can fall back to the x264 software path. Like the NVENC/VAAPI encoders,
+    /// it writes raw Annex-B to `recording_sink` itself and self-prepends the wire header.
+    pub fn new(settings: &RustCaptureSettings, recording_sink: Option<Arc<RecordingSink>>) -> Option<Self> {
+        let width = (settings.width.max(2) as usize) & !1;
+        let height = (settings.height.max(2) as usize) & !1;
+        let bps = (settings.h264_bitrate_kbps.max(1) as u32).saturating_mul(1000);
+        let fps = if settings.target_fps < 1.0 { 30.0 } else { settings.target_fps as f32 };
+        let threads = std::thread::available_parallelism()
+            .map(|n| n.get().saturating_sub(1).clamp(1, 4))
+            .unwrap_or(1) as u16;
+        let is_cbr = settings.h264_cbr_mode;
+        let crf = settings.h264_crf.clamp(0, 51) as u8;
+
+        let base = EncoderConfig::new()
+            .max_frame_rate(FrameRate::from_hz(fps))
+            .usage_type(UsageType::ScreenContentRealTime)
+            .complexity(Complexity::Low)
+            // No encoder-level frame drops; selkies handles congestion.
+            .skip_frames(false)
+            // Auto-disabled for screen content; set explicitly to avoid the init warnings.
+            .adaptive_quantization(false)
+            .background_detection(false)
+            // Silence the library's internal stderr trace (WELS_LOG_QUIET).
+            .debug(false)
+            .intra_frame_period(IntraFramePeriod::from_num_frames(INFINITE_INTRA_PERIOD))
+            .num_threads(threads);
+        let config = if is_cbr {
+            // CBR: the target bitrate acts as a QP target the slider scales (x264-ABR-like).
+            base.bitrate(BitRate::from_bps(bps)).rate_control_mode(RateControlMode::Bitrate)
+        } else {
+            // CRF (CQP): pin the QP to a single value (min==max==h264_crf) under bitrate-mode RC
+            // with a bitrate budget so large the QP clamp always dominates, so every frame uses the
+            // constant QP = h264_crf, matching NVENC CONSTQP / VAAPI CQP. (Empirically RC_OFF ignores
+            // the QP range and RC_QUALITY rejects a min==max range; bitrate-mode + pinned QP is what
+            // actually holds the QP constant.)
+            base.bitrate(BitRate::from_bps(BITRATE_CEILING_BPS))
+                .rate_control_mode(RateControlMode::Bitrate)
+                .qp(QpRange::new(crf, crf))
+        };
+
+        let encoder = Encoder::with_api_config(OpenH264API::from_source(), config).ok()?;
+        let (cw, ch) = (width / 2, height / 2);
+        Some(Self {
+            encoder,
+            width,
+            height,
+            y_buf: vec![0u8; width * height],
+            u_buf: vec![0u8; cw * ch],
+            v_buf: vec![0u8; cw * ch],
+            current_bitrate_bps: bps as i32,
+            is_cbr,
+            omit_stripe_headers: settings.omit_stripe_headers,
+            recording_sink,
+        })
+    }
+
+    /// Apply a live bitrate (kbps) / framerate change. OpenH264 honors these via SetOption
+    /// without re-initializing, so the reference chain (and infinite GOP) is preserved -- this
+    /// is what lets the web UI's bitrate slider take effect mid-stream, like NVENC and x264.
+    pub fn reconfigure_rate(&mut self, bitrate_kbps: i32, fps: f64) {
+        // Bitrate applies only in CBR mode; in CRF/CQP the QP is fixed at construction (a CRF
+        // change restarts the capture, rebuilding the encoder — there is no live-QP setter here).
+        if self.is_cbr {
+            let bps = bitrate_kbps.max(1).saturating_mul(1000);
+            if bps != self.current_bitrate_bps {
+                self.set_live_bitrate(bps);
+            }
+        }
+        if fps >= 1.0 {
+            let mut rate = fps as f32;
+            unsafe {
+                self.encoder.raw_api().set_option(
+                    openh264_sys2::ENCODER_OPTION_FRAME_RATE,
+                    std::ptr::addr_of_mut!(rate).cast(),
+                );
+            }
+        }
+    }
+
+    /// Move the CBR target to `bps`. Init pins OpenH264's max bitrate to the construction-time
+    /// target (EncoderConfig has no separate max), and SetOption rejects -- after partially
+    /// applying -- any target above the max, so lift the ceiling first (overall and layer 0;
+    /// verification checks the per-layer value). Failures keep or restore the previous rate.
+    fn set_live_bitrate(&mut self, bps: i32) {
+        for layer in [openh264_sys2::SPATIAL_LAYER_ALL, openh264_sys2::SPATIAL_LAYER_0] {
+            let ret = self.set_bitrate_option(
+                openh264_sys2::ENCODER_OPTION_MAX_BITRATE,
+                layer,
+                BITRATE_CEILING_BPS as i32,
+            );
+            if ret != 0 {
+                eprintln!(
+                    "[openh264] live bitrate change to {bps} bps failed raising the max-bitrate ceiling \
+                     (code {ret}); keeping {} bps",
+                    self.current_bitrate_bps
+                );
+                return;
+            }
+        }
+        let ret =
+            self.set_bitrate_option(openh264_sys2::ENCODER_OPTION_BITRATE, openh264_sys2::SPATIAL_LAYER_ALL, bps);
+        if ret == 0 {
+            self.current_bitrate_bps = bps;
+        } else {
+            // A failed SetOption already mutated the target; re-apply the accepted rate so the
+            // encoder stays in sync with current_bitrate_bps.
+            self.set_bitrate_option(
+                openh264_sys2::ENCODER_OPTION_BITRATE,
+                openh264_sys2::SPATIAL_LAYER_ALL,
+                self.current_bitrate_bps,
+            );
+            eprintln!(
+                "[openh264] live bitrate change to {bps} bps failed (code {ret}); keeping {} bps",
+                self.current_bitrate_bps
+            );
+        }
+    }
+
+    /// One SBitrateInfo-shaped SetOption call (BITRATE / MAX_BITRATE); returns 0 on success.
+    fn set_bitrate_option(
+        &mut self,
+        option: openh264_sys2::ENCODER_OPTION,
+        layer: openh264_sys2::LAYER_NUM,
+        bps: i32,
+    ) -> i32 {
+        let mut info: openh264_sys2::SBitrateInfo = unsafe { std::mem::zeroed() };
+        info.iLayer = layer;
+        info.iBitrate = bps;
+        unsafe { self.encoder.raw_api().set_option(option, std::ptr::addr_of_mut!(info).cast()) }
+    }
+
+    /// Encode one host frame to H.264. `stride` is bytes per row. `rgba_input`
+    /// selects the byte order: false = B,G,R,A (X11 XShm), true = R,G,B,A (Wayland GL
+    /// readback). When `force_idr` is set, the frame is emitted as an IDR.
+    /// Output is the 10-byte wire header + Annex-B (bare Annex-B when
+    /// `omit_stripe_headers`); raw Annex-B is also fed to the recording sink.
+    pub fn encode_host_argb(
+        &mut self,
+        argb: &[u8],
+        stride: usize,
+        frame_number: u64,
+        force_idr: bool,
+        rgba_input: bool,
+    ) -> Result<Vec<u8>, String> {
+        if force_idr {
+            self.encoder.force_intra_frame();
+        }
+        let cw = self.width / 2;
+        {
+            let mut planar = YuvPlanarImageMut {
+                y_plane: BufferStoreMut::Borrowed(&mut self.y_buf),
+                y_stride: self.width as u32,
+                u_plane: BufferStoreMut::Borrowed(&mut self.u_buf),
+                u_stride: cw as u32,
+                v_plane: BufferStoreMut::Borrowed(&mut self.v_buf),
+                v_stride: cw as u32,
+                width: self.width as u32,
+                height: self.height as u32,
+            };
+            let convert = if rgba_input { yuv::rgba_to_yuv420 } else { yuv::bgra_to_yuv420 };
+            convert(
+                &mut planar,
+                argb,
+                stride as u32,
+                YuvRange::Limited,
+                YuvStandardMatrix::Bt709,
+                YuvConversionMode::Fast,
+            )
+            .map_err(|e| format!("rgb-to-yuv420 failed: {e:?}"))?;
+        }
+
+        let slices = YUVSlices::new(
+            (&self.y_buf, &self.u_buf, &self.v_buf),
+            (self.width, self.height),
+            (self.width, cw, cw),
+        );
+        match self.encoder.encode(&slices) {
+            Ok(bitstream) => {
+                let header_sz = if self.omit_stripe_headers { 0 } else { 10 };
+                let mut out = Vec::with_capacity(header_sz);
+                if header_sz != 0 {
+                    // Same header as the NVENC/VAAPI/x264 full-frame paths; type byte from the
+                    // ACTUAL encoded picture type (IDR=0x01, I=0x02, P=0x00).
+                    let type_hdr = match bitstream.frame_type() {
+                        FrameType::IDR => 0x01u8,
+                        FrameType::I => 0x02u8,
+                        _ => 0x00u8,
+                    };
+                    out.push(0x04);
+                    out.push(type_hdr);
+                    out.extend_from_slice(&(frame_number as u16).to_be_bytes());
+                    out.extend_from_slice(&0u16.to_be_bytes());
+                    out.extend_from_slice(&(self.width as u16).to_be_bytes());
+                    out.extend_from_slice(&(self.height as u16).to_be_bytes());
+                }
+                bitstream.write_vec(&mut out);
+                if out.len() == header_sz {
+                    // No payload (e.g. a skipped frame): emit nothing rather than a bare header.
+                    return Ok(Vec::new());
+                }
+                if let Some(ref sink) = self.recording_sink {
+                    sink.write_frame(&out[header_sz..]);
+                }
+                Ok(out)
+            }
+            Err(e) => Err(format!("openh264 encode failed: {e:?}")),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // A busy, motion-carrying frame so the rate controller produces non-trivial P-frames.
+    // High-entropy (per-pixel) content: fine under CBR (the RC raises QP to fit) but pathological
+    // for a pinned low QP, so the CRF test uses the compressible gradient_frame instead.
+    fn busy_frame(w: usize, h: usize, t: usize) -> Vec<u8> {
+        let mut f = vec![0u8; w * h * 4];
+        for y in 0..h {
+            for x in 0..w {
+                let i = (y * w + x) * 4;
+                let v = (((x + t) * 7 + (y + t) * 13) & 0xFF) as u8;
+                f[i] = v;
+                f[i + 1] = v.wrapping_mul(3);
+                f[i + 2] = v.wrapping_add((x + t) as u8);
+                f[i + 3] = 255;
+            }
+        }
+        f
+    }
+
+    // A smooth diagonal gradient (moderate, realistic screen-like detail): compressible enough not
+    // to overflow at a low pinned QP, yet detailed enough that the QP visibly scales output size.
+    fn gradient_frame(w: usize, h: usize, t: usize) -> Vec<u8> {
+        let mut f = vec![0u8; w * h * 4];
+        for y in 0..h {
+            for x in 0..w {
+                let i = (y * w + x) * 4;
+                let v = ((x * 3 + y * 5 + t * 4) & 0xFF) as u8;
+                f[i] = v;
+                f[i + 1] = v.wrapping_add(40);
+                f[i + 2] = v.wrapping_add(80);
+                f[i + 3] = 255;
+            }
+        }
+        f
+    }
+
+    #[test]
+    fn encodes_annexb_idr_then_p() {
+        let mut s = RustCaptureSettings::default();
+        s.width = 128;
+        s.height = 96;
+        s.h264_bitrate_kbps = 2000;
+        s.target_fps = 30.0;
+        let mut enc = Openh264Encoder::new(&s, None).expect("openh264 init");
+        let stride = 128 * 4;
+        let idr = enc.encode_host_argb(&busy_frame(128, 96, 0), stride, 0, true, false).expect("encode idr");
+        assert!(idr.len() > 10, "IDR frame should produce output");
+        // Wire header: 0x04, type, frame_id u16, y_start u16, width u16, height u16.
+        assert_eq!(idr[0], 0x04);
+        assert_eq!(idr[1], 0x01, "forced first frame must be typed IDR");
+        assert_eq!(&idr[2..6], &[0, 0, 0, 0], "frame_id 0 and y_start 0");
+        assert_eq!(&idr[6..10], &[0, 128, 0, 96], "width/height big-endian");
+        assert!(
+            idr[10..].starts_with(&[0, 0, 0, 1]) || idr[10..].starts_with(&[0, 0, 1]),
+            "payload must be Annex-B (start code prefixed)"
+        );
+        // Re-encode the same content: no scene change, so this must be a P frame.
+        let p = enc.encode_host_argb(&busy_frame(128, 96, 0), stride, 7, false, false).expect("encode p");
+        assert!(p.len() > 10, "second frame should produce output");
+        assert_eq!(p[1], 0x00, "unforced static second frame must be typed delta");
+        assert_eq!(&p[2..4], &[0, 7], "frame_id must come from frame_number");
+    }
+
+    #[test]
+    fn omit_stripe_headers_yields_bare_annexb() {
+        let mut s = RustCaptureSettings::default();
+        s.width = 128;
+        s.height = 96;
+        s.h264_bitrate_kbps = 2000;
+        s.target_fps = 30.0;
+        s.omit_stripe_headers = true;
+        let mut enc = Openh264Encoder::new(&s, None).expect("openh264 init");
+        let out = enc.encode_host_argb(&busy_frame(128, 96, 0), 128 * 4, 0, true, false).expect("encode");
+        assert!(
+            out.starts_with(&[0, 0, 0, 1]) || out.starts_with(&[0, 0, 1]),
+            "omit_stripe_headers output must be bare Annex-B"
+        );
+    }
+
+    #[test]
+    fn lower_bitrate_yields_smaller_output() {
+        let (w, h, stride) = (256usize, 192usize, 256 * 4);
+        let encode_run = |kbps: i32| -> usize {
+            let mut s = RustCaptureSettings::default();
+            s.width = w as i32;
+            s.height = h as i32;
+            s.h264_cbr_mode = true; // bitrate only controls output in CBR mode
+            s.h264_bitrate_kbps = kbps;
+            s.target_fps = 30.0;
+            let mut e = Openh264Encoder::new(&s, None).unwrap();
+            let _ = e.encode_host_argb(&busy_frame(w, h, 0), stride, 0, true, false).unwrap();
+            (1..24).map(|t| e.encode_host_argb(&busy_frame(w, h, t), stride, t as u64, false, false).unwrap().len()).sum()
+        };
+        let high = encode_run(8000);
+        let low = encode_run(200);
+        assert!(low < high, "lower target bitrate should compress harder (low={low}, high={high})");
+    }
+
+    #[test]
+    fn live_reconfigure_rate_takes_effect() {
+        let (w, h, stride) = (256usize, 192usize, 256 * 4);
+        let mut s = RustCaptureSettings::default();
+        s.width = w as i32;
+        s.height = h as i32;
+        s.h264_cbr_mode = true; // live bitrate only applies in CBR mode
+        s.h264_bitrate_kbps = 8000;
+        s.target_fps = 30.0;
+        let mut e = Openh264Encoder::new(&s, None).unwrap();
+        let _ = e.encode_host_argb(&busy_frame(w, h, 0), stride, 0, true, false).unwrap();
+        let high: usize =
+            (1..24).map(|t| e.encode_host_argb(&busy_frame(w, h, t), stride, t as u64, false, false).unwrap().len()).sum();
+        // Drop the bitrate live (as the web UI slider does) and re-measure.
+        e.reconfigure_rate(200, 30.0);
+        let low: usize =
+            (24..48).map(|t| e.encode_host_argb(&busy_frame(w, h, t), stride, t as u64, false, false).unwrap().len()).sum();
+        assert!(low < high, "live bitrate drop should shrink output (low={low}, high={high})");
+    }
+
+    #[test]
+    fn live_reconfigure_rate_raise_takes_effect() {
+        // Raising above the session's initial bitrate requires lifting OpenH264's max-bitrate
+        // ceiling, which init pins to the starting target.
+        let (w, h, stride) = (256usize, 192usize, 256 * 4);
+        let mut s = RustCaptureSettings::default();
+        s.width = w as i32;
+        s.height = h as i32;
+        s.h264_cbr_mode = true; // live bitrate only applies in CBR mode
+        s.h264_bitrate_kbps = 300;
+        s.target_fps = 30.0;
+        let mut e = Openh264Encoder::new(&s, None).unwrap();
+        let _ = e.encode_host_argb(&busy_frame(w, h, 0), stride, 0, true, false).unwrap();
+        let low: usize =
+            (1..24).map(|t| e.encode_host_argb(&busy_frame(w, h, t), stride, t as u64, false, false).unwrap().len()).sum();
+        // Raise the bitrate live, far above the session's initial value.
+        e.reconfigure_rate(8000, 30.0);
+        assert_eq!(e.current_bitrate_bps, 8_000_000, "live raise must be accepted, not rejected");
+        let high: usize =
+            (24..48).map(|t| e.encode_host_argb(&busy_frame(w, h, t), stride, t as u64, false, false).unwrap().len()).sum();
+        assert!(high > low * 3 / 2, "live bitrate raise should grow output (low={low}, high={high})");
+    }
+
+    #[test]
+    fn crf_mode_qp_scales_output() {
+        // CRF/CQP mode (RC_OFF + QpRange): a higher h264_crf (higher QP, stronger compression)
+        // must produce SMALLER output than a lower CRF, confirming the constant QP is applied.
+        let (w, h, stride) = (256usize, 192usize, 256 * 4);
+        let run = |crf: i32| -> usize {
+            let mut s = RustCaptureSettings::default();
+            s.width = w as i32;
+            s.height = h as i32;
+            s.h264_cbr_mode = false; // CRF/CQP mode
+            s.h264_crf = crf;
+            s.target_fps = 30.0;
+            let mut e = Openh264Encoder::new(&s, None).unwrap();
+            // Include the IDR (frame 0), where the constant QP most affects size.
+            let mut total = e.encode_host_argb(&gradient_frame(w, h, 0), stride, 0, true, false).unwrap().len();
+            total += (1..24)
+                .map(|t| e.encode_host_argb(&gradient_frame(w, h, t), stride, t as u64, false, false).unwrap().len())
+                .sum::<usize>();
+            total
+        };
+        let high_quality = run(18); // low QP -> larger
+        let low_quality = run(40); // high QP -> smaller
+        assert!(
+            low_quality < high_quality,
+            "higher CRF must compress harder (crf40={low_quality}, crf18={high_quality})"
+        );
+    }
+
+    #[test]
+    fn rgba_and_bgra_inputs_both_encode() {
+        // The Wayland GLES readback delivers RGBA; X11 delivers BGRA. Both byte orders must
+        // encode valid Annex-B (color correctness is exercised end-to-end, not here).
+        let (w, h, stride) = (128usize, 96usize, 128 * 4);
+        let mut s = RustCaptureSettings::default();
+        s.width = w as i32;
+        s.height = h as i32;
+        s.h264_bitrate_kbps = 2000;
+        s.target_fps = 30.0;
+        let frame = busy_frame(w, h, 0);
+        for rgba in [false, true] {
+            let mut e = Openh264Encoder::new(&s, None).unwrap();
+            let out = e.encode_host_argb(&frame, stride, 0, true, rgba).expect("encode");
+            assert!(out.len() > 10, "rgba_input={rgba} must produce output");
+            assert_eq!(out[0], 0x04, "rgba_input={rgba} output must carry the wire header");
+            assert!(
+                out[10..].starts_with(&[0, 0, 0, 1]) || out[10..].starts_with(&[0, 0, 1]),
+                "rgba_input={rgba} payload must be Annex-B"
+            );
+        }
+    }
+}
diff --git a/pixelflux_wayland/src/encoders/overlay.rs b/pixelflux/src/encoders/overlay.rs
similarity index 100%
rename from pixelflux_wayland/src/encoders/overlay.rs
rename to pixelflux/src/encoders/overlay.rs
diff --git a/pixelflux_wayland/src/encoders/software.rs b/pixelflux/src/encoders/software.rs
similarity index 51%
rename from pixelflux_wayland/src/encoders/software.rs
rename to pixelflux/src/encoders/software.rs
index 970c43b..51444b2 100644
--- a/pixelflux_wayland/src/encoders/software.rs
+++ b/pixelflux/src/encoders/software.rs
@@ -10,6 +10,19 @@ use yuv::{BufferStoreMut, YuvConversionMode, YuvPlanarImageMut, YuvRange, YuvSta
 /// @brief Maximum number of stripes used for CPU encoding.
 pub const MAX_STRIPE_CAPACITY: usize = 64;
 
+thread_local! {
+    /// One reusable libjpeg-turbo compressor per worker thread (rayon striped path or the encode
+    /// thread in single-stripe mode) -- avoids a tjInitCompress/tjDestroy per stripe per frame.
+    static JPEG_COMPRESSOR: std::cell::RefCell<Option<turbojpeg::Compressor>> =
+        std::cell::RefCell::new(None);
+}
+
+/// Process-global lock serializing libx264 encoder open/close. libx264 mutates global state
+/// during x264_encoder_open/close, so concurrent open/close from parallel stripe encoders (or
+/// multiple capture instances in one process) can corrupt the heap. Held only around open/close,
+/// never during encode.
+static X264_OPEN_CLOSE_LOCK: std::sync::Mutex<()> = std::sync::Mutex::new(());
+
 /// @brief Wrapper around x264-sys for CPU-based H.264 encoding.
 ///
 /// Manages the raw C pointer to the x264 encoder state and handles
@@ -20,6 +33,10 @@ pub struct H264EncoderWrapper {
     pub height: i32,
     current_crf: i32,
     pub is_i444: bool,
+    is_cbr: bool,
+    current_bitrate: i32,
+    current_vbv: i32,
+    current_fps: u32,
     #[allow(dead_code)]
     full_range: bool,
 }
@@ -29,6 +46,7 @@ unsafe impl Send for H264EncoderWrapper {}
 impl Drop for H264EncoderWrapper {
     fn drop(&mut self) {
         if !self.encoder.is_null() {
+            let _guard = X264_OPEN_CLOSE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
             unsafe { x264_sys::x264_encoder_close(self.encoder) };
             self.encoder = ptr::null_mut();
         }
@@ -43,7 +61,8 @@ impl H264EncoderWrapper {
     /// @input is_i444: True for YUV444 (full color), false for YUV420.
     /// @input fps: The target framerate.
     /// @return Option<Self>: The wrapper instance or None if initialization fails.
-    pub fn new(width: i32, height: i32, crf: i32, is_i444: bool, fps: f64) -> Option<Self> {
+    pub fn new(width: i32, height: i32, crf: i32, is_i444: bool, fps: f64, threads: i32,
+               cbr_mode: bool, bitrate_kbps: i32, vbv_kb: i32) -> Option<Self> {
         unsafe {
             let mut param: x264_sys::x264_param_t = std::mem::zeroed();
             let preset = CString::new("ultrafast").unwrap();
@@ -58,8 +77,18 @@ impl H264EncoderWrapper {
             param.i_fps_num = if fps < 1.0 { 30 } else { fps as u32 };
             param.i_fps_den = 1;
             param.i_keyint_max = x264_sys::X264_KEYINT_MAX_INFINITE as i32;
-            param.rc.i_rc_method = x264_sys::X264_RC_CRF as i32;
-            param.rc.f_rf_constant = crf as f32;
+            if cbr_mode {
+                // ABR with a VBV cap (explicit kb, or ~100ms of bitrate when unset); no filler.
+                let bk = bitrate_kbps.saturating_abs();
+                param.rc.i_rc_method = x264_sys::X264_RC_ABR as i32;
+                param.rc.i_bitrate = bk;
+                param.rc.i_vbv_max_bitrate = bk;
+                param.rc.i_vbv_buffer_size = if vbv_kb > 0 { vbv_kb } else { (bk + 9) / 10 };
+                param.rc.b_filler = 0;
+            } else {
+                param.rc.i_rc_method = x264_sys::X264_RC_CRF as i32;
+                param.rc.f_rf_constant = crf as f32;
+            }
             param.i_csp = if is_i444 {
                 x264_sys::X264_CSP_I444
             } else {
@@ -73,12 +102,15 @@ impl H264EncoderWrapper {
             let profile = CString::new(if is_i444 { "high444" } else { "baseline" }).unwrap();
             x264_sys::x264_param_apply_profile(&mut param, profile.as_ptr());
 
-            param.i_threads = 1;
+            param.i_threads = threads;
             param.b_repeat_headers = 1;
             param.b_annexb = 1;
             param.i_log_level = x264_sys::X264_LOG_NONE as i32;
 
-            let encoder = x264_sys::x264_encoder_open(&mut param);
+            let encoder = {
+                let _guard = X264_OPEN_CLOSE_LOCK.lock().unwrap_or_else(|e| e.into_inner());
+                x264_sys::x264_encoder_open(&mut param)
+            };
             if encoder.is_null() {
                 None
             } else {
@@ -88,6 +120,10 @@ impl H264EncoderWrapper {
                     height,
                     current_crf: crf,
                     is_i444,
+                    is_cbr: cbr_mode,
+                    current_bitrate: bitrate_kbps.saturating_abs(),
+                    current_vbv: vbv_kb,
+                    current_fps: if fps < 1.0 { 30 } else { fps as u32 },
                     full_range: param.vui.b_fullrange == 1,
                 })
             }
@@ -97,8 +133,8 @@ impl H264EncoderWrapper {
     /// @brief Updates the Rate Factor (CRF) dynamically without recreating the encoder.
     /// @input new_crf: The new quality value.
     pub fn reconfigure_crf(&mut self, new_crf: i32) {
-        if self.current_crf == new_crf {
-            return;
+        if self.is_cbr || self.current_crf == new_crf {
+            return; // CBR is bitrate-controlled; CRF reconfig doesn't apply.
         }
         unsafe {
             let mut param: x264_sys::x264_param_t = std::mem::zeroed();
@@ -110,6 +146,41 @@ impl H264EncoderWrapper {
         }
     }
 
+    /// @brief Applies a runtime bitrate/VBV (CBR only) and framerate change without recreating
+    /// the encoder. Cheap to call every frame; reconfigures only when a value actually changed.
+    pub fn reconfigure_rate(&mut self, bitrate_kbps: i32, vbv_kb: i32, fps: f64) {
+        let bk = bitrate_kbps.saturating_abs();
+        let new_fps = if fps < 1.0 { 30 } else { fps as u32 };
+        let rate_changed =
+            self.is_cbr && (self.current_bitrate != bk || self.current_vbv != vbv_kb);
+        let fps_changed = self.current_fps != new_fps;
+        if !rate_changed && !fps_changed {
+            return;
+        }
+        unsafe {
+            let mut param: x264_sys::x264_param_t = std::mem::zeroed();
+            x264_sys::x264_encoder_parameters(self.encoder, &mut param);
+            if rate_changed {
+                param.rc.i_bitrate = bk;
+                param.rc.i_vbv_max_bitrate = bk;
+                param.rc.i_vbv_buffer_size = if vbv_kb > 0 { vbv_kb } else { (bk + 9) / 10 };
+            }
+            if fps_changed {
+                param.i_fps_num = new_fps;
+                param.i_fps_den = 1;
+            }
+            if x264_sys::x264_encoder_reconfig(self.encoder, &mut param) == 0 {
+                if rate_changed {
+                    self.current_bitrate = bk;
+                    self.current_vbv = vbv_kb;
+                }
+                if fps_changed {
+                    self.current_fps = new_fps;
+                }
+            }
+        }
+    }
+
     /// @brief Encodes YUV planes into H.264 NAL units and prepends a custom header.
     /// @input y: Luma plane data.
     /// @input u: Chroma U plane data.
@@ -133,6 +204,7 @@ impl H264EncoderWrapper {
         frame_id: i64,
         force_idr: bool,
         fixed_header: &[u8],
+        omit_headers: bool,
         output_buf: &mut Vec<u8>,
         recording_sink: Option<&Arc<RecordingSink>>,
     ) -> bool {
@@ -172,22 +244,24 @@ impl H264EncoderWrapper {
             );
 
             if frame_size > 0 {
-                let header_len = 2 + fixed_header.len();
+                let header_len = if omit_headers { 0 } else { 2 + fixed_header.len() };
                 let total_len = header_len + frame_size as usize;
 
                 output_buf.clear();
                 output_buf.reserve(total_len);
-                output_buf.push(0x04);
 
-                let type_byte = if pic_out.i_type == x264_sys::X264_TYPE_IDR as i32 {
-                    0x01
-                } else if pic_out.i_type == x264_sys::X264_TYPE_I as i32 {
-                    0x02
-                } else {
-                    0x00
-                };
-                output_buf.push(type_byte);
-                output_buf.extend_from_slice(fixed_header);
+                if !omit_headers {
+                    output_buf.push(0x04);
+                    let type_byte = if pic_out.i_type == x264_sys::X264_TYPE_IDR as i32 {
+                        0x01
+                    } else if pic_out.i_type == x264_sys::X264_TYPE_I as i32 {
+                        0x02
+                    } else {
+                        0x00
+                    };
+                    output_buf.push(type_byte);
+                    output_buf.extend_from_slice(fixed_header);
+                }
 
                 let nal_slice = std::slice::from_raw_parts(nals, i_nals as usize);
                 for nal in nal_slice {
@@ -218,6 +292,103 @@ pub struct StripeState {
     pub u_buf: Vec<u8>,
     pub v_buf: Vec<u8>,
     pub packet_buf: Vec<u8>,
+    // Content-hash damage state, used by sources without external damage (X11).
+    pub last_hash: u64,
+    pub consecutive_changes: u32,
+    pub in_damage_block: bool,
+    pub damage_block_frames_remaining: i32,
+    pub hash_at_block_start: u64,
+}
+
+/// Fast 64-bit content hash for change detection. NOT cryptographic; a collision between two
+/// distinct stripes is ~2^-64 -- far rarer than the periodic recovery keyframe that would refresh
+/// a missed update anyway. Used ONLY to compare a stripe against its previous frame in memory
+/// (never persisted or sent on the wire), so the exact value is irrelevant -- only that identical
+/// bytes hash identically.
+///
+/// Eight independent FNV-1a lanes over interleaved 8-byte words break the serial multiply
+/// dependency chain of a single accumulator (the bottleneck at ~6.5 GB/s single-thread), letting
+/// the CPU keep several multiplies in flight; the lanes fold into one 64-bit value at the end.
+fn fast_hash(bytes: &[u8]) -> u64 {
+    const PRIME: u64 = 0x100000001b3;
+    const SEED: u64 = 0xcbf29ce484222325;
+    const LANES: usize = 8;
+    const STRIDE: usize = LANES * 8; // 64 bytes per interleaved block
+
+    let mut h = [SEED; LANES];
+    let mut blocks = bytes.chunks_exact(STRIDE);
+    for b in &mut blocks {
+        for (lane, acc) in h.iter_mut().enumerate() {
+            let off = lane * 8;
+            let w = u64::from_le_bytes(b[off..off + 8].try_into().unwrap());
+            *acc = (*acc ^ w).wrapping_mul(PRIME);
+        }
+    }
+    // Fold the eight lanes into one accumulator.
+    let mut acc = SEED;
+    for lane in h {
+        acc = (acc ^ lane).wrapping_mul(PRIME);
+    }
+    // Tail (< 64 bytes): whole 8-byte words then the trailing bytes.
+    let rem = blocks.remainder();
+    let mut words = rem.chunks_exact(8);
+    for w in &mut words {
+        let w = u64::from_le_bytes(w.try_into().unwrap());
+        acc = (acc ^ w).wrapping_mul(PRIME);
+    }
+    for &byte in words.remainder() {
+        acc = (acc ^ byte as u64).wrapping_mul(PRIME);
+    }
+    acc
+}
+
+impl StripeState {
+    /// Content-hash damage detection for sources without external damage (X11): reports
+    /// whether this stripe changed, maintaining a damage block so a continuously-moving
+    /// region keeps sending for `duration` frames once `threshold` consecutive changes are
+    /// seen, re-hashing only at block end to decide whether to extend the block or exit it.
+    pub fn content_dirty(&mut self, bytes: &[u8], threshold: u32, duration: i32) -> bool {
+        if self.in_damage_block {
+            self.damage_block_frames_remaining -= 1;
+            if self.damage_block_frames_remaining <= 0 {
+                let h = fast_hash(bytes);
+                if h != self.hash_at_block_start {
+                    self.damage_block_frames_remaining = duration;
+                    self.hash_at_block_start = h;
+                } else {
+                    self.in_damage_block = false;
+                    self.consecutive_changes = 0;
+                }
+                self.last_hash = h;
+            }
+            return true;
+        }
+        let h = fast_hash(bytes);
+        let changed = h != self.last_hash;
+        self.last_hash = h;
+        if changed {
+            self.consecutive_changes += 1;
+            if self.consecutive_changes >= threshold {
+                self.in_damage_block = true;
+                self.damage_block_frames_remaining = duration;
+                self.hash_at_block_start = h;
+            }
+        } else {
+            self.consecutive_changes = 0;
+        }
+        changed
+    }
+}
+
+/// Encoded stripe payload plus the metadata the consumer needs as frame attributes
+/// (so it stays available even when the per-stripe header is omitted). data_type is the
+/// wire codec tag: JPEG=1, H.264=2.
+pub struct EncodedStripe {
+    pub data: Vec<u8>,
+    pub data_type: i32,
+    pub stripe_y_start: i32,
+    pub stripe_height: i32,
+    pub frame_id: i32,
 }
 
 /// @brief Main CPU encoding logic handling threading, striping, and format conversion.
@@ -233,6 +404,8 @@ pub struct StripeState {
 /// @input settings: Capture settings (Quality, FPS, etc).
 /// @input frame_counter: Current frame index.
 /// @input use_gpu: Whether the input buffer came from GPU (affects pixel format).
+/// @input force_idr_all: When true, force a send + IDR on every H.264 stripe this frame
+///        (on-demand request or periodic recovery keyframe). JPEG stripes ignore it.
 /// @return Vec<Vec<u8>>: A collection of encoded packets for the changed stripes.
 pub fn encode_cpu(
     stripes: &mut Vec<StripeState>,
@@ -243,8 +416,12 @@ pub fn encode_cpu(
     settings: &RustCaptureSettings,
     frame_counter: u16,
     use_gpu: bool,
+    // When true (X11, no compositor damage), per-stripe content hashing drives damage
+    // instead of `damage_rects`; see StripeState::content_dirty.
+    hash_damage: bool,
     recording_sink: Option<&Arc<RecordingSink>>,
-) -> Vec<Vec<u8>> {
+    force_idr_all: bool,
+) -> Vec<EncodedStripe> {
     let num_cores = std::thread::available_parallelism()
         .map(|n| n.get())
         .unwrap_or(1);
@@ -264,22 +441,8 @@ pub fn encode_cpu(
         stripes.resize_with(n_processing_stripes, StripeState::default);
     }
 
-    let mut stripe_geometries = Vec::with_capacity(n_processing_stripes);
-    let mut current_y = 0;
-    let h_usize = height as usize;
-    let n = n_processing_stripes;
-    let base_h = (h_usize / n) & !1; 
-    
-    let total_used = base_h * n;
-    let remainder = h_usize - total_used;
-    let stripes_with_extra = remainder / 2;
-
-    for i in 0..n {
-        let extra = if i < stripes_with_extra { 2 } else { 0 };
-        let s_h = base_h + extra;
-        stripe_geometries.push((current_y, s_h));
-        current_y += s_h;
-    }
+    let stripe_geometries =
+        compute_stripe_geometries(height as usize, n_processing_stripes, settings.output_mode);
     let mut stripe_is_dirty = vec![false; n_processing_stripes];
     if !damage_rects.is_empty() {
         for rect in damage_rects {
@@ -308,16 +471,23 @@ pub fn encode_cpu(
     let trigger_frames = settings.paint_over_trigger_frames;
     let use_paint_over = settings.use_paint_over_quality;
     let target_fps = settings.target_fps;
+    let omit_headers = settings.omit_stripe_headers;
+    let damage_block_threshold = settings.damage_block_threshold;
+    let damage_block_duration = settings.damage_block_duration as i32;
+    let h264_cbr = settings.h264_cbr_mode;
+    let h264_bitrate = settings.h264_bitrate_kbps;
+    let h264_vbv = settings.h264_vbv_buffer_size_kb;
+    // Single full-frame stripe: let x264 multi-thread internally (0 = auto; zerolatency uses
+    // sliced-threads, so this adds no latency). Striped mode keeps 1 thread per stripe --
+    // parallelism comes from rayon across stripes.
+    let h264_threads = if n_processing_stripes == 1 { 0 } else { 1 };
     let stripe_sink: Option<Arc<RecordingSink>> = if n_processing_stripes == 1 {
         recording_sink.cloned()
     } else {
         None
     };
 
-    stripes
-        .par_iter_mut()
-        .enumerate()
-        .filter_map(|(i, stripe_state)| {
+    let stripe_body = |(i, stripe_state): (usize, &mut StripeState)| -> Option<EncodedStripe> {
             if i >= stripe_geometries.len() {
                 return None;
             }
@@ -329,7 +499,15 @@ pub fn encode_cpu(
             let mut send_this_stripe = false;
             let mut quality_or_crf = if output_mode == 0 { jpeg_q } else { h264_crf };
             let mut force_idr = false;
-            let is_dirty = stripe_is_dirty[i];
+            let is_dirty = if !hash_damage {
+                stripe_is_dirty[i]
+            } else if output_mode == 1 && h264_streaming {
+                // Streaming H.264 sends every stripe unconditionally below, so the content
+                // hash is unused here — skip it.
+                false
+            } else {
+                stripe_state.content_dirty(stripe_bytes, damage_block_threshold, damage_block_duration)
+            };
 
             if output_mode == 1 && stripe_state.h264_burst_frames_remaining > 0 {
                 send_this_stripe = true;
@@ -374,10 +552,16 @@ pub fn encode_cpu(
                 }
             }
 
+            // Recovery IDR: force a send + IDR even on a static stripe so a reconnecting
+            // client can resume. Deliberately leaves paint_over_sent / no_motion_frame_count
+            // and quality_or_crf untouched so it can't preempt a pending paint-over.
+            if force_idr_all && output_mode == 1 {
+                send_this_stripe = true;
+                force_idr = true;
+            }
+
             if send_this_stripe {
                 if output_mode == 0 {
-                    let mut compressor = turbojpeg::Compressor::new().ok()?;
-                    compressor.set_quality(quality_or_crf).ok()?;
                     let pixel_format = if use_gpu {
                         turbojpeg::PixelFormat::RGBA
                     } else {
@@ -390,20 +574,40 @@ pub fn encode_cpu(
                         height: actual_height,
                         format: pixel_format,
                     };
-                    stripe_state.packet_buf.clear();
-                    stripe_state
-                        .packet_buf
-                        .extend_from_slice(&frame_counter.to_be_bytes());
-                    stripe_state
-                        .packet_buf
-                        .extend_from_slice(&(y_start as u16).to_be_bytes());
-                    match compressor.compress_to_vec(img) {
-                        Ok(jpeg) => {
-                            stripe_state.packet_buf.extend_from_slice(&jpeg);
-                            Some(stripe_state.packet_buf.clone())
+                    // Reuse this worker thread's compressor (created once) instead of a fresh one
+                    // per stripe per frame.
+                    JPEG_COMPRESSOR.with(|cell| -> Option<EncodedStripe> {
+                        let mut slot = cell.borrow_mut();
+                        if slot.is_none() {
+                            *slot = Some(turbojpeg::Compressor::new().ok()?);
                         }
-                        Err(_) => None,
-                    }
+                        let compressor = slot.as_mut().unwrap();
+                        compressor.set_quality(quality_or_crf).ok()?;
+                        let jpeg = compressor.compress_to_vec(img).ok()?;
+                        // Header-less: hand the encoded buffer straight through (drops the extra
+                        // copy into packet_buf). With headers: prepend the 6-byte stripe header.
+                        let data = if omit_headers {
+                            jpeg
+                        } else {
+                            stripe_state.packet_buf.clear();
+                            stripe_state
+                                .packet_buf
+                                .extend_from_slice(&frame_counter.to_be_bytes());
+                            stripe_state
+                                .packet_buf
+                                .extend_from_slice(&(y_start as u16).to_be_bytes());
+                            stripe_state.packet_buf.extend_from_slice(&jpeg);
+                            std::mem::take(&mut stripe_state.packet_buf)
+                        };
+                        Some(EncodedStripe {
+                            data,
+                            data_type: 1,
+                            stripe_y_start: y_start as i32,
+                            // Report the actual stripe height (JPEG carries it as frame metadata too).
+                            stripe_height: actual_height as i32,
+                            frame_id: frame_counter as i32,
+                        })
+                    })
                 } else {
                     let needs_reinit = if let Some(ref enc) = stripe_state.h264_encoder {
                         enc.width != width_usize as i32
@@ -420,10 +624,15 @@ pub fn encode_cpu(
                             quality_or_crf,
                             h264_fullcolor,
                             target_fps,
+                            h264_threads,
+                            h264_cbr,
+                            h264_bitrate,
+                            h264_vbv,
                         );
                         force_idr = true;
                     } else if let Some(ref mut enc) = stripe_state.h264_encoder {
                         enc.reconfigure_crf(quality_or_crf);
+                        enc.reconfigure_rate(h264_bitrate, h264_vbv, target_fps);
                     }
 
                     if let Some(ref mut enc) = stripe_state.h264_encoder {
@@ -457,46 +666,55 @@ pub fn encode_cpu(
                             height: actual_height as u32,
                         };
 
-                        if h264_fullcolor {
+                        let conversion_result = if h264_fullcolor {
                             if use_gpu {
-                                let _ = yuv::rgba_to_yuv444(
+                                yuv::rgba_to_yuv444(
                                     &mut planar_image,
                                     stripe_bytes,
                                     (width_usize * 4) as u32,
                                     YuvRange::Full,
                                     YuvStandardMatrix::Bt709,
-                                    YuvConversionMode::Balanced,
-                                );
+                                    YuvConversionMode::Fast,
+                                )
                             } else {
-                                let _ = yuv::bgra_to_yuv444(
+                                yuv::bgra_to_yuv444(
                                     &mut planar_image,
                                     stripe_bytes,
                                     (width_usize * 4) as u32,
                                     YuvRange::Full,
                                     YuvStandardMatrix::Bt709,
-                                    YuvConversionMode::Balanced,
-                                );
+                                    YuvConversionMode::Fast,
+                                )
                             }
                         } else {
                             if use_gpu {
-                                let _ = yuv::rgba_to_yuv420(
+                                yuv::rgba_to_yuv420(
                                     &mut planar_image,
                                     stripe_bytes,
                                     (width_usize * 4) as u32,
                                     YuvRange::Limited,
                                     YuvStandardMatrix::Bt709,
-                                    YuvConversionMode::Balanced,
-                                );
+                                    YuvConversionMode::Fast,
+                                )
                             } else {
-                                let _ = yuv::bgra_to_yuv420(
+                                yuv::bgra_to_yuv420(
                                     &mut planar_image,
                                     stripe_bytes,
                                     (width_usize * 4) as u32,
                                     YuvRange::Limited,
                                     YuvStandardMatrix::Bt709,
-                                    YuvConversionMode::Balanced,
-                                );
+                                    YuvConversionMode::Fast,
+                                )
                             }
+                        };
+
+                        // Skip the stripe on conversion failure instead of encoding garbage.
+                        if let Err(e) = conversion_result {
+                            eprintln!(
+                                "[software] YUV conversion failed for {}x{} stripe: {:?}; skipping",
+                                width_usize, actual_height, e
+                            );
+                            return None;
                         }
 
                         let mut fixed_header = [0u8; 8];
@@ -520,10 +738,17 @@ pub fn encode_cpu(
                             frame_counter as i64,
                             force_idr || force_idr_for_recording,
                             &fixed_header,
+                            omit_headers,
                             &mut stripe_state.packet_buf,
                             stripe_sink.as_ref(),
                         ) {
-                            Some(stripe_state.packet_buf.clone())
+                            Some(EncodedStripe {
+                                data: std::mem::take(&mut stripe_state.packet_buf),
+                                data_type: 2,
+                                stripe_y_start: y_start as i32,
+                                stripe_height: actual_height as i32,
+                                frame_id: frame_counter as i32,
+                            })
                         } else {
                             None
                         }
@@ -534,6 +759,100 @@ pub fn encode_cpu(
             } else {
                 None
             }
-        })
-        .collect()
+    };
+    // Single full-frame stripe: run inline (sequential) -- empirically faster than a one-element
+    // rayon dispatch. Striped mode keeps rayon to parallelize the encode across stripes.
+    if n_processing_stripes <= 1 {
+        stripes.iter_mut().enumerate().filter_map(&stripe_body).collect()
+    } else {
+        stripes.par_iter_mut().enumerate().filter_map(&stripe_body).collect()
+    }
+}
+
+/// Splits `height` rows into `n` stripes. JPEG (output_mode 0) covers every row;
+/// H.264 keeps even stripe heights for 4:2:0, leaving any trailing odd row uncovered.
+fn compute_stripe_geometries(height: usize, n: usize, output_mode: i32) -> Vec<(usize, usize)> {
+    let mut geoms = Vec::with_capacity(n);
+    let mut current_y = 0;
+    if output_mode == 0 {
+        let base_h = height / n;
+        let remainder = height - base_h * n;
+        for i in 0..n {
+            let s_h = base_h + if i < remainder { 1 } else { 0 };
+            geoms.push((current_y, s_h));
+            current_y += s_h;
+        }
+    } else {
+        let base_h = (height / n) & !1;
+        let remainder = height - base_h * n;
+        let stripes_with_extra = remainder / 2;
+        for i in 0..n {
+            let s_h = base_h + if i < stripes_with_extra { 2 } else { 0 };
+            geoms.push((current_y, s_h));
+            current_y += s_h;
+        }
+    }
+    geoms
+}
+
+#[cfg(test)]
+mod tests {
+    use super::{compute_stripe_geometries, StripeState};
+
+    #[test]
+    fn content_dirty_detects_change_and_damage_block() {
+        let mut st = StripeState::default();
+        let a = vec![1u8; 256];
+        let b = vec![2u8; 256];
+        assert!(st.content_dirty(&a, 2, 3)); // changed vs zero-initialized hash
+        assert!(!st.content_dirty(&a, 2, 3)); // stable
+        assert!(st.content_dirty(&b, 2, 3)); // change #1 (consecutive=1)
+        assert!(st.content_dirty(&a, 2, 3)); // change #2 -> enters damage block
+        assert!(st.in_damage_block);
+        assert!(st.content_dirty(&a, 2, 3)); // block holds dirty, no re-hash (rem 3->2)
+        assert!(st.content_dirty(&a, 2, 3)); // block (2->1)
+        assert!(st.content_dirty(&a, 2, 3)); // block end (1->0): re-hash, unchanged -> exit
+        assert!(!st.in_damage_block);
+        assert!(!st.content_dirty(&a, 2, 3)); // stable again
+    }
+
+    fn covered(geoms: &[(usize, usize)]) -> usize {
+        geoms.iter().map(|&(_, h)| h).sum()
+    }
+
+    fn assert_contiguous(geoms: &[(usize, usize)]) {
+        let mut y = 0;
+        for &(sy, sh) in geoms {
+            assert_eq!(sy, y, "stripes must be contiguous");
+            y += sh;
+        }
+    }
+
+    #[test]
+    fn jpeg_covers_every_row_including_odd() {
+        for &h in &[1usize, 63, 720, 721, 1079, 1080, 1081] {
+            for &n in &[1usize, 2, 3, 8, 16] {
+                let g = compute_stripe_geometries(h, n, 0);
+                assert_eq!(g.len(), n);
+                assert_eq!(covered(&g), h, "JPEG must cover full height h={} n={}", h, n);
+                assert_contiguous(&g);
+            }
+        }
+    }
+
+    #[test]
+    fn h264_stripes_even_and_within_bounds() {
+        for &h in &[64usize, 720, 721, 1080, 1081] {
+            for &n in &[1usize, 2, 8] {
+                let g = compute_stripe_geometries(h, n, 1);
+                assert_eq!(g.len(), n);
+                for &(_, sh) in &g {
+                    assert_eq!(sh % 2, 0, "H.264 stripe heights must be even h={} n={}", h, n);
+                }
+                assert_contiguous(&g);
+                assert!(covered(&g) <= h);
+                assert!(h - covered(&g) <= 1, "at most one trailing odd row uncovered");
+            }
+        }
+    }
 }
diff --git a/pixelflux_wayland/src/encoders/vaapi.rs b/pixelflux/src/encoders/vaapi.rs
similarity index 58%
rename from pixelflux_wayland/src/encoders/vaapi.rs
rename to pixelflux/src/encoders/vaapi.rs
index aa91d07..c5dd4c4 100644
--- a/pixelflux_wayland/src/encoders/vaapi.rs
+++ b/pixelflux/src/encoders/vaapi.rs
@@ -60,11 +60,20 @@ struct DmabufResources {
 }
 
 /// @brief Callback function used by FFmpeg to release custom DRM frames.
-unsafe extern "C" fn release_drm_frame(opaque: *mut c_void, _data: *mut u8) {
-    let resources = Box::from_raw(opaque as *mut DmabufResources);
-    for &fd in &resources.fds {
-        close(fd);
-    }
+unsafe extern "C" fn release_drm_frame(opaque: *mut c_void, data: *mut u8) {
+    // FFmpeg (C) invokes this on buffer teardown: a panic must not unwind across the extern "C"
+    // boundary (the compiler guard would abort the process), so catch it here.
+    let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        let resources = Box::from_raw(opaque as *mut DmabufResources);
+        for &fd in &resources.fds {
+            close(fd);
+        }
+        // Free the descriptor FFmpeg owns now (null only on the error path, where
+        // the caller frees it).
+        if !data.is_null() {
+            ff::av_free(data as *mut c_void);
+        }
+    }));
 }
 
 /// @brief Helper to convert FFmpeg error codes into Rust strings.
@@ -112,11 +121,16 @@ pub struct VaapiEncoder {
     current_qp: u32,
     qp_hysteresis_counter: u32,
 
+    // Live rate-control state, tracked so a reconfigure re-opens the codec only on a real change.
+    cbr_mode: bool,
+    current_bitrate_kbps: i32,
+    current_vbv_kb: i32,
+
     recording_sink: Option<Arc<RecordingSink>>,
+    omit_stripe_headers: bool,
 }
 
 unsafe impl Send for VaapiEncoder {}
-unsafe impl Sync for VaapiEncoder {}
 
 impl Drop for VaapiEncoder {
     fn drop(&mut self) {
@@ -166,9 +180,29 @@ impl VaapiEncoder {
     /// @input settings: Capture configuration (resolution, FPS, QP, render node).
     /// @input recording_sink: Optional Unix socket sink for encoded output.
     /// @return Result containing the new VaapiEncoder instance.
+    ///
+    /// Dmabuf input (Wayland): the source frame is a DRM-PRIME dmabuf mapped into a VAAPI
+    /// surface by the filter graph.
     pub fn new(
         settings: &RustCaptureSettings,
         recording_sink: Option<Arc<RecordingSink>>,
+    ) -> Result<Self, String> {
+        Self::new_impl(settings, recording_sink, false)
+    }
+
+    /// Host-ARGB input (X11): the source is a CPU BGRA frame uploaded to a VAAPI surface, then
+    /// converted ARGB->NV12 by VA-VPP (scale_vaapi) on the GPU before encode -- no CPU CSC.
+    pub fn new_host(
+        settings: &RustCaptureSettings,
+        recording_sink: Option<Arc<RecordingSink>>,
+    ) -> Result<Self, String> {
+        Self::new_impl(settings, recording_sink, true)
+    }
+
+    fn new_impl(
+        settings: &RustCaptureSettings,
+        recording_sink: Option<Arc<RecordingSink>>,
+        host_input: bool,
     ) -> Result<Self, String> {
         FF_INIT.call_once(|| {});
 
@@ -211,8 +245,10 @@ impl VaapiEncoder {
                 ));
             }
 
-            let drm_frames_ref = ff::av_hwframe_ctx_alloc(drm_device_ctx);
+            let mut drm_frames_ref = ff::av_hwframe_ctx_alloc(drm_device_ctx);
             if drm_frames_ref.is_null() {
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to alloc DRM frames ctx".into());
             }
 
@@ -224,12 +260,18 @@ impl VaapiEncoder {
             (*drm_frames).initial_pool_size = 0;
 
             if ff::av_hwframe_ctx_init(drm_frames_ref) < 0 {
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to init DRM frames ctx".into());
             }
 
             let codec_name = CString::new("h264_vaapi").unwrap();
             let codec = ff::avcodec_find_encoder_by_name(codec_name.as_ptr());
             if codec.is_null() {
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("h264_vaapi encoder not found".into());
             }
 
@@ -237,6 +279,12 @@ impl VaapiEncoder {
             let aligned_height = (height + 31) & !31;
 
             let mut enc_frames_ref = ff::av_hwframe_ctx_alloc(hw_device_ctx);
+            if enc_frames_ref.is_null() {
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
+                return Err("Failed to allocate encoder frames ctx".into());
+            }
             let enc_frames = (*enc_frames_ref).data as *mut ff::AVHWFramesContext;
             (*enc_frames).format = ff::AVPixelFormat::AV_PIX_FMT_VAAPI;
             (*enc_frames).sw_format = ff::AVPixelFormat::AV_PIX_FMT_NV12;
@@ -245,13 +293,25 @@ impl VaapiEncoder {
             (*enc_frames).initial_pool_size = 20;
 
             if ff::av_hwframe_ctx_init(enc_frames_ref) < 0 {
+                ff::av_buffer_unref(&mut enc_frames_ref);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to init encoder frames ctx".into());
             }
 
             // Keep a reference for restarting the encoder
-            let saved_enc_frames_ctx = ff::av_buffer_ref(enc_frames_ref);
-
-            let encoder_ctx = ff::avcodec_alloc_context3(codec);
+            let mut saved_enc_frames_ctx = ff::av_buffer_ref(enc_frames_ref);
+
+            let mut encoder_ctx = ff::avcodec_alloc_context3(codec);
+            if encoder_ctx.is_null() {
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut enc_frames_ref);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
+                return Err("Failed to allocate encoder context".into());
+            }
             (*encoder_ctx).width = width;
             (*encoder_ctx).height = height;
             (*encoder_ctx).time_base = ff::AVRational { num: 1, den: fps };
@@ -271,19 +331,39 @@ impl VaapiEncoder {
                 ff::av_dict_set(d, ck.as_ptr(), cv.as_ptr(), 0);
             };
 
-            set_opt(&mut opts, "rc_mode", "CQP");
-            set_opt(&mut opts, "qp", &settings.h264_crf.to_string());
+            if settings.h264_cbr_mode {
+                // Constant bitrate: VA-API reads the target rate from the codec context's
+                // bit_rate; the rc_mode opt just selects the algorithm.
+                let bps = (settings.h264_bitrate_kbps.max(0) as i64).saturating_mul(1000);
+                let vbv = if settings.h264_vbv_buffer_size_kb > 0 {
+                    (settings.h264_vbv_buffer_size_kb as i64).saturating_mul(1000)
+                } else {
+                    bps / 10
+                };
+                (*encoder_ctx).bit_rate = bps;
+                (*encoder_ctx).rc_max_rate = bps;
+                (*encoder_ctx).rc_buffer_size = vbv.min(i32::MAX as i64) as i32;
+                set_opt(&mut opts, "rc_mode", "CBR");
+            } else {
+                set_opt(&mut opts, "rc_mode", "CQP");
+                set_opt(&mut opts, "qp", &settings.h264_crf.to_string());
+            }
             set_opt(&mut opts, "async_depth", "1");
             set_opt(&mut opts, "profile", "high");
             set_opt(&mut opts, "level", "4.1");
 
             let ret = ff::avcodec_open2(encoder_ctx, codec, &mut opts);
+            ff::av_dict_free(&mut opts);
             if ret < 0 {
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err(format!("Failed to open encoder: {}", ff_err_str(ret)));
             }
-            ff::av_dict_free(&mut opts);
 
-            let filter_graph = ff::avfilter_graph_alloc();
+            let mut filter_graph = ff::avfilter_graph_alloc();
             let buffersrc = ff::avfilter_get_by_name(CString::new("buffer").unwrap().as_ptr());
             let buffersink =
                 ff::avfilter_get_by_name(CString::new("buffersink").unwrap().as_ptr());
@@ -294,8 +374,22 @@ impl VaapiEncoder {
                 ff::avfilter_graph_alloc_filter(filter_graph, buffersrc, name_in.as_ptr());
 
             let par = ff::av_buffersrc_parameters_alloc();
-            (*par).format = ff::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32;
-            (*par).hw_frames_ctx = ff::av_buffer_ref(drm_frames_ref);
+            if par.is_null() {
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
+                return Err("Failed to alloc buffersrc parameters".into());
+            }
+            if host_input {
+                // Software BGRA in; hwupload (below) stages it onto a VAAPI surface.
+                (*par).format = ff::AVPixelFormat::AV_PIX_FMT_BGRA as i32;
+            } else {
+                (*par).format = ff::AVPixelFormat::AV_PIX_FMT_DRM_PRIME as i32;
+                (*par).hw_frames_ctx = ff::av_buffer_ref(drm_frames_ref);
+            }
             (*par).width = width;
             (*par).height = height;
             (*par).time_base = ff::AVRational { num: 1, den: fps };
@@ -306,6 +400,12 @@ impl VaapiEncoder {
             }
             ff::av_free(par as *mut c_void);
             if ret < 0 {
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err(format!(
                     "Failed to set buffersrc parameters: {}",
                     ff_err_str(ret)
@@ -318,6 +418,12 @@ impl VaapiEncoder {
             );
             let args = CString::new(args_str).unwrap();
             if ff::avfilter_init_str(buffersrc_ctx, args.as_ptr()) < 0 {
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to init buffersrc".into());
             }
 
@@ -331,6 +437,12 @@ impl VaapiEncoder {
                 filter_graph,
             ) < 0
             {
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to create buffersink".into());
             }
 
@@ -345,9 +457,15 @@ impl VaapiEncoder {
             (*outputs).pad_idx = 0;
             (*outputs).next = ptr::null_mut();
 
+            // VA-VPP convert ARGB->NV12 on the GPU BEFORE encode (VA implementations vary, so an
+            // explicit convert is safer than relying on encoder-side RGB CSC). Pin BT.709 limited
+            // to match the NVENC/x264 4:2:0 output so all backends agree on color. Host input
+            // (X11) uploads the CPU BGRA frame to a VAAPI surface (hwupload); dmabuf input
+            // (Wayland) maps the DRM-PRIME buffer (hwmap). Both then run scale_vaapi (VA-VPP).
+            let stage = if host_input { "hwupload" } else { "hwmap" };
             let filters_desc = CString::new(format!(
-                "hwmap,scale_vaapi=w={}:h={}:format=nv12",
-                width, height
+                "{},scale_vaapi=w={}:h={}:format=nv12:out_color_matrix=bt709:out_range=tv",
+                stage, width, height
             ))
             .unwrap();
             if ff::avfilter_graph_parse_ptr(
@@ -358,6 +476,14 @@ impl VaapiEncoder {
                 ptr::null_mut(),
             ) < 0
             {
+                ff::avfilter_inout_free(&mut inputs);
+                ff::avfilter_inout_free(&mut outputs);
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to parse filter graph".into());
             }
 
@@ -369,17 +495,41 @@ impl VaapiEncoder {
             }
 
             if ff::avfilter_graph_config(filter_graph, ptr::null_mut()) < 0 {
+                ff::avfilter_inout_free(&mut inputs);
+                ff::avfilter_inout_free(&mut outputs);
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to config filter graph".into());
             }
 
-            let video_frame = ff::av_frame_alloc();
-            let sw_frame = ff::av_frame_alloc();
-            let hw_frame = ff::av_frame_alloc();
+            let mut video_frame = ff::av_frame_alloc();
+            let mut sw_frame = ff::av_frame_alloc();
+            let mut hw_frame = ff::av_frame_alloc();
 
             if ff::av_hwframe_get_buffer((*encoder_ctx).hw_frames_ctx, hw_frame, 0) < 0 {
+                ff::av_frame_free(&mut hw_frame);
+                ff::av_frame_free(&mut sw_frame);
+                ff::av_frame_free(&mut video_frame);
+                ff::avfilter_inout_free(&mut inputs);
+                ff::avfilter_inout_free(&mut outputs);
+                ff::avfilter_graph_free(&mut filter_graph);
+                ff::avcodec_free_context(&mut encoder_ctx);
+                ff::av_buffer_unref(&mut saved_enc_frames_ctx);
+                ff::av_buffer_unref(&mut drm_frames_ref);
+                ff::av_buffer_unref(&mut hw_device_ctx);
+                ff::av_buffer_unref(&mut drm_device_ctx);
                 return Err("Failed to allocate HW frame for NV12 path".into());
             }
 
+            // Free the AVFilterInOut lists on the success path too (the error paths already
+            // do); the graph is parsed/configured so they're unused. No-op on the parser's NULLs.
+            ff::avfilter_inout_free(&mut inputs);
+            ff::avfilter_inout_free(&mut outputs);
+
             Ok(Self {
                 encoder_ctx,
                 codec,
@@ -399,16 +549,23 @@ impl VaapiEncoder {
                 fps,
                 current_qp: settings.h264_crf as u32,
                 qp_hysteresis_counter: 0,
+                cbr_mode: settings.h264_cbr_mode,
+                current_bitrate_kbps: settings.h264_bitrate_kbps,
+                current_vbv_kb: settings.h264_vbv_buffer_size_kb,
                 recording_sink,
+                omit_stripe_headers: settings.omit_stripe_headers,
             })
         }
     }
 
-    /// @brief Completely restarts the encoder context with a new QP.
+    /// @brief Re-opens the codec context in place, applying the current rate-control state.
     ///
-    /// This is required because VAAPI dynamic QP updates are flaky or unsupported
-    /// on some drivers, necessitating a full stream stop/start to apply changes cleanly.
-    unsafe fn restart_encoder(&mut self, new_qp: u32) -> Result<(), String> {
+    /// The VA device, encoder frames pool and filter graph persist; only the AVCodecContext is
+    /// rebuilt. CBR reprograms bit_rate / rc_max_rate / rc_buffer_size from the tracked bitrate
+    /// and VBV; CQP reprograms the quantizer. A freshly opened context emits an IDR as its first
+    /// frame, so the reference chain self-heals. This replaces any driver-side live reconfigure,
+    /// which is flaky on some drivers.
+    unsafe fn reopen_codec(&mut self, qp: u32) -> Result<(), String> {
         if !self.encoder_ctx.is_null() {
             ff::avcodec_free_context(&mut self.encoder_ctx);
         }
@@ -435,8 +592,21 @@ impl VaapiEncoder {
             ff::av_dict_set(d, ck.as_ptr(), cv.as_ptr(), 0);
         };
 
-        set_opt(&mut opts, "rc_mode", "CQP");
-        set_opt(&mut opts, "qp", &new_qp.to_string());
+        if self.cbr_mode {
+            let bps = (self.current_bitrate_kbps.max(0) as i64).saturating_mul(1000);
+            let vbv = if self.current_vbv_kb > 0 {
+                (self.current_vbv_kb as i64).saturating_mul(1000)
+            } else {
+                bps / 10
+            };
+            (*self.encoder_ctx).bit_rate = bps;
+            (*self.encoder_ctx).rc_max_rate = bps;
+            (*self.encoder_ctx).rc_buffer_size = vbv.min(i32::MAX as i64) as i32;
+            set_opt(&mut opts, "rc_mode", "CBR");
+        } else {
+            set_opt(&mut opts, "rc_mode", "CQP");
+            set_opt(&mut opts, "qp", &qp.to_string());
+        }
         set_opt(&mut opts, "async_depth", "1");
         set_opt(&mut opts, "profile", "high");
         set_opt(&mut opts, "level", "4.1");
@@ -448,35 +618,70 @@ impl VaapiEncoder {
             return Err(format!("Failed to re-open encoder: {}", ff_err_str(ret)));
         }
 
-        self.current_qp = new_qp;
+        self.current_qp = qp;
         Ok(())
     }
 
     /// @brief Updates the quantization parameter (QP) with hysteresis.
     ///
-    /// If QP decreases (higher quality paint-over), it restarts immediately.
+    /// If QP decreases (higher quality paint-over), it re-opens immediately.
     /// If QP increases (lower quality motion), it waits for the hysteresis limit
-    /// to avoid blinking artifacts.
+    /// to avoid blinking artifacts. No-op in CBR, where quality is driven by the
+    /// bitrate target and a per-frame QP change would otherwise re-open in CQP and
+    /// drop the configured bitrate.
     unsafe fn update_qp(&mut self, target_qp: u32) -> Result<(), String> {
+        if self.cbr_mode {
+            return Ok(());
+        }
+
         if target_qp == self.current_qp {
             self.qp_hysteresis_counter = 0;
-            return Ok(()).into();
+            return Ok(());
         }
 
         if target_qp < self.current_qp {
             self.qp_hysteresis_counter = 0;
-            self.restart_encoder(target_qp)?;
+            self.reopen_codec(target_qp)?;
         } else {
             self.qp_hysteresis_counter += 1;
             if self.qp_hysteresis_counter > QP_HYSTERESIS_LIMIT {
                 self.qp_hysteresis_counter = 0;
-                self.restart_encoder(target_qp)?;
+                self.reopen_codec(target_qp)?;
             }
         }
 
         Ok(())
     }
 
+    /// Apply a live rate-control / framerate change from updated settings. Guarded so it only
+    /// re-opens the codec when the CBR bitrate/VBV (CBR only) or the target fps actually change,
+    /// making it cheap to call every frame like the NVENC path. The re-open carries the current
+    /// QP so a CQP stream keeps its quantizer across an fps change.
+    pub fn reconfigure_rate(&mut self, settings: &RustCaptureSettings) {
+        unsafe {
+            let mut changed = false;
+            if self.cbr_mode
+                && (settings.h264_bitrate_kbps != self.current_bitrate_kbps
+                    || settings.h264_vbv_buffer_size_kb != self.current_vbv_kb)
+            {
+                changed = true;
+            }
+            let new_fps = settings.target_fps.max(1.0) as i32;
+            if new_fps != self.fps {
+                changed = true;
+            }
+            if !changed {
+                return;
+            }
+            self.fps = new_fps;
+            self.current_bitrate_kbps = settings.h264_bitrate_kbps;
+            self.current_vbv_kb = settings.h264_vbv_buffer_size_kb;
+            if let Err(e) = self.reopen_codec(self.current_qp) {
+                eprintln!("[vaapi] rate reconfigure failed: {e}");
+            }
+        }
+    }
+
     /// @brief Retrieves encoded packets from the encoder and formats them with the custom header.
     unsafe fn collect_packet(&mut self, frame_number: u64, output: &mut Vec<u8>) {
         while ff::avcodec_receive_packet(self.encoder_ctx, self.packet) == 0 {
@@ -484,13 +689,16 @@ impl VaapiEncoder {
             let data = (*self.packet).data;
             let is_key = ((*self.packet).flags & ff::AV_PKT_FLAG_KEY) != 0;
 
-            output.reserve(10 + size);
-            output.push(0x04);
-            output.push(if is_key { 0x01 } else { 0x00 });
-            output.extend_from_slice(&(frame_number as u16).to_be_bytes());
-            output.extend_from_slice(&0u16.to_be_bytes());
-            output.extend_from_slice(&(self.width as u16).to_be_bytes());
-            output.extend_from_slice(&(self.height as u16).to_be_bytes());
+            let header_sz = if self.omit_stripe_headers { 0 } else { 10 };
+            output.reserve(header_sz + size);
+            if !self.omit_stripe_headers {
+                output.push(0x04);
+                output.push(if is_key { 0x01 } else { 0x00 });
+                output.extend_from_slice(&(frame_number as u16).to_be_bytes());
+                output.extend_from_slice(&0u16.to_be_bytes());
+                output.extend_from_slice(&(self.width as u16).to_be_bytes());
+                output.extend_from_slice(&(self.height as u16).to_be_bytes());
+            }
 
             let slice = std::slice::from_raw_parts(data, size);
             output.extend_from_slice(slice);
@@ -536,6 +744,9 @@ impl VaapiEncoder {
             for (i, (handle, _)) in dmabuf.handles().zip(dmabuf.offsets()).enumerate() {
                 let fd = dup(handle.as_raw_fd());
                 if fd < 0 {
+                    for &dup_fd in &resources.fds {
+                        close(dup_fd);
+                    }
                     ff::av_free(desc_ptr as *mut c_void);
                     return Err("Failed to dup fd".into());
                 }
@@ -589,6 +800,11 @@ impl VaapiEncoder {
             (*self.video_frame).hw_frames_ctx = ff::av_buffer_ref(self.drm_frames_ctx);
 
             if ff::av_buffersrc_add_frame(self.buffersrc_ctx, self.video_frame) < 0 {
+                // add_frame (no KEEP_REF) only consumes the frame's refs on success;
+                // on error "the input frame is not touched", so buf[0] is still live
+                // and this unref is required to release it (-> release_drm_frame closes
+                // the dup'd dmabuf fds). Don't add a manual fd close: that double-closes.
+                ff::av_frame_unref(self.video_frame);
                 return Err("Failed to feed filter graph".into());
             }
 
@@ -614,6 +830,81 @@ impl VaapiEncoder {
         }
     }
 
+    /// @brief Encodes one host BGRA frame using GPU postprocessing (X11 path).
+    ///
+    /// The CPU frame is staged into a VAAPI surface (hwupload) and converted ARGB->NV12 by
+    /// VA-VPP (scale_vaapi) on the GPU before encode -- there is no CPU colorspace conversion.
+    /// Only valid on an encoder built with `new_host`.
+    ///
+    /// @input bgra: B,G,R,A bytes, `stride` bytes per row (X11 host layout).
+    /// @input stride: Source row stride in bytes (may include padding).
+    /// @input frame_number: Frame index.
+    /// @input qp: Quality parameter.
+    /// @input force_idr: Force keyframe generation.
+    /// @return Result containing the encoded packet.
+    pub fn encode_host_argb(
+        &mut self,
+        bgra: &[u8],
+        stride: usize,
+        frame_number: u64,
+        qp: u32,
+        force_idr: bool,
+    ) -> Result<Vec<u8>, String> {
+        unsafe {
+            self.update_qp(qp)?;
+
+            let h = self.height as usize;
+            let needed = stride.checked_mul(h).ok_or("stride overflow")?;
+            if bgra.len() < needed {
+                return Err("Input buffer too small".into());
+            }
+
+            // Build a fresh refcounted BGRA frame and stage the host rows into it (the only copy;
+            // it is data movement for the GPU upload, not a colorspace conversion).
+            ff::av_frame_unref(self.video_frame);
+            (*self.video_frame).width = self.width;
+            (*self.video_frame).height = self.height;
+            (*self.video_frame).format = ff::AVPixelFormat::AV_PIX_FMT_BGRA as i32;
+            if ff::av_frame_get_buffer(self.video_frame, 0) < 0 {
+                return Err("Failed to allocate host BGRA frame".into());
+            }
+            let dst = (*self.video_frame).data[0];
+            let dst_stride = (*self.video_frame).linesize[0] as usize;
+            let row_bytes = (self.width as usize) * 4;
+            let copy_bytes = row_bytes.min(stride).min(dst_stride);
+            for row in 0..h {
+                ptr::copy_nonoverlapping(
+                    bgra.as_ptr().add(row * stride),
+                    dst.add(row * dst_stride),
+                    copy_bytes,
+                );
+            }
+            (*self.video_frame).pts = frame_number as i64;
+
+            if ff::av_buffersrc_add_frame(self.buffersrc_ctx, self.video_frame) < 0 {
+                ff::av_frame_unref(self.video_frame);
+                return Err("Failed to feed filter graph".into());
+            }
+
+            let mut output = Vec::new();
+            let mut filtered_frame = ff::av_frame_alloc();
+            while ff::av_buffersink_get_frame(self.buffersink_ctx, filtered_frame) >= 0 {
+                if force_idr {
+                    (*filtered_frame).pict_type = ff::AVPictureType::AV_PICTURE_TYPE_I;
+                }
+                if ff::avcodec_send_frame(self.encoder_ctx, filtered_frame) < 0 {
+                    ff::av_frame_free(&mut filtered_frame);
+                    return Err("Failed to send frame to encoder".into());
+                }
+                ff::av_frame_unref(filtered_frame);
+                self.collect_packet(frame_number, &mut output);
+            }
+            ff::av_frame_free(&mut filtered_frame);
+
+            Ok(output)
+        }
+    }
+
     /// @brief Encodes raw NV12 pixel data by uploading it from CPU memory to the GPU.
     ///
     /// @input nv12_pixels: Raw byte slice of NV12 data.
@@ -650,6 +941,8 @@ impl VaapiEncoder {
             (*self.sw_frame).data[1] = nv12_pixels.as_ptr().add(width * height) as *mut u8;
             (*self.sw_frame).linesize[1] = self.width;
 
+            // get_buffer needs an empty frame; without this the prior surface leaks.
+            ff::av_frame_unref(self.hw_frame);
             if ff::av_hwframe_get_buffer((*self.encoder_ctx).hw_frames_ctx, self.hw_frame, 0) < 0 {
                 return Err("Failed to allocate HW frame for NV12 path".into());
             }
@@ -663,12 +956,11 @@ impl VaapiEncoder {
             ff::av_frame_unref(self.sw_frame);
 
             (*self.hw_frame).pts = frame_number as i64;
+            // Force keyframes via pict_type (AV_PKT_FLAG_KEY is a packet flag, not a frame flag).
             if force_idr {
                 (*self.hw_frame).pict_type = ff::AVPictureType::AV_PICTURE_TYPE_I;
-                (*self.hw_frame).flags |= ff::AV_PKT_FLAG_KEY;
             } else {
                 (*self.hw_frame).pict_type = ff::AVPictureType::AV_PICTURE_TYPE_NONE;
-                (*self.hw_frame).flags &= !ff::AV_PKT_FLAG_KEY;
             }
 
             if ff::avcodec_send_frame(self.encoder_ctx, self.hw_frame) < 0 {
diff --git a/pixelflux_wayland/src/lib.rs b/pixelflux/src/lib.rs
similarity index 53%
rename from pixelflux_wayland/src/lib.rs
rename to pixelflux/src/lib.rs
index 05aad57..26f7b82 100644
--- a/pixelflux_wayland/src/lib.rs
+++ b/pixelflux/src/lib.rs
@@ -20,7 +20,7 @@ use std::time::{Duration, Instant};
 
 use gbm::{BufferObject, BufferObjectFlags, Device as RawGbmDevice, Format as GbmFormat};
 use pyo3::prelude::*;
-use pyo3::types::{PyAny, PyBytes, PyModule};
+use pyo3::types::{PyAny, PyModule};
 use yuv::{
     BufferStoreMut, YuvBiPlanarImageMut, YuvConversionMode, YuvRange, YuvStandardMatrix,
 };
@@ -92,6 +92,7 @@ use smithay::{
 
 pub mod encoders {
     pub mod nvenc;
+    pub mod oh264;
     pub mod overlay;
     pub mod software;
     pub mod vaapi;
@@ -99,6 +100,9 @@ pub mod encoders {
 
 pub mod wayland;
 pub mod recording_sink;
+pub mod pipeline;
+pub mod x11;
+pub mod nvgpufilter;
 
 pub use encoders::nvenc;
 pub use encoders::software::StripeState;
@@ -128,11 +132,15 @@ fn get_shm_usage_bytes() -> u64 {
 }
 
 fn calculate_memory_threshold(width: i32, height: i32) -> usize {
-    let frame_size = (width * height * 4) as usize;
-    let base_app_memory = 300 * 1024 * 1024;
-    let buffer_allowance = frame_size * 20;
-    let min_threshold = 1536 * 1024 * 1024;
-    (base_app_memory + buffer_allowance).max(min_threshold)
+    let frame_size = (width.max(0) as usize)
+        .saturating_mul(height.max(0) as usize)
+        .saturating_mul(4);
+    let base_app_memory: usize = 300 * 1024 * 1024;
+    let buffer_allowance = frame_size.saturating_mul(20);
+    let min_threshold: usize = 1536 * 1024 * 1024;
+    base_app_memory
+        .saturating_add(buffer_allowance)
+        .max(min_threshold)
 }
 
 use encoders::nvenc::NvencEncoder;
@@ -198,8 +206,16 @@ pub struct RustCaptureSettings {
     pub watermark_location_enum: i32,
     pub vaapi_render_node_index: i32,
     pub use_cpu: bool,
+    pub use_openh264: bool,
     pub debug_logging: bool,
+    pub auto_adjust_screen_capture_size: bool,
     pub recording_socket: String,
+    // When true, encoders emit the raw payload without the per-stripe header byte block;
+    // stripe metadata is then carried only on the frame attributes.
+    pub omit_stripe_headers: bool,
+    pub h264_cbr_mode: bool,
+    pub h264_bitrate_kbps: i32,
+    pub h264_vbv_buffer_size_kb: i32,
 }
 
 impl Default for RustCaptureSettings {
@@ -229,24 +245,119 @@ impl Default for RustCaptureSettings {
             watermark_location_enum: 0,
             vaapi_render_node_index: -1,
             use_cpu: false,
+            use_openh264: false,
             debug_logging: false,
+            auto_adjust_screen_capture_size: false,
             recording_socket: String::new(),
+            omit_stripe_headers: false,
+            h264_cbr_mode: false,
+            h264_bitrate_kbps: 4000,
+            h264_vbv_buffer_size_kb: 0,
         }
     }
 }
 
+/// Build a `RustCaptureSettings` by reading each field off a Python settings object
+/// (any object exposing the `CaptureSettings` attributes) via getattr. Used by both the
+/// Wayland and X11 capture entry points, so they read an identical set of fields.
+pub(crate) fn extract_settings(settings: &Bound<'_, PyAny>) -> PyResult<RustCaptureSettings> {
+    let watermark_path_obj = settings.getattr("watermark_path")?;
+    let watermark_path = if let Ok(s) = watermark_path_obj.extract::<String>() {
+        s
+    } else if let Ok(b) = watermark_path_obj.extract::<Vec<u8>>() {
+        String::from_utf8_lossy(&b).into_owned()
+    } else {
+        String::new()
+    };
+
+    let scale = settings
+        .getattr("scale")
+        .ok()
+        .and_then(|x| x.extract().ok())
+        .unwrap_or(1.0);
+
+    Ok(RustCaptureSettings {
+        width: settings.getattr("capture_width")?.extract()?,
+        height: settings.getattr("capture_height")?.extract()?,
+        scale,
+        capture_x: settings.getattr("capture_x")?.extract()?,
+        capture_y: settings.getattr("capture_y")?.extract()?,
+        target_fps: settings.getattr("target_fps")?.extract()?,
+        jpeg_quality: settings.getattr("jpeg_quality")?.extract()?,
+        paint_over_jpeg_quality: settings.getattr("paint_over_jpeg_quality")?.extract()?,
+        use_paint_over_quality: settings.getattr("use_paint_over_quality")?.extract()?,
+        paint_over_trigger_frames: settings.getattr("paint_over_trigger_frames")?.extract()?,
+        damage_block_threshold: settings.getattr("damage_block_threshold")?.extract()?,
+        damage_block_duration: settings.getattr("damage_block_duration")?.extract()?,
+        output_mode: settings.getattr("output_mode")?.extract()?,
+        h264_crf: settings.getattr("h264_crf")?.extract()?,
+        h264_paintover_crf: settings.getattr("h264_paintover_crf")?.extract()?,
+        h264_paintover_burst_frames: settings.getattr("h264_paintover_burst_frames")?.extract()?,
+        h264_fullcolor: settings.getattr("h264_fullcolor")?.extract()?,
+        h264_fullframe: settings.getattr("h264_fullframe")?.extract()?,
+        h264_streaming_mode: settings.getattr("h264_streaming_mode")?.extract()?,
+        capture_cursor: settings.getattr("capture_cursor")?.extract()?,
+        watermark_path,
+        watermark_location_enum: settings.getattr("watermark_location_enum")?.extract()?,
+        vaapi_render_node_index: settings.getattr("vaapi_render_node_index")?.extract()?,
+        use_cpu: settings.getattr("use_cpu")?.extract()?,
+        use_openh264: settings
+            .getattr("use_openh264")
+            .ok()
+            .and_then(|v| v.extract::<bool>().ok())
+            .unwrap_or(false),
+        debug_logging: settings.getattr("debug_logging")?.extract()?,
+        auto_adjust_screen_capture_size: settings
+            .getattr("auto_adjust_screen_capture_size")
+            .ok()
+            .and_then(|v| v.extract::<bool>().ok())
+            .unwrap_or(false),
+        recording_socket: settings
+            .getattr("recording_socket")
+            .ok()
+            .and_then(|v| v.extract::<String>().ok())
+            .unwrap_or_default(),
+        // When true, encoders emit the raw payload without the per-stripe header. Stripe
+        // metadata is still exposed on the frame attributes, so the consumer must read it
+        // from there rather than parsing header bytes when this is set.
+        omit_stripe_headers: settings
+            .getattr("omit_stripe_headers")
+            .ok()
+            .and_then(|v| v.extract::<bool>().ok())
+            .unwrap_or(false),
+        h264_cbr_mode: settings.getattr("h264_cbr_mode")?.extract()?,
+        h264_bitrate_kbps: settings.getattr("h264_bitrate_kbps")?.extract()?,
+        h264_vbv_buffer_size_kb: settings.getattr("h264_vbv_buffer_size_kb")?.extract()?,
+    })
+}
+
 pub enum ThreadCommand {
     StartCapture(Py<PyAny>, RustCaptureSettings),
     StopCapture,
     SetCursorCallback(Py<PyAny>),
     KeyboardKey { scancode: u32, state: u32 },
+    // Inject by X11/XKB keysym, resolved to a keycode (+ shift level) against our own
+    // smithay xkb keymap. See the KeyboardKeysym handler.
+    KeyboardKeysym { keysym: u32, state: u32 },
+    // Reply with the smithay keyboard's keymap as an XKB_KEYMAP_FORMAT_TEXT_V1 string so a
+    // consumer (selkies) can build its reverse keysym map from the IDENTICAL keymap.
+    GetXkbKeymap { reply: std::sync::mpsc::Sender<String> },
     PointerMotion { x: f64, y: f64 },
     PointerRelativeMotion { dx: f64, dy: f64 },
     PointerButton { btn: u32, state: u32 },
     PointerAxis { x: f64, y: f64 },
     UpdateCursorConfig { render_on_framebuffer: bool },
+    RequestIdr,
+    // Live rate-control change for the Wayland calloop thread (parity with the X11 rate_dirty
+    // path). Each field is None when that dimension is unchanged.
+    UpdateRate { bitrate_kbps: Option<i32>, vbv_kb: Option<i32>, fps: Option<f64> },
 }
 
+/// X11/XKB keycode = Linux evdev keycode + 8. `inject_key` works in evdev space so the
+/// KeyboardKey handler ADDS this; `inject_keysym` resolves against xkb's already-X11 keycodes
+/// (min..=max) and passes them straight to `KeyboardHandle::input`, so it never adds it.
+const EVDEV_TO_XKB_KEYCODE_OFFSET: u32 = 8;
+
 fn get_gpu_driver(card_index: i32) -> String {
     let path = format!("/sys/class/drm/renderD{}/device/driver", 128 + card_index);
     match std::fs::read_link(&path) {
@@ -255,6 +366,87 @@ fn get_gpu_driver(card_index: i32) -> String {
     }
 }
 
+/// True when GPU auto-selection is requested, preferring SELKIES_AUTO_GPU and
+/// only consulting AUTO_GPU when SELKIES_AUTO_GPU is unset.
+fn auto_gpu_enabled() -> bool {
+    std::env::var("SELKIES_AUTO_GPU")
+        .or_else(|_| std::env::var("AUTO_GPU"))
+        .unwrap_or_default()
+        .to_lowercase()
+        == "true"
+}
+
+/// Resolve a usable /dev/dri/renderD* node by walking /sys/class/drm cards in
+/// numeric order. This skips cards with no render node (e.g. an IPMI/VGA card0)
+/// and only returns a node that is actually present in this namespace, so it
+/// behaves correctly inside containers where /dev/dri is filtered.
+fn auto_select_render_node() -> Option<String> {
+    // Don't `?`-return if /sys/class/drm is unreadable (e.g. a container that
+    // bind-mounts /dev/dri without /sys): fall through to the /dev/dri scan below.
+    let mut cards: Vec<(u32, std::path::PathBuf)> = std::fs::read_dir("/sys/class/drm")
+        .into_iter()
+        .flatten()
+        .flatten()
+        .filter_map(|e| {
+            let num = e.file_name().into_string().ok()?.strip_prefix("card")?.parse::<u32>().ok()?;
+            Some((num, e.path()))
+        })
+        .collect();
+    cards.sort_by_key(|(n, _)| *n);
+    for (_, path) in &cards {
+        if let Ok(drm_entries) = std::fs::read_dir(path.join("device/drm")) {
+            for de in drm_entries.flatten() {
+                let name = de.file_name().into_string().unwrap_or_default();
+                if name.starts_with("renderD") {
+                    let dev = format!("/dev/dri/{}", name);
+                    if std::path::Path::new(&dev).exists() {
+                        return Some(dev);
+                    }
+                }
+            }
+        }
+    }
+    // Fallback: lowest render node directly under /dev/dri.
+    let mut nodes: Vec<String> = std::fs::read_dir("/dev/dri")
+        .ok()?
+        .flatten()
+        .filter_map(|e| e.file_name().into_string().ok())
+        .filter(|n| n.starts_with("renderD"))
+        .collect();
+    nodes.sort();
+    nodes.first().map(|n| format!("/dev/dri/{}", n))
+}
+
+/// Resolve a keysym to an X11 keycode (+ whether Shift is needed) against smithay's own xkb
+/// keymap. Prefers the unshifted level-0 binding, else the shifted level-1 binding. Returns
+/// None if no key in the active layout produces it. Read-only; never panics on the keysym.
+fn resolve_keysym_to_keycode(
+    keymap: &smithay::input::keyboard::xkb::Keymap,
+    layout: smithay::input::keyboard::Layout,
+    target_keysym: u32,
+) -> Option<(u32, u8)> {
+    use smithay::input::keyboard::xkb;
+    let min_kc = keymap.min_keycode().raw();
+    let max_kc = keymap.max_keycode().raw();
+    if min_kc > max_kc {
+        return None;
+    }
+    // Scan shift-levels in preference order (0 = unshifted, preferred; 1 = Shift; 2 =
+    // AltGr / ISO_Level3_Shift; 3 = Shift+AltGr) so AltGr-only glyphs on non-US layouts
+    // (e.g. @ € | \ ~ on many EU layouts) resolve instead of being dropped. Returns the
+    // level so the caller can synthesize the matching modifier(s).
+    for level in 0u32..4 {
+        for raw_kc in min_kc..=max_kc {
+            let kc = xkb::Keycode::new(raw_kc);
+            let syms = keymap.key_get_syms_by_level(kc, layout.0, level);
+            if syms.iter().any(|s| s.raw() == target_keysym) {
+                return Some((raw_kc, level as u8));
+            }
+        }
+    }
+    None
+}
+
 /// @brief The main execution loop of the Wayland backend.
 ///
 /// This function acts as the central nervous system of the application. It runs in its own thread
@@ -277,57 +469,43 @@ fn get_gpu_driver(card_index: i32) -> String {
 ///    A high-frequency timer triggers the frame generation process:
 ///    - **Compositing**: Renders all active windows onto a virtual output framebuffer.
 ///    - **Readback Logic**: Determines if the GPU buffer needs to be copied to CPU memory
-///      (e.g., for software encoding, watermarking, or cross-GPU transfer).
+///      (e.g., for software encoding or cross-GPU transfer).
 ///    - **Encoding**: Passes the frame to the active encoder. This handles the complex
 ///      "Zero-Copy" path (sharing DMABUFs directly with hardware encoders) vs the "Readback"
 ///      path (copying pixels for CPU-based processing/encoding).
 ///    - **Transmission**: Sends the encoded video packets back to the Python layer via callback.
-fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<ThreadCommand>) {
-    let mut width: i32 = 1024;
-    let mut height: i32 = 768;
-
-    if let Ok(res_str) = std::env::var("MAX_RES") {
-        let parts: Vec<&str> = res_str.split('x').collect();
-        if parts.len() == 2 {
-            if let (Ok(w), Ok(h)) = (parts[0].parse::<i32>(), parts[1].parse::<i32>()) {
-                width = w;
-                height = h;
-                println!("[Wayland] Resolution set via MAX_RES: {}x{}", width, height);
-            }
-        }
-    }
-    if let Ok(w_str) = std::env::var("SELKIES_MANUAL_WIDTH") {
-        if let Ok(w) = w_str.parse::<i32>() {
-            width = w;
-            println!("[Wayland] Width override via SELKIES_MANUAL_WIDTH: {}", width);
-        }
-    }
-
-    if let Ok(h_str) = std::env::var("SELKIES_MANUAL_HEIGHT") {
-        if let Ok(h) = h_str.parse::<i32>() {
-            height = h;
-            println!("[Wayland] Height override via SELKIES_MANUAL_HEIGHT: {}", height);
-        }
-    }
+fn run_wayland_thread(
+    command_rx: smithay::reexports::calloop::channel::Channel<ThreadCommand>,
+    initial_width: i32,
+    initial_height: i32,
+    explicit_dri_node: String,
+) {
+    // Initial framebuffer size comes from selkies (the server owns resolution policy
+    // and forwards it via the WaylandBackend constructor); first StartCapture resizes.
+    let width: i32 = if initial_width > 0 { initial_width } else { 1024 };
+    let height: i32 = if initial_height > 0 { initial_height } else { 768 };
 
     let mut event_loop = EventLoop::<AppState>::try_new().expect("Unable to create event_loop");
     let display: Display<AppState> = Display::new().unwrap();
     let dh: DisplayHandle = display.handle();
     dh.set_default_max_buffer_size(10 * 1024 * 1024);
 
-    let auto_gpu = std::env::var("AUTO_GPU").unwrap_or_default().to_lowercase() == "true";
-    let mut dri_node = std::env::var("DRINODE").unwrap_or_default();
-
-    if auto_gpu {
-        if let Ok(entries) = std::fs::read_dir("/dev/dri") {
-            let mut nodes: Vec<String> = entries.flatten()
-                .filter_map(|e| e.file_name().into_string().ok())
-                .filter(|n| n.starts_with("renderD"))
-                .collect();
-            nodes.sort();
-            if let Some(node) = nodes.first() {
-                dri_node = format!("/dev/dri/{}", node);
-                println!("[Wayland] AUTO_GPU enabled. Selected: {}", dri_node);
+    // Explicit node from selkies (via the constructor); fall back to AUTO_GPU
+    // hardware detection (which the device library owns) when none was given.
+    let mut dri_node = explicit_dri_node;
+    if dri_node.is_empty() && auto_gpu_enabled() {
+        if let Some(node) = auto_select_render_node() {
+            dri_node = node;
+            println!("[Wayland] AUTO_GPU enabled. Selected: {}", dri_node);
+        }
+    }
+    // With no explicit node and AUTO_GPU off, honor an operator-set DRINODE before
+    // falling back to the software renderer.
+    if dri_node.is_empty() && !auto_gpu_enabled() {
+        if let Ok(node) = std::env::var("DRINODE") {
+            if !node.is_empty() {
+                dri_node = node;
+                println!("[Wayland] Using DRINODE from environment: {}", dri_node);
             }
         }
     }
@@ -490,8 +668,12 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
         start_time: Instant::now(),
         clock: Clock::new(),
         frame_counter: 0,
+        pending_force_idr: false,
+        synthetic_shift_keysyms: std::collections::HashMap::new(),
+        synthetic_mod_refcounts: std::collections::HashMap::new(),
         use_gpu,
         video_encoder: None,
+        openh264_encoder: None,
         vaapi_state: StripeState::default(),
         cursor_helper: Cursor::load(),
         overlay_state: OverlayState::default(),
@@ -501,6 +683,8 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
         render_cursor_on_framebuffer: false,
         render_node_path,
         recording_sink: None,
+        deliver_tx: None,
+        deliver_join: None,
     };
 
     let output = Output::new(
@@ -536,8 +720,9 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
         .insert_source(command_rx, move |event, _, state| {
             match event {
                 CalloopEvent::Msg(ThreadCommand::StartCapture(cb, mut settings)) => {
-                    let auto_gpu = std::env::var("AUTO_GPU").unwrap_or_default().to_lowercase() == "true";
-                    if auto_gpu {
+                    // AUTO_GPU aims the encoder at the auto-picked render node, but an explicit
+                    // operator choice (vaapi_render_node_index >= 0, e.g. --dri-node) always wins.
+                    if auto_gpu_enabled() && settings.vaapi_render_node_index < 0 {
                         if let Some(idx_str) = state.render_node_path.strip_prefix("/dev/dri/renderD") {
                             if let Ok(idx) = idx_str.parse::<i32>() {
                                 settings.vaapi_render_node_index = idx - 128;
@@ -545,6 +730,12 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                         }
                     }
 
+                    // H.264 4:2:0 needs even dimensions.
+                    if settings.output_mode == 1 {
+                        settings.width &= !1;
+                        settings.height &= !1;
+                    }
+
                     state.recording_sink =
                         crate::recording_sink::RecordingSink::try_bind(&settings.recording_socket);
 
@@ -583,11 +774,6 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
 
                             let pixel_count = (settings.width * settings.height) as usize;
                             state.frame_buffer = vec![0u8; pixel_count * 4];
-                            if settings.h264_fullcolor {
-                                state.nv12_buffer = vec![0u8; pixel_count * 3];
-                            } else {
-                                state.nv12_buffer = vec![0u8; pixel_count * 3 / 2];
-                            }
 
                             if state.use_gpu {
                                 if let Some(gbm) = state.gbm_device.as_mut() {
@@ -606,6 +792,18 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                             }
                         }
 
+                        // Size depends on fullcolor too, so (re)size unconditionally,
+                        // not only on a resolution change.
+                        let nv12_pixel_count = (settings.width * settings.height) as usize;
+                        let nv12_needed = if settings.h264_fullcolor {
+                            nv12_pixel_count * 3
+                        } else {
+                            nv12_pixel_count * 3 / 2
+                        };
+                        if state.nv12_buffer.len() != nv12_needed {
+                            state.nv12_buffer = vec![0u8; nv12_needed];
+                        }
+
                         for window in state.space.elements() {
                             if let Some(surface) = window.wl_surface() {
                                 output.enter(&surface);
@@ -628,9 +826,24 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                     }
 
                     state.video_encoder = None;
+                    state.openh264_encoder = None;
                     let use_cpu_explicit = settings.use_cpu || settings.vaapi_render_node_index == -1;
 
-                    if use_cpu_explicit {
+                    if settings.output_mode == 1 && settings.use_openh264 {
+                        // Explicit OpenH264 software encoder (full-frame, host pixels) — parity with
+                        // the X11 path. video_encoder stays None so the readback keeps frame_buffer as
+                        // host pixels (no NV12 conversion); the encode dispatch feeds it from there.
+                        match crate::encoders::oh264::Openh264Encoder::new(
+                            &settings,
+                            state.recording_sink.clone(),
+                        ) {
+                            Some(e) => {
+                                state.openh264_encoder = Some(e);
+                                println!("[Wayland] OpenH264 software encoder selected.");
+                            }
+                            None => eprintln!("[Wayland] OpenH264 init failed; falling back to x264 software."),
+                        }
+                    } else if use_cpu_explicit {
                         println!("[Wayland] CPU encoding selected (use_cpu=true or vaapi_node=-1).");
                     } else {
                         let encode_driver = get_gpu_driver(settings.vaapi_render_node_index);
@@ -704,7 +917,7 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                     let mut n_stripes = num_cores;
 
                     if settings.output_mode == 1 {
-                        if state.video_encoder.is_some() || settings.h264_fullframe {
+                        if state.video_encoder.is_some() || state.openh264_encoder.is_some() || settings.h264_fullframe {
                             n_stripes = 1;
                         } else {
                             let min_h = 64;
@@ -753,14 +966,18 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                             ));
                         }
                     } else {
-                        let encoder_type = match &state.video_encoder {
-                            Some(GpuEncoder::Nvenc(_)) => "NVENC",
-                            Some(GpuEncoder::Vaapi(_)) => "VAAPI",
-                            None => "CPU",
+                        let encoder_type = if state.openh264_encoder.is_some() {
+                            "OpenH264"
+                        } else {
+                            match &state.video_encoder {
+                                Some(GpuEncoder::Nvenc(_)) => "NVENC",
+                                Some(GpuEncoder::Vaapi(_)) => "VAAPI",
+                                None => "CPU",
+                            }
                         };
                         log_msg.push_str(&format!(" | Mode: H264 ({})", encoder_type));
 
-                        if state.video_encoder.is_some() || settings.h264_fullframe {
+                        if state.video_encoder.is_some() || state.openh264_encoder.is_some() || settings.h264_fullframe {
                             log_msg.push_str(" FullFrame");
                         } else {
                             log_msg.push_str(" Striped");
@@ -779,10 +996,14 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                             ));
                         }
 
-                        let is_actually_444 = match &state.video_encoder {
-                            Some(GpuEncoder::Nvenc(_)) => settings.h264_fullcolor,
-                            Some(_) => false,
-                            None => settings.h264_fullcolor,
+                        let is_actually_444 = if state.openh264_encoder.is_some() {
+                            false // OpenH264 is 4:2:0 only
+                        } else {
+                            match &state.video_encoder {
+                                Some(GpuEncoder::Nvenc(_)) => settings.h264_fullcolor,
+                                Some(_) => false,
+                                None => settings.h264_fullcolor,
+                            }
                         };
 
                         let range_str = if is_actually_444 {
@@ -816,29 +1037,228 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                     state.total_stripes_encoded = 0;
                     state.last_log_time = Instant::now();
                     state.frame_counter = 0;
+                    state.pending_force_idr = false;
                     state.stripes.clear();
                     state.vaapi_state = StripeState::default();
+                    // If a cursor callback is already registered, replay the retained cursor to
+                    // this (re)started capture so the client isn't left cursorless until the next
+                    // compositor cursor event.
+                    if state.cursor_callback.is_some() {
+                        if let Some(icon) = state.current_cursor_icon.clone() {
+                            state.send_cursor_image(&icon);
+                        }
+                    }
+                    // Move the Python callback onto a dedicated delivery thread so it (and the GIL
+                    // it holds) never runs on the calloop thread and can't stall input/control
+                    // dispatch. Tear down any prior thread first (restart without StopCapture).
+                    if let Some(tx) = state.deliver_tx.take() { drop(tx); }
+                    if let Some(j) = state.deliver_join.take() { let _ = j.join(); }
+                    if let Some(cb) = state.callback.take() {
+                        let (tx, rx) = std::sync::mpsc::sync_channel::<Vec<EncodedStripe>>(1);
+                        let join = thread::spawn(move || {
+                            // recv() blocks until a frame arrives; returns Err (exits) when the
+                            // SyncSender is dropped on StopCapture/teardown. One GIL acquisition
+                            // per frame, all stripes batched, mirroring the X11 on_frame closure.
+                            while let Ok(stripes) = rx.recv() {
+                                // Drain without attaching once teardown has begun: attaching to a
+                                // finalizing interpreter aborts the process pre-3.13.
+                                if PY_SHUTDOWN.load(Ordering::Relaxed) { continue; }
+                                Python::attach(|py| {
+                                    for s in stripes {
+                                        match Py::new(py, StripeFrame::new_owned_meta(
+                                            s.data, s.data_type, s.stripe_y_start,
+                                            s.stripe_height, s.frame_id,
+                                        )) {
+                                            Ok(f) => { if let Err(e) = cb.call1(py, (f,)) { e.print(py); } }
+                                            Err(e) => eprintln!("[wayland] frame alloc error: {e:?}"),
+                                        }
+                                    }
+                                });
+                            }
+                        });
+                        state.deliver_tx = Some(tx);
+                        state.deliver_join = Some(join);
+                    }
                 }
                 CalloopEvent::Msg(ThreadCommand::StopCapture) => {
                     println!("[Wayland] Capture loop stopped.");
                     state.is_capturing = false;
                     state.callback = None;
+                    // Drop the cursor callback: this thread outlives the interpreter, so a
+                    // retained one could fire into a finalizing interpreter (the off-GIL drop
+                    // defers the decref). Callers re-register it before the next start.
+                    state.cursor_callback = None;
                     state.video_encoder = None;
+                    if state.settings.debug_logging {
+                        println!(
+                            "[Wayland] Stop: dropping encoder state (openh264={}, stripes={}).",
+                            state.openh264_encoder.is_some(),
+                            state.stripes.len()
+                        );
+                    }
+                    // Release CPU encoder state too (x264 contexts own worker threads; OpenH264
+                    // owns plane buffers) instead of holding it until the next StartCapture.
+                    state.openh264_encoder = None;
+                    state.stripes.clear();
+                    state.vaapi_state = StripeState::default();
                     state.recording_sink = None;
+                    // Drop the sender so the delivery thread's recv() unblocks, then join it. The
+                    // calloop holds no GIL here, so joining while it may be mid-callback is safe.
+                    // Also reached from the atexit teardown, which sends StopCapture.
+                    if let Some(tx) = state.deliver_tx.take() { drop(tx); }
+                    if let Some(j) = state.deliver_join.take() { let _ = j.join(); }
                 }
                 CalloopEvent::Msg(ThreadCommand::SetCursorCallback(cb)) => {
                     state.cursor_callback = Some(cb);
+                    // Replay the retained cursor so a client that (re)registers its callback AFTER
+                    // the last compositor cursor event still gets the current cursor immediately
+                    // (fixes the cursor-lost-after-tab-sleep symptom), instead of waiting for the
+                    // next cursor event that may never come.
+                    if let Some(icon) = state.current_cursor_icon.clone() {
+                        state.send_cursor_image(&icon);
+                    }
                 }
                 CalloopEvent::Msg(ThreadCommand::KeyboardKey { scancode, state: key_state_val }) => {
                     let key_state = if key_state_val > 0 { KeyState::Pressed } else { KeyState::Released };
                     let serial = next_serial();
                     let time = wayland_time();
                     if let Some(keyboard) = state.seat.get_keyboard() {
-                        keyboard.input(state, Keycode::new(scancode), key_state, serial, time, |_, _, _| {
+                        // scancode is an evdev keycode; xkb/smithay want X11 keycodes (see
+                        // EVDEV_TO_XKB_KEYCODE_OFFSET).
+                        keyboard.input(state, Keycode::new(scancode.saturating_add(EVDEV_TO_XKB_KEYCODE_OFFSET)), key_state, serial, time, |_, _, _| {
                             FilterResult::<()>::Forward
                         });
                     }
                 }
+                CalloopEvent::Msg(ThreadCommand::KeyboardKeysym { keysym, state: key_state_val }) => {
+                    // Inject by keysym against our own live xkb keymap: resolve to an X11 keycode
+                    // (+ Shift), then inject, synthesizing a Shift press/release for shifted keysyms.
+                    let key_state = if key_state_val > 0 { KeyState::Pressed } else { KeyState::Released };
+                    if let Some(keyboard) = state.seat.get_keyboard() {
+                        let inject = |state: &mut AppState, x11_kc: u32, ks: KeyState| {
+                            let serial = next_serial();
+                            let time = wayland_time();
+                            keyboard.input(state, Keycode::new(x11_kc), ks, serial, time, |_, _, _| {
+                                FilterResult::<()>::Forward
+                            });
+                        };
+
+                        match key_state {
+                            KeyState::Pressed => {
+                                // Phase 1: resolve against the live keymap (read-only) -> (target
+                                // keycode, the modifier keycodes that select its shift-level). No
+                                // `.unwrap()` on attacker-supplied data.
+                                let resolved: Option<(u32, Vec<u32>)> =
+                                    keyboard.with_xkb_state(state, |context| {
+                                        let xkb_guard = match context.xkb().lock() {
+                                            Ok(g) => g,
+                                            Err(_) => return None,
+                                        };
+                                        let layout = xkb_guard.active_layout();
+                                        // SAFETY: the &Keymap borrow stays within this guard's scope
+                                        // and is only read; we never store it past the lock.
+                                        let keymap = unsafe { xkb_guard.keymap() };
+                                        let (kc, level) = resolve_keysym_to_keycode(keymap, layout, keysym)?;
+                                        // Resolve a modifier keysym to its keycode, falling back to the
+                                        // conventional X11 keycode when the layout lacks it.
+                                        let resolve_mod = |ks: u32, fallback: u32| {
+                                            resolve_keysym_to_keycode(keymap, layout, ks)
+                                                .map(|(c, _)| c)
+                                                .unwrap_or(fallback)
+                                        };
+                                        // Standard ISO convention: level 1 = Shift, level 2 = AltGr
+                                        // (ISO_Level3_Shift), level 3 = Shift+AltGr.
+                                        let mut mods: Vec<u32> = Vec::new();
+                                        if level == 1 || level == 3 {
+                                            // Shift_L (0xFFE1); fallback evdev KEY_LEFTSHIFT 42 + offset.
+                                            mods.push(resolve_mod(0xFFE1, 42 + EVDEV_TO_XKB_KEYCODE_OFFSET));
+                                        }
+                                        if level == 2 || level == 3 {
+                                            // ISO_Level3_Shift / AltGr (0xFE03); fallback evdev KEY_RIGHTALT 100 + offset.
+                                            mods.push(resolve_mod(0xFE03, 100 + EVDEV_TO_XKB_KEYCODE_OFFSET));
+                                        }
+                                        Some((kc, mods))
+                                    });
+
+                                // Phase 2: inject and RECORD the injected keycodes so the matching
+                                // key-up releases exactly these, regardless of later layout changes.
+                                // Synthetic modifiers are ref-counted so two simultaneously-held
+                                // shifted/AltGr keys don't release each other's modifier early.
+                                if let Some((kc, mods)) = resolved {
+                                    // Auto-repeat (~25Hz) re-presses the same keysym without an
+                                    // intervening release. Only the FIRST press touches modifier
+                                    // refcounts + the held-key map; otherwise the refcount would
+                                    // climb while the single map entry (and its one release)
+                                    // stayed at 1, leaving Shift/AltGr stuck held. Re-presses still
+                                    // re-inject the key-down so auto-repeat key events flow through.
+                                    if !state.synthetic_shift_keysyms.contains_key(&keysym) {
+                                        for &m in &mods {
+                                            let first = {
+                                                let c = state.synthetic_mod_refcounts.entry(m).or_insert(0);
+                                                *c += 1;
+                                                *c == 1
+                                            };
+                                            if first {
+                                                inject(state, m, KeyState::Pressed);
+                                            }
+                                        }
+                                        state.synthetic_shift_keysyms.insert(keysym, (kc, mods));
+                                    }
+                                    inject(state, kc, KeyState::Pressed);
+                                } else {
+                                    eprintln!(
+                                        "[Wayland] inject_keysym: keysym {:#06x} not found in active xkb layout; ignoring",
+                                        keysym
+                                    );
+                                }
+                            }
+                            KeyState::Released => {
+                                // Release the keycodes recorded at PRESS time; do NOT re-resolve
+                                // (the layout may have changed mid-keystroke). A synthetic modifier
+                                // is released only when its last holder is released (ref-counted).
+                                if let Some((kc, mods)) =
+                                    state.synthetic_shift_keysyms.remove(&keysym)
+                                {
+                                    inject(state, kc, KeyState::Released);
+                                    for &m in &mods {
+                                        let last = match state.synthetic_mod_refcounts.get_mut(&m) {
+                                            Some(c) => {
+                                                *c = c.saturating_sub(1);
+                                                *c == 0
+                                            }
+                                            None => false,
+                                        };
+                                        if last {
+                                            state.synthetic_mod_refcounts.remove(&m);
+                                            inject(state, m, KeyState::Released);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                    }
+                }
+                CalloopEvent::Msg(ThreadCommand::GetXkbKeymap { reply }) => {
+                    // Hand back our keymap as an XKB_KEYMAP_FORMAT_TEXT_V1 string so a consumer can
+                    // build its reverse keysym map from the IDENTICAL keymap.
+                    let mut keymap_str = String::new();
+                    if let Some(keyboard) = state.seat.get_keyboard() {
+                        keymap_str = keyboard.with_xkb_state(state, |context| {
+                            match context.xkb().lock() {
+                                Ok(guard) => {
+                                    // SAFETY: read-only use of the &Keymap within the guard scope.
+                                    let keymap = unsafe { guard.keymap() };
+                                    keymap.get_as_string(
+                                        smithay::input::keyboard::xkb::KEYMAP_FORMAT_TEXT_V1,
+                                    )
+                                }
+                                Err(_) => String::new(),
+                            }
+                        });
+                    }
+                    // Best-effort: the caller may have timed out and dropped the receiver.
+                    let _ = reply.send(keymap_str);
+                }
                 CalloopEvent::Msg(ThreadCommand::PointerMotion { x, y }) => {
                     let serial = next_serial();
                     let time = wayland_time();
@@ -956,7 +1376,12 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                 }
                             }
                         }
-                        pointer.button(state, &ButtonEvent { button: btn, state: button_state, serial, time });
+                        // `btn` is already an evdev BTN_ code by contract (the selkies
+                        // consumer sends e.g. 272=BTN_LEFT/273=BTN_RIGHT/274=BTN_MIDDLE,
+                        // and 0x113=BTN_SIDE/0x114=BTN_EXTRA for back/forward), so pass it
+                        // straight through to smithay's pointer.
+                        let button = btn;
+                        pointer.button(state, &ButtonEvent { button, state: button_state, serial, time });
                         pointer.frame(state);
                     }
                 }
@@ -989,6 +1414,29 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                 CalloopEvent::Msg(ThreadCommand::UpdateCursorConfig { render_on_framebuffer }) => {
                     state.render_cursor_on_framebuffer = render_on_framebuffer;
                 }
+                CalloopEvent::Msg(ThreadCommand::RequestIdr) => {
+                    // On-demand keyframe (client reconnect / decoder reset). Consumed on
+                    // the next captured frame; forces a send + IDR even on a static screen.
+                    state.pending_force_idr = true;
+                }
+                CalloopEvent::Msg(ThreadCommand::UpdateRate { bitrate_kbps, vbv_kb, fps }) => {
+                    // Live rate change (web-UI bitrate/fps sliders), parity with the X11 path.
+                    // Update settings so the software x264 path and the frame pacing pick it up on
+                    // the next frame; reconfigure the live encoders immediately (NVENC / OpenH264
+                    // adjust in place, VAAPI re-opens its codec context to apply the new rate).
+                    if let Some(b) = bitrate_kbps { state.settings.h264_bitrate_kbps = b; }
+                    if let Some(v) = vbv_kb { state.settings.h264_vbv_buffer_size_kb = v; }
+                    if let Some(f) = fps { if f > 0.0 { state.settings.target_fps = f; } }
+                    if let Some(GpuEncoder::Nvenc(enc)) = state.video_encoder.as_mut() {
+                        enc.reconfigure_rate(&state.settings);
+                    }
+                    if let Some(GpuEncoder::Vaapi(enc)) = state.video_encoder.as_mut() {
+                        enc.reconfigure_rate(&state.settings);
+                    }
+                    if let Some(enc) = state.openh264_encoder.as_mut() {
+                        enc.reconfigure_rate(state.settings.h264_bitrate_kbps, state.settings.target_fps);
+                    }
+                }
                 CalloopEvent::Closed => {}
             }
         })
@@ -1048,7 +1496,8 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                         let mut different_gpu = false;
 
                         if state.video_encoder.is_some() {
-                            let dri_node = std::env::var("DRINODE").unwrap_or_default();
+                            // Use the node resolved at startup, not a fresh env read.
+                            let dri_node = state.render_node_path.clone();
                             let encode_node_idx = state.settings.vaapi_render_node_index;
                             if !dri_node.is_empty() && encode_node_idx >= 0 {
                                 if !dri_node.contains(&format!("renderD{}", 128 + encode_node_idx)) {
@@ -1060,21 +1509,29 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                         let is_readback = !rendering_gpu || !encoding_gpu_avail || different_gpu;
                         let copy_mode_str = if is_readback { "Readback" } else { "ZeroCopy" };
 
-                        let backend = match &state.video_encoder {
-                            Some(GpuEncoder::Nvenc(_)) => format!("NVENC ({})", copy_mode_str),
-                            Some(GpuEncoder::Vaapi(_)) => format!("VAAPI ({})", copy_mode_str),
-                            None => "CPU".to_string(),
+                        let backend = if state.openh264_encoder.is_some() {
+                            "OpenH264".to_string()
+                        } else {
+                            match &state.video_encoder {
+                                Some(GpuEncoder::Nvenc(_)) => format!("NVENC ({})", copy_mode_str),
+                                Some(GpuEncoder::Vaapi(_)) => format!("VAAPI ({})", copy_mode_str),
+                                None => "CPU".to_string(),
+                            }
                         };
 
-                        let is_actually_444 = match &state.video_encoder {
-                            Some(GpuEncoder::Nvenc(_)) => state.settings.h264_fullcolor,
-                            Some(_) => false,
-                            None => state.settings.h264_fullcolor,
+                        let is_actually_444 = if state.openh264_encoder.is_some() {
+                            false // OpenH264 is 4:2:0 only
+                        } else {
+                            match &state.video_encoder {
+                                Some(GpuEncoder::Nvenc(_)) => state.settings.h264_fullcolor,
+                                Some(_) => false,
+                                None => state.settings.h264_fullcolor,
+                            }
                         };
 
                         let cs_str = if is_actually_444 { "CS_IN:I444" } else { "CS_IN:I420" };
                         let range_str = if is_actually_444 { "FR" } else { "LR" };
-                        let frame_str = if state.video_encoder.is_some() || state.settings.h264_fullframe { "FF" } else { "Striped" };
+                        let frame_str = if state.video_encoder.is_some() || state.openh264_encoder.is_some() || state.settings.h264_fullframe { "FF" } else { "Striped" };
 
                         format!("H264 ({}) {} {} {} CRF:{}", backend, cs_str, range_str, frame_str, state.settings.h264_crf)
                     };
@@ -1112,6 +1569,10 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                 return TimeoutAction::ToDuration(Duration::from_millis(16));
             }
 
+            // READ (don't take) the on-demand IDR request: it's cleared below only where an
+            // encoder actually consumes it, so a request on a skipped frame isn't dropped.
+            let requested_idr = state.pending_force_idr;
+
             state.overlay_state.update_position(
                 state.settings.width,
                 state.settings.height,
@@ -1272,12 +1733,9 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                         Err(e) => eprintln!("Render error: {:?}", e)
                                     }
                                     if needs_readback {
-                                        let (read_w, read_h) = if is_memory_throttling {
-                                            (1, 1)
-                                        } else {
-                                            (width, height)
-                                        };
-                                        
+                                        // Throttling skips readback entirely, so this is always full-size.
+                                        let (read_w, read_h) = (width, height);
+
                                         if !is_memory_throttling {
                                             let _ = renderer.with_context(|gl| unsafe {
                                                 gl.ReadPixels(
@@ -1318,7 +1776,7 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                                          w * 4,
                                                          YuvRange::Full,
                                                          YuvStandardMatrix::Bt709,
-                                                         YuvConversionMode::Balanced
+                                                         YuvConversionMode::Fast
                                                      );
                                                 } else {
                                                     let y_size = (w * h) as usize;
@@ -1339,7 +1797,7 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                                         w * 4,
                                                         YuvRange::Limited,
                                                         YuvStandardMatrix::Bt709,
-                                                        YuvConversionMode::Balanced
+                                                        YuvConversionMode::Fast
                                                     );
                                                 }
                                             }
@@ -1511,7 +1969,7 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                                      w * 4,
                                                      YuvRange::Full,
                                                      YuvStandardMatrix::Bt709,
-                                                     YuvConversionMode::Balanced
+                                                     YuvConversionMode::Fast
                                                  );
                                             } else {
                                                 let y_size = (w * h) as usize;
@@ -1532,7 +1990,7 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                                     w * 4,
                                                     YuvRange::Limited,
                                                     YuvStandardMatrix::Bt709,
-                                                    YuvConversionMode::Balanced
+                                                    YuvConversionMode::Fast
                                                 );
                                             }
                                         }
@@ -1553,61 +2011,22 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
 
                     if is_memory_throttling {
                     } else if let Some(ref mut encoder) = state.video_encoder {
-                        let is_dirty = !damage_rects.is_empty();
+                        // Full-frame H.264 send / paint-over / recovery-IDR decision. The
+                        // dirtiness signal here is compositor damage (non-empty damage_rects);
+                        // the function itself is agnostic to how dirtiness was determined.
                         let is_animated = state.overlay_state.is_animated();
-
-                        let mut send_frame = false;
-                        let mut force_idr = false;
-
-                        let normal_qp = state.settings.h264_crf as u32;
-                        let paint_qp = state.settings.h264_paintover_crf as u32;
-                        let mut target_qp = normal_qp;
-
-                        let trigger_frames = state.settings.paint_over_trigger_frames;
-                        let use_paint_over = state.settings.use_paint_over_quality;
-                        let burst = state.settings.h264_paintover_burst_frames;
-                        let streaming = state.settings.h264_streaming_mode;
-
-                        let st = &mut state.vaapi_state;
-
-                        if st.h264_burst_frames_remaining > 0 {
-                            send_frame = true;
-                            target_qp = paint_qp;
-                            st.h264_burst_frames_remaining -= 1;
-
-                            if is_dirty {
-                                st.h264_burst_frames_remaining = 0;
-                                st.paint_over_sent = false;
-                                target_qp = normal_qp;
-                            }
-                        }
-
-                        if !send_frame && (streaming || is_animated) {
-                            send_frame = true;
-                        }
-
-                        if is_dirty || state.encoded_frame_count == 0 {
-                            send_frame = true;
-                            
-                            if state.encoded_frame_count == 0 {
-                                force_idr = true;
-                            }
-
-                            st.no_motion_frame_count = 0;
-                            st.paint_over_sent = false;
-                            st.h264_burst_frames_remaining = 0;
-                            target_qp = normal_qp;
-                        } else if !send_frame {
-                            st.no_motion_frame_count += 1;
-
-                            if use_paint_over && st.no_motion_frame_count >= trigger_frames && !st.paint_over_sent {
-                                send_frame = true;
-                                st.paint_over_sent = true;
-                                force_idr = true;
-                                target_qp = paint_qp;
-                                st.h264_burst_frames_remaining = burst - 1;
-                            }
-                        }
+                        let frame_counter = state.frame_counter;
+                        let decision = crate::pipeline::decide_hw_fullframe(
+                            &mut state.vaapi_state,
+                            &state.settings,
+                            frame_counter,
+                            !damage_rects.is_empty(),
+                            is_animated,
+                            requested_idr,
+                        );
+                        let send_frame = decision.send;
+                        let force_idr = decision.force_idr;
+                        let target_qp = decision.target_qp;
 
                         if send_frame {
                             let force_idr_for_recording = state
@@ -1645,23 +2064,77 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                                 if !data.is_empty() {
                                     state.encoded_frame_count += 1;
                                     state.total_stripes_encoded += 1;
-                                    if let Some(ref cb) = state.callback {
-                                        #[allow(deprecated)]
-                                        Python::with_gil(|py| {
-                                            let py_bytes = PyBytes::new(py, &data);
-                                            if let Err(e) = cb.call1(py, (py_bytes,)) { eprintln!("Callback error: {:?}", e); }
-                                        });
+                                    // Full-frame H.264 (y_start=0, full height): hand off to the delivery
+                                    // thread. send() blocks the calloop only while the previous frame is
+                                    // still undelivered -- single-slot backpressure, same as X11 publish().
+                                    if let Some(ref tx) = state.deliver_tx {
+                                        let _ = tx.send(vec![EncodedStripe {
+                                            data, data_type: 2, stripe_y_start: 0,
+                                            stripe_height: height, frame_id: state.frame_counter as i32,
+                                        }]);
                                     }
                                 }
                             } else if let Err(e) = result {
                                 eprintln!("HW Encode Error: {}", e);
                             }
                         }
+                    } else if state.openh264_encoder.is_some() {
+                        // OpenH264 full-frame software H.264 on host pixels (frame_buffer) — parity with
+                        // the X11 OpenH264 path and the HW full-frame path. Same decide_hw_fullframe
+                        // paint-over/recovery-IDR logic, and it feeds the recording sink like the HW
+                        // encoders. frame_buffer is RGBA on the GLES path (use_gpu) and BGRA on pixman.
+                        let is_animated = state.overlay_state.is_animated();
+                        let frame_counter = state.frame_counter;
+                        let decision = crate::pipeline::decide_hw_fullframe(
+                            &mut state.vaapi_state,
+                            &state.settings,
+                            frame_counter,
+                            !damage_rects.is_empty(),
+                            is_animated,
+                            requested_idr,
+                        );
+                        if decision.send {
+                            let force_idr_for_recording = state
+                                .recording_sink
+                                .as_ref()
+                                .map(|s| s.should_force_idr())
+                                .unwrap_or(false);
+                            let force_idr = decision.force_idr || force_idr_for_recording;
+                            let stride = (width * 4) as usize;
+                            let result = state.openh264_encoder.as_mut().unwrap().encode_host_argb(
+                                &state.frame_buffer, stride, state.frame_counter as u64,
+                                force_idr, state.use_gpu,
+                            );
+                            match result {
+                                Ok(data) if !data.is_empty() => {
+                                    state.encoded_frame_count += 1;
+                                    state.total_stripes_encoded += 1;
+                                    if let Some(ref tx) = state.deliver_tx {
+                                        let _ = tx.send(vec![EncodedStripe {
+                                            data, data_type: 2, stripe_y_start: 0,
+                                            stripe_height: height, frame_id: state.frame_counter as i32,
+                                        }]);
+                                    }
+                                }
+                                Ok(_) => {}
+                                Err(e) => eprintln!("OpenH264 Encode Error: {}", e),
+                            }
+                        }
                     } else {
                         if state.overlay_state.is_animated() {
                              damage_rects.push(Rectangle::new((0,0).into(), (width, height).into()));
                         }
 
+                        // IDR triggers for the software H.264 path: an explicit request_idr
+                        // plus a ~2s periodic recovery keyframe. Clamp fps before the cast:
+                        // target_fps<=0 would make kf_interval 1, forcing an IDR every frame.
+                        let safe_fps = state.settings.target_fps.max(1.0);
+                        let kf_interval = ((safe_fps * 2.0).round() as u64).max(1);
+                        let periodic_idr = (state.frame_counter as u64 % kf_interval) == 0;
+                        // Only meaningful for H.264 (output_mode 1); encode_cpu ignores it for JPEG.
+                        let force_idr_all = state.settings.output_mode == 1
+                            && (state.frame_counter == 0 || periodic_idr || requested_idr);
+
                         let encoded_packets = encoders::software::encode_cpu(
                             &mut state.stripes,
                             &state.frame_buffer,
@@ -1671,28 +2144,32 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
                             &state.settings,
                             state.frame_counter,
                             state.use_gpu,
+                            false, // hash_damage: Wayland gets damage from the compositor
                             state.recording_sink.as_ref(),
+                            force_idr_all,
                         );
 
                         if !encoded_packets.is_empty() {
                             state.encoded_frame_count += 1;
                             state.total_stripes_encoded += encoded_packets.len() as u32;
-                            if let Some(ref cb) = state.callback {
-                                #[allow(deprecated)]
-                                Python::with_gil(|py| {
-                                    for packet in encoded_packets {
-                                        let py_bytes = PyBytes::new(py, &packet);
-                                        if let Err(e) = cb.call1(py, (py_bytes,)) { eprintln!("Callback error: {:?}", e); }
-                                    }
-                                });
+                            // encoded_packets is already Vec<EncodedStripe>; hand the whole frame's
+                            // stripes off in one message so ordering (the H.264 ref chain) is kept.
+                            if let Some(ref tx) = state.deliver_tx {
+                                let _ = tx.send(encoded_packets);
                             }
                         }
                     }
+                    // Consume the on-demand IDR request only now that an encode pass actually ran;
+                    // if throttling, leave it set for the next frame rather than dropping it.
+                    if !is_memory_throttling {
+                        state.pending_force_idr = false;
+                    }
                     state.frame_counter = state.frame_counter.wrapping_add(1);
                 }
             }
             let work_elapsed = loop_start_time.elapsed();
-            let fps = if is_memory_throttling { 5.0 } else { state.settings.target_fps };
+            // Clamp to a positive, finite fps; Duration::from_secs_f64 panics on inf/negative.
+            let fps = (if is_memory_throttling { 5.0 } else { state.settings.target_fps }).max(1.0);
             let target_frame_duration = Duration::from_secs_f64(1.0 / fps);
             let wait_duration = target_frame_duration.saturating_sub(work_elapsed);
             let final_wait = if wait_duration.as_millis() < 1 { Duration::from_millis(1) } else { wait_duration };
@@ -1711,6 +2188,70 @@ fn run_wayland_thread(command_rx: smithay::reexports::calloop::channel::Channel<
     event_loop.run(None, &mut state, |state| { state.dh.flush_clients().unwrap(); }).unwrap();
 }
 
+/// Zero-copy encoded-frame handoff to Python. Owns the encoded `Vec<u8>` and
+/// exposes it read-only via the buffer protocol, so `bytes(frame)` /
+/// `memoryview(frame)` alias the Rust buffer instead of copying. Carries the
+/// four stripe-metadata ints as Python attributes.
+#[pyclass]
+struct StripeFrame {
+    data: Vec<u8>,
+    #[pyo3(get, set)]
+    data_type: i32,
+    #[pyo3(get, set)]
+    stripe_y_start: i32,
+    #[pyo3(get, set)]
+    stripe_height: i32,
+    #[pyo3(get, set)]
+    frame_id: i32,
+}
+
+impl StripeFrame {
+    /// Hot-path constructor: MOVES the encoded buffer in (no copy) and carries stripe
+    /// metadata as attributes, so the consumer can read it without parsing a header
+    /// (required for omit_stripe_headers).
+    fn new_owned_meta(data: Vec<u8>, data_type: i32, stripe_y_start: i32, stripe_height: i32, frame_id: i32) -> Self {
+        Self { data, data_type, stripe_y_start, stripe_height, frame_id }
+    }
+}
+
+#[pymethods]
+impl StripeFrame {
+    // Symmetry/testability: copies the bytes-like into the owned Vec. The hot
+    // path uses `new_owned_meta` (a move) instead.
+    #[new]
+    #[pyo3(signature = (data, data_type = 0, stripe_y_start = 0, stripe_height = 0, frame_id = 0))]
+    fn new(data: Vec<u8>, data_type: i32, stripe_y_start: i32, stripe_height: i32, frame_id: i32) -> Self {
+        Self { data, data_type, stripe_y_start, stripe_height, frame_id }
+    }
+
+    fn __len__(&self) -> usize {
+        self.data.len()
+    }
+
+    // PyBuffer_FillInfo INCREFs `slf` into view->obj, pinning the Vec until every
+    // view is released, so memoryviews can outlive the Python `frame` handle.
+    unsafe fn __getbuffer__(
+        slf: PyRefMut<'_, Self>,
+        view: *mut pyo3::ffi::Py_buffer,
+        flags: std::os::raw::c_int,
+    ) -> PyResult<()> {
+        let r = pyo3::ffi::PyBuffer_FillInfo(
+            view,
+            slf.as_ptr(),
+            slf.data.as_ptr() as *mut std::os::raw::c_void,
+            slf.data.len() as pyo3::ffi::Py_ssize_t,
+            1, // readonly
+            flags,
+        );
+        if r != 0 {
+            return Err(PyErr::fetch(slf.py()));
+        }
+        Ok(())
+    }
+
+    unsafe fn __releasebuffer__(&self, _view: *mut pyo3::ffi::Py_buffer) {}
+}
+
 /// @brief Python interface class.
 ///
 /// This class is exposed to Python and spawns the Wayland thread upon instantiation.
@@ -1723,124 +2264,843 @@ struct WaylandBackend {
 #[pymethods]
 impl WaylandBackend {
     #[new]
-    fn new() -> Self {
+    fn new(width: i32, height: i32, dri_node: String) -> Self {
         let (tx, rx) = smithay::reexports::calloop::channel::channel();
         thread::spawn(move || {
-            run_wayland_thread(rx);
+            run_wayland_thread(rx, width, height, dri_node);
         });
         WaylandBackend { tx }
     }
 
-    fn start_capture(&mut self, callback: Py<PyAny>, settings: &Bound<'_, PyAny>) -> PyResult<()> {
-        let watermark_path_obj = settings.getattr("watermark_path")?;
-        let watermark_path = if let Ok(s) = watermark_path_obj.extract::<String>() {
-            s
-        } else if let Ok(b) = watermark_path_obj.extract::<Vec<u8>>() {
-            String::from_utf8_lossy(&b).into_owned()
-        } else {
-            String::new()
-        };
-
-        let scale = settings.getattr("scale").ok().and_then(|x| x.extract().ok()).unwrap_or(1.0);
-
-        let rust_settings = RustCaptureSettings {
-            width: settings.getattr("capture_width")?.extract()?,
-            height: settings.getattr("capture_height")?.extract()?,
-            scale,
-            capture_x: settings.getattr("capture_x")?.extract()?,
-            capture_y: settings.getattr("capture_y")?.extract()?,
-            target_fps: settings.getattr("target_fps")?.extract()?,
-            jpeg_quality: settings.getattr("jpeg_quality")?.extract()?,
-            paint_over_jpeg_quality: settings.getattr("paint_over_jpeg_quality")?.extract()?,
-            use_paint_over_quality: settings.getattr("use_paint_over_quality")?.extract()?,
-            paint_over_trigger_frames: settings.getattr("paint_over_trigger_frames")?.extract()?,
-            damage_block_threshold: settings.getattr("damage_block_threshold")?.extract()?,
-            damage_block_duration: settings.getattr("damage_block_duration")?.extract()?,
-            output_mode: settings.getattr("output_mode")?.extract()?,
-            h264_crf: settings.getattr("h264_crf")?.extract()?,
-            h264_paintover_crf: settings.getattr("h264_paintover_crf")?.extract()?,
-            h264_paintover_burst_frames: settings.getattr("h264_paintover_burst_frames")?.extract()?,
-            h264_fullcolor: settings.getattr("h264_fullcolor")?.extract()?,
-            h264_fullframe: settings.getattr("h264_fullframe")?.extract()?,
-            h264_streaming_mode: settings.getattr("h264_streaming_mode")?.extract()?,
-            capture_cursor: settings.getattr("capture_cursor")?.extract()?,
-            watermark_path,
-            watermark_location_enum: settings.getattr("watermark_location_enum")?.extract()?,
-            vaapi_render_node_index: settings.getattr("vaapi_render_node_index")?.extract()?,
-            use_cpu: settings.getattr("use_cpu")?.extract()?,
-            debug_logging: settings.getattr("debug_logging")?.extract()?,
-            recording_socket: settings
-                .getattr("recording_socket")
-                .ok()
-                .and_then(|v| v.extract::<String>().ok())
-                .unwrap_or_default(),
-        };
+    fn start_capture(&self, callback: Py<PyAny>, settings: &Bound<'_, PyAny>) -> PyResult<()> {
+        let rust_settings = extract_settings(settings)?;
 
+        // Starting from Python proves the interpreter is live again after a manual sweep.
+        PY_SHUTDOWN.store(false, Ordering::Relaxed);
         self.tx
             .send(ThreadCommand::StartCapture(callback, rust_settings))
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to send start command: {}", e)))?;
         Ok(())
     }
 
-    fn stop_capture(&mut self) -> PyResult<()> {
+    fn stop_capture(&self) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::StopCapture)
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to send stop command: {}", e)))?;
         Ok(())
     }
 
-    fn set_cursor_callback(&mut self, callback: Py<PyAny>) -> PyResult<()> {
+    fn set_cursor_callback(&self, callback: Py<PyAny>) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::SetCursorCallback(callback))
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to set cursor callback: {}", e)))?;
         Ok(())
     }
 
-    fn inject_key(&mut self, scancode: u32, state: u32) -> PyResult<()> {
+    fn inject_key(&self, scancode: u32, state: u32) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::KeyboardKey { scancode, state })
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to inject key: {}", e)))?;
         Ok(())
     }
 
-    fn inject_mouse_move(&mut self, x: f64, y: f64) -> PyResult<()> {
+    /// Inject a key by X11/XKB keysym (e.g. 0x41 'A', 0xFF0D Return), resolved against our own
+    /// xkb keymap. Prefer over `inject_key` when you have a keysym. A shifted keysym gets a
+    /// synthetic Shift press/release. `state`: 1 = press, 0 = release.
+    fn inject_keysym(&self, keysym: u32, state: u32) -> PyResult<()> {
+        self.tx
+            .send(ThreadCommand::KeyboardKeysym { keysym, state })
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to inject keysym: {}", e)))?;
+        Ok(())
+    }
+
+    /// Return the active xkb keymap as an XKB_KEYMAP_FORMAT_TEXT_V1 string so a consumer can build
+    /// a reverse keysym->keycode map from the identical keymap. Empty string if it can't be read.
+    fn get_xkb_keymap_string(&self, py: Python<'_>) -> PyResult<String> {
+        let (reply_tx, reply_rx) = std::sync::mpsc::channel::<String>();
+        self.tx
+            .send(ThreadCommand::GetXkbKeymap { reply: reply_tx })
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to request keymap: {}", e)))?;
+        // Release the GIL while waiting (the wayland thread can call back into Python -> deadlock);
+        // move the owned Receiver in (it's Send) and bound the wait so a stall can't hang us.
+        let result = py.detach(move || reply_rx.recv_timeout(Duration::from_secs(2)));
+        match result {
+            Ok(s) => Ok(s),
+            Err(_) => Ok(String::new()),
+        }
+    }
+
+    fn inject_mouse_move(&self, x: f64, y: f64) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::PointerMotion { x, y })
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to inject motion: {}", e)))?;
         Ok(())
     }
 
-    fn inject_relative_mouse_move(&mut self, dx: f64, dy: f64) -> PyResult<()> {
+    fn inject_relative_mouse_move(&self, dx: f64, dy: f64) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::PointerRelativeMotion { dx, dy })
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to inject relative motion: {}", e)))?;
         Ok(())
     }
 
-    fn inject_mouse_button(&mut self, btn: u32, state: u32) -> PyResult<()> {
+    fn inject_mouse_button(&self, btn: u32, state: u32) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::PointerButton { btn, state })
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to inject button: {}", e)))?;
         Ok(())
     }
 
-    fn inject_mouse_scroll(&mut self, x: f64, y: f64) -> PyResult<()> {
+    fn inject_mouse_scroll(&self, x: f64, y: f64) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::PointerAxis { x, y })
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to inject axis: {}", e)))?;
         Ok(())
     }
 
-    fn set_cursor_rendering(&mut self, enabled: bool) -> PyResult<()> {
+    fn set_cursor_rendering(&self, enabled: bool) -> PyResult<()> {
         self.tx
             .send(ThreadCommand::UpdateCursorConfig { render_on_framebuffer: enabled })
             .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to set cursor config: {}", e)))?;
         Ok(())
     }
+
+    /// Forces an IDR/keyframe on the next captured frame so a (re)connecting client
+    /// or a decoder reset can resume immediately instead of waiting for the periodic
+    /// recovery keyframe. No-op cost on the JPEG/software path (keyframes are N/A).
+    fn request_idr_frame(&self) -> PyResult<()> {
+        self.tx
+            .send(ThreadCommand::RequestIdr)
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to request IDR: {}", e)))?;
+        Ok(())
+    }
+
+    /// Apply a live bitrate (kbps) / VBV (kb) / framerate change to the running capture.
+    fn update_rate(&self, bitrate_kbps: Option<i32>, vbv_kb: Option<i32>, fps: Option<f64>) -> PyResult<()> {
+        self.tx
+            .send(ThreadCommand::UpdateRate { bitrate_kbps, vbv_kb, fps })
+            .map_err(|e| PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(format!("Failed to update rate: {}", e)))?;
+        Ok(())
+    }
+}
+
+use std::sync::atomic::{AtomicBool, AtomicU64, Ordering};
+use std::sync::{Mutex, OnceLock};
+
+use crate::encoders::software::EncodedStripe;
+
+/// Create a `StripeFrame` from any buffer-like object (bytes/bytearray/memoryview), copying the
+/// bytes in. Module-level helper for constructing a frame from already-encoded data.
+#[pyfunction]
+#[pyo3(signature = (data, data_type = 0, stripe_y_start = 0, stripe_height = 0, frame_id = 0))]
+fn stripe_frame_from_buffer(
+    data: Vec<u8>,
+    data_type: i32,
+    stripe_y_start: i32,
+    stripe_height: i32,
+    frame_id: i32,
+) -> StripeFrame {
+    StripeFrame::new_owned_meta(data, data_type, stripe_y_start, stripe_height, frame_id)
+}
+
+/// Capture configuration read by `start_capture` (each field by attribute name via
+/// `extract_settings`, so the field names must match exactly). Declared `dict` so callers
+/// can stash extra attributes (e.g. `recording_socket`) not listed here.
+#[pyclass(dict)]
+struct CaptureSettings {
+    #[pyo3(get, set)] capture_width: i32,
+    #[pyo3(get, set)] capture_height: i32,
+    #[pyo3(get, set)] scale: f64,
+    #[pyo3(get, set)] capture_x: i32,
+    #[pyo3(get, set)] capture_y: i32,
+    #[pyo3(get, set)] target_fps: f64,
+    #[pyo3(get, set)] jpeg_quality: i32,
+    #[pyo3(get, set)] paint_over_jpeg_quality: i32,
+    #[pyo3(get, set)] use_paint_over_quality: bool,
+    #[pyo3(get, set)] paint_over_trigger_frames: i32,
+    #[pyo3(get, set)] damage_block_threshold: i32,
+    #[pyo3(get, set)] damage_block_duration: i32,
+    #[pyo3(get, set)] output_mode: i32,
+    #[pyo3(get, set)] h264_crf: i32,
+    #[pyo3(get, set)] h264_paintover_crf: i32,
+    #[pyo3(get, set)] h264_paintover_burst_frames: i32,
+    #[pyo3(get, set)] h264_fullcolor: bool,
+    #[pyo3(get, set)] h264_fullframe: bool,
+    #[pyo3(get, set)] h264_streaming_mode: bool,
+    #[pyo3(get, set)] capture_cursor: bool,
+    #[pyo3(get, set)] watermark_path: Py<PyAny>,
+    #[pyo3(get, set)] watermark_location_enum: i32,
+    #[pyo3(get, set)] vaapi_render_node_index: i32,
+    #[pyo3(get, set)] use_cpu: bool,
+    #[pyo3(get, set)] use_openh264: bool,
+    #[pyo3(get, set)] debug_logging: bool,
+    #[pyo3(get, set)] h264_cbr_mode: bool,
+    #[pyo3(get, set)] h264_bitrate_kbps: i32,
+    #[pyo3(get, set)] h264_vbv_buffer_size_kb: i32,
+    #[pyo3(get, set)] auto_adjust_screen_capture_size: bool,
+    #[pyo3(get, set)] omit_stripe_headers: bool,
+    #[pyo3(get, set)] deferred_free: bool,
+    #[pyo3(get, set)] vaapi_render_node_path: Py<PyAny>,
+}
+
+#[pymethods]
+impl CaptureSettings {
+    #[new]
+    fn new(py: Python<'_>) -> Self {
+        Self {
+            capture_width: 1920, capture_height: 1080, scale: 1.0, capture_x: 0, capture_y: 0,
+            target_fps: 60.0, jpeg_quality: 85, paint_over_jpeg_quality: 95,
+            use_paint_over_quality: false, paint_over_trigger_frames: 10,
+            damage_block_threshold: 15, damage_block_duration: 30, output_mode: 0,
+            h264_crf: 25, h264_paintover_crf: 18, h264_paintover_burst_frames: 5,
+            h264_fullcolor: false, h264_fullframe: false, h264_streaming_mode: false,
+            capture_cursor: false, watermark_path: py.None(), watermark_location_enum: 0,
+            vaapi_render_node_index: -1, use_cpu: false, use_openh264: false, debug_logging: false,
+            h264_cbr_mode: false, h264_bitrate_kbps: 4000, h264_vbv_buffer_size_kb: 0,
+            auto_adjust_screen_capture_size: false, omit_stripe_headers: false,
+            deferred_free: false, vaapi_render_node_path: py.None(),
+        }
+    }
+}
+
+// ---- Unified backend glue: process-wide Wayland singleton, its owner, and the atexit sweep ----
+
+/// Process-wide Wayland backend: input and capture share ONE compositor (constructed lazily).
+static WAYLAND_BACKEND: OnceLock<Mutex<Option<Py<WaylandBackend>>>> = OnceLock::new();
+/// Cursor callback registered before the backend exists (selkies registers it pre-start);
+/// applied when the backend is created, which is deferred to capture start so the real
+/// render node (not a placeholder) reaches the compositor.
+static PENDING_CURSOR_CALLBACK: Mutex<Option<Py<PyAny>>> = Mutex::new(None);
+/// Interpreter-teardown gate, set by the atexit sweep: the detached compositor and delivery
+/// threads must never attach to a finalizing interpreter (aborts the process pre-3.13).
+/// Cleared by a fresh capture start (only a live interpreter can start one).
+pub(crate) static PY_SHUTDOWN: AtomicBool = AtomicBool::new(false);
+/// ScreenCapture id that owns the active shared Wayland capture (0 = none). Only the owner may
+/// stop it, so an input-only or stale instance can't tear down a live capture.
+static WAYLAND_OWNER: AtomicU64 = AtomicU64::new(0);
+/// Monotonic ScreenCapture id source.
+static NEXT_CAPTURE_ID: AtomicU64 = AtomicU64::new(1);
+/// Live X11 capture controls, for the atexit sweep.
+static LIVE_X11: OnceLock<Mutex<Vec<Arc<crate::x11::Controls>>>> = OnceLock::new();
+
+fn live_x11() -> &'static Mutex<Vec<Arc<crate::x11::Controls>>> {
+    LIVE_X11.get_or_init(|| Mutex::new(Vec::new()))
+}
+
+/// Forward a live rate change to the shared Wayland backend (no-op if none is running).
+fn wayland_update_rate(py: Python<'_>, bitrate_kbps: Option<i32>, vbv_kb: Option<i32>, fps: Option<f64>) {
+    if let Some(slot) = WAYLAND_BACKEND.get() {
+        if let Some(be) = slot.lock().unwrap().as_ref() {
+            let _ = be.bind(py).borrow().update_rate(bitrate_kbps, vbv_kb, fps);
+        }
+    }
+}
+
+/// Get-or-create the singleton Wayland backend (idempotent; first dims + render node win,
+/// capture resizes). Only capture start calls this: creation fixes the compositor's render
+/// node, and start is the one entry point that knows the operator's real node.
+fn ensure_wayland_backend(
+    py: Python<'_>,
+    width: i32,
+    height: i32,
+    node: String,
+) -> PyResult<Py<WaylandBackend>> {
+    let slot = WAYLAND_BACKEND.get_or_init(|| Mutex::new(None));
+    let mut g = slot.lock().unwrap();
+    if g.is_none() {
+        let be = Py::new(py, WaylandBackend::new(width, height, node))?;
+        // Apply a cursor callback registered before the backend existed.
+        if let Some(cb) = PENDING_CURSOR_CALLBACK.lock().unwrap().take() {
+            let _ = be.bind(py).borrow().set_cursor_callback(cb);
+        }
+        *g = Some(be);
+    }
+    Ok(g.as_ref().unwrap().clone_ref(py))
+}
+
+/// The live Wayland backend, if any — never creates one. The pre-capture entry points
+/// (input injection, cursor/config setters) use this so they can't lock in a backend
+/// with a placeholder render node.
+fn wayland_backend_running(py: Python<'_>) -> Option<Py<WaylandBackend>> {
+    let slot = WAYLAND_BACKEND.get()?;
+    let g = slot.lock().unwrap();
+    g.as_ref().map(|b| b.clone_ref(py))
+}
+
+/// Use the Wayland backend when PIXELFLUX_WAYLAND is truthy or WAYLAND_DISPLAY is non-empty;
+/// otherwise capture via X11.
+fn want_wayland() -> bool {
+    if let Ok(v) = std::env::var("PIXELFLUX_WAYLAND") {
+        if matches!(v.to_ascii_lowercase().as_str(), "1" | "true" | "yes" | "on") {
+            return true;
+        }
+    }
+    std::env::var("WAYLAND_DISPLAY").map(|v| !v.is_empty()).unwrap_or(false)
+}
+
+struct ScState {
+    backend: u8, // 0 = idle, 1 = X11, 2 = Wayland
+    controls: Option<Arc<crate::x11::Controls>>,
+    handle: Option<thread::JoinHandle<()>>,
+    cap_thread_id: Option<thread::ThreadId>,
+    // The internal encode+deliver thread's id, so a re-entrant stop from inside the delivery
+    // callback (which runs on that thread) is detected and doesn't try to self-join.
+    encode_thread_id: Option<thread::ThreadId>,
+}
+
+/// Unified capture handle exposed to Python. Drives the X11 capture directly or delegates to the
+/// shared Wayland backend, chosen at `start_capture` time. Exposes start_capture / stop_capture /
+/// request_idr_frame / update_* / is_capturing, plus the Wayland input-injection methods.
+#[pyclass]
+struct ScreenCapture {
+    id: u64,
+    inner: Mutex<ScState>,
+}
+
+impl ScreenCapture {
+    fn stop_internal(&self, py: Python<'_>) -> PyResult<()> {
+        let (handle, same_thread, backend, controls) = {
+            let mut st = self.inner.lock().unwrap();
+            if let Some(c) = &st.controls {
+                c.stop.store(true, Ordering::Relaxed);
+            }
+            let cur = Some(thread::current().id());
+            // Re-entrant stop from our own capture OR encode/deliver thread: can't join self.
+            let same = st.cap_thread_id == cur || st.encode_thread_id == cur;
+            let controls = st.controls.take();
+            let handle = st.handle.take();
+            let backend = st.backend;
+            st.backend = 0;
+            st.cap_thread_id = None;
+            st.encode_thread_id = None;
+            (handle, same, backend, controls)
+        };
+        if let Some(c) = &controls {
+            live_x11().lock().unwrap().retain(|x| !Arc::ptr_eq(x, c));
+        }
+        if backend == 2 {
+            // Wayland: only the owner may stop the shared compositor capture. Claim-and-clear
+            // ownership atomically so a stale stop can't tear down a capture that another instance
+            // just started and took ownership of between our read and our clear.
+            if WAYLAND_OWNER
+                .compare_exchange(self.id, 0, Ordering::AcqRel, Ordering::Relaxed)
+                .is_ok()
+            {
+                if let Some(slot) = WAYLAND_BACKEND.get() {
+                    if let Some(be) = slot.lock().unwrap().as_ref() {
+                        let _ = be.bind(py).borrow().stop_capture();
+                    }
+                }
+            }
+        } else if let Some(h) = handle {
+            // Joining the capture thread also joins the encode thread (run_capture joins it before
+            // returning). Release the GIL first: the encode thread runs the Python callback, so
+            // holding the GIL across the join would deadlock.
+            if same_thread {
+                // Re-entrant stop from the capture or encode thread: can't join self; the threads
+                // exit on the stop flag. Detach.
+                drop(h);
+            } else {
+                py.detach(|| {
+                    let _ = h.join();
+                });
+            }
+        }
+        Ok(())
+    }
+}
+
+#[pymethods]
+impl ScreenCapture {
+    #[new]
+    fn new() -> Self {
+        Self {
+            id: NEXT_CAPTURE_ID.fetch_add(1, Ordering::Relaxed),
+            inner: Mutex::new(ScState {
+                backend: 0,
+                controls: None,
+                handle: None,
+                cap_thread_id: None,
+                encode_thread_id: None,
+            }),
+        }
+    }
+
+    /// Begin capture. `callback(frame)` is invoked per encoded stripe with a `StripeFrame`.
+    fn start_capture(
+        &self,
+        py: Python<'_>,
+        settings: &Bound<'_, PyAny>,
+        callback: Py<PyAny>,
+    ) -> PyResult<()> {
+        self.stop_internal(py)?;
+        let rs = extract_settings(settings)?;
+
+        if want_wayland() {
+            // selkies forwards --dri-node as utf-8 bytes; accept str too.
+            let node = settings
+                .getattr("vaapi_render_node_path")
+                .ok()
+                .and_then(|o| {
+                    o.extract::<String>()
+                        .or_else(|_| {
+                            o.extract::<Vec<u8>>()
+                                .map(|b| String::from_utf8_lossy(&b).into_owned())
+                        })
+                        .ok()
+                })
+                .unwrap_or_default();
+            let be = ensure_wayland_backend(py, rs.width, rs.height, node)?;
+            be.bind(py).borrow().start_capture(callback, settings)?;
+            WAYLAND_OWNER.store(self.id, Ordering::Relaxed);
+            self.inner.lock().unwrap().backend = 2;
+            return Ok(());
+        }
+
+        // X11 capture: this spawned thread runs the capture loop; run_capture internally spawns an
+        // encode+deliver thread. `controls` carries live request_idr / rate / fps. The delivery
+        // callback runs on the encode thread.
+        let controls = Arc::new(crate::x11::Controls::new(&rs));
+        live_x11().lock().unwrap().push(controls.clone());
+        let c2 = controls.clone();
+        let c3 = controls.clone(); // flag stop when the capture thread exits (Ok OR Err)
+        let cb = callback;
+        // Captures run_capture's error so a dead start (bad DISPLAY, shm failure) surfaces as a
+        // PyErr below instead of a silent, forever-"capturing" lie.
+        let err_slot: Arc<Mutex<Option<String>>> = Arc::new(Mutex::new(None));
+        let err_slot2 = err_slot.clone();
+
+        // Delivery closure (runs on the encode thread): ONE GIL acquisition per FRAME (all stripes
+        // batched) -> StripeFrame -> callback, to cut GIL churn. Errors are printed, never
+        // propagated -- nothing must unwind into the encode loop.
+        let on_frame = move |frame: Vec<EncodedStripe>| {
+            Python::attach(|py| {
+                for s in frame {
+                    match Py::new(
+                        py,
+                        StripeFrame::new_owned_meta(
+                            s.data,
+                            s.data_type,
+                            s.stripe_y_start,
+                            s.stripe_height,
+                            s.frame_id,
+                        ),
+                    ) {
+                        Ok(f) => {
+                            if let Err(e) = cb.call1(py, (f,)) {
+                                e.print(py);
+                            }
+                        }
+                        Err(e) => eprintln!("[x11] frame alloc error: {e:?}"),
+                    }
+                }
+            });
+        };
+
+        let (tid_tx, tid_rx) = std::sync::mpsc::channel(); // capture thread id
+        let (etid_tx, etid_rx) = std::sync::mpsc::channel(); // encode thread id (from run_capture)
+        let handle = thread::spawn(move || {
+            let _ = tid_tx.send(thread::current().id());
+            let res = crate::x11::run_capture(rs, c2, etid_tx, on_frame);
+            // Whether run_capture returned Ok (external stop) or Err (setup / mid-run failure),
+            // the capture is dead: mark it stopped so is_capturing reports the truth instead of
+            // lying True forever.
+            c3.stop.store(true, Ordering::Release);
+            if let Err(e) = res {
+                let msg = format!("{e}");
+                eprintln!("[x11] capture error: {msg}");
+                if let Ok(mut g) = err_slot2.lock() {
+                    *g = Some(msg);
+                }
+            }
+        });
+        // The capture thread sends its id first; the encode id arrives once run_capture spawns it
+        // (bounded wait). Release the GIL across the wait: the encode thread runs the Python
+        // callback, so holding it here could deadlock, and this can block up to ~2s.
+        let (tid, etid_res) = py.detach(move || {
+            let tid = tid_rx.recv().ok();
+            let etid_res = etid_rx.recv_timeout(std::time::Duration::from_secs(2));
+            (tid, etid_res)
+        });
+        // A live encode thread sends its id as its very first action, so Ok(tid) => it started.
+        // Disconnected => the sender was dropped WITHOUT a send: run_capture returned Err during
+        // setup (bad DISPLAY, shm/xfixes/geometry failure) before spawning the encode thread. That
+        // is a definitive start failure -- join the (finishing) capture thread and raise its
+        // captured error instead of registering a capture that would report is_capturing == True
+        // forever. Only a plain Timeout (thread alive but slow to spawn the encoder) falls through.
+        let etid = match etid_res {
+            Ok(id) => Some(id),
+            Err(std::sync::mpsc::RecvTimeoutError::Disconnected) => {
+                let _ = handle.join();
+                live_x11().lock().unwrap().retain(|x| !Arc::ptr_eq(x, &controls));
+                let msg = err_slot
+                    .lock()
+                    .ok()
+                    .and_then(|g| g.clone())
+                    .unwrap_or_else(|| "X11 capture thread exited during start".to_string());
+                return Err(PyErr::new::<pyo3::exceptions::PyRuntimeError, _>(msg));
+            }
+            Err(std::sync::mpsc::RecvTimeoutError::Timeout) => None,
+        };
+        let mut st = self.inner.lock().unwrap();
+        st.backend = 1;
+        st.controls = Some(controls);
+        st.handle = Some(handle);
+        st.cap_thread_id = tid;
+        st.encode_thread_id = etid;
+        Ok(())
+    }
+
+    fn stop_capture(&self, py: Python<'_>) -> PyResult<()> {
+        self.stop_internal(py)
+    }
+
+    fn request_idr_frame(&self, py: Python<'_>) -> PyResult<()> {
+        let (backend, controls) = {
+            let st = self.inner.lock().unwrap();
+            (st.backend, st.controls.clone())
+        };
+        match backend {
+            1 => {
+                if let Some(c) = controls {
+                    c.force_idr.store(true, Ordering::Relaxed);
+                }
+            }
+            2 => {
+                if let Some(slot) = WAYLAND_BACKEND.get() {
+                    if let Some(be) = slot.lock().unwrap().as_ref() {
+                        let _ = be.bind(py).borrow().request_idr_frame();
+                    }
+                }
+            }
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn update_video_bitrate(&self, py: Python<'_>, kbps: i32) -> PyResult<()> {
+        let (backend, controls) = {
+            let st = self.inner.lock().unwrap();
+            (st.backend, st.controls.clone())
+        };
+        match backend {
+            1 => {
+                if let Some(c) = &controls {
+                    // Release-publish the dirty flag AFTER the payload store so the encode thread's
+                    // Acquire read can't observe the flag set with a stale bitrate.
+                    c.bitrate_kbps.store(kbps, Ordering::Relaxed);
+                    c.rate_dirty.store(true, Ordering::Release);
+                }
+            }
+            2 => wayland_update_rate(py, Some(kbps), None, None),
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn update_framerate(&self, py: Python<'_>, fps: f64) -> PyResult<()> {
+        let (backend, controls) = {
+            let st = self.inner.lock().unwrap();
+            (st.backend, st.controls.clone())
+        };
+        match backend {
+            1 => {
+                if let Some(c) = &controls {
+                    c.fps_milli.store((fps.max(1.0) * 1000.0) as u64, Ordering::Relaxed);
+                    c.rate_dirty.store(true, Ordering::Release);
+                }
+            }
+            2 => wayland_update_rate(py, None, None, Some(fps)),
+            _ => {}
+        }
+        Ok(())
+    }
+
+    fn update_vbv_buffer_size(&self, py: Python<'_>, kb: i32) -> PyResult<()> {
+        let (backend, controls) = {
+            let st = self.inner.lock().unwrap();
+            (st.backend, st.controls.clone())
+        };
+        match backend {
+            1 => {
+                if let Some(c) = &controls {
+                    c.vbv_kb.store(kb, Ordering::Relaxed);
+                    c.rate_dirty.store(true, Ordering::Release);
+                }
+            }
+            2 => wayland_update_rate(py, None, Some(kb), None),
+            _ => {}
+        }
+        Ok(())
+    }
+
+    #[getter]
+    fn is_capturing(&self) -> bool {
+        let st = self.inner.lock().unwrap();
+        match st.backend {
+            1 => st
+                .controls
+                .as_ref()
+                .map(|c| !c.stop.load(Ordering::Relaxed))
+                .unwrap_or(false),
+            2 => WAYLAND_OWNER.load(Ordering::Relaxed) == self.id,
+            _ => false,
+        }
+    }
+
+    // ---- Input injection: via the shared Wayland backend (X11 input lives elsewhere). Dispatch
+    // is tied to an active session, so none of these create the backend (creation fixes the
+    // render node and belongs to start_capture); with no backend they are no-ops. ----
+    fn inject_key(&self, py: Python<'_>, scancode: u32, state: u32) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().inject_key(scancode, state))
+    }
+    fn inject_keysym(&self, py: Python<'_>, keysym: u32, state: u32) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().inject_keysym(keysym, state))
+    }
+    fn inject_mouse_move(&self, py: Python<'_>, x: f64, y: f64) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().inject_mouse_move(x, y))
+    }
+    fn inject_relative_mouse_move(&self, py: Python<'_>, dx: f64, dy: f64) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().inject_relative_mouse_move(dx, dy))
+    }
+    fn inject_mouse_button(&self, py: Python<'_>, btn: u32, state: u32) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().inject_mouse_button(btn, state))
+    }
+    fn inject_mouse_scroll(&self, py: Python<'_>, x: f64, y: f64) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().inject_mouse_scroll(x, y))
+    }
+    fn set_cursor_rendering(&self, py: Python<'_>, enabled: bool) -> PyResult<()> {
+        wayland_backend_running(py).map_or(Ok(()), |be| be.bind(py).borrow().set_cursor_rendering(enabled))
+    }
+    fn set_cursor_callback(&self, py: Python<'_>, callback: Py<PyAny>) -> PyResult<()> {
+        // Hold the slot lock across the check so a concurrent creation can't miss the stash.
+        let slot = WAYLAND_BACKEND.get_or_init(|| Mutex::new(None));
+        let g = slot.lock().unwrap();
+        match g.as_ref() {
+            Some(be) => be.bind(py).borrow().set_cursor_callback(callback),
+            // Pre-start: stash it; ensure_wayland_backend applies it at creation.
+            None => {
+                *PENDING_CURSOR_CALLBACK.lock().unwrap() = Some(callback);
+                Ok(())
+            }
+        }
+    }
+    fn get_xkb_keymap_string(&self, py: Python<'_>) -> PyResult<String> {
+        wayland_backend_running(py)
+            .map_or(Ok(String::new()), |be| be.bind(py).borrow().get_xkb_keymap_string(py))
+    }
+}
+
+impl Drop for ScreenCapture {
+    fn drop(&mut self) {
+        // Best-effort: signal the capture thread to exit. Joining needs the GIL (the thread calls
+        // back into Python), which Drop can't safely take, so just flag it; explicit
+        // stop_capture() / the atexit sweep do the joining.
+        if let Ok(st) = self.inner.lock() {
+            if let Some(c) = &st.controls {
+                c.stop.store(true, Ordering::Relaxed);
+            }
+        }
+    }
+}
+
+/// Stop every live X11 capture (registered with atexit). Sets each stop flag and gives the
+/// threads a brief grace period to exit before interpreter finalization tears down Python.
+#[pyfunction]
+fn _stop_all_captures(py: Python<'_>) {
+    // Gate first: from here the interpreter may finalize at any point and no detached thread
+    // may attach to it. Also drop a never-applied cursor stash while the GIL is held.
+    PY_SHUTDOWN.store(true, Ordering::Relaxed);
+    *PENDING_CURSOR_CALLBACK.lock().unwrap() = None;
+    if let Some(slot) = LIVE_X11.get() {
+        for c in slot.lock().unwrap().iter() {
+            c.stop.store(true, Ordering::Relaxed);
+        }
+    }
+    // A live Wayland capture runs on an unjoined compositor thread that calls the Python frame
+    // callback; stop it (the wayland thread clears its callback + encoder on StopCapture) so it
+    // can't call into a finalizing interpreter and abort/segfault. Best-effort, async via the
+    // command channel; the grace sleep below lets it be observed before finalization.
+    if let Some(slot) = WAYLAND_BACKEND.get() {
+        if let Some(be) = slot.lock().unwrap().as_ref() {
+            let _ = be.bind(py).borrow().stop_capture();
+        }
+    }
+    WAYLAND_OWNER.store(0, Ordering::Relaxed);
+    py.detach(|| std::thread::sleep(Duration::from_millis(50)));
 }
 
 #[pymodule]
-fn pixelflux_wayland(m: &Bound<'_, PyModule>) -> PyResult<()> {
+fn pixelflux(m: &Bound<'_, PyModule>) -> PyResult<()> {
     m.add_class::<WaylandBackend>()?;
+    m.add_class::<StripeFrame>()?;
+    m.add_class::<CaptureSettings>()?;
+    m.add_class::<ScreenCapture>()?;
+    m.add_function(wrap_pyfunction!(stripe_frame_from_buffer, m)?)?;
+    m.add_function(wrap_pyfunction!(_stop_all_captures, m)?)?;
+    // Register _stop_all_captures with atexit so every live capture is stopped before
+    // interpreter shutdown, ensuring no capture thread calls into Python during finalization.
+    if let Ok(atexit) = m.py().import("atexit") {
+        let _ = atexit.call_method1("register", (m.getattr("_stop_all_captures")?,));
+    }
     Ok(())
 }
+
+#[cfg(test)]
+mod keysym_release_replay_tests {
+    //! Invariant under test: `inject_keysym` records the keycodes injected at PRESS time into
+    //! `synthetic_shift_keysyms` (a `HashMap<u32, (u32, u32)>`) so the matching key-up releases
+    //! the SAME physical keycodes, even if the active xkb layout changed mid-keystroke. The
+    //! release path must read the recorded mapping and must NOT re-resolve the keysym against
+    //! the (possibly different) live layout.
+    //!
+    //! The production record/replay lives inside the calloop loop in `run_wayland_thread` and
+    //! resolves against a live `xkb::Keymap`, which needs a connected keyboard. These tests
+    //! model the identical state machine with the same map type and a deterministic 2-layout
+    //! resolver so the invariant is provable in isolation.
+
+    use std::collections::HashMap;
+
+    /// Simulated key-up actions emitted by the release path, in order.
+    /// Mirrors the real `inject(state, kc, KeyState::Released)` calls.
+    #[derive(Debug, PartialEq, Eq)]
+    enum Release {
+        Key(u32),
+        Shift(u32),
+    }
+
+    /// Deterministic stand-in for `resolve_keysym_to_keycode`, parameterized by layout.
+    /// Returns (keycode, needs_shift). Two layouts deliberately map the same keysym to
+    /// DIFFERENT keycodes / shift-requirements to model a layout switch mid-keystroke.
+    ///
+    /// Layout 0 ("us"):    keysym 0x41 ('A') -> kc 38, needs_shift=true ; Shift_L -> kc 50
+    /// Layout 1 ("de"):    keysym 0x41 ('A') -> kc 24, needs_shift=false (different key!)
+    fn resolve(layout: u32, keysym: u32) -> Option<(u32, bool)> {
+        match (layout, keysym) {
+            (0, 0x41) => Some((38, true)),
+            (0, 0xFFE1) => Some((50, false)), // Shift_L on layout 0
+            (1, 0x41) => Some((24, false)),
+            (1, 0xFFE1) => Some((62, false)), // Shift_L on layout 0 -> different kc on layout 1
+            (_, 0xFF0D) => Some((36, false)), // Return: same on both, never shifted
+            _ => None,
+        }
+    }
+
+    const SHIFT_L: u32 = 0xFFE1;
+
+    /// Models the press branch: resolve against the *current* layout, then RECORD
+    /// (kc, shift_kc_or_0) keyed by keysym. Returns false if unresolved.
+    fn press(map: &mut HashMap<u32, (u32, u32)>, current_layout: u32, keysym: u32) -> bool {
+        let Some((kc, needs_shift)) = resolve(current_layout, keysym) else {
+            return false;
+        };
+        let shift_kc = if needs_shift {
+            resolve(current_layout, SHIFT_L).map(|(kc, _)| kc).unwrap_or(50)
+        } else {
+            0
+        };
+        map.insert(keysym, (kc, if needs_shift { shift_kc } else { 0 }));
+        true
+    }
+
+    /// Models the correct release branch: read the recorded keycodes and release exactly
+    /// those; key first, then synthetic Shift if shift_kc != 0.
+    fn release_fixed(map: &mut HashMap<u32, (u32, u32)>, keysym: u32) -> Vec<Release> {
+        let mut out = Vec::new();
+        if let Some((kc, shift_kc)) = map.remove(&keysym) {
+            out.push(Release::Key(kc));
+            if shift_kc != 0 {
+                out.push(Release::Shift(shift_kc));
+            }
+        }
+        out
+    }
+
+    /// Models the incorrect alternative: ignore the record and RE-RESOLVE against the live
+    /// (possibly changed) layout. Kept only to prove the correct path diverges from it
+    /// exactly when the layout changes mid-keystroke.
+    fn release_reresolve(current_layout: u32, keysym: u32) -> Vec<Release> {
+        let mut out = Vec::new();
+        if let Some((kc, needs_shift)) = resolve(current_layout, keysym) {
+            out.push(Release::Key(kc));
+            if needs_shift {
+                let shift_kc = resolve(current_layout, SHIFT_L).map(|(kc, _)| kc).unwrap_or(50);
+                out.push(Release::Shift(shift_kc));
+            }
+        }
+        out
+    }
+
+    #[test]
+    fn press_records_keycode_and_shift() {
+        // 'A' on layout 0 needs Shift -> must record both the key kc and the Shift_L kc.
+        let mut map = HashMap::new();
+        assert!(press(&mut map, 0, 0x41));
+        assert_eq!(map.get(&0x41), Some(&(38u32, 50u32)));
+    }
+
+    #[test]
+    fn press_records_zero_shift_when_unshifted() {
+        // Return is unshifted -> shift_kc sentinel must be 0 so release skips synthetic Shift.
+        let mut map = HashMap::new();
+        assert!(press(&mut map, 0, 0xFF0D));
+        assert_eq!(map.get(&0xFF0D), Some(&(36u32, 0u32)));
+    }
+
+    #[test]
+    fn unresolved_keysym_records_nothing() {
+        let mut map = HashMap::new();
+        assert!(!press(&mut map, 0, 0xDEAD));
+        assert!(map.is_empty());
+    }
+
+    #[test]
+    fn release_replays_recorded_keycodes_not_a_reresolve() {
+        // Press 'A' on layout 0 (kc 38 + Shift 50). THEN the layout switches to 1 mid-keystroke.
+        let mut map = HashMap::new();
+        assert!(press(&mut map, 0, 0x41));
+
+        let layout_at_release = 1; // user/compositor switched layout after press
+
+        let fixed = release_fixed(&mut map, 0x41);
+        let buggy = release_reresolve(layout_at_release, 0x41);
+
+        // The recorded-replay path releases exactly what was pressed: kc 38 then Shift 50.
+        assert_eq!(fixed, vec![Release::Key(38), Release::Shift(50)]);
+
+        // A re-resolve would release kc 24 (layout 1's 'A') and NO shift -> kc 38 and
+        // Shift 50 stay logically held down. Prove the two paths diverge here.
+        assert_eq!(buggy, vec![Release::Key(24)]);
+        assert_ne!(fixed, buggy, "recorded replay must NOT match the re-resolve path under a layout switch");
+    }
+
+    #[test]
+    fn release_consumes_the_record_no_double_release() {
+        // remove() must take the entry so a duplicate key-up is a no-op (no phantom release).
+        let mut map = HashMap::new();
+        assert!(press(&mut map, 0, 0x41));
+        let first = release_fixed(&mut map, 0x41);
+        assert_eq!(first, vec![Release::Key(38), Release::Shift(50)]);
+        assert!(map.is_empty());
+        let second = release_fixed(&mut map, 0x41);
+        assert!(second.is_empty(), "second key-up must release nothing");
+    }
+
+    #[test]
+    fn release_without_prior_press_is_noop() {
+        // A stray key-up that was never recorded must not inject anything.
+        let mut map = HashMap::new();
+        assert!(release_fixed(&mut map, 0x41).is_empty());
+    }
+
+    #[test]
+    fn no_layout_change_fix_and_reresolve_agree() {
+        // Sanity: when the layout is stable, the recorded-replay path and a re-resolve must
+        // agree, so recording the keycodes does not change behavior in the common case.
+        let mut map = HashMap::new();
+        assert!(press(&mut map, 0, 0x41));
+        let fixed = release_fixed(&mut map, 0x41);
+        let same_layout = release_reresolve(0, 0x41);
+        assert_eq!(fixed, same_layout);
+    }
+}
diff --git a/pixelflux/src/nvgpufilter.rs b/pixelflux/src/nvgpufilter.rs
new file mode 100644
index 0000000..1950737
--- /dev/null
+++ b/pixelflux/src/nvgpufilter.rs
@@ -0,0 +1,527 @@
+//! Multi-GPU NVENC GET_ATTACHED_IDS / GET_PROBED_IDS ioctl filter.
+//!
+//! Works across driver versions: on 570-595 (where RM enumeration wrongly returns every host GPU)
+//! it drops the unreachable GPUs so the session opens; on 565-or-before / 610-or-later (where
+//! enumeration is correct) the strict-subset guard makes it a no-op.
+//!
+//! On NVIDIA driver 570-595, `libnvidia-encode`/`libcuda`/`libnvcuvid` enumerate every host
+//! GPU via the RM `GET_ATTACHED_IDS` ioctl and peer-init each; a GPU whose `/dev/nvidiaX` is
+//! absent from the container makes `nvEncOpenEncodeSessionEx` fail with UNSUPPORTED_DEVICE. We
+//! GOT-patch `ioctl` in those NVIDIA libraries only (no LD_PRELOAD object; our own GOT is left
+//! untouched so the inner real `ioctl` reaches libc) and drop the unreachable GPUs from the
+//! response. A strict no-op unless at least one host GPU is hidden from the container.
+
+use libc::{c_char, c_int, c_long, c_ulong, c_void};
+use std::ffi::CStr;
+use std::sync::atomic::{AtomicPtr, AtomicU64, Ordering};
+use std::sync::Once;
+
+#[allow(dead_code)] // NR byte of NV_RM_CONTROL_REQUEST; kept for documentation + the NR unit test.
+const NV_ESC_RM_CONTROL: c_ulong = 0x2A; // ioctl NR for NV_ESC_RM_CONTROL
+// Canonical ioctl request for NV_ESC_RM_CONTROL: _IOWR('F'=0x46, 0x2A, sizeof(NVOS54_PARAMETERS)=32).
+// We match by DIR|TYPE|NR but deliberately IGNORE the encoded _IOC_SIZE (via ioc_no_size), so a
+// driver whose NVOS54_PARAMETERS has a different sizeof still matches -- the true per-cmd param
+// layout is validated separately by ctrl.params_size below. Matching the NR alone is too loose;
+// pinning the exact size is too tight.
+const NV_RM_CONTROL_REQUEST: c_ulong = 0xC020_462A;
+// _IOC_SIZE field (asm-generic/ioctl.h): a 14-bit size at bit 16. Masked out to compare requests
+// size-agnostically.
+const IOC_SIZESHIFT: u32 = 16;
+const IOC_SIZEMASK: c_ulong = 0x3FFF;
+const GPU_GET_ATTACHED_IDS: u32 = 0x0201; // NV0000_CTRL_CMD_GPU_GET_ATTACHED_IDS
+const GPU_GET_PROBED_IDS: u32 = 0x0214; // NV0000_CTRL_CMD_GPU_GET_PROBED_IDS
+const MAX_ATTACHED_GPUS: usize = 32;
+const INVALID_GPU_ID: u32 = 0xFFFF_FFFF;
+// Exact param-struct sizes: ATTACHED = gpuIds[32]; PROBED = gpuIds[32] + excludedGpuIds[32].
+// Both begin with gpuIds[32], so we filter that leading array for either; the size disambiguates.
+const ATTACHED_PARAMS_SIZE: usize = 4 * MAX_ATTACHED_GPUS;
+const PROBED_PARAMS_SIZE: usize = 4 * MAX_ATTACHED_GPUS * 2;
+
+/// NVOS54_PARAMETERS (32 bytes) — the RM control ioctl parameter struct.
+#[repr(C)]
+struct NvRmControlParams {
+    h_client: u32,
+    h_object: u32,
+    cmd: u32,
+    flags: u32,
+    params: u64,
+    params_size: u32,
+    status: u32,
+}
+
+// ELF bits libc lacks (Elf64_Sym / Elf64_Phdr / dl_phdr_info / PT_DYNAMIC it has).
+const DT_NULL: i64 = 0;
+const DT_PLTRELSZ: i64 = 2;
+const DT_RELA: i64 = 7;
+const DT_RELASZ: i64 = 8;
+const DT_STRTAB: i64 = 5;
+const DT_SYMTAB: i64 = 6;
+const DT_JMPREL: i64 = 23;
+
+// x86-64 relocation types that name a GOT slot holding a function pointer we can repoint:
+// JUMP_SLOT (lazy PLT) and GLOB_DAT (eager / -fno-plt builds).
+const R_X86_64_GLOB_DAT: u32 = 6;
+const R_X86_64_JUMP_SLOT: u32 = 7;
+
+/// rdev of /dev/nvidiactl, cached once at install() so the ioctl hook only rewrites responses
+/// that actually came from that char device (0 = not resolved -> identity gate skipped).
+static NVIDIACTL_RDEV: AtomicU64 = AtomicU64::new(0);
+
+#[repr(C)]
+struct Elf64Dyn {
+    d_tag: i64,
+    d_un: u64, // d_val / d_ptr union (both 64-bit)
+}
+
+#[repr(C)]
+struct Elf64Rela {
+    r_offset: u64,
+    r_info: u64,
+    r_addend: i64,
+}
+
+#[inline]
+fn elf64_r_sym(info: u64) -> u64 {
+    info >> 32
+}
+
+#[inline]
+fn elf64_r_type(info: u64) -> u32 {
+    (info & 0xffff_ffff) as u32
+}
+
+#[allow(dead_code)] // NR extractor; kept for documentation + the NR unit test.
+#[inline]
+fn ioc_nr(req: c_ulong) -> c_ulong {
+    // _IOC_NR: bits 0-7 of the ioctl request.
+    req & 0xFF
+}
+
+/// The ioctl request with its _IOC_SIZE field zeroed, leaving DIR|TYPE|NR. The gate matches on
+/// this so it stays bound to the exact RM control command without coupling to one param-struct size.
+#[inline]
+fn ioc_no_size(req: c_ulong) -> c_ulong {
+    req & !(IOC_SIZEMASK << IOC_SIZESHIFT)
+}
+
+/// True when `/dev/nvidiaN` exists.
+fn node_present(minor: u32) -> bool {
+    let path = format!("/dev/nvidia{}\0", minor);
+    unsafe { libc::access(path.as_ptr() as *const c_char, libc::F_OK) == 0 }
+}
+
+/// Resolve a gpuId to its `/dev/nvidia` minor via /proc (the PCI bus is encoded in
+/// `gpuId >> 8`). Returns -1 when no match is found. Matches a /proc entry on either the bus
+/// byte alone or the combined domain:bus (`gpuId >> 8`), since larger gpuIds fold the domain in.
+fn gpuid_to_minor(gpu_id: u32) -> i32 {
+    let want_bus = (gpu_id >> 8) & 0xFF;
+    let want_full = gpu_id >> 8;
+    let dir = match std::fs::read_dir("/proc/driver/nvidia/gpus") {
+        Ok(d) => d,
+        Err(_) => return -1,
+    };
+    for ent in dir.flatten() {
+        let name = ent.file_name();
+        let name = match name.to_str() {
+            Some(s) => s,
+            None => continue,
+        };
+        // Parse a PCI address "domain:bus:slot.func" (hex).
+        let parts: Vec<&str> = name.split([':', '.']).collect();
+        if parts.len() != 4 {
+            continue;
+        }
+        let dom = u32::from_str_radix(parts[0], 16);
+        let bus = u32::from_str_radix(parts[1], 16);
+        let (dom, bus) = match (dom, bus) {
+            (Ok(d), Ok(b)) => (d, b),
+            _ => continue,
+        };
+        if bus != want_bus && ((dom << 8) | bus) != want_full {
+            continue;
+        }
+        // Read "Device Minor: N" from the information file.
+        let info = format!("/proc/driver/nvidia/gpus/{}/information", name);
+        if let Ok(text) = std::fs::read_to_string(&info) {
+            for line in text.lines() {
+                if let Some(rest) = line.strip_prefix("Device Minor:") {
+                    if let Ok(m) = rest.trim().parse::<i32>() {
+                        return m;
+                    }
+                }
+            }
+        }
+        break;
+    }
+    -1
+}
+
+/// Pure GET_ATTACHED_IDS rewrite: keep only ids for which `keep(id)` is true, but only when a
+/// strict subset survives (`nkept` in `1..total`) — if none or all are kept, leave the array
+/// untouched (fail-safe). Factored out so it can be unit-tested without /proc or /dev.
+fn filter_ids(ids: &mut [u32; MAX_ATTACHED_GPUS], keep: impl Fn(u32) -> bool) {
+    let mut kept = [0u32; MAX_ATTACHED_GPUS];
+    let (mut total, mut nkept) = (0usize, 0usize);
+    for &id in ids.iter() {
+        if id == INVALID_GPU_ID {
+            break;
+        }
+        total += 1;
+        if keep(id) {
+            kept[nkept] = id;
+            nkept += 1;
+        }
+    }
+    if nkept > 0 && nkept < total {
+        ids[..nkept].copy_from_slice(&kept[..nkept]);
+        for slot in ids[nkept..].iter_mut() {
+            *slot = INVALID_GPU_ID;
+        }
+    }
+}
+
+/// ioctl wrapper installed into the NVIDIA libraries' GOT. Our own object's GOT is left
+/// untouched, so this inner `libc::ioctl` reaches libc normally (no recursion).
+unsafe extern "C" fn filtered_ioctl(fd: c_int, req: c_ulong, arg: *mut c_void) -> c_int {
+    let rc = libc::ioctl(fd, req, arg);
+    // Preserve the real ioctl's errno across our /proc + /dev lookups below (a caller may read
+    // errno after the syscall). Also: a panic in the filtering logic must NOT unwind across this
+    // extern "C" boundary (the compiler guard would abort the whole process), so catch it and
+    // fall back to the unmodified real result.
+    let saved_errno = *libc::__errno_location();
+    let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        rewrite_attached_ids(fd, rc, req, arg);
+    }));
+    *libc::__errno_location() = saved_errno;
+    rc
+}
+
+/// In-place rewrite of a successful GET_ATTACHED_IDS / GET_PROBED_IDS response. Split from
+/// `filtered_ioctl` so it can run under catch_unwind. Only rewrites when the request is the
+/// RM_CONTROL ioctl (matched by DIR|TYPE|NR, size-agnostic) on the real /dev/nvidiactl char device.
+unsafe fn rewrite_attached_ids(fd: c_int, rc: c_int, req: c_ulong, arg: *mut c_void) {
+    if rc != 0 || ioc_no_size(req) != ioc_no_size(NV_RM_CONTROL_REQUEST) || arg.is_null() {
+        return;
+    }
+    // fd identity: only rewrite responses from the real /dev/nvidiactl char device. Skip the gate
+    // (fall back to the request/size checks) only if we never resolved nvidiactl's rdev.
+    let cached = NVIDIACTL_RDEV.load(Ordering::Relaxed);
+    if cached != 0 {
+        let mut st: libc::stat = std::mem::zeroed();
+        if libc::fstat(fd, &mut st) != 0 {
+            return; // pass through on fstat failure
+        }
+        if (st.st_mode & libc::S_IFMT) != libc::S_IFCHR || st.st_rdev as u64 != cached {
+            return;
+        }
+    }
+    let ctrl = &mut *(arg as *mut NvRmControlParams);
+    if ctrl.status != 0 || ctrl.params == 0 {
+        return;
+    }
+    // Filter BOTH enumeration APIs so behavior holds across driver versions: 570-595 (buggy) AND
+    // 565-or-before / 610-or-later (correct, where this is a strict-subset no-op). Require the
+    // EXACT params size for the cmd so a lying size can't make us rewrite under a different layout.
+    let which = match ctrl.cmd {
+        GPU_GET_ATTACHED_IDS if ctrl.params_size as usize == ATTACHED_PARAMS_SIZE => "ATTACHED",
+        GPU_GET_PROBED_IDS if ctrl.params_size as usize == PROBED_PARAMS_SIZE => "PROBED",
+        _ => return,
+    };
+    // Both params structs start with gpuIds[MAX_ATTACHED_GPUS]; filter that leading array. For
+    // PROBED the trailing excludedGpuIds[] is left untouched (it lists GPUs to exclude, not use).
+    let ids = &mut *(ctrl.params as *mut [u32; MAX_ATTACHED_GPUS]);
+    let debug = std::env::var_os("PIXELFLUX_GPU_FILTER_DEBUG").is_some();
+    let before = ids.iter().take_while(|&&id| id != INVALID_GPU_ID).count();
+    filter_ids(ids, |id| {
+        let minor = gpuid_to_minor(id);
+        minor >= 0 && node_present(minor as u32)
+    });
+    if debug {
+        let after = ids.iter().take_while(|&&id| id != INVALID_GPU_ID).count();
+        eprintln!("[pixelflux] GET_{which}_IDS intercepted: {before} host GPU(s) -> {after} kept");
+    }
+}
+
+/// VM protection (PROT_* bits) of the page holding `addr`, from /proc/self/maps (-1 if unknown).
+fn page_prot(addr: usize) -> i32 {
+    let text = match std::fs::read_to_string("/proc/self/maps") {
+        Ok(t) => t,
+        Err(_) => return -1,
+    };
+    for line in text.lines() {
+        // "lo-hi perms ..."
+        let mut it = line.split_whitespace();
+        let range = match it.next() {
+            Some(r) => r,
+            None => continue,
+        };
+        let perms = match it.next() {
+            Some(p) => p,
+            None => continue,
+        };
+        let mut rr = range.split('-');
+        let lo = rr.next().and_then(|s| usize::from_str_radix(s, 16).ok());
+        let hi = rr.next().and_then(|s| usize::from_str_radix(s, 16).ok());
+        if let (Some(lo), Some(hi)) = (lo, hi) {
+            if addr >= lo && addr < hi {
+                let b = perms.as_bytes();
+                let mut prot = 0;
+                if b.first() == Some(&b'r') {
+                    prot |= libc::PROT_READ;
+                }
+                if b.get(1) == Some(&b'w') {
+                    prot |= libc::PROT_WRITE;
+                }
+                if b.get(2) == Some(&b'x') {
+                    prot |= libc::PROT_EXEC;
+                }
+                return prot;
+            }
+        }
+    }
+    -1
+}
+
+/// Resolve a DT_* d_ptr to an absolute address (glibc rewrites these absolute; musl keeps them
+/// file-relative). Heuristic: below base -> relative offset; at/above base -> already absolute.
+#[inline]
+fn dyn_addr(base: usize, v: u64) -> usize {
+    let v = v as usize;
+    if v < base {
+        base + v
+    } else {
+        v
+    }
+}
+
+unsafe fn patch_ioctl_got(base: usize, dynp: *const Elf64Dyn) {
+    let mut symtab: *const libc::Elf64_Sym = std::ptr::null();
+    let mut strtab: *const c_char = std::ptr::null();
+    let mut jmprel: *const Elf64Rela = std::ptr::null();
+    let mut pltrelsz: usize = 0;
+    let mut rela: *const Elf64Rela = std::ptr::null();
+    let mut relasz: usize = 0;
+
+    let mut d = dynp;
+    while (*d).d_tag != DT_NULL {
+        match (*d).d_tag {
+            DT_SYMTAB => symtab = dyn_addr(base, (*d).d_un) as *const libc::Elf64_Sym,
+            DT_STRTAB => strtab = dyn_addr(base, (*d).d_un) as *const c_char,
+            DT_JMPREL => jmprel = dyn_addr(base, (*d).d_un) as *const Elf64Rela,
+            DT_PLTRELSZ => pltrelsz = (*d).d_un as usize,
+            DT_RELA => rela = dyn_addr(base, (*d).d_un) as *const Elf64Rela,
+            DT_RELASZ => relasz = (*d).d_un as usize,
+            _ => {}
+        }
+        d = d.add(1);
+    }
+    if symtab.is_null() || strtab.is_null() {
+        return;
+    }
+    // Bail if a resolved table isn't in a readable mapping (a bad relative/absolute guess would
+    // otherwise fault on the first dereference).
+    let readable = |p: usize| {
+        let prot = page_prot(p);
+        prot < 0 || (prot & libc::PROT_READ) != 0
+    };
+    if !readable(symtab as usize) || !readable(strtab as usize) {
+        return;
+    }
+
+    let page = unsafe { libc::sysconf(libc::_SC_PAGESIZE) } as c_long;
+    if page <= 0 {
+        return;
+    }
+    let page = page as usize;
+    let ent = std::mem::size_of::<Elf64Rela>();
+    // PLT relocations (lazy-bound JUMP_SLOT: the classic path).
+    if !jmprel.is_null() && pltrelsz >= ent && readable(jmprel as usize) {
+        patch_reloc_table(base, symtab, strtab, jmprel, pltrelsz / ent, page);
+    }
+    // General relocations (GLOB_DAT): NVIDIA libs built with -fno-plt bind `ioctl` through a GOT
+    // slot named by an R_X86_64_GLOB_DAT entry in .rela.dyn rather than .rela.plt.
+    if !rela.is_null() && relasz >= ent && readable(rela as usize) {
+        patch_reloc_table(base, symtab, strtab, rela, relasz / ent, page);
+    }
+}
+
+/// Scan one relocation table and repoint every GOT slot naming `ioctl` (JUMP_SLOT or GLOB_DAT)
+/// to `filtered_ioctl`.
+unsafe fn patch_reloc_table(
+    base: usize,
+    symtab: *const libc::Elf64_Sym,
+    strtab: *const c_char,
+    rela: *const Elf64Rela,
+    count: usize,
+    page: usize,
+) {
+    for i in 0..count {
+        let r = rela.add(i);
+        let rtype = elf64_r_type((*r).r_info);
+        if rtype != R_X86_64_JUMP_SLOT && rtype != R_X86_64_GLOB_DAT {
+            continue;
+        }
+        let sym_idx = elf64_r_sym((*r).r_info) as usize;
+        if sym_idx == 0 {
+            continue;
+        }
+        let name_off = (*symtab.add(sym_idx)).st_name as usize;
+        let name = CStr::from_ptr(strtab.add(name_off));
+        if name.to_bytes() != b"ioctl" {
+            continue;
+        }
+        let slot = (base + (*r).r_offset as usize) as *mut *mut c_void;
+        let pg = (slot as usize & !(page - 1)) as *mut c_void;
+        // Restore the slot's original protection (writable under partial RELRO) rather than a
+        // hardcoded read-only: these libs lazily bind through this page, so read-only faults the
+        // next resolve. Default to writable if maps is unreadable.
+        let mut orig = page_prot(slot as usize);
+        if orig < 0 {
+            orig = libc::PROT_READ | libc::PROT_WRITE;
+        }
+        if libc::mprotect(pg, page, libc::PROT_READ | libc::PROT_WRITE) == 0 {
+            // Atomic pointer store: another thread may be dispatching through this GOT slot
+            // concurrently, so publish the new pointer without tearing.
+            let ap = &*(slot as *const AtomicPtr<c_void>);
+            ap.store(filtered_ioctl as *mut c_void, Ordering::Release);
+            libc::mprotect(pg, page, orig);
+        }
+    }
+}
+
+unsafe extern "C" fn patch_phdr_cb(
+    info: *mut libc::dl_phdr_info,
+    _size: libc::size_t,
+    _data: *mut c_void,
+) -> c_int {
+    // A panic must not unwind across this extern "C" boundary (dl_iterate_phdr is C; the guard
+    // would abort the process): catch it and keep iterating.
+    let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
+        let info = &*info;
+        if info.dlpi_name.is_null() || *info.dlpi_name == 0 {
+            return;
+        }
+        let name = CStr::from_ptr(info.dlpi_name).to_string_lossy();
+        // GET_ATTACHED_IDS is issued by libcuda and libnvcuvid (libnvidia-encode calls through
+        // libnvcuvid), so exactly those three are patched. Match tightly so unrelated libraries
+        // (libcudart, libnvidia-ml, libnvidia-glcore, ...) are left untouched.
+        if !name.contains("libnvcuvid")
+            && !name.contains("libnvidia-encode")
+            && !name.contains("libcuda.so")
+        {
+            return;
+        }
+        let base = info.dlpi_addr as usize;
+        for i in 0..info.dlpi_phnum as isize {
+            let ph = &*info.dlpi_phdr.offset(i);
+            if ph.p_type == libc::PT_DYNAMIC {
+                patch_ioctl_got(base, (base + ph.p_vaddr as usize) as *const Elf64Dyn);
+            }
+        }
+    }));
+    0
+}
+
+/// True when at least one host GPU is hidden from the container (the only case the peer-init bug
+/// can trigger), so the filter is a no-op everywhere else.
+fn has_hidden_gpus() -> bool {
+    let host = std::fs::read_dir("/proc/driver/nvidia/gpus")
+        .map(|d| d.flatten().filter(|e| !e.file_name().to_string_lossy().starts_with('.')).count())
+        .unwrap_or(0);
+    let visible = (0..MAX_ATTACHED_GPUS as u32).filter(|&m| node_present(m)).count();
+    host > visible && visible > 0
+}
+
+/// Install the GET_ATTACHED_IDS GOT filter once, but only when a host GPU is hidden from the
+/// container. Safe to call before every NVENC session open; idempotent.
+pub fn install() {
+    static ONCE: Once = Once::new();
+    ONCE.call_once(|| {
+        // Escape hatch to skip the GOT patch entirely (also handy for A/B testing it).
+        if std::env::var_os("PIXELFLUX_DISABLE_GPU_FILTER").is_some() {
+            eprintln!("[pixelflux] multi-GPU NVENC filter disabled via PIXELFLUX_DISABLE_GPU_FILTER");
+            return;
+        }
+        // Cache /dev/nvidiactl's rdev so the ioctl hook can verify the fd identity before it ever
+        // rewrites a response (0 stays cached on failure -> the identity gate is skipped).
+        unsafe {
+            let mut st: libc::stat = std::mem::zeroed();
+            if libc::stat(b"/dev/nvidiactl\0".as_ptr() as *const c_char, &mut st) == 0 {
+                NVIDIACTL_RDEV.store(st.st_rdev as u64, Ordering::Relaxed);
+            }
+        }
+        if has_hidden_gpus() {
+            unsafe {
+                libc::dl_iterate_phdr(Some(patch_phdr_cb), std::ptr::null_mut());
+            }
+            eprintln!("[pixelflux] multi-GPU NVENC ioctl filter installed (GET_ATTACHED_IDS/GET_PROBED_IDS)");
+        }
+    });
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn filter_keeps_strict_subset_and_invalidates_rest() {
+        let mut ids = [INVALID_GPU_ID; MAX_ATTACHED_GPUS];
+        ids[0] = 0x100;
+        ids[1] = 0x200; // dropped
+        ids[2] = 0x300;
+        // total=3, keep 0x100 & 0x300 -> strict subset (2<3) => rewrite.
+        filter_ids(&mut ids, |id| id == 0x100 || id == 0x300);
+        assert_eq!(ids[0], 0x100);
+        assert_eq!(ids[1], 0x300);
+        assert_eq!(ids[2], INVALID_GPU_ID);
+        assert_eq!(ids[3], INVALID_GPU_ID);
+    }
+
+    #[test]
+    fn filter_noop_when_all_kept() {
+        let mut ids = [INVALID_GPU_ID; MAX_ATTACHED_GPUS];
+        ids[0] = 0xAA;
+        ids[1] = 0xBB;
+        filter_ids(&mut ids, |_| true); // nkept==total => no rewrite
+        assert_eq!(ids[0], 0xAA);
+        assert_eq!(ids[1], 0xBB);
+        assert_eq!(ids[2], INVALID_GPU_ID);
+    }
+
+    #[test]
+    fn filter_noop_when_none_kept() {
+        let mut ids = [INVALID_GPU_ID; MAX_ATTACHED_GPUS];
+        ids[0] = 0xAA;
+        ids[1] = 0xBB;
+        filter_ids(&mut ids, |_| false); // nkept==0 => fail-safe, leave untouched
+        assert_eq!(ids[0], 0xAA);
+        assert_eq!(ids[1], 0xBB);
+    }
+
+    #[test]
+    fn ioc_nr_extracts_low_byte() {
+        assert_eq!(ioc_nr(0xC020462A), NV_ESC_RM_CONTROL);
+    }
+
+    #[test]
+    fn request_match_ignores_param_size() {
+        // A request sharing DIR|TYPE|NR but encoding a DIFFERENT _IOC_SIZE must still match, so a
+        // driver whose NVOS54_PARAMETERS has a different sizeof isn't rejected by the request gate.
+        let base = ioc_no_size(NV_RM_CONTROL_REQUEST);
+        let other_size = base | (0x30 << IOC_SIZESHIFT); // size 0x30 instead of the canonical 0x20
+        assert_ne!(other_size, NV_RM_CONTROL_REQUEST);
+        assert_eq!(ioc_no_size(other_size), base);
+        // NR precision is preserved: a different NR does NOT match.
+        let other_nr = (NV_RM_CONTROL_REQUEST & !0xFF) | 0x2B;
+        assert_ne!(ioc_no_size(other_nr), base);
+    }
+
+    #[test]
+    fn param_struct_sizes_match_nvidia_layout() {
+        // ATTACHED = gpuIds[32]; PROBED = gpuIds[32] + excludedGpuIds[32]. Both lead with the
+        // gpuIds[32] we rewrite, so the exact-size guards disambiguate the two cmds.
+        assert_eq!(ATTACHED_PARAMS_SIZE, 128);
+        assert_eq!(PROBED_PARAMS_SIZE, 256);
+        assert_eq!(std::mem::size_of::<NvRmControlParams>(), 32);
+    }
+}
diff --git a/pixelflux/src/pipeline.rs b/pixelflux/src/pipeline.rs
new file mode 100644
index 0000000..e85d884
--- /dev/null
+++ b/pixelflux/src/pipeline.rs
@@ -0,0 +1,404 @@
+//! Source-agnostic frame-processing logic shared by the Wayland (dmabuf /
+//! compositor-damage) and X11 (host-ARGB / stripe-hash-damage) capture paths, so
+//! both get identical paint-over and recovery-keyframe behavior.
+
+use crate::encoders::nvenc::NvencEncoder;
+use crate::encoders::oh264::Openh264Encoder;
+use crate::encoders::software::{encode_cpu, EncodedStripe, StripeState};
+use crate::encoders::vaapi::VaapiEncoder;
+use crate::recording_sink::RecordingSink;
+use crate::RustCaptureSettings;
+use std::sync::Arc;
+
+/// Outcome of the full-frame H.264 (NVENC/VAAPI) send decision.
+pub struct HwFrameDecision {
+    pub send: bool,
+    pub force_idr: bool,
+    pub target_qp: u32,
+}
+
+/// Decide whether to emit a full-frame H.264 picture this frame, at what QP, and
+/// whether to force an IDR -- advancing the paint-over bookkeeping in `st`.
+///
+/// `is_dirty` is the motion signal (compositor damage on Wayland, a stripe-hash
+/// change on X11); `is_animated` forces a send for animated overlays; `requested_idr`
+/// is an on-demand keyframe request. HW encoders run an effectively infinite GOP and
+/// have no in-band IDR channel, so a (re)connecting client can only start decoding on
+/// a forced IDR: we space periodic recovery keyframes ~2s apart, and also force one on
+/// real motion, the first frame, or an explicit request.
+pub fn decide_hw_fullframe(
+    st: &mut StripeState,
+    settings: &RustCaptureSettings,
+    frame_counter: u16,
+    is_dirty: bool,
+    is_animated: bool,
+    requested_idr: bool,
+) -> HwFrameDecision {
+    let normal_qp = settings.h264_crf as u32;
+    let paint_qp = settings.h264_paintover_crf as u32;
+    let trigger_frames = settings.paint_over_trigger_frames;
+    let use_paint_over = settings.use_paint_over_quality;
+    let burst = settings.h264_paintover_burst_frames;
+    let streaming = settings.h264_streaming_mode;
+
+    let mut send_frame = false;
+    let mut force_idr = false;
+    let mut target_qp = normal_qp;
+
+    if st.h264_burst_frames_remaining > 0 {
+        send_frame = true;
+        target_qp = paint_qp;
+        st.h264_burst_frames_remaining -= 1;
+
+        if is_dirty {
+            st.h264_burst_frames_remaining = 0;
+            st.paint_over_sent = false;
+            target_qp = normal_qp;
+        }
+    }
+
+    if !send_frame && (streaming || is_animated) {
+        send_frame = true;
+    }
+
+    // Clamp fps before the cast: target_fps<=0 would make kf_interval 1 (an IDR every
+    // frame). ~2s spacing bounds reconnect latency without keyframing a static screen.
+    let safe_fps = settings.target_fps.max(1.0);
+    let kf_interval = ((safe_fps * 2.0).round() as u64).max(1);
+    let periodic_idr = (frame_counter as u64 % kf_interval) == 0;
+    let recovery_idr = frame_counter == 0 || periodic_idr || requested_idr;
+
+    if is_dirty {
+        // Real motion: full reset of paint-over bookkeeping (the screen changed).
+        send_frame = true;
+        force_idr = recovery_idr;
+        st.no_motion_frame_count = 0;
+        st.paint_over_sent = false;
+        st.h264_burst_frames_remaining = 0;
+        target_qp = normal_qp;
+    } else if recovery_idr {
+        // Recovery keyframe on a STATIC screen. Do NOT reset no_motion_frame_count /
+        // paint_over_sent here -- that restarts the paint-over countdown every ~2s and
+        // can starve it. Leave an in-flight burst untouched; override QP only if none runs.
+        send_frame = true;
+        force_idr = true;
+        if st.h264_burst_frames_remaining <= 0 {
+            target_qp = normal_qp;
+        }
+    } else if !send_frame {
+        st.no_motion_frame_count += 1;
+
+        if use_paint_over
+            && st.no_motion_frame_count >= trigger_frames
+            && !st.paint_over_sent
+            && paint_qp < normal_qp
+        {
+            send_frame = true;
+            st.paint_over_sent = true;
+            force_idr = true;
+            target_qp = paint_qp;
+            st.h264_burst_frames_remaining = burst - 1;
+        }
+    }
+
+    HwFrameDecision { send: send_frame, force_idr, target_qp }
+}
+
+/// Hardware encoder bound to the X11 (host-ARGB) pipeline. Software JPEG/x264 needs no
+/// persistent encoder object (encode_cpu owns per-stripe x264 state), so it is `None`.
+enum X11Encoder {
+    None,
+    Nvenc(NvencEncoder),
+    Vaapi(VaapiEncoder),
+    Openh264(Openh264Encoder),
+}
+
+/// Persistent per-capture context for the X11 host-ARGB path. The caller hands a borrowed ARGB
+/// frame to `process()` each tick. There is no compositor here, so damage comes from whole-frame
+/// or per-stripe content hashing; full-frame H.264 goes through `decide_hw_fullframe`, while
+/// striped JPEG/x264 goes through `encode_cpu` with `hash_damage=true`.
+pub struct X11Pipeline {
+    settings: RustCaptureSettings,
+    stripes: Vec<StripeState>,
+    hw: X11Encoder,
+    hw_state: StripeState,
+    frame_counter: u16,
+    pending_force_idr: bool,
+    // Optional Unix-socket H.264 fan-out (parity with the Wayland path); None unless
+    // recording_socket is set. HW encoders write to it internally; the CPU/OpenH264 paths
+    // are fed from process().
+    recording_sink: Option<Arc<RecordingSink>>,
+}
+
+impl X11Pipeline {
+    /// Build the context, creating an NVENC session when the settings select it
+    /// (H.264, GPU, no VAAPI render node). EGL is unused on the CPU-ARGB path, so a null
+    /// display is passed. Falls back to software on NVENC init failure.
+    ///
+    /// `recording_sink` is bound once per capture and OWNED BY THE CALLER: the pipeline is
+    /// rebuilt on auto-adjust resizes, and re-binding the sink there would tear down the
+    /// socket listener and disconnect attached recorders mid-recording.
+    pub fn new(settings: RustCaptureSettings, recording_sink: Option<Arc<RecordingSink>>) -> Self {
+        let hw = if settings.output_mode == 1 && settings.use_openh264 {
+            // Explicit opt-in to the OpenH264 software encoder (full-frame, like the HW path).
+            match Openh264Encoder::new(&settings, recording_sink.clone()) {
+                Some(e) => X11Encoder::Openh264(e),
+                None => {
+                    eprintln!("[x11] OpenH264 init failed; falling back to software x264");
+                    X11Encoder::None
+                }
+            }
+        } else if settings.output_mode == 1 && !settings.use_cpu {
+            if settings.vaapi_render_node_index >= 0 {
+                if settings.h264_fullcolor {
+                    // VAAPI has no reliable 4:4:4 H.264 profile; the x264 software path does
+                    // (high444), so defer full-color to it instead of a silent CPU fallback.
+                    eprintln!("[x11] 4:4:4 full-color requested; VAAPI lacks it, using software x264");
+                    X11Encoder::None
+                } else {
+                    // VAAPI: upload host ARGB to a VAAPI surface, VA-VPP converts to NV12 on the GPU.
+                    match VaapiEncoder::new_host(&settings, recording_sink.clone()) {
+                        Ok(e) => X11Encoder::Vaapi(e),
+                        Err(err) => {
+                            eprintln!("[x11] VAAPI init failed ({err}); falling back to software");
+                            X11Encoder::None
+                        }
+                    }
+                }
+            } else {
+                match NvencEncoder::new(&settings, std::ptr::null(), recording_sink.clone()) {
+                    Ok(e) => X11Encoder::Nvenc(e),
+                    Err(err) => {
+                        eprintln!("[x11] NVENC init failed ({err}); falling back to software");
+                        X11Encoder::None
+                    }
+                }
+            }
+        } else {
+            X11Encoder::None
+        };
+        Self {
+            settings,
+            stripes: Vec::new(),
+            hw,
+            hw_state: StripeState::default(),
+            frame_counter: 0,
+            pending_force_idr: false,
+            recording_sink,
+        }
+    }
+
+    /// Request an on-demand keyframe on the next processed frame.
+    pub fn request_idr(&mut self) {
+        self.pending_force_idr = true;
+    }
+
+    /// Apply a runtime rate-control / framerate change: the CBR target bitrate + VBV (kbps /
+    /// kb; ignored unless CBR is active) and the target fps. NVENC and OpenH264 reconfigure their
+    /// live session immediately; VAAPI re-opens its codec context to apply the new rate; the x264
+    /// software path picks the new values up on the next `process()` (encode_cpu reads the updated
+    /// settings and reconfigures each stripe).
+    pub fn update_rate(&mut self, bitrate_kbps: i32, vbv_kb: i32, fps: f64) {
+        self.settings.h264_bitrate_kbps = bitrate_kbps;
+        self.settings.h264_vbv_buffer_size_kb = vbv_kb;
+        if fps > 0.0 {
+            self.settings.target_fps = fps;
+        }
+        match &mut self.hw {
+            X11Encoder::Nvenc(enc) => enc.reconfigure_rate(&self.settings),
+            X11Encoder::Openh264(enc) => enc.reconfigure_rate(bitrate_kbps, fps),
+            X11Encoder::Vaapi(enc) => enc.reconfigure_rate(&self.settings),
+            _ => {}
+        }
+    }
+
+    /// Encode one host-ARGB frame (B,G,R,A order; `stride` bytes per row) and return the
+    /// encoded stripes (empty when nothing changed). Borrows `argb` for the call only.
+    pub fn process(&mut self, argb: &[u8], stride: usize) -> Vec<EncodedStripe> {
+        let width = self.settings.width;
+        let height = self.settings.height;
+        let requested = self.pending_force_idr;
+        let threshold = self.settings.damage_block_threshold;
+        let duration = self.settings.damage_block_duration as i32;
+
+        let out = if !matches!(self.hw, X11Encoder::None) {
+            // Full-frame H.264 on a HW encoder (NVENC or VAAPI). Frame-level damage via
+            // whole-frame content hashing; streaming mode sends every frame, so the content
+            // hash is unused there — skip it.
+            let is_dirty = if self.settings.h264_streaming_mode {
+                false
+            } else {
+                self.hw_state.content_dirty(argb, threshold, duration)
+            };
+            let d = decide_hw_fullframe(
+                &mut self.hw_state,
+                &self.settings,
+                self.frame_counter,
+                is_dirty,
+                false,
+                requested,
+            );
+            if d.send {
+                let fc = self.frame_counter as u64;
+                // Recording sink forces an IDR on connect / every N frames so a late recorder
+                // starts on a keyframe (parity with the Wayland path).
+                let force_idr = d.force_idr
+                    || self.recording_sink.as_ref().map(|s| s.should_force_idr()).unwrap_or(false);
+                // Identical decision for both HW encoders; only the submission differs: NVENC
+                // encodes ARGB directly, VAAPI uploads + VA-VPP converts to NV12 on the GPU.
+                let res = match &mut self.hw {
+                    X11Encoder::Nvenc(enc) => {
+                        enc.encode_cpu_argb(argb, stride, fc, d.target_qp, force_idr)
+                    }
+                    X11Encoder::Vaapi(enc) => {
+                        enc.encode_host_argb(argb, stride, fc, d.target_qp, force_idr)
+                    }
+                    // OpenH264 is bitrate-controlled, so the paint-over QP is not applied here.
+                    // X11 host pixels are BGRA (rgba_input=false).
+                    X11Encoder::Openh264(enc) => {
+                        enc.encode_host_argb(argb, stride, fc, force_idr, false)
+                    }
+                    X11Encoder::None => unreachable!(),
+                };
+                match res {
+                    Ok(data) if !data.is_empty() => {
+                        vec![EncodedStripe {
+                            data,
+                            data_type: 2,
+                            stripe_y_start: 0,
+                            stripe_height: height,
+                            frame_id: self.frame_counter as i32,
+                        }]
+                    }
+                    Ok(_) => Vec::new(),
+                    Err(e) => {
+                        eprintln!("[x11] HW encode error: {e}");
+                        Vec::new()
+                    }
+                }
+            } else {
+                Vec::new()
+            }
+        } else {
+            // Invariant: the software path (encode_cpu / content_dirty) indexes rows at width*4;
+            // it does NOT thread the producer stride through. This holds for every current
+            // producer (X11 XShm BGRA and the Wayland readback both deliver tightly-packed
+            // width*4 rows). A future producer with a padded stride would need encode_cpu taught
+            // to honor `stride` -- the HW paths above already pass it through.
+            debug_assert_eq!(
+                stride,
+                width as usize * 4,
+                "software encode path assumes tightly-packed rows (stride == width*4)"
+            );
+            let safe_fps = self.settings.target_fps.max(1.0);
+            let kf_interval = ((safe_fps * 2.0).round() as u64).max(1);
+            let periodic = (self.frame_counter as u64 % kf_interval) == 0;
+            let force_idr_all = self.settings.output_mode == 1
+                && (self.frame_counter == 0 || periodic || requested);
+            encode_cpu(
+                &mut self.stripes,
+                argb,
+                width,
+                height,
+                &[],
+                &self.settings,
+                self.frame_counter,
+                false,
+                true,
+                self.recording_sink.as_ref(),
+                force_idr_all,
+            )
+        };
+
+        self.pending_force_idr = false;
+        self.frame_counter = self.frame_counter.wrapping_add(1);
+        out
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn x11_software_emits_on_change_and_stays_quiet_when_static() {
+        let mut s = RustCaptureSettings::default();
+        s.width = 128;
+        s.height = 128;
+        s.output_mode = 0; // JPEG
+        s.use_cpu = true; // force the software path (no NVENC in the test)
+        s.jpeg_quality = 60;
+        s.use_paint_over_quality = false; // keep the assertion about static frames clean
+        let mut p = X11Pipeline::new(s, None);
+        let stride = 128 * 4;
+        let frame_a = vec![10u8; stride * 128];
+        let mut frame_b = frame_a.clone();
+        for px in frame_b.iter_mut().take(stride * 40) {
+            *px = 200; // change the top rows so some stripes differ
+        }
+        let n1 = p.process(&frame_a, stride).len();
+        let n2 = p.process(&frame_a, stride).len();
+        let n3 = p.process(&frame_b, stride).len();
+        assert!(n1 > 0, "first frame should emit (all stripes dirty vs init)");
+        assert_eq!(n2, 0, "identical static frame should emit nothing");
+        assert!(n3 > 0, "changed frame should emit dirty stripes");
+    }
+
+    fn settings() -> RustCaptureSettings {
+        RustCaptureSettings {
+            h264_crf: 25,
+            h264_paintover_crf: 18,
+            paint_over_trigger_frames: 3,
+            use_paint_over_quality: true,
+            h264_paintover_burst_frames: 5,
+            h264_streaming_mode: false,
+            target_fps: 60.0, // kf_interval = 120
+            ..Default::default()
+        }
+    }
+
+    #[test]
+    fn first_frame_forces_idr() {
+        let s = settings();
+        let mut st = StripeState::default();
+        let d = decide_hw_fullframe(&mut st, &s, 0, false, false, false);
+        assert!(d.send && d.force_idr);
+        assert_eq!(d.target_qp, 25);
+    }
+
+    #[test]
+    fn paint_over_fires_after_trigger_then_bursts() {
+        let s = settings();
+        let mut st = StripeState::default();
+        // Static, non-recovery frames accumulate no-motion count.
+        for fc in 1..=2 {
+            let d = decide_hw_fullframe(&mut st, &s, fc, false, false, false);
+            assert!(!d.send, "frame {fc} should stay idle");
+        }
+        // 3rd static frame hits trigger -> paint-over IDR at paint QP, burst armed.
+        let d = decide_hw_fullframe(&mut st, &s, 3, false, false, false);
+        assert!(d.send && d.force_idr);
+        assert_eq!(d.target_qp, 18);
+        assert_eq!(st.h264_burst_frames_remaining, 4);
+        // 4th frame: burst continues at paint QP, no forced IDR.
+        let d = decide_hw_fullframe(&mut st, &s, 4, false, false, false);
+        assert!(d.send && !d.force_idr);
+        assert_eq!(d.target_qp, 18);
+        assert_eq!(st.h264_burst_frames_remaining, 3);
+    }
+
+    #[test]
+    fn motion_resets_paintover_and_uses_normal_qp() {
+        let s = settings();
+        let mut st = StripeState::default();
+        st.paint_over_sent = true;
+        st.h264_burst_frames_remaining = 2;
+        st.no_motion_frame_count = 9;
+        let d = decide_hw_fullframe(&mut st, &s, 7, true, false, false);
+        assert!(d.send);
+        assert_eq!(d.target_qp, 25);
+        assert!(!st.paint_over_sent);
+        assert_eq!(st.h264_burst_frames_remaining, 0);
+        assert_eq!(st.no_motion_frame_count, 0);
+    }
+}
diff --git a/pixelflux/src/recording_sink.rs b/pixelflux/src/recording_sink.rs
new file mode 100644
index 0000000..c710a8e
--- /dev/null
+++ b/pixelflux/src/recording_sink.rs
@@ -0,0 +1,296 @@
+use std::fs;
+use std::io::{ErrorKind, Write};
+use std::os::unix::net::UnixListener;
+use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
+use std::sync::{Arc, Mutex};
+use std::thread;
+use std::time::Duration;
+
+use crossbeam_channel::{bounded, Sender, TrySendError};
+
+/// Core settings and state for the out-of-band H.264 recording sink.
+///
+/// Defines socket connection timeouts, polling intervals, environment fallbacks,
+/// keyframe cadence, and the primary RecordingSink structure used to multiplex
+/// the elementary stream to connected Unix socket clients.
+const WRITE_TIMEOUT: Duration = Duration::from_millis(100);
+const ACCEPT_POLL_INTERVAL: Duration = Duration::from_millis(50);
+pub const RECORDING_SOCKET_ENV: &str = "PIXELFLUX_RECORDING_SOCKET";
+const DEFAULT_KEYINT_FRAMES: u32 = 60;
+
+/// Per-client buffered-frame cap; a client that exceeds it is dropped as too slow.
+const CLIENT_QUEUE_CAP: usize = 256;
+
+/// A connected client; bytes are drained to its socket by a dedicated thread.
+struct ClientHandle {
+    tx: Sender<Arc<Vec<u8>>>,
+    /// Signals the writer thread to stop promptly (without draining its backlog)
+    /// when the client is dropped from the sink.
+    stop: Arc<AtomicBool>,
+}
+
+pub struct RecordingSink {
+    path: String,
+    clients: Arc<Mutex<Vec<ClientHandle>>>,
+    shutdown: Arc<AtomicBool>,
+    frames_since_idr: Arc<AtomicU32>,
+    keyint_frames: u32,
+}
+
+impl RecordingSink {
+    /// @brief Resolves the configured socket path and tries to bind it.
+    ///
+    /// @input settings_path: The configured path for the socket.
+    /// @return Option containing the new RecordingSink instance.
+    pub fn try_bind(settings_path: &str) -> Option<Arc<Self>> {
+        let path = if !settings_path.is_empty() {
+            settings_path.to_string()
+        } else {
+            match std::env::var(RECORDING_SOCKET_ENV) {
+                Ok(p) if !p.is_empty() => p,
+                _ => return None,
+            }
+        };
+
+        match Self::bind(path) {
+            Ok(sink) => Some(Arc::new(sink)),
+            Err(e) => {
+                eprintln!("[recording_sink] bind failed: {:?}", e);
+                None
+            }
+        }
+    }
+
+    /// @brief Binds the Unix listener and spawns the accept thread.
+    ///
+    /// @input path: The file path to bind the socket to.
+    /// @return Result containing the new RecordingSink instance.
+    fn bind(path: String) -> std::io::Result<Self> {
+        let _ = fs::remove_file(&path);
+
+        let listener = UnixListener::bind(&path)?;
+        listener.set_nonblocking(true)?;
+
+        let clients: Arc<Mutex<Vec<ClientHandle>>> = Arc::new(Mutex::new(Vec::new()));
+        let shutdown = Arc::new(AtomicBool::new(false));
+
+        let frames_since_idr = Arc::new(AtomicU32::new(u32::MAX));
+
+        let clients_acc = clients.clone();
+        let shutdown_acc = shutdown.clone();
+        let frames_since_idr_acc = frames_since_idr.clone();
+        let path_log = path.clone();
+
+        thread::spawn(move || {
+            eprintln!("[recording_sink] listening on {}", path_log);
+            while !shutdown_acc.load(Ordering::Relaxed) {
+                match listener.accept() {
+                    Ok((stream, _)) => {
+                        if let Err(e) = stream.set_write_timeout(Some(WRITE_TIMEOUT)) {
+                            eprintln!("[recording_sink] set_write_timeout failed: {:?}", e);
+                            continue;
+                        }
+
+                        // Writer thread owns the stream; exits when tx drops, the
+                        // stop flag is set, or a hard write error occurs.
+                        let (tx, rx) = bounded::<Arc<Vec<u8>>>(CLIENT_QUEUE_CAP);
+                        let stop = Arc::new(AtomicBool::new(false));
+                        let stop_writer = stop.clone();
+                        thread::spawn(move || {
+                            let mut stream = stream;
+                            for buf in rx.iter() {
+                                if stop_writer.load(Ordering::Relaxed) {
+                                    break;
+                                }
+                                if let Err(e) = write_all_frame(&mut stream, &buf, &stop_writer) {
+                                    // A soft timeout/would-block is retried inside
+                                    // write_all_frame; reaching here means a hard
+                                    // error (or a requested stop). Surface the
+                                    // concrete reason and drop the client.
+                                    eprintln!(
+                                        "[recording_sink] writer thread exiting; write failed: {:?}",
+                                        e
+                                    );
+                                    break;
+                                }
+                            }
+                        });
+
+                        let mut guard = clients_acc.lock().unwrap();
+                        guard.push(ClientHandle { tx, stop });
+
+                        frames_since_idr_acc.store(u32::MAX, Ordering::Relaxed);
+                        eprintln!(
+                            "[recording_sink] client connected; total {}; requesting IDR",
+                            guard.len()
+                        );
+                    }
+                    Err(e) if e.kind() == ErrorKind::WouldBlock => {
+                        thread::sleep(ACCEPT_POLL_INTERVAL);
+                    }
+                    Err(e) => {
+                        eprintln!("[recording_sink] accept error: {:?}", e);
+                        thread::sleep(Duration::from_millis(500));
+                    }
+                }
+            }
+            eprintln!("[recording_sink] listener thread exiting");
+        });
+
+        Ok(Self {
+            path,
+            clients,
+            shutdown,
+            frames_since_idr,
+            keyint_frames: DEFAULT_KEYINT_FRAMES,
+        })
+    }
+
+    /// @brief Advisory hook indicating if the next encode should be an IDR.
+    ///
+    /// @return True if the next frame should be a keyframe.
+    pub fn should_force_idr(&self) -> bool {
+        let prev = self.frames_since_idr.fetch_add(1, Ordering::Relaxed);
+        if idr_due(prev, self.keyint_frames) {
+            self.frames_since_idr.store(0, Ordering::Relaxed);
+            true
+        } else {
+            false
+        }
+    }
+
+    /// @brief Fans encoded bytes out to every client without blocking the caller.
+    ///
+    /// Bytes go to each client's bounded queue; a full (too slow) or disconnected
+    /// client is dropped.
+    ///
+    /// @input data: The raw Annex-B H.264 byte slice.
+    pub fn write_frame(&self, data: &[u8]) {
+        if data.is_empty() {
+            return;
+        }
+
+        let mut clients = self.clients.lock().unwrap();
+        // Skip the per-frame heap alloc + copy entirely when there is no recorder
+        // attached (the normal optional-tap case where the socket is configured
+        // but unconnected).
+        if clients.is_empty() {
+            return;
+        }
+
+        // One copy, shared across clients via Arc.
+        let buf = Arc::new(data.to_vec());
+
+        let mut to_remove: Vec<usize> = Vec::new();
+
+        for (idx, client) in clients.iter().enumerate() {
+            match client.tx.try_send(buf.clone()) {
+                Ok(()) => {}
+                Err(TrySendError::Full(_)) => {
+                    eprintln!("[recording_sink] dropping slow client (idx {})", idx);
+                    to_remove.push(idx);
+                }
+                Err(TrySendError::Disconnected(_)) => {
+                    eprintln!("[recording_sink] dropping disconnected client (idx {})", idx);
+                    to_remove.push(idx);
+                }
+            }
+        }
+
+        for idx in to_remove.into_iter().rev() {
+            // Signal the writer to stop promptly rather than draining its 256-frame
+            // backlog before noticing the dropped tx.
+            let removed = clients.swap_remove(idx);
+            removed.stop.store(true, Ordering::Relaxed);
+        }
+    }
+}
+
+impl Drop for RecordingSink {
+    fn drop(&mut self) {
+        self.shutdown.store(true, Ordering::Relaxed);
+        // Drop every client immediately so their writer threads/FDs are reclaimed
+        // now instead of via the accept thread's ~50 ms poll. Signal stop first so
+        // a writer mid-frame exits without draining its backlog.
+        if let Ok(mut clients) = self.clients.lock() {
+            for client in clients.iter() {
+                client.stop.store(true, Ordering::Relaxed);
+            }
+            clients.clear();
+        }
+        let _ = fs::remove_file(&self.path);
+    }
+}
+
+/// Writes a full frame to the client, resuming from the last written offset on a
+/// soft timeout rather than dropping the connection mid-frame.
+///
+/// `set_write_timeout` makes `write` return `TimedOut`/`WouldBlock` after pushing
+/// only some leading bytes of a frame; bailing out there would emit a truncated
+/// Annex-B NAL to a merely-slow-but-healthy reader. Instead we retry the unwritten
+/// remainder until the whole frame lands or a hard error occurs. A permanently
+/// stuck client is bounded elsewhere: its queue fills, `write_frame` drops it from
+/// the client list, and `stop` is set — which this loop observes between retries.
+fn write_all_frame<W: Write>(stream: &mut W, buf: &[u8], stop: &AtomicBool) -> std::io::Result<()> {
+    let mut written = 0usize;
+    while written < buf.len() {
+        if stop.load(Ordering::Relaxed) {
+            return Err(std::io::Error::new(
+                ErrorKind::Other,
+                "writer stopped (client dropped)",
+            ));
+        }
+        match stream.write(&buf[written..]) {
+            Ok(0) => {
+                return Err(std::io::Error::new(
+                    ErrorKind::WriteZero,
+                    "failed to write whole frame",
+                ));
+            }
+            Ok(n) => written += n,
+            // Soft errors: the partial write already advanced `written`, so retry the
+            // remainder. The blocking write timeout paces the loop; add a small sleep
+            // for the (non-default) non-blocking WouldBlock case to avoid a hot spin.
+            Err(ref e) if e.kind() == ErrorKind::TimedOut => {}
+            Err(ref e) if e.kind() == ErrorKind::WouldBlock => {
+                thread::sleep(Duration::from_millis(5));
+            }
+            Err(ref e) if e.kind() == ErrorKind::Interrupted => {}
+            Err(e) => return Err(e),
+        }
+    }
+    Ok(())
+}
+
+/// True when a keyframe is due. saturating_sub avoids underflow if keyint is 0.
+fn idr_due(frames_since_idr: u32, keyint_frames: u32) -> bool {
+    frames_since_idr >= keyint_frames.saturating_sub(1)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::idr_due;
+
+    #[test]
+    fn first_frame_forces_idr() {
+        // frames_since_idr is initialized to u32::MAX, so the first call is due.
+        assert!(idr_due(u32::MAX, 60));
+    }
+
+    #[test]
+    fn cadence_matches_keyint() {
+        let keyint = 60;
+        assert!(!idr_due(0, keyint));
+        assert!(!idr_due(58, keyint));
+        assert!(idr_due(59, keyint));
+        assert!(idr_due(60, keyint));
+    }
+
+    #[test]
+    fn keyint_zero_and_one_do_not_underflow() {
+        assert!(idr_due(0, 0));
+        assert!(idr_due(u32::MAX, 0));
+        assert!(idr_due(0, 1));
+        assert!(idr_due(1, 1));
+    }
+}
diff --git a/pixelflux_wayland/src/wayland/cursor.rs b/pixelflux/src/wayland/cursor.rs
similarity index 94%
rename from pixelflux_wayland/src/wayland/cursor.rs
rename to pixelflux/src/wayland/cursor.rs
index 046d1a9..679c785 100644
--- a/pixelflux_wayland/src/wayland/cursor.rs
+++ b/pixelflux/src/wayland/cursor.rs
@@ -135,5 +135,11 @@ fn load_icon(theme: &CursorTheme, name: &str) -> Result<Vec<Image>, String> {
     cursor_file
         .read_to_end(&mut cursor_data)
         .map_err(|e| e.to_string())?;
-    parse_xcursor(&cursor_data).ok_or("Failed to parse".to_string())
+    // Reject empty parses so nearest_images()'s min_by_key().unwrap() can't panic
+    // on a valid-header-but-zero-image cursor file.
+    let imgs = parse_xcursor(&cursor_data).ok_or("Failed to parse".to_string())?;
+    if imgs.is_empty() {
+        return Err("Cursor file has no images".to_string());
+    }
+    Ok(imgs)
 }
diff --git a/pixelflux_wayland/src/wayland/frontend.rs b/pixelflux/src/wayland/frontend.rs
similarity index 87%
rename from pixelflux_wayland/src/wayland/frontend.rs
rename to pixelflux/src/wayland/frontend.rs
index 54e3945..6a79347 100644
--- a/pixelflux_wayland/src/wayland/frontend.rs
+++ b/pixelflux/src/wayland/frontend.rs
@@ -23,6 +23,7 @@ use smithay::wayland::viewporter::ViewporterState;
 use smithay::delegate_viewporter;
 use smithay::wayland::pointer_warp::{PointerWarpHandler, PointerWarpManager};
 use smithay::reexports::wayland_server::protocol::wl_pointer::WlPointer;
+use smithay::reexports::wayland_server::protocol::wl_shm;
 use smithay::wayland::relative_pointer::RelativePointerManagerState;
 use smithay::wayland::pointer_constraints::{PointerConstraintsHandler, PointerConstraintsState};
 use smithay::input::pointer::PointerHandle;
@@ -197,9 +198,24 @@ pub struct AppState {
     pub clock: Clock<Monotonic>,
 
     pub frame_counter: u16,
+    // Set by ThreadCommand::RequestIdr (client reconnect / decoder reset),
+    // consumed once on the next captured frame to force an immediate keyframe.
+    pub pending_force_idr: bool,
+    // Keycodes injected on key-down by inject_keysym, recorded so the matching key-up
+    // releases the SAME keycodes (and synthetic modifiers) even if the xkb layout changed
+    // mid-keystroke. Maps keysym -> (main keycode, modifier keycodes used: Shift and/or AltGr).
+    pub synthetic_shift_keysyms: std::collections::HashMap<u32, (u32, Vec<u32>)>,
+    // Ref-count of currently-held synthetic modifier keycodes (Shift_L / ISO_Level3_Shift)
+    // so a modifier is released only when the LAST keysym using it is released (releasing one
+    // of two simultaneously-held shifted/AltGr keys must not drop the modifier for the other).
+    pub synthetic_mod_refcounts: std::collections::HashMap<u32, u32>,
     pub use_gpu: bool,
 
     pub video_encoder: Option<GpuEncoder>,
+    // Full-frame software H.264 (OpenH264). Kept separate from `video_encoder` because it
+    // consumes host RGBA (the readback buffer) rather than a GPU surface/NV12; when set,
+    // `video_encoder` stays None so the readback fills `frame_buffer` without NV12 conversion.
+    pub openh264_encoder: Option<crate::encoders::oh264::Openh264Encoder>,
     pub vaapi_state: StripeState,
     pub cursor_helper: Cursor,
 
@@ -216,6 +232,11 @@ pub struct AppState {
     pub pointer_constraints_state: PointerConstraintsState,
     pub render_node_path: String,
     pub recording_sink: Option<Arc<crate::recording_sink::RecordingSink>>,
+    // Encoded frames are handed to a dedicated delivery thread (not the calloop thread) so a slow,
+    // GIL-holding Python callback can't stall input/control dispatch. Capacity-1 rendezvous mirrors
+    // the X11 FramePool single slot: non-dropping, ordered, <=1 frame of blocking backpressure.
+    pub deliver_tx: Option<std::sync::mpsc::SyncSender<Vec<crate::encoders::software::EncodedStripe>>>,
+    pub deliver_join: Option<std::thread::JoinHandle<()>>,
 }
 
 impl PointerConstraintsHandler for AppState {
@@ -427,6 +448,8 @@ impl CompositorHandler for AppState {
                 self.pending_windows.push(window);
             } else {
                 self.space.map_element(window.clone(), (0, 0), true);
+                // Refresh the cached bbox before reading geometry() (avoids a redundant configure).
+                window.on_commit();
 
                 if let Some(output) = self.outputs.first() {
                     output.enter(surface);
@@ -467,7 +490,13 @@ impl AppState {
     /// This method accepts a `CursorImageStatus` (Named, Hidden, or Surface), extracts
     /// the relevant pixel data (checking the hash cache for surfaces to avoid re-encoding),
     /// and outputs the final PNG bytes and hotspot coordinates to the registered Python callback.
-    fn send_cursor_image(&mut self, image: &CursorImageStatus) {
+    // pub(crate): also invoked from the calloop command handlers in lib.rs to replay the retained
+    // cursor when a callback (re)registers or a capture (re)starts.
+    pub(crate) fn send_cursor_image(&mut self, image: &CursorImageStatus) {
+        // Teardown gate: never attach to a finalizing interpreter from this thread.
+        if crate::PY_SHUTDOWN.load(std::sync::atomic::Ordering::Relaxed) {
+            return;
+        }
         if let Some(ref cb) = self.cursor_callback {
             let (msg_type, data, hot_x, hot_y) = match image {
                 CursorImageStatus::Named(icon) => {
@@ -527,29 +556,58 @@ impl AppState {
                         let shm_result = with_buffer_contents(&buffer, |ptr, len, spec| {
                             let slice = unsafe { std::slice::from_raw_parts(ptr, len) };
                             let mut hasher = DefaultHasher::new();
-                            slice.hash(&mut hasher);
+                            // Hash only the cursor's sub-region (and its geometry/format), not the
+                            // whole pool: multiple sprites can share one pool and differ only by
+                            // `offset`, which would otherwise collide to the same cached PNG.
+                            spec.width.hash(&mut hasher);
+                            spec.height.hash(&mut hasher);
+                            spec.stride.hash(&mut hasher);
+                            spec.offset.hash(&mut hasher);
+                            spec.format.hash(&mut hasher);
+                            let start = (spec.offset.max(0) as usize).min(len);
+                            let span = (spec.stride.max(0) as usize)
+                                .saturating_mul(spec.height.max(0) as usize);
+                            let end = start.saturating_add(span).min(len);
+                            slice[start..end].hash(&mut hasher);
                             let hash = hasher.finish();
-                            (hash, spec.width, spec.height, spec.stride, slice.to_vec())
+                            (hash, spec.width, spec.height, spec.stride, spec.format, spec.offset, slice.to_vec())
                         });
 
                         match shm_result {
-                            Ok((hash, width, height, stride, raw_bytes)) => {
+                            Ok((hash, width, height, stride, format, buf_offset, raw_bytes)) => {
                                 if let Some(cached_png) = self.cursor_cache.get(&hash) {
                                     final_png = cached_png.clone();
                                 } else {
                                     if width <= 128 && height <= 128 && !raw_bytes.is_empty() {
                                         let mut img_buf = ImageBuffer::<Rgba<u8>, Vec<u8>>::new(width as u32, height as u32);
-                                        let stride_usize = stride as usize;
-                                        
+                                        // Clamp client-supplied stride/offset to non-negative and use
+                                        // checked arithmetic: a garbage descriptor must skip pixels, not
+                                        // panic (a negative i32 cast to usize wraps; offset+4 can overflow).
+                                        let stride_usize = stride.max(0) as usize;
+                                        let base_offset = buf_offset.max(0) as usize;
+
                                         for y in 0..(height as u32) {
                                             for x in 0..(width as u32) {
-                                                let offset = (y as usize * stride_usize) + (x as usize * 4);
-                                                if offset + 4 <= raw_bytes.len() {
-                                                    img_buf.put_pixel(x, y, Rgba([
-                                                        raw_bytes[offset + 2], 
-                                                        raw_bytes[offset + 1], 
-                                                        raw_bytes[offset], 
+                                                let offset = (y as usize)
+                                                    .checked_mul(stride_usize)
+                                                    .and_then(|row| base_offset.checked_add(row))
+                                                    .and_then(|o| o.checked_add((x as usize) * 4));
+                                                let offset = match offset {
+                                                    Some(o) => o,
+                                                    None => continue,
+                                                };
+                                                if offset.checked_add(4).map_or(false, |end| end <= raw_bytes.len()) {
+                                                    // Xrgb8888 has no alpha; byte 3 is padding.
+                                                    let alpha = if format == wl_shm::Format::Xrgb8888 {
+                                                        255
+                                                    } else {
                                                         raw_bytes[offset + 3]
+                                                    };
+                                                    img_buf.put_pixel(x, y, Rgba([
+                                                        raw_bytes[offset + 2],
+                                                        raw_bytes[offset + 1],
+                                                        raw_bytes[offset],
+                                                        alpha
                                                     ]));
                                                 }
                                             }
@@ -560,7 +618,10 @@ impl AppState {
                                             self.cursor_cache.insert(hash, bytes.clone());
                                             final_png = bytes;
                                             if self.cursor_cache.len() > 100 {
-                                                self.cursor_cache.clear();
+                                                // Evict an arbitrary entry to bound the cache; content-hash
+                                                // memoization means a re-render simply re-inserts if needed.
+                                                let evict = *self.cursor_cache.keys().next().unwrap();
+                                                self.cursor_cache.remove(&evict);
                                             }
                                         }
                                     }
@@ -606,7 +667,7 @@ impl AppState {
                                      } else {
                                          if width <= 128 && height <= 128 && !raw_bytes.is_empty() {
                                              let mut img_buf = ImageBuffer::<Rgba<u8>, Vec<u8>>::new(width as u32, height as u32);
-                                             let stride_usize = (width * 4) as usize;
+                                             let stride_usize = rgba_readback_stride(raw_bytes.len(), height as usize, width as usize);
                                              
                                              for y in 0..(height as u32) {
                                                  for x in 0..(width as u32) {
@@ -627,7 +688,10 @@ impl AppState {
                                                  self.cursor_cache.insert(hash, bytes.clone());
                                                  final_png = bytes;
                                                  if self.cursor_cache.len() > 100 {
-                                                     self.cursor_cache.clear();
+                                                     // Evict an arbitrary entry to bound the cache; content-hash
+                                                     // memoization means a re-render simply re-inserts if needed.
+                                                     let evict = *self.cursor_cache.keys().next().unwrap();
+                                                     self.cursor_cache.remove(&evict);
                                                  }
                                              }
                                          }
@@ -649,8 +713,7 @@ impl AppState {
             };
 
             if !data.is_empty() || msg_type == "hide" || msg_type == "surface" {
-                #[allow(deprecated)]
-                Python::with_gil(|py| {
+                Python::attach(|py| {
                     let py_bytes = PyBytes::new(py, &data);
                     let _ = cb.call1(py, (msg_type, py_bytes, hot_x, hot_y));
                 });
@@ -1359,3 +1422,65 @@ delegate_viewporter!(AppState);
 delegate_presentation!(AppState);
 delegate_xdg_activation!(AppState);
 delegate_primary_selection!(AppState);
+
+/// Row stride (bytes) of a tightly-mapped RGBA8 GPU readback. Derived from the mapping
+/// length rather than assuming width*4, so a padded readback can't skew the cursor image;
+/// never returns less than one full row.
+fn rgba_readback_stride(buf_len: usize, height: usize, width: usize) -> usize {
+    let row = width.saturating_mul(4);
+    if height == 0 {
+        return row;
+    }
+    (buf_len / height).max(row)
+}
+
+#[cfg(test)]
+mod stride_tests {
+    use super::rgba_readback_stride;
+
+    #[test]
+    fn packed_readback_is_width_times_four() {
+        assert_eq!(rgba_readback_stride(64 * 4 * 48, 48, 64), 64 * 4);
+        assert_eq!(rgba_readback_stride(3 * 4 * 2, 2, 3), 12);
+    }
+
+    #[test]
+    fn padded_readback_recovers_real_stride() {
+        let (w, h, pad) = (3usize, 2usize, 4usize);
+        let stride = w * 4 + pad;
+        assert_eq!(rgba_readback_stride(stride * h, h, w), stride);
+    }
+
+    #[test]
+    fn padded_extraction_has_no_skew() {
+        let (w, h) = (3usize, 2usize);
+        let stride = 16usize; // w*4 == 12, padded to 16
+        let mut buf = vec![0u8; stride * h];
+        for y in 0..h {
+            for x in 0..w {
+                let o = y * stride + x * 4;
+                buf[o] = x as u8 + 1;
+                buf[o + 1] = y as u8 + 1;
+            }
+        }
+        let s = rgba_readback_stride(buf.len(), h, w);
+        assert_eq!(s, stride);
+        for y in 0..h {
+            for x in 0..w {
+                let o = y * s + x * 4;
+                assert_eq!(buf[o], x as u8 + 1);
+                assert_eq!(buf[o + 1], y as u8 + 1);
+            }
+        }
+    }
+
+    #[test]
+    fn zero_height_no_divide_by_zero() {
+        assert_eq!(rgba_readback_stride(0, 0, 10), 40);
+    }
+
+    #[test]
+    fn truncated_buffer_keeps_full_row() {
+        assert_eq!(rgba_readback_stride(10, 5, 64), 64 * 4);
+    }
+}
diff --git a/pixelflux_wayland/src/wayland/mod.rs b/pixelflux/src/wayland/mod.rs
similarity index 100%
rename from pixelflux_wayland/src/wayland/mod.rs
rename to pixelflux/src/wayland/mod.rs
diff --git a/pixelflux/src/x11/mod.rs b/pixelflux/src/x11/mod.rs
new file mode 100644
index 0000000..cae7171
--- /dev/null
+++ b/pixelflux/src/x11/mod.rs
@@ -0,0 +1,997 @@
+//! X11 host capture. Grabs the root window into a shared memory segment (XShm via x11rb) as
+//! BGRA, composites the XFixes hardware cursor and the watermark on the CPU, and feeds each
+//! frame to [`X11Pipeline`] which owns damage/stripe/encode.
+//!
+//! The whole pipeline runs on one thread (the caller's): the x11rb connection, the shm segment,
+//! and the encoder all live for the duration of [`run_capture`]. Multi-instance safety for the
+//! encoders is handled inside them (e.g. the libx264 open/close lock); each capture owns its own
+//! private xcb connection, so there is no shared X state to serialize here.
+
+use std::sync::atomic::{AtomicBool, AtomicI32, AtomicU64, Ordering};
+use std::sync::mpsc::Sender;
+use std::sync::{Arc, Condvar, Mutex};
+use std::thread;
+use std::time::{Duration, Instant};
+
+use x11rb::connection::Connection;
+use x11rb::protocol::shm::ConnectionExt as ShmExt;
+use x11rb::protocol::xfixes::ConnectionExt as XfixesExt;
+use x11rb::protocol::xproto::{ConnectionExt as XprotoExt, ImageFormat};
+use x11rb::rust_connection::RustConnection;
+
+use crate::encoders::overlay::WatermarkLocation;
+use crate::encoders::software::EncodedStripe;
+use crate::pipeline::X11Pipeline;
+use crate::recording_sink::RecordingSink;
+use crate::RustCaptureSettings;
+
+/// Cross-thread controls for a running capture. The owner (the `ScreenCapture` pyclass) flips
+/// these from the Python thread; the capture thread reads them at the top of each iteration and
+/// applies them to its `X11Pipeline`. This keeps request_idr / rate / fps changes off the
+/// pipeline's thread boundary: the pipeline and its encoder are only ever touched from the
+/// capture thread.
+pub struct Controls {
+    pub stop: AtomicBool,
+    pub force_idr: AtomicBool,
+    pub rate_dirty: AtomicBool,
+    pub bitrate_kbps: AtomicI32,
+    pub vbv_kb: AtomicI32,
+    /// target fps * 1000 (atomic-friendly; re-read each frame for dynamic pacing + rate control).
+    pub fps_milli: AtomicU64,
+}
+
+impl Controls {
+    pub fn new(s: &RustCaptureSettings) -> Self {
+        Self {
+            stop: AtomicBool::new(false),
+            force_idr: AtomicBool::new(false),
+            rate_dirty: AtomicBool::new(false),
+            bitrate_kbps: AtomicI32::new(s.h264_bitrate_kbps),
+            vbv_kb: AtomicI32::new(s.h264_vbv_buffer_size_kb),
+            fps_milli: AtomicU64::new((s.target_fps.max(1.0) * 1000.0) as u64),
+        }
+    }
+}
+
+/// A shared-memory image surface: a POSIX shm segment attached to both this process and the X
+/// server, into which `shm_get_image` writes one BGRA frame.
+struct ShmSurface {
+    shmseg: u32,
+    addr: *mut u8,
+    size: usize,
+    width: u16,
+    height: u16,
+    stride: usize,
+}
+
+impl ShmSurface {
+    /// Allocate a shm segment of `width*height*4` bytes, attach it locally and to the X server,
+    /// then mark it `IPC_RMID` so the kernel frees it once both ends detach.
+    fn create(conn: &RustConnection, width: u16, height: u16) -> Result<Self, String> {
+        let stride = width as usize * 4;
+        let size = stride * height as usize;
+        if size == 0 {
+            return Err("zero-sized capture surface".into());
+        }
+        unsafe {
+            let shmid = libc::shmget(libc::IPC_PRIVATE, size, libc::IPC_CREAT | 0o600);
+            if shmid < 0 {
+                return Err("shmget failed".into());
+            }
+            let addr = libc::shmat(shmid, std::ptr::null(), 0);
+            if addr == (-1isize) as *mut libc::c_void {
+                libc::shmctl(shmid, libc::IPC_RMID, std::ptr::null_mut());
+                return Err("shmat failed".into());
+            }
+            let shmseg = match conn.generate_id() {
+                Ok(id) => id,
+                Err(e) => {
+                    libc::shmdt(addr);
+                    libc::shmctl(shmid, libc::IPC_RMID, std::ptr::null_mut());
+                    return Err(format!("generate_id: {e}"));
+                }
+            };
+            // Attach on the server, confirm with a round-trip, THEN mark for deletion so the
+            // segment survives until both this process and the server detach.
+            let attach = conn
+                .shm_attach(shmseg, shmid as u32, false)
+                .map_err(|e| format!("shm_attach: {e}"))
+                .and_then(|c| c.check().map_err(|e| format!("shm_attach check: {e}")));
+            libc::shmctl(shmid, libc::IPC_RMID, std::ptr::null_mut());
+            if let Err(e) = attach {
+                libc::shmdt(addr);
+                return Err(e);
+            }
+            Ok(Self {
+                shmseg,
+                addr: addr as *mut u8,
+                size,
+                width,
+                height,
+                stride,
+            })
+        }
+    }
+
+    fn as_mut_slice(&mut self) -> &mut [u8] {
+        unsafe { std::slice::from_raw_parts_mut(self.addr, self.size) }
+    }
+
+    /// Detach from the server (needs the connection) and locally. Call before drop.
+    fn destroy(&mut self, conn: &RustConnection) {
+        let _ = conn.shm_detach(self.shmseg);
+        let _ = conn.flush();
+        unsafe {
+            libc::shmdt(self.addr as *mut libc::c_void);
+        }
+        self.addr = std::ptr::null_mut();
+    }
+}
+
+/// Resolve the capture dimensions from settings + the live root geometry. With auto-adjust (or an
+/// unset width/height) the capture tracks the full root; otherwise the requested size is clamped to
+/// what's available from the capture offset. H.264 needs even dimensions.
+fn resolve_dims(root_w: u16, root_h: u16, s: &RustCaptureSettings) -> (u16, u16) {
+    // capture_x/y are clamped to >=0 the same way shm_get_image consumes them; saturating math
+    // plus a final u16 clamp keep pathological settings from overflowing or truncating.
+    let cap_x = s.capture_x.max(0);
+    let cap_y = s.capture_y.max(0);
+    let avail_w = (root_w as i32).saturating_sub(cap_x).max(2);
+    let avail_h = (root_h as i32).saturating_sub(cap_y).max(2);
+    let mut w = if s.auto_adjust_screen_capture_size || s.width <= 0 {
+        avail_w
+    } else {
+        s.width.min(avail_w)
+    };
+    let mut h = if s.auto_adjust_screen_capture_size || s.height <= 0 {
+        avail_h
+    } else {
+        s.height.min(avail_h)
+    };
+    w = w.clamp(2, u16::MAX as i32);
+    h = h.clamp(2, u16::MAX as i32);
+    if s.output_mode == 1 {
+        w &= !1;
+        h &= !1;
+    }
+    (w as u16, h as u16)
+}
+
+/// Alpha-blend a source pixel (already split into r,g,b,a) over a BGRA destination pixel:
+/// opaque overwrites, partial alpha blends, fully transparent skips.
+#[inline]
+fn blend_pixel(dst: &mut [u8], r: u8, g: u8, b: u8, a: u8) {
+    if a == 255 {
+        dst[0] = b;
+        dst[1] = g;
+        dst[2] = r;
+    } else if a > 0 {
+        let ia = 255 - a as u32;
+        dst[0] = ((b as u32 * a as u32 + dst[0] as u32 * ia) / 255) as u8;
+        dst[1] = ((g as u32 * a as u32 + dst[1] as u32 * ia) / 255) as u8;
+        dst[2] = ((r as u32 * a as u32 + dst[2] as u32 * ia) / 255) as u8;
+    }
+}
+
+/// Frame-space top-left for the cursor image. XFixes reports the cursor position at its
+/// HOTSPOT; X draws the image with its top-left at (pos - hot). May go negative near the
+/// frame edges -- `overlay_cursor` clips per pixel, matching the server's own edge clipping.
+#[inline]
+fn cursor_image_origin(x: i16, y: i16, xhot: u16, yhot: u16, cap_x: i32, cap_y: i32) -> (i32, i32) {
+    (x as i32 - xhot as i32 - cap_x, y as i32 - yhot as i32 - cap_y)
+}
+
+/// Composite the XFixes cursor (ARGB `u32` per pixel) onto the BGRA frame at `(img_x,img_y)`
+/// (top-left), with per-pixel bounds clipping.
+fn overlay_cursor(
+    frame: &mut [u8],
+    stride: usize,
+    frame_w: i32,
+    frame_h: i32,
+    cur_w: i32,
+    cur_h: i32,
+    pixels: &[u32],
+    img_x: i32,
+    img_y: i32,
+) {
+    for y in 0..cur_h {
+        let ty = img_y + y;
+        if ty < 0 || ty >= frame_h {
+            continue;
+        }
+        for x in 0..cur_w {
+            let tx = img_x + x;
+            if tx < 0 || tx >= frame_w {
+                continue;
+            }
+            let px = pixels[(y * cur_w + x) as usize];
+            let a = ((px >> 24) & 0xFF) as u8;
+            let r = ((px >> 16) & 0xFF) as u8;
+            let g = ((px >> 8) & 0xFF) as u8;
+            let b = (px & 0xFF) as u8;
+            let off = ty as usize * stride + tx as usize * 4;
+            blend_pixel(&mut frame[off..off + 4], r, g, b, a);
+        }
+    }
+}
+
+/// CPU watermark: holds the raw RGBA pixels in host memory plus the placement/animation state,
+/// for blending directly into the captured BGRA frame on the CPU.
+struct X11Watermark {
+    pixels: Vec<u8>, // RGBA, row-major, w*h*4
+    w: i32,
+    h: i32,
+    pos_x: i32,
+    pos_y: i32,
+    sub_x: f64,
+    sub_y: f64,
+    vel_x: f64,
+    vel_y: f64,
+    loaded: bool,
+}
+
+impl X11Watermark {
+    fn load(path: &str) -> Self {
+        let mut wm = Self {
+            pixels: Vec::new(),
+            w: 0,
+            h: 0,
+            pos_x: 0,
+            pos_y: 0,
+            sub_x: 0.0,
+            sub_y: 0.0,
+            vel_x: 2.0,
+            vel_y: 2.0,
+            loaded: false,
+        };
+        if path.is_empty() {
+            return wm;
+        }
+        if let Ok(img) = image::open(std::path::Path::new(path)) {
+            let rgba = img.to_rgba8();
+            wm.w = rgba.width() as i32;
+            wm.h = rgba.height() as i32;
+            wm.pixels = rgba.into_vec();
+            wm.loaded = wm.w > 0 && wm.h > 0;
+        }
+        wm
+    }
+
+    /// Update the watermark's top-left placement for this frame from the location setting;
+    /// for the animated mode, advance the bouncing position and reflect off the frame edges.
+    fn update_position(&mut self, frame_w: i32, frame_h: i32, loc_enum: i32) {
+        if !self.loaded {
+            return;
+        }
+        match WatermarkLocation::from(loc_enum) {
+            WatermarkLocation::TL => {
+                self.pos_x = 0;
+                self.pos_y = 0;
+            }
+            WatermarkLocation::TR => {
+                self.pos_x = frame_w - self.w;
+                self.pos_y = 0;
+            }
+            WatermarkLocation::BL => {
+                self.pos_x = 0;
+                self.pos_y = frame_h - self.h;
+            }
+            WatermarkLocation::BR => {
+                self.pos_x = frame_w - self.w;
+                self.pos_y = frame_h - self.h;
+            }
+            WatermarkLocation::MI => {
+                self.pos_x = (frame_w - self.w) / 2;
+                self.pos_y = (frame_h - self.h) / 2;
+            }
+            WatermarkLocation::AN => {
+                self.sub_x += self.vel_x;
+                self.sub_y += self.vel_y;
+                if self.sub_x <= 0.0 {
+                    self.sub_x = 0.0;
+                    self.vel_x = self.vel_x.abs();
+                } else if self.sub_x + self.w as f64 >= frame_w as f64 {
+                    self.sub_x = (frame_w - self.w) as f64;
+                    self.vel_x = -self.vel_x.abs();
+                }
+                if self.sub_y <= 0.0 {
+                    self.sub_y = 0.0;
+                    self.vel_y = self.vel_y.abs();
+                } else if self.sub_y + self.h as f64 >= frame_h as f64 {
+                    self.sub_y = (frame_h - self.h) as f64;
+                    self.vel_y = -self.vel_y.abs();
+                }
+                self.pos_x = self.sub_x as i32;
+                self.pos_y = self.sub_y as i32;
+            }
+            WatermarkLocation::None => return,
+        }
+    }
+
+    fn blend_into(&self, frame: &mut [u8], stride: usize, frame_w: i32, frame_h: i32) {
+        if !self.loaded {
+            return;
+        }
+        for y in 0..self.h {
+            let ty = self.pos_y + y;
+            if ty < 0 || ty >= frame_h {
+                continue;
+            }
+            for x in 0..self.w {
+                let tx = self.pos_x + x;
+                if tx < 0 || tx >= frame_w {
+                    continue;
+                }
+                let src = ((y * self.w + x) * 4) as usize;
+                let (r, g, b, a) = (
+                    self.pixels[src],
+                    self.pixels[src + 1],
+                    self.pixels[src + 2],
+                    self.pixels[src + 3],
+                );
+                let off = ty as usize * stride + tx as usize * 4;
+                blend_pixel(&mut frame[off..off + 4], r, g, b, a);
+            }
+        }
+    }
+}
+
+/// A captured raw BGRA frame held in a pooled shm surface, ready to encode. Carries the surface
+/// pointer + geometry so the encode thread reads it directly (no copy) and can rebuild its pipeline
+/// if the capture size changed (auto-adjust), plus the pool's surface generation so a rebuild also
+/// happens when the surfaces were recreated at the SAME size.
+struct RawFrame {
+    idx: usize,
+    ptr: *mut u8,
+    len: usize,
+    width: u16,
+    height: u16,
+    stride: usize,
+    generation: u64,
+}
+// The pointer addresses a pooled shm surface the pool guarantees is not reused until the encode
+// thread recycles this frame, so the handle is safe to move across the thread boundary.
+unsafe impl Send for RawFrame {}
+
+struct PoolInner {
+    free: Vec<usize>,
+    slot: Option<RawFrame>,
+}
+
+/// Demand-driven capture->encode handoff. The capture thread writes into a pooled surface and
+/// `publish`es it into a single slot; the encode thread `take`s it. Capture stays at most one frame
+/// ahead of encode: `acquire`/`publish` BLOCK (bounded) until the encoder frees a surface / drains
+/// the slot, so capture is throttled to the encode rate. Because X11 capture is pull-based (no
+/// backlog), this throttling -- rather than capturing-then-dropping -- means capture never wastes a
+/// full-resolution shm round-trip on a frame that would be discarded, while still overlapping the
+/// next capture with the current encode (the throughput win). The encoder only ever sees a
+/// contiguous frame stream, so the H.264 reference chain stays valid.
+struct FramePool {
+    inner: Mutex<PoolInner>,
+    cv: Condvar,
+    stop: AtomicBool,
+    /// Surface generation: bumped by the capture thread each time the shm surfaces backing the
+    /// pool are destroyed and recreated. Published frames carry it so the encode thread rebuilds
+    /// its pipeline -- dropping encoder state keyed to surface base pointers (e.g. NVENC's
+    /// pinned-host registrations) -- even when a resize flap lands back on the old dimensions and
+    /// the recreated segments reuse the old virtual addresses.
+    generation: AtomicU64,
+}
+
+impl FramePool {
+    fn new(n: usize) -> Self {
+        Self {
+            inner: Mutex::new(PoolInner { free: (0..n).collect(), slot: None }),
+            cv: Condvar::new(),
+            stop: AtomicBool::new(false),
+            generation: AtomicU64::new(0),
+        }
+    }
+
+    /// Capture: record that the surfaces were recreated. Only called after `drain_for_resize`
+    /// succeeded, so no frame from the previous generation is still in flight.
+    fn bump_generation(&self) {
+        self.generation.fetch_add(1, Ordering::Relaxed);
+    }
+
+    /// Current surface generation, stamped onto each published frame.
+    fn generation(&self) -> u64 {
+        self.generation.load(Ordering::Relaxed)
+    }
+
+    /// Capture: get a free surface to write the next frame into, BLOCKING (bounded, re-checking
+    /// `stop` every 20ms) until one is free. This throttles capture to the encode rate so a
+    /// full-resolution capture is never spent on a frame that would be dropped. Returns None on stop.
+    fn acquire(&self, stop: &AtomicBool) -> Option<usize> {
+        let mut g = self.inner.lock().unwrap();
+        loop {
+            if let Some(idx) = g.free.pop() {
+                return Some(idx);
+            }
+            if stop.load(Ordering::Relaxed) {
+                return None;
+            }
+            let (gg, _) = self.cv.wait_timeout(g, Duration::from_millis(20)).unwrap();
+            g = gg;
+        }
+    }
+
+    /// Capture: publish the just-captured frame into the single slot, BLOCKING (bounded) until the
+    /// encode thread has taken the previous one -- capture stays at most one frame ahead, never
+    /// dropping. Returns false (frame discarded) on stop.
+    fn publish(&self, frame: RawFrame, stop: &AtomicBool) -> bool {
+        let mut g = self.inner.lock().unwrap();
+        loop {
+            if g.slot.is_none() {
+                g.slot = Some(frame);
+                drop(g);
+                self.cv.notify_all();
+                return true;
+            }
+            if stop.load(Ordering::Relaxed) {
+                return false;
+            }
+            let (gg, _) = self.cv.wait_timeout(g, Duration::from_millis(20)).unwrap();
+            g = gg;
+        }
+    }
+
+    /// Encode: block until a frame is available (Some) or stop is signalled (None). The wait is
+    /// bounded (re-checking `stop` every 20ms) as defense-in-depth against a lost wakeup, so a
+    /// stop that races the park can never leave this thread blocked forever.
+    fn take(&self) -> Option<RawFrame> {
+        let mut g = self.inner.lock().unwrap();
+        loop {
+            if let Some(f) = g.slot.take() {
+                return Some(f);
+            }
+            if self.stop.load(Ordering::Acquire) {
+                return None;
+            }
+            let (gg, _) = self.cv.wait_timeout(g, Duration::from_millis(20)).unwrap();
+            g = gg;
+        }
+    }
+
+    /// Encode: return a surface to the free list after it has been encoded.
+    fn recycle(&self, idx: usize) {
+        self.inner.lock().unwrap().free.push(idx);
+        self.cv.notify_all();
+    }
+
+    /// Capture: before recreating surfaces (auto-adjust resize), reclaim the pending slot and wait
+    /// until every surface is back in the free list (the encode thread finished any in-flight
+    /// frame), so no surface is destroyed while the encode thread is reading it. The wait is bounded
+    /// and also breaks on stop (pool shutdown OR the external stop) -- with panic=abort gone, a
+    /// panicked/dead encode thread that never recycles must not wedge the capture thread here
+    /// forever; a requested stop unblocks it. Returns true if it fully drained (safe to recreate
+    /// surfaces), false if it aborted on stop (the caller then tears down, which joins the encode
+    /// thread before destroying surfaces, so the resize-safety guarantee still holds).
+    fn drain_for_resize(&self, n: usize, stop: &AtomicBool) -> bool {
+        let mut g = self.inner.lock().unwrap();
+        if let Some(old) = g.slot.take() {
+            g.free.push(old.idx);
+        }
+        while g.free.len() < n {
+            if self.stop.load(Ordering::Acquire) || stop.load(Ordering::Relaxed) {
+                return false;
+            }
+            let (gg, _) = self.cv.wait_timeout(g, Duration::from_millis(20)).unwrap();
+            g = gg;
+        }
+        true
+    }
+
+    fn shutdown(&self) {
+        // Acquire the inner mutex BEFORE storing stop + notifying: this closes the lost-wakeup
+        // window where take() has checked stop==false but not yet parked -- holding the lock
+        // means take() is either still before its stop-check or already parked (and will get the
+        // notify). Notify after dropping the guard so the woken thread doesn't immediately block
+        // on the lock we hold.
+        let g = self.inner.lock().unwrap();
+        self.stop.store(true, Ordering::Release);
+        drop(g);
+        self.cv.notify_all();
+    }
+}
+
+/// Encode thread body: pull the freshest captured frame, (re)build the pipeline if the size or
+/// surface generation changed, apply cross-thread controls, encode, recycle the surface, then
+/// deliver. Recycling before delivery means a slow consumer never holds a capture surface.
+fn encode_loop<F>(pool: &FramePool, controls: &Controls, settings: &RustCaptureSettings, on_frame: &mut F)
+where
+    F: FnMut(Vec<EncodedStripe>),
+{
+    let mut psettings = settings.clone();
+    // Optional Unix-socket H.264 fan-out (parity with the Wayland path). Bound ONCE per capture
+    // and owned here, outside the pipeline: rebuilds on auto-adjust resizes must keep the socket
+    // listener and any attached recorders alive. Full-frame H.264 only; warn on configurations
+    // that can't produce a single recordable stream.
+    let recording_sink = RecordingSink::try_bind(&settings.recording_socket);
+    if recording_sink.is_some() {
+        if settings.output_mode == 0 {
+            eprintln!("[recording_sink] recording_socket set but output_mode is JPEG; no recordable H.264 stream.");
+        } else if settings.use_cpu && !settings.use_openh264 && !settings.h264_fullframe {
+            eprintln!("[recording_sink] recording_socket set but the CPU encoder is striped; set h264_fullframe=true for a recordable stream.");
+        }
+    }
+    let mut pipeline: Option<X11Pipeline> = None;
+    let (mut pw, mut ph) = (0i32, 0i32);
+    let mut pgen = 0u64;
+
+    while let Some(frame) = pool.take() {
+        let (fw, fh) = (frame.width as i32, frame.height as i32);
+        // Rebuild on a size change OR a surface-generation change: recreated shm segments often
+        // reuse the old virtual base addresses, so encoder state keyed to base pointers (NVENC's
+        // pinned-host cache) must be dropped even when the dimensions are identical.
+        if pipeline.is_none() || pw != fw || ph != fh || pgen != frame.generation {
+            psettings.width = fw;
+            psettings.height = fh;
+            // The controls atomics always hold the CURRENT rate values (the update_* setters
+            // store them there), so a rebuild carries live bitrate/VBV/fps changes forward
+            // instead of reverting to the capture-start settings.
+            psettings.h264_bitrate_kbps = controls.bitrate_kbps.load(Ordering::Relaxed);
+            psettings.h264_vbv_buffer_size_kb = controls.vbv_kb.load(Ordering::Relaxed);
+            psettings.target_fps =
+                (controls.fps_milli.load(Ordering::Relaxed).max(1) as f64) / 1000.0;
+            // Drop the old pipeline (and its NVENC/VAAPI session + GPU surfaces) BEFORE building
+            // the new one, so an auto-adjust resize never holds two full encoder allocations at
+            // once (transient 2x GPU memory).
+            drop(pipeline.take());
+            pipeline = Some(X11Pipeline::new(psettings.clone(), recording_sink.clone()));
+            pw = fw;
+            ph = fh;
+            pgen = frame.generation;
+        }
+        let pl = pipeline.as_mut().unwrap();
+
+        // Cross-thread controls are applied here, on the thread that owns the pipeline. The
+        // Acquire on the rate_dirty swap pairs with the Release store on the update_* setters, so
+        // the payload (bitrate/vbv/fps) is fully visible -- never seen half-applied.
+        if controls.force_idr.swap(false, Ordering::Relaxed) {
+            pl.request_idr();
+        }
+        if controls.rate_dirty.swap(false, Ordering::Acquire) {
+            let b = controls.bitrate_kbps.load(Ordering::Relaxed);
+            let v = controls.vbv_kb.load(Ordering::Relaxed);
+            let fps = (controls.fps_milli.load(Ordering::Relaxed).max(1) as f64) / 1000.0;
+            pl.update_rate(b, v, fps);
+        }
+
+        // SAFETY: the pool guarantees this surface is not reused until we recycle it below.
+        let buf = unsafe { std::slice::from_raw_parts(frame.ptr, frame.len) };
+        let stripes = pl.process(buf, frame.stride);
+        pool.recycle(frame.idx);
+        if !stripes.is_empty() {
+            on_frame(stripes);
+        }
+    }
+}
+
+/// Run the X11 capture pipeline until `stop` is set. Capture (this thread) grabs frames into a pool
+/// of shm surfaces and hands the freshest to an internal encode+deliver thread; `on_frame(stripes)`
+/// runs on that encode thread once per encoded frame. Splitting capture from encode lets the two
+/// overlap (throughput); dropping happens on raw frames before encode, so the delivered H.264 stays
+/// a valid contiguous reference chain. `encode_tid_tx` reports the encode thread's id (for the
+/// caller's re-entrant-stop handling).
+///
+/// Blocking; intended to run on a dedicated thread. The X connection + shm surfaces live on this
+/// thread; the encoder lives on the encode thread; nothing X-related crosses threads.
+pub fn run_capture<F>(
+    settings: RustCaptureSettings,
+    controls: Arc<Controls>,
+    encode_tid_tx: Sender<thread::ThreadId>,
+    on_frame: F,
+) -> Result<(), String>
+where
+    F: FnMut(Vec<EncodedStripe>) + Send + 'static,
+{
+    let (conn, screen_num) =
+        x11rb::connect(None).map_err(|e| format!("X11 connect failed: {e}"))?;
+    let root = conn.setup().roots[screen_num].root;
+    let root_depth = conn.setup().roots[screen_num].root_depth;
+
+    // The Z-pixmap byte depth must be 4 (BGRA); modern servers use 32bpp for depth 24/32.
+    let bpp = conn
+        .setup()
+        .pixmap_formats
+        .iter()
+        .find(|f| f.depth == root_depth)
+        .map(|f| f.bits_per_pixel)
+        .unwrap_or(32);
+    if bpp != 32 {
+        return Err(format!(
+            "unsupported root depth {root_depth} ({bpp} bpp); only 32-bpp BGRA is supported"
+        ));
+    }
+
+    conn.shm_query_version()
+        .map_err(|e| format!("shm_query_version: {e}"))?
+        .reply()
+        .map_err(|e| format!("XShm unavailable: {e}"))?;
+    if settings.capture_cursor {
+        conn.xfixes_query_version(5, 0)
+            .map_err(|e| format!("xfixes_query_version: {e}"))?
+            .reply()
+            .map_err(|e| format!("XFixes unavailable: {e}"))?;
+    }
+
+    let geo = conn
+        .get_geometry(root)
+        .map_err(|e| format!("get_geometry: {e}"))?
+        .reply()
+        .map_err(|e| format!("get_geometry reply: {e}"))?;
+
+    let (mut cap_w, mut cap_h) = resolve_dims(geo.width, geo.height, &settings);
+    let cap_x = settings.capture_x.max(0) as i16;
+    let cap_y = settings.capture_y.max(0) as i16;
+
+    // Pool of shm surfaces. 3 is the working set (one in-capture, one in-slot, one in-encode); a
+    // demand-driven capture stays one frame ahead and blocks beyond that, so 3 suffices and keeps
+    // the memory cost (3 * W*H*4) bounded -- it matters at 4K.
+    const POOL_N: usize = 3;
+    let mut surfaces: Vec<ShmSurface> = Vec::with_capacity(POOL_N);
+    for _ in 0..POOL_N {
+        surfaces.push(ShmSurface::create(&conn, cap_w, cap_h)?);
+    }
+    let pool = Arc::new(FramePool::new(POOL_N));
+
+    let mut watermark = X11Watermark::load(&settings.watermark_path);
+
+    // Encode + deliver thread: owns the pipeline + the Python callback, consumes raw frames from the
+    // pool. It reports its thread id so the caller can detect a re-entrant stop from the callback.
+    let enc_pool = pool.clone();
+    let enc_controls = controls.clone();
+    let enc_settings = settings.clone();
+    let encode_thread = thread::spawn(move || {
+        let _ = encode_tid_tx.send(thread::current().id());
+        let mut on_frame = on_frame;
+        encode_loop(&enc_pool, &enc_controls, &enc_settings, &mut on_frame);
+    });
+
+    let mut next_frame = Instant::now();
+
+    let result = (|| -> Result<(), String> {
+        while !controls.stop.load(Ordering::Relaxed) {
+            // Dynamic pacing: re-read fps each iteration so update_framerate takes effect live.
+            let fps = (controls.fps_milli.load(Ordering::Relaxed).max(1) as f64) / 1000.0;
+            let frame_dur = Duration::from_secs_f64(1.0 / fps.max(1.0));
+            // Frame pacing: sleep until the next deadline; if already behind, yield so a
+            // concurrent stop / other work can run instead of busy-spinning.
+            let now = Instant::now();
+            if now < next_frame {
+                std::thread::sleep(next_frame - now);
+            } else {
+                std::thread::yield_now();
+            }
+            next_frame += frame_dur;
+            let now = Instant::now();
+            if next_frame < now {
+                next_frame = now;
+            }
+            if controls.stop.load(Ordering::Relaxed) {
+                break;
+            }
+
+            // Auto-adjust: on a geometry change, drain in-flight frames then recreate the surfaces
+            // (the encode thread rebuilds its pipeline when it sees the new size or generation --
+            // the generation covers a flap back to the old size before the encoder saw a frame).
+            if settings.auto_adjust_screen_capture_size {
+                if let Some(g) = conn.get_geometry(root).ok().and_then(|c| c.reply().ok()) {
+                    let (nw, nh) = resolve_dims(g.width, g.height, &settings);
+                    if nw != cap_w || nh != cap_h {
+                        // If a stop races the drain, skip the recreate and fall through to teardown
+                        // (which joins the encode thread before destroying surfaces).
+                        if !pool.drain_for_resize(POOL_N, &controls.stop) {
+                            break;
+                        }
+                        // The drain can outlast ANOTHER geometry change (a fast flap), so
+                        // re-resolve against the current root before recreating: surfaces at a
+                        // stale size would make the next shm_get_image exceed the root. If the
+                        // flap fully reverted, the surfaces are still right -- keep them (and
+                        // their generation: nothing the encoder holds went stale).
+                        let (fw, fh) = conn
+                            .get_geometry(root)
+                            .ok()
+                            .and_then(|c| c.reply().ok())
+                            .map(|g| resolve_dims(g.width, g.height, &settings))
+                            .unwrap_or((nw, nh));
+                        if fw != cap_w || fh != cap_h {
+                            for s in surfaces.iter_mut() {
+                                s.destroy(&conn);
+                            }
+                            surfaces.clear();
+                            cap_w = fw;
+                            cap_h = fh;
+                            for _ in 0..POOL_N {
+                                surfaces.push(ShmSurface::create(&conn, cap_w, cap_h)?);
+                            }
+                            pool.bump_generation();
+                        }
+                    }
+                }
+            }
+
+            // Acquire a pooled surface (blocks until the encoder frees one) and grab the region into
+            // it (synchronous: reply() waits). None means stop was observed while waiting.
+            let idx = match pool.acquire(&controls.stop) {
+                Some(i) => i,
+                None => break,
+            };
+            let surface = &mut surfaces[idx];
+            conn.shm_get_image(
+                root,
+                cap_x,
+                cap_y,
+                cap_w,
+                cap_h,
+                !0u32,
+                ImageFormat::Z_PIXMAP.into(),
+                surface.shmseg,
+                0,
+            )
+            .map_err(|e| format!("shm_get_image: {e}"))?
+            .reply()
+            .map_err(|e| format!("shm_get_image reply: {e}"))?;
+
+            let frame_w = cap_w as i32;
+            let frame_h = cap_h as i32;
+            let stride = surface.stride;
+            let buf = surface.as_mut_slice();
+
+            // Cursor overlay (XFixes reports the hotspot position; draw at pos - hot, offset
+            // by the capture origin).
+            if settings.capture_cursor {
+                if let Some(c) = conn
+                    .xfixes_get_cursor_image()
+                    .ok()
+                    .and_then(|c| c.reply().ok())
+                {
+                    if c.width > 0 && c.height > 0 {
+                        let (img_x, img_y) = cursor_image_origin(
+                            c.x,
+                            c.y,
+                            c.xhot,
+                            c.yhot,
+                            settings.capture_x,
+                            settings.capture_y,
+                        );
+                        overlay_cursor(
+                            buf,
+                            stride,
+                            frame_w,
+                            frame_h,
+                            c.width as i32,
+                            c.height as i32,
+                            &c.cursor_image,
+                            img_x,
+                            img_y,
+                        );
+                    }
+                }
+            }
+
+            // Watermark overlay.
+            if watermark.loaded {
+                watermark.update_position(frame_w, frame_h, settings.watermark_location_enum);
+                watermark.blend_into(buf, stride, frame_w, frame_h);
+            }
+
+            // Hand the finished raw frame to the encode thread (blocks until the slot is free; never
+            // drops). A false return means stop was observed while waiting -- exit the loop.
+            let published = pool.publish(
+                RawFrame {
+                    idx,
+                    ptr: surface.addr,
+                    len: surface.size,
+                    width: cap_w,
+                    height: cap_h,
+                    stride,
+                    generation: pool.generation(),
+                },
+                &controls.stop,
+            );
+            if !published {
+                break;
+            }
+        }
+        Ok(())
+    })();
+
+    // Teardown: stop + join the encode thread BEFORE destroying surfaces it may still be reading.
+    pool.shutdown();
+    let _ = encode_thread.join();
+    for s in surfaces.iter_mut() {
+        s.destroy(&conn);
+    }
+    result
+}
+
+#[cfg(test)]
+mod pool_tests {
+    use super::*;
+
+    fn dummy(idx: usize) -> RawFrame {
+        RawFrame {
+            idx,
+            ptr: std::ptr::null_mut(),
+            len: 0,
+            width: 0,
+            height: 0,
+            stride: 0,
+            generation: 0,
+        }
+    }
+
+    #[test]
+    fn roundtrip_then_recycle_returns_all_surfaces() {
+        let p = FramePool::new(3);
+        let stop = AtomicBool::new(false);
+        let a = p.acquire(&stop).unwrap();
+        assert!(p.publish(dummy(a), &stop));
+        let f = p.take().unwrap();
+        assert_eq!(f.idx, a);
+        p.recycle(f.idx);
+        // All three surfaces are acquirable again and distinct.
+        let (x, y, z) = (
+            p.acquire(&stop).unwrap(),
+            p.acquire(&stop).unwrap(),
+            p.acquire(&stop).unwrap(),
+        );
+        assert!(x != y && y != z && x != z);
+    }
+
+    #[test]
+    fn acquire_returns_none_when_exhausted_and_stopped() {
+        let p = FramePool::new(2);
+        let stop = AtomicBool::new(false);
+        let _a = p.acquire(&stop).unwrap();
+        let _b = p.acquire(&stop).unwrap();
+        stop.store(true, Ordering::Relaxed); // no free left + stop -> None (after the bounded wait)
+        assert!(p.acquire(&stop).is_none());
+    }
+
+    #[test]
+    fn publish_returns_false_when_slot_full_and_stopped() {
+        let p = FramePool::new(3);
+        let stop = AtomicBool::new(false);
+        let a = p.acquire(&stop).unwrap();
+        assert!(p.publish(dummy(a), &stop)); // slot now occupied
+        let b = p.acquire(&stop).unwrap();
+        stop.store(true, Ordering::Relaxed);
+        assert!(!p.publish(dummy(b), &stop)); // slot full + stop -> false (frame discarded)
+    }
+
+    #[test]
+    fn drain_for_resize_waits_until_all_free() {
+        let p = Arc::new(FramePool::new(3));
+        let stop = AtomicBool::new(false);
+        let held = [
+            p.acquire(&stop).unwrap(),
+            p.acquire(&stop).unwrap(),
+            p.acquire(&stop).unwrap(),
+        ];
+        // Another thread recycles everything shortly; drain must block until free == 3.
+        let p2 = p.clone();
+        let t = thread::spawn(move || {
+            thread::sleep(Duration::from_millis(30));
+            for idx in held {
+                p2.recycle(idx);
+            }
+        });
+        assert!(p.drain_for_resize(3, &stop)); // fully drained
+        t.join().unwrap();
+        // Pool is whole again.
+        assert!(p.acquire(&stop).is_some());
+    }
+
+    #[test]
+    fn drain_for_resize_aborts_on_stop() {
+        // A surface is held (never recycled, as a dead encode thread would leave it); drain must not
+        // block forever -- setting stop unblocks it and it reports it did NOT fully drain.
+        let p = FramePool::new(3);
+        let stop = AtomicBool::new(false);
+        let _held = p.acquire(&stop).unwrap();
+        stop.store(true, Ordering::Relaxed);
+        assert!(!p.drain_for_resize(3, &stop)); // aborted on stop (bounded, no hang)
+    }
+
+    #[test]
+    fn resize_flap_reclaim_changes_generation_at_identical_dims() {
+        // W1->W2->W1 flap where the encoder never takes the W2 frame: drain reclaims it
+        // from the slot, the surfaces recreate twice, and the next taken frame lands back
+        // on the ORIGINAL dimensions -- the generation change is then the only rebuild
+        // signal (it invalidates encoder state keyed to the reused surface addresses).
+        let p = FramePool::new(3);
+        let stop = AtomicBool::new(false);
+        let a = p.acquire(&stop).unwrap();
+        assert!(p.publish(
+            RawFrame { generation: p.generation(), width: 1920, height: 1080, ..dummy(a) },
+            &stop
+        ));
+        let f = p.take().unwrap();
+        let (last_gen, last_dims) = (f.generation, (f.width, f.height));
+        p.recycle(f.idx);
+        // Capture sees W2: drain, recreate (gen 1), publish a W2 frame.
+        assert!(p.drain_for_resize(3, &stop));
+        p.bump_generation();
+        let b = p.acquire(&stop).unwrap();
+        assert!(p.publish(
+            RawFrame { generation: p.generation(), width: 2560, height: 1600, ..dummy(b) },
+            &stop
+        ));
+        // W1 returns before the encoder takes: drain reclaims the W2 frame, recreate (gen 2).
+        assert!(p.drain_for_resize(3, &stop));
+        p.bump_generation();
+        let c = p.acquire(&stop).unwrap();
+        assert!(p.publish(
+            RawFrame { generation: p.generation(), width: 1920, height: 1080, ..dummy(c) },
+            &stop
+        ));
+        let g = p.take().unwrap();
+        assert_eq!((g.width, g.height), last_dims, "flap lands on identical dimensions");
+        assert_eq!(g.generation, 2);
+        assert_ne!(g.generation, last_gen, "generation is the only rebuild signal");
+    }
+
+    #[test]
+    fn generation_bumps_on_recreate_and_rides_published_frames() {
+        let p = FramePool::new(3);
+        let stop = AtomicBool::new(false);
+        assert_eq!(p.generation(), 0);
+        let a = p.acquire(&stop).unwrap();
+        assert!(p.publish(RawFrame { generation: p.generation(), ..dummy(a) }, &stop));
+        let f = p.take().unwrap();
+        assert_eq!(f.generation, 0);
+        p.recycle(f.idx);
+        // Surfaces recreated (same dims): frames published afterwards must carry a NEW
+        // generation -- the encode thread's rebuild trigger.
+        p.bump_generation();
+        let b = p.acquire(&stop).unwrap();
+        assert!(p.publish(RawFrame { generation: p.generation(), ..dummy(b) }, &stop));
+        assert_eq!(p.take().unwrap().generation, 1);
+    }
+}
+
+#[cfg(test)]
+mod cursor_tests {
+    use super::*;
+
+    #[test]
+    fn origin_subtracts_hotspot_and_capture_offset() {
+        // Pointer at (100,80), hotspot (4,6): the image top-left is (96,74).
+        assert_eq!(cursor_image_origin(100, 80, 4, 6, 0, 0), (96, 74));
+        // Capture region offset shifts it further.
+        assert_eq!(cursor_image_origin(100, 80, 4, 6, 10, 20), (86, 54));
+        // Hotspot near the frame origin pushes the top-left negative (clipped when drawn).
+        assert_eq!(cursor_image_origin(1, 1, 8, 8, 0, 0), (-7, -7));
+    }
+
+    #[test]
+    fn overlay_blits_at_hotspot_offset_origin() {
+        // 8x8 BGRA frame; 2x2 opaque white cursor, hotspot (1,1), pointer at (4,4):
+        // pixels must land at (3,3)..(4,4), not at the hotspot position (4,4)..(5,5).
+        let stride = 8 * 4;
+        let mut frame = vec![0u8; stride * 8];
+        let pixels = [0xFFFF_FFFFu32; 4];
+        let (ox, oy) = cursor_image_origin(4, 4, 1, 1, 0, 0);
+        overlay_cursor(&mut frame, stride, 8, 8, 2, 2, &pixels, ox, oy);
+        let px = |x: usize, y: usize| frame[y * stride + x * 4];
+        assert_eq!(px(3, 3), 255);
+        assert_eq!(px(4, 4), 255);
+        assert_eq!(px(2, 2), 0);
+        assert_eq!(px(5, 5), 0, "no pixel at the un-offset (hotspot) corner");
+    }
+
+    #[test]
+    fn overlay_clips_negative_origin_at_frame_edge() {
+        // Hotspot at the frame corner: origin (-1,-1); only the in-frame quadrant lands.
+        let stride = 4 * 4;
+        let mut frame = vec![0u8; stride * 4];
+        let pixels = [0xFFFF_FFFFu32; 4];
+        let (ox, oy) = cursor_image_origin(0, 0, 1, 1, 0, 0);
+        overlay_cursor(&mut frame, stride, 4, 4, 2, 2, &pixels, ox, oy);
+        assert_eq!(frame[0], 255); // (0,0) holds the cursor's bottom-right pixel
+        assert!(frame[4..].iter().all(|&b| b == 0), "no writes outside (0,0)");
+    }
+}
diff --git a/pixelflux_wayland/.gitignore b/pixelflux_wayland/.gitignore
deleted file mode 100644
index 2c96eb1..0000000
--- a/pixelflux_wayland/.gitignore
+++ /dev/null
@@ -1,2 +0,0 @@
-target/
-Cargo.lock
diff --git a/pixelflux_wayland/Cargo.toml b/pixelflux_wayland/Cargo.toml
deleted file mode 100644
index de4b6c1..0000000
--- a/pixelflux_wayland/Cargo.toml
+++ /dev/null
@@ -1,58 +0,0 @@
-[package]
-name = "pixelflux_wayland"
-version = "1.6.4"
-edition = "2021"
-
-[lib]
-name = "pixelflux_wayland"
-crate-type = ["cdylib"]
-
-[dependencies]
-pyo3 = { version = "0.27.2", features = ["extension-module"] }
-wayland-server = { version = "0.31.10", features = ["libwayland_1_23"] }
-wayland-protocols = { version = "0.31", features = ["server"] }
-crossbeam-channel = "0.5"
-env_logger = "0.10"
-log = "0.4"
-calloop = "0.12"
-turbojpeg = "1.3" 
-rayon = "1.10"
-x264-sys = "0.2.2"
-libc = "0.2"
-yuv = "0.8.9"
-gbm = "0.14" 
-libloading = "0.8"
-libva-sys = "0.1.2"
-xcursor = "0.3.1"
-image = "=0.25.9"
-ffmpeg-next = "8.0"
-ffmpeg-sys-next = "8.0"
-
-[dependencies.nvenc-sys]
-git = "https://github.com/legion-labs/nvenc-sys"
-rev = "996be4ceac8112e14ae127adcf8c699bcc1618f5"
-features = ["cuda"]
-
-[dependencies.smithay]
-git = "https://github.com/Smithay/smithay"
-rev = "ca932e042fa9ad150605c150a86275b85f9ad5b3"
-default-features = false
-features = [
-    "backend_drm",
-    "backend_egl",
-    "backend_gbm",
-    "backend_libinput",
-    "backend_udev",
-    "renderer_gl", 
-    "renderer_pixman",
-    "use_system_lib",
-    "desktop",
-    "wayland_frontend",
-]
-
-[profile.release]
-opt-level = 3
-lto = "fat"
-codegen-units = 1
-panic = "abort"
-strip = true
diff --git a/pixelflux_wayland/src/recording_sink.rs b/pixelflux_wayland/src/recording_sink.rs
deleted file mode 100644
index 2946b1c..0000000
--- a/pixelflux_wayland/src/recording_sink.rs
+++ /dev/null
@@ -1,152 +0,0 @@
-use std::fs;
-use std::io::{ErrorKind, Write};
-use std::os::unix::net::{UnixListener, UnixStream};
-use std::sync::atomic::{AtomicBool, AtomicU32, Ordering};
-use std::sync::{Arc, Mutex};
-use std::thread;
-use std::time::Duration;
-
-/// Core settings and state for the out-of-band H.264 recording sink.
-///
-/// Defines socket connection timeouts, polling intervals, environment fallbacks,
-/// keyframe cadence, and the primary RecordingSink structure used to multiplex
-/// the elementary stream to connected Unix socket clients.
-const WRITE_TIMEOUT: Duration = Duration::from_millis(100);
-const ACCEPT_POLL_INTERVAL: Duration = Duration::from_millis(50);
-pub const RECORDING_SOCKET_ENV: &str = "PIXELFLUX_RECORDING_SOCKET";
-const DEFAULT_KEYINT_FRAMES: u32 = 60;
-
-pub struct RecordingSink {
-    path: String,
-    clients: Arc<Mutex<Vec<UnixStream>>>,
-    shutdown: Arc<AtomicBool>,
-    frames_since_idr: Arc<AtomicU32>,
-    keyint_frames: u32,
-}
-
-impl RecordingSink {
-    /// @brief Resolves the configured socket path and tries to bind it.
-    ///
-    /// @input settings_path: The configured path for the socket.
-    /// @return Option containing the new RecordingSink instance.
-    pub fn try_bind(settings_path: &str) -> Option<Arc<Self>> {
-        let path = if !settings_path.is_empty() {
-            settings_path.to_string()
-        } else {
-            match std::env::var(RECORDING_SOCKET_ENV) {
-                Ok(p) if !p.is_empty() => p,
-                _ => return None,
-            }
-        };
-
-        match Self::bind(path) {
-            Ok(sink) => Some(Arc::new(sink)),
-            Err(e) => {
-                eprintln!("[recording_sink] bind failed: {:?}", e);
-                None
-            }
-        }
-    }
-
-    /// @brief Binds the Unix listener and spawns the accept thread.
-    ///
-    /// @input path: The file path to bind the socket to.
-    /// @return Result containing the new RecordingSink instance.
-    fn bind(path: String) -> std::io::Result<Self> {
-        let _ = fs::remove_file(&path);
-
-        let listener = UnixListener::bind(&path)?;
-        listener.set_nonblocking(true)?;
-
-        let clients: Arc<Mutex<Vec<UnixStream>>> = Arc::new(Mutex::new(Vec::new()));
-        let shutdown = Arc::new(AtomicBool::new(false));
-
-        let frames_since_idr = Arc::new(AtomicU32::new(u32::MAX));
-
-        let clients_acc = clients.clone();
-        let shutdown_acc = shutdown.clone();
-        let frames_since_idr_acc = frames_since_idr.clone();
-        let path_log = path.clone();
-
-        thread::spawn(move || {
-            eprintln!("[recording_sink] listening on {}", path_log);
-            while !shutdown_acc.load(Ordering::Relaxed) {
-                match listener.accept() {
-                    Ok((stream, _)) => {
-                        if let Err(e) = stream.set_write_timeout(Some(WRITE_TIMEOUT)) {
-                            eprintln!("[recording_sink] set_write_timeout failed: {:?}", e);
-                            continue;
-                        }
-                        let mut guard = clients_acc.lock().unwrap();
-                        guard.push(stream);
-
-                        frames_since_idr_acc.store(u32::MAX, Ordering::Relaxed);
-                        eprintln!(
-                            "[recording_sink] client connected; total {}; requesting IDR",
-                            guard.len()
-                        );
-                    }
-                    Err(e) if e.kind() == ErrorKind::WouldBlock => {
-                        thread::sleep(ACCEPT_POLL_INTERVAL);
-                    }
-                    Err(e) => {
-                        eprintln!("[recording_sink] accept error: {:?}", e);
-                        thread::sleep(Duration::from_millis(500));
-                    }
-                }
-            }
-            eprintln!("[recording_sink] listener thread exiting");
-        });
-
-        Ok(Self {
-            path,
-            clients,
-            shutdown,
-            frames_since_idr,
-            keyint_frames: DEFAULT_KEYINT_FRAMES,
-        })
-    }
-
-    /// @brief Advisory hook indicating if the next encode should be an IDR.
-    ///
-    /// @return True if the next frame should be a keyframe.
-    pub fn should_force_idr(&self) -> bool {
-        let prev = self.frames_since_idr.fetch_add(1, Ordering::Relaxed);
-        if prev >= self.keyint_frames - 1 {
-            self.frames_since_idr.store(0, Ordering::Relaxed);
-            true
-        } else {
-            false
-        }
-    }
-
-    /// @brief Fans a chunk of encoded bytes out to every connected client.
-    ///
-    /// @input data: The raw Annex-B H.264 byte slice.
-    pub fn write_frame(&self, data: &[u8]) {
-        if data.is_empty() {
-            return;
-        }
-
-        let mut clients = self.clients.lock().unwrap();
-        let mut to_remove: Vec<usize> = Vec::new();
-
-        for (idx, client) in clients.iter_mut().enumerate() {
-            if let Err(e) = client.write_all(data) {
-                eprintln!("[recording_sink] dropping client (idx {}): {:?}", idx, e);
-                to_remove.push(idx);
-            }
-        }
-
-        for idx in to_remove.into_iter().rev() {
-            clients.swap_remove(idx);
-        }
-    }
-}
-
-impl Drop for RecordingSink {
-    fn drop(&mut self) {
-        self.shutdown.store(true, Ordering::Relaxed);
-        let _ = fs::remove_file(&self.path);
-    }
-}
diff --git a/pyproject.toml b/pyproject.toml
index aa8d9e8..390373a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,6 +6,11 @@ build-backend = "setuptools.build_meta"
 archs = [ "x86_64", "aarch64" ]
 manylinux-x86_64-image = "manylinux_2_34"
 manylinux-aarch64-image = "manylinux_2_34"
+# Build CPython 3.9-3.14 only. Skip free-threaded (cp3XXt) builds: pixelflux is a
+# full-API (GIL) PyO3 extension, not a free-threaded one. Skip PyPy (no C-API ext).
+# musllinux is kept (the Alpine `before-all` branch below provisions it).
+build = "cp39-* cp310-* cp311-* cp312-* cp313-* cp314-*"
+skip = "*t-* pp*"
 
 [tool.cibuildwheel.environment]
 LD_LIBRARY_PATH = "/usr/local/lib:/usr/local/lib64"
@@ -26,9 +31,6 @@ before-all = """
             pixman-devel \
             libX11-devel \
             libXext-devel \
-            libXcursor-devel \
-            libev-devel \
-            libXcomposite-devel \
             libva-devel \
             libdrm-devel \
             libinput-devel \
@@ -51,18 +53,6 @@ before-all = """
             make -j$(nproc) && \
             make install) && \
         \
-        if [ "$(uname -m)" = "aarch64" ]; then \
-            dnf install -y libyuv-devel; \
-        else \
-            (cd /tmp && \
-                git clone --branch stable --depth 1 https://chromium.googlesource.com/libyuv/libyuv && \
-                cd libyuv && \
-                mkdir build && cd build && \
-                cmake .. -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DCMAKE_INSTALL_PREFIX=/usr/local -DCMAKE_POLICY_VERSION_MINIMUM=3.5 && \
-                make -j$(nproc) && \
-                make install); \
-        fi && \
-        \
         (cd /tmp && \
             git clone --branch 3.1.1 --depth 1 https://github.com/libjpeg-turbo/libjpeg-turbo.git && \
             cd libjpeg-turbo && \
@@ -72,7 +62,7 @@ before-all = """
             make install) && \
         \
         (cd /tmp && \
-            git clone --branch n8.0 --depth 1 https://git.ffmpeg.org/ffmpeg.git && \
+            git clone --branch n8.1 --depth 1 https://git.ffmpeg.org/ffmpeg.git && \
             cd ffmpeg && \
             ./configure \
                 --prefix=/usr/local \
@@ -89,6 +79,8 @@ before-all = """
         ldconfig
 
     # --- ALPINE (APK-based) ---
+    # NOTE: ffmpeg-dev must be >= 8.1 (the Rust ext's ffmpeg-next 8.1.0 needs
+    # FFmpeg 8.1 symbols). manylinux_2_34's Alpine ships 8.x; pin a newer base if not.
     elif command -v apk; then
         apk add --no-cache \
             build-base \
@@ -99,14 +91,9 @@ before-all = """
             pixman-dev \
             libx11-dev \
             libxext-dev \
-            libxcursor-dev \
             libxfixes-dev \
             jpeg-dev \
-            libev-dev \
             x264-dev \
-            libyuv \
-            libyuv-dev \
-            libxcomposite-dev \
             libva-dev \
             libdrm-dev \
             libinput-dev \
diff --git a/setup.py b/setup.py
index aae14e4..45803e4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,73 +1,11 @@
-import os
-import subprocess
-import sys
-import platform
-from pathlib import Path
-import setuptools
 from setuptools import setup
-from setuptools.command.build_ext import build_ext
 from setuptools_rust import Binding, RustExtension, Strip
 
-if "RUSTFLAGS" not in os.environ:
-    machine = platform.machine()
-    if machine == "x86_64":
-        print("Enabling x86-64-v3 optimizations (AVX2/FMA)")
-        os.environ["RUSTFLAGS"] = "-C target-cpu=x86-64-v3"
-
-class BuildCtypesExt(build_ext):
-    def run(self):
-        super().run()
-        self.build_custom_cpp()
-
-    def build_custom_cpp(self):
-        compiler = "g++"
-        if hasattr(self, 'compiler') and self.compiler:
-             if hasattr(self.compiler, 'compiler_cxx'):
-                 compiler = self.compiler.compiler_cxx[0]
-
-        lib_dir = Path(self.build_lib)
-        output_path = lib_dir / "pixelflux" / "screen_capture_module.so"
-        output_path.parent.mkdir(parents=True, exist_ok=True)
-        
-        sources = [
-            'pixelflux/screen_capture_module.cpp',
-            'pixelflux/include/xxhash.c'
-        ]
-        
-        include_dirs = ['pixelflux/include']
-        library_dirs = []
-        
-        if os.environ.get("CIBUILDWHEEL"):
-            include_dirs.append('/usr/local/include')
-            library_dirs.append('/usr/local/lib')
-
-        libraries = ['X11', 'Xext', 'Xfixes', 'jpeg', 'x264', 'yuv', 'dl', 'avcodec', 'avutil']
-        extra_compile_args = ['-std=c++17', '-Wno-unused-function', '-fPIC', '-O3', '-flto', '-shared']
-            
-        command = [compiler] + extra_compile_args + ['-o', str(output_path)]
-        for inc in include_dirs: command.append(f'-I{inc}')
-        for lib in library_dirs: command.append(f'-L{lib}')
-        command.extend(sources)
-        for lib in libraries: command.append(f'-l{lib}')
-            
-        print(f"Building C++ module: {' '.join(command)}")
-        try:
-            subprocess.check_call(command)
-        except subprocess.CalledProcessError as e:
-            print(f"C++ build failed with exit code {e.returncode}")
-            sys.exit(1)
-
 with open("README.md", "r", encoding="utf-8") as fh:
     long_description = fh.read()
 
-install_requires = []
-is_alpine = os.path.exists("/etc/alpine-release")
-if not is_alpine:
-    install_requires.append("nvidia-cuda-nvrtc")
-
 setup(
     name="pixelflux",
-    install_requires=install_requires,
     version="1.6.4",
     author="Linuxserver.io",
     author_email="pypi@linuxserver.io",
@@ -76,30 +14,25 @@ def build_custom_cpp(self):
     long_description_content_type="text/markdown",
     license="MPL-2.0",
     url="https://github.com/linuxserver/pixelflux",
-    packages=setuptools.find_packages(),
-    
+
+    # Single self-contained Rust extension: the top-level `pixelflux` module does X11 (XShm)
+    # and Wayland capture plus all encoding/conversion. No C/C++ sources, no Python package
+    # layer -- `import pixelflux` resolves directly to pixelflux.cpython-*.so.
+    packages=[],
     rust_extensions=[
         RustExtension(
-            "pixelflux.pixelflux_wayland", 
-            "pixelflux_wayland/Cargo.toml",
+            "pixelflux",
+            "pixelflux/Cargo.toml",
             binding=Binding.PyO3,
             debug=False,
             strip=Strip.All
         )
     ],
-    
-    cmdclass={
-       "build_ext": BuildCtypesExt,
-    },
 
-    package_data={
-       "pixelflux": ["screen_capture_module.so"],
-    },
-    
     classifiers=[
         "Programming Language :: Python :: 3",
         "Operating System :: POSIX :: Linux",
     ],
-    python_requires=">=3.6",
+    python_requires=">=3.9",
     zip_safe=False,
 )