Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/sweep-security-state.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"geotiff": {
"last_inspected": "2026-04-17",
"issue": 1215,
"followup_issues": [1220],
"followup_issues": [1219, 1220],
"severity_max": "HIGH",
"categories_found": [1, 4],
"notes": "Follow-up #1220: GPU predictor=2 kernel over-indexed d_decomp for multi-sample tiled TIFFs (silent OOB GPU write). Fixed by passing width=tile_width instead of tile_width*samples."
Expand Down
7 changes: 6 additions & 1 deletion xrspatial/geotiff/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1068,7 +1068,7 @@ def read_geotiff_gpu(source: str, *,
"Install it with: pip install cupy-cuda12x")

from ._reader import _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT
from ._header import parse_header, parse_all_ifds
from ._header import parse_header, parse_all_ifds, validate_tile_layout
from ._dtypes import tiff_dtype_to_numpy
from ._geotags import extract_geo_info
from ._gpu_decode import gpu_decode_tiles
Expand Down Expand Up @@ -1135,6 +1135,11 @@ def read_geotiff_gpu(source: str, *,
# A single tile's decoded bytes must also fit under the pixel budget.
_check_dimensions(tw, th, samples, max_pixels)

# Reject malformed TIFFs whose declared tile grid exceeds the
# supplied TileOffsets length. The GPU tile-assembly kernel would
# read OOB otherwise. See issue #1219.
validate_tile_layout(ifd)

finally:
src.close()

Expand Down
63 changes: 63 additions & 0 deletions xrspatial/geotiff/_header.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""TIFF/BigTIFF header and IFD parsing."""
from __future__ import annotations

import math
import struct
from dataclasses import dataclass, field
from typing import Any
Expand Down Expand Up @@ -208,6 +209,68 @@ def nodata_str(self) -> str | None:
return str(v).rstrip('\x00')


def validate_tile_layout(ifd: IFD) -> None:
"""Validate that a tiled IFD's TileOffsets covers the declared tile grid.

A well-formed tiled TIFF must supply at least `tiles_across * tiles_down`
TileOffsets entries (times samples_per_pixel for planar config 2). An
adversarial or malformed file can declare larger image dimensions than
its offsets array covers, which causes out-of-bounds reads in
downstream decoders (notably the GPU tile-assembly kernel).

Parameters
----------
ifd : IFD
Parsed IFD. Must be tiled.

Raises
------
ValueError
If TileOffsets or TileByteCounts is missing, if tile width/height
is zero, or if the declared grid exceeds the offsets array length.
"""
if not ifd.is_tiled:
return

offsets = ifd.tile_offsets
byte_counts = ifd.tile_byte_counts
if offsets is None or byte_counts is None:
raise ValueError("Tiled TIFF is missing TileOffsets or TileByteCounts")

tw = ifd.tile_width
th = ifd.tile_height
if tw <= 0 or th <= 0:
raise ValueError(
f"Invalid tile dimensions: tile_width={tw}, tile_height={th}")

width = ifd.width
height = ifd.height
if width <= 0 or height <= 0:
raise ValueError(
f"Invalid image dimensions: width={width}, height={height}")

tiles_across = math.ceil(width / tw)
tiles_down = math.ceil(height / th)
planar = ifd.planar_config
samples = ifd.samples_per_pixel
bands = samples if (planar == 2 and samples > 1) else 1
expected = tiles_across * tiles_down * bands

if len(offsets) < expected:
raise ValueError(
f"Malformed TIFF: declared tile grid requires {expected} tile "
f"offsets ({tiles_across} x {tiles_down}"
f"{f' x {bands} bands' if bands > 1 else ''}), "
f"but TileOffsets has only {len(offsets)} entries"
)
if len(byte_counts) < expected:
raise ValueError(
f"Malformed TIFF: declared tile grid requires {expected} tile "
f"byte counts, but TileByteCounts has only {len(byte_counts)} "
f"entries"
)


def parse_header(data: bytes | memoryview) -> TIFFHeader:
"""Parse a TIFF/BigTIFF file header.

Expand Down
11 changes: 10 additions & 1 deletion xrspatial/geotiff/_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
)
from ._dtypes import SUB_BYTE_BPS, tiff_dtype_to_numpy
from ._geotags import GeoInfo, GeoTransform, extract_geo_info
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header
from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header, validate_tile_layout

# ---------------------------------------------------------------------------
# Allocation guard: reject TIFF dimensions that would exhaust memory
Expand Down Expand Up @@ -501,6 +501,11 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader,

_check_dimensions(out_w, out_h, samples, max_pixels)

# Reject malformed TIFFs whose declared tile grid exceeds the number of
# supplied TileOffsets entries. Silent skipping in the CPU loop below
# would mask the problem, and the GPU path reads OOB. See issue #1219.
validate_tile_layout(ifd)

_alloc = np.zeros if window is not None else np.empty
if samples > 1:
result = _alloc((out_h, out_w, samples), dtype=dtype)
Expand Down Expand Up @@ -664,6 +669,10 @@ def _read_cog_http(url: str, overview_level: int | None = None,
# A single tile's decoded bytes must also fit under the pixel budget.
_check_dimensions(tw, th, samples, max_pixels)

# Reject malformed TIFFs whose declared tile grid exceeds the supplied
# TileOffsets length. See issue #1219.
validate_tile_layout(ifd)

if samples > 1:
result = np.empty((height, width, samples), dtype=dtype)
else:
Expand Down
145 changes: 145 additions & 0 deletions xrspatial/geotiff/tests/test_security.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,3 +372,148 @@ def test_absolute_path_also_canonicalized(self, tmp_path):

assert ".." not in source_path
assert source_path == os.path.realpath("/tmp/../tmp/test.tif")


# ---------------------------------------------------------------------------
# Tile layout validation (issue #1219)
#
# An adversarial TIFF can declare image dimensions that imply more tiles
# than its TileOffsets tag supplies. The CPU path silently skipped the
# missing tiles (zero-padded output) and the GPU tile-assembly kernel
# read past the end of the decompression-offsets array on device.
# ---------------------------------------------------------------------------

def _make_short_offsets_tiff(
width: int,
height: int,
tile_size: int,
declared_offset_count: int,
dtype: np.dtype = np.dtype('float32'),
) -> bytes:
"""Build a tiled TIFF whose TileOffsets tag count is less than what
the image/tile dimensions imply.

The produced bytes are a valid TIFF that rioxarray/GDAL might still
accept with a warning, but our reader should reject them.
"""
import math

# Start from a normal tiled TIFF, then rewrite the TileOffsets IFD
# entry to advertise a smaller count.
data = bytearray(make_minimal_tiff(
width, height, dtype, tiled=True, tile_size=tile_size))

# Parse to locate the TileOffsets entry.
header = parse_header(bytes(data))
ifds = parse_all_ifds(bytes(data), header)
ifd = ifds[0]

offsets = ifd.tile_offsets
assert offsets is not None
tiles_across = math.ceil(width / tile_size)
tiles_down = math.ceil(height / tile_size)
true_count = tiles_across * tiles_down
assert len(offsets) == true_count
assert declared_offset_count < true_count

# Find the IFD entry bytes for tag 324 (TileOffsets) and rewrite its
# count field. TIFF (non-BigTIFF) entries are 12 bytes: HHIi
# (tag, type, count, value_or_ptr).
bo = header.byte_order
ifd_offset = header.first_ifd_offset
num_entries = struct.unpack_from(f'{bo}H', data, ifd_offset)[0]
entry_offset = ifd_offset + 2
for i in range(num_entries):
eo = entry_offset + i * 12
tag = struct.unpack_from(f'{bo}H', data, eo)[0]
if tag == 324: # TileOffsets
# Overwrite the count field at eo+4 (4 bytes, unsigned int).
struct.pack_into(
f'{bo}I', data, eo + 4, declared_offset_count)
break
else:
raise AssertionError("TileOffsets tag not found in IFD")

return bytes(data)


class TestTileLayoutValidation:
"""Regression tests for issue #1219."""

def test_validate_tile_layout_rejects_short_offsets(self):
"""validate_tile_layout raises when offsets count < declared grid."""
from xrspatial.geotiff._header import validate_tile_layout

# 16x16 image with 4x4 tiles = 16 tiles, but only 4 offsets declared.
data = _make_short_offsets_tiff(
width=16, height=16, tile_size=4, declared_offset_count=4)
header = parse_header(data)
ifds = parse_all_ifds(data, header)
ifd = ifds[0]

with pytest.raises(ValueError, match="Malformed TIFF.*tile offsets"):
validate_tile_layout(ifd)

def test_validate_tile_layout_accepts_well_formed(self):
"""validate_tile_layout accepts a normal tiled TIFF."""
from xrspatial.geotiff._header import validate_tile_layout

data = make_minimal_tiff(
8, 8, np.dtype('float32'), tiled=True, tile_size=4)
header = parse_header(data)
ifds = parse_all_ifds(data, header)
ifd = ifds[0]

# Should not raise.
validate_tile_layout(ifd)

def test_validate_tile_layout_ignores_stripped(self):
"""validate_tile_layout is a no-op for stripped TIFFs."""
from xrspatial.geotiff._header import validate_tile_layout

data = make_minimal_tiff(4, 4, np.dtype('float32'))
header = parse_header(data)
ifds = parse_all_ifds(data, header)
ifd = ifds[0]

# Should not raise -- stripped file, not tiled.
validate_tile_layout(ifd)

def test_read_tiles_rejects_short_offsets(self):
"""_read_tiles surfaces the malformed-TIFF error instead of
silently zero-padding missing tiles."""
data = _make_short_offsets_tiff(
width=16, height=16, tile_size=4, declared_offset_count=4)
header = parse_header(data)
ifds = parse_all_ifds(data, header)
ifd = ifds[0]
dtype = tiff_dtype_to_numpy(ifd.bits_per_sample, ifd.sample_format)

with pytest.raises(ValueError, match="Malformed TIFF"):
_read_tiles(data, ifd, header, dtype)

def test_read_to_array_rejects_short_offsets(self, tmp_path):
"""End-to-end: reading a short-offsets TIFF raises a clear
ValueError (not a silent zero output or CUDA crash)."""
data = _make_short_offsets_tiff(
width=16, height=16, tile_size=4, declared_offset_count=4)
path = str(tmp_path / "malformed_1219.tif")
with open(path, 'wb') as f:
f.write(data)

with pytest.raises(ValueError, match="Malformed TIFF"):
read_to_array(path)

def test_boundary_exact_count_ok(self, tmp_path):
"""A TIFF with exactly the required number of offsets reads fine."""
# 8x8 image, 4x4 tiles => 4 tiles exactly.
expected = np.arange(64, dtype=np.float32).reshape(8, 8)
data = make_minimal_tiff(
8, 8, np.dtype('float32'),
pixel_data=expected, tiled=True, tile_size=4)
path = str(tmp_path / "exact_1219.tif")
with open(path, 'wb') as f:
f.write(data)

arr, _ = read_to_array(path)
np.testing.assert_array_equal(arr, expected)
Loading