diff --git a/.claude/sweep-security-state.json b/.claude/sweep-security-state.json index 198b2b43..f78f79e1 100644 --- a/.claude/sweep-security-state.json +++ b/.claude/sweep-security-state.json @@ -9,7 +9,7 @@ "geotiff": { "last_inspected": "2026-04-17", "issue": 1215, - "followup_issues": [1220], + "followup_issues": [1219, 1220], "severity_max": "HIGH", "categories_found": [1, 4], "notes": "Follow-up #1220: GPU predictor=2 kernel over-indexed d_decomp for multi-sample tiled TIFFs (silent OOB GPU write). Fixed by passing width=tile_width instead of tile_width*samples." diff --git a/xrspatial/geotiff/__init__.py b/xrspatial/geotiff/__init__.py index f7f9a268..4080282f 100644 --- a/xrspatial/geotiff/__init__.py +++ b/xrspatial/geotiff/__init__.py @@ -1068,7 +1068,7 @@ def read_geotiff_gpu(source: str, *, "Install it with: pip install cupy-cuda12x") from ._reader import _FileSource, _check_dimensions, MAX_PIXELS_DEFAULT - from ._header import parse_header, parse_all_ifds + from ._header import parse_header, parse_all_ifds, validate_tile_layout from ._dtypes import tiff_dtype_to_numpy from ._geotags import extract_geo_info from ._gpu_decode import gpu_decode_tiles @@ -1135,6 +1135,11 @@ def read_geotiff_gpu(source: str, *, # A single tile's decoded bytes must also fit under the pixel budget. _check_dimensions(tw, th, samples, max_pixels) + # Reject malformed TIFFs whose declared tile grid exceeds the + # supplied TileOffsets length. The GPU tile-assembly kernel would + # read OOB otherwise. See issue #1219. + validate_tile_layout(ifd) + finally: src.close() diff --git a/xrspatial/geotiff/_header.py b/xrspatial/geotiff/_header.py index 8c6ddce8..5f6f6a1b 100644 --- a/xrspatial/geotiff/_header.py +++ b/xrspatial/geotiff/_header.py @@ -1,6 +1,7 @@ """TIFF/BigTIFF header and IFD parsing.""" from __future__ import annotations +import math import struct from dataclasses import dataclass, field from typing import Any @@ -208,6 +209,68 @@ def nodata_str(self) -> str | None: return str(v).rstrip('\x00') +def validate_tile_layout(ifd: IFD) -> None: + """Validate that a tiled IFD's TileOffsets covers the declared tile grid. + + A well-formed tiled TIFF must supply at least `tiles_across * tiles_down` + TileOffsets entries (times samples_per_pixel for planar config 2). An + adversarial or malformed file can declare larger image dimensions than + its offsets array covers, which causes out-of-bounds reads in + downstream decoders (notably the GPU tile-assembly kernel). + + Parameters + ---------- + ifd : IFD + Parsed IFD. Must be tiled. + + Raises + ------ + ValueError + If TileOffsets or TileByteCounts is missing, if tile width/height + is zero, or if the declared grid exceeds the offsets array length. + """ + if not ifd.is_tiled: + return + + offsets = ifd.tile_offsets + byte_counts = ifd.tile_byte_counts + if offsets is None or byte_counts is None: + raise ValueError("Tiled TIFF is missing TileOffsets or TileByteCounts") + + tw = ifd.tile_width + th = ifd.tile_height + if tw <= 0 or th <= 0: + raise ValueError( + f"Invalid tile dimensions: tile_width={tw}, tile_height={th}") + + width = ifd.width + height = ifd.height + if width <= 0 or height <= 0: + raise ValueError( + f"Invalid image dimensions: width={width}, height={height}") + + tiles_across = math.ceil(width / tw) + tiles_down = math.ceil(height / th) + planar = ifd.planar_config + samples = ifd.samples_per_pixel + bands = samples if (planar == 2 and samples > 1) else 1 + expected = tiles_across * tiles_down * bands + + if len(offsets) < expected: + raise ValueError( + f"Malformed TIFF: declared tile grid requires {expected} tile " + f"offsets ({tiles_across} x {tiles_down}" + f"{f' x {bands} bands' if bands > 1 else ''}), " + f"but TileOffsets has only {len(offsets)} entries" + ) + if len(byte_counts) < expected: + raise ValueError( + f"Malformed TIFF: declared tile grid requires {expected} tile " + f"byte counts, but TileByteCounts has only {len(byte_counts)} " + f"entries" + ) + + def parse_header(data: bytes | memoryview) -> TIFFHeader: """Parse a TIFF/BigTIFF file header. diff --git a/xrspatial/geotiff/_reader.py b/xrspatial/geotiff/_reader.py index 669abf03..325f03ec 100644 --- a/xrspatial/geotiff/_reader.py +++ b/xrspatial/geotiff/_reader.py @@ -17,7 +17,7 @@ ) from ._dtypes import SUB_BYTE_BPS, tiff_dtype_to_numpy from ._geotags import GeoInfo, GeoTransform, extract_geo_info -from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header +from ._header import IFD, TIFFHeader, parse_all_ifds, parse_header, validate_tile_layout # --------------------------------------------------------------------------- # Allocation guard: reject TIFF dimensions that would exhaust memory @@ -501,6 +501,11 @@ def _read_tiles(data: bytes, ifd: IFD, header: TIFFHeader, _check_dimensions(out_w, out_h, samples, max_pixels) + # Reject malformed TIFFs whose declared tile grid exceeds the number of + # supplied TileOffsets entries. Silent skipping in the CPU loop below + # would mask the problem, and the GPU path reads OOB. See issue #1219. + validate_tile_layout(ifd) + _alloc = np.zeros if window is not None else np.empty if samples > 1: result = _alloc((out_h, out_w, samples), dtype=dtype) @@ -664,6 +669,10 @@ def _read_cog_http(url: str, overview_level: int | None = None, # A single tile's decoded bytes must also fit under the pixel budget. _check_dimensions(tw, th, samples, max_pixels) + # Reject malformed TIFFs whose declared tile grid exceeds the supplied + # TileOffsets length. See issue #1219. + validate_tile_layout(ifd) + if samples > 1: result = np.empty((height, width, samples), dtype=dtype) else: diff --git a/xrspatial/geotiff/tests/test_security.py b/xrspatial/geotiff/tests/test_security.py index 385714ea..ec020391 100644 --- a/xrspatial/geotiff/tests/test_security.py +++ b/xrspatial/geotiff/tests/test_security.py @@ -372,3 +372,148 @@ def test_absolute_path_also_canonicalized(self, tmp_path): assert ".." not in source_path assert source_path == os.path.realpath("/tmp/../tmp/test.tif") + + +# --------------------------------------------------------------------------- +# Tile layout validation (issue #1219) +# +# An adversarial TIFF can declare image dimensions that imply more tiles +# than its TileOffsets tag supplies. The CPU path silently skipped the +# missing tiles (zero-padded output) and the GPU tile-assembly kernel +# read past the end of the decompression-offsets array on device. +# --------------------------------------------------------------------------- + +def _make_short_offsets_tiff( + width: int, + height: int, + tile_size: int, + declared_offset_count: int, + dtype: np.dtype = np.dtype('float32'), +) -> bytes: + """Build a tiled TIFF whose TileOffsets tag count is less than what + the image/tile dimensions imply. + + The produced bytes are a valid TIFF that rioxarray/GDAL might still + accept with a warning, but our reader should reject them. + """ + import math + + # Start from a normal tiled TIFF, then rewrite the TileOffsets IFD + # entry to advertise a smaller count. + data = bytearray(make_minimal_tiff( + width, height, dtype, tiled=True, tile_size=tile_size)) + + # Parse to locate the TileOffsets entry. + header = parse_header(bytes(data)) + ifds = parse_all_ifds(bytes(data), header) + ifd = ifds[0] + + offsets = ifd.tile_offsets + assert offsets is not None + tiles_across = math.ceil(width / tile_size) + tiles_down = math.ceil(height / tile_size) + true_count = tiles_across * tiles_down + assert len(offsets) == true_count + assert declared_offset_count < true_count + + # Find the IFD entry bytes for tag 324 (TileOffsets) and rewrite its + # count field. TIFF (non-BigTIFF) entries are 12 bytes: HHIi + # (tag, type, count, value_or_ptr). + bo = header.byte_order + ifd_offset = header.first_ifd_offset + num_entries = struct.unpack_from(f'{bo}H', data, ifd_offset)[0] + entry_offset = ifd_offset + 2 + for i in range(num_entries): + eo = entry_offset + i * 12 + tag = struct.unpack_from(f'{bo}H', data, eo)[0] + if tag == 324: # TileOffsets + # Overwrite the count field at eo+4 (4 bytes, unsigned int). + struct.pack_into( + f'{bo}I', data, eo + 4, declared_offset_count) + break + else: + raise AssertionError("TileOffsets tag not found in IFD") + + return bytes(data) + + +class TestTileLayoutValidation: + """Regression tests for issue #1219.""" + + def test_validate_tile_layout_rejects_short_offsets(self): + """validate_tile_layout raises when offsets count < declared grid.""" + from xrspatial.geotiff._header import validate_tile_layout + + # 16x16 image with 4x4 tiles = 16 tiles, but only 4 offsets declared. + data = _make_short_offsets_tiff( + width=16, height=16, tile_size=4, declared_offset_count=4) + header = parse_header(data) + ifds = parse_all_ifds(data, header) + ifd = ifds[0] + + with pytest.raises(ValueError, match="Malformed TIFF.*tile offsets"): + validate_tile_layout(ifd) + + def test_validate_tile_layout_accepts_well_formed(self): + """validate_tile_layout accepts a normal tiled TIFF.""" + from xrspatial.geotiff._header import validate_tile_layout + + data = make_minimal_tiff( + 8, 8, np.dtype('float32'), tiled=True, tile_size=4) + header = parse_header(data) + ifds = parse_all_ifds(data, header) + ifd = ifds[0] + + # Should not raise. + validate_tile_layout(ifd) + + def test_validate_tile_layout_ignores_stripped(self): + """validate_tile_layout is a no-op for stripped TIFFs.""" + from xrspatial.geotiff._header import validate_tile_layout + + data = make_minimal_tiff(4, 4, np.dtype('float32')) + header = parse_header(data) + ifds = parse_all_ifds(data, header) + ifd = ifds[0] + + # Should not raise -- stripped file, not tiled. + validate_tile_layout(ifd) + + def test_read_tiles_rejects_short_offsets(self): + """_read_tiles surfaces the malformed-TIFF error instead of + silently zero-padding missing tiles.""" + data = _make_short_offsets_tiff( + width=16, height=16, tile_size=4, declared_offset_count=4) + header = parse_header(data) + ifds = parse_all_ifds(data, header) + ifd = ifds[0] + dtype = tiff_dtype_to_numpy(ifd.bits_per_sample, ifd.sample_format) + + with pytest.raises(ValueError, match="Malformed TIFF"): + _read_tiles(data, ifd, header, dtype) + + def test_read_to_array_rejects_short_offsets(self, tmp_path): + """End-to-end: reading a short-offsets TIFF raises a clear + ValueError (not a silent zero output or CUDA crash).""" + data = _make_short_offsets_tiff( + width=16, height=16, tile_size=4, declared_offset_count=4) + path = str(tmp_path / "malformed_1219.tif") + with open(path, 'wb') as f: + f.write(data) + + with pytest.raises(ValueError, match="Malformed TIFF"): + read_to_array(path) + + def test_boundary_exact_count_ok(self, tmp_path): + """A TIFF with exactly the required number of offsets reads fine.""" + # 8x8 image, 4x4 tiles => 4 tiles exactly. + expected = np.arange(64, dtype=np.float32).reshape(8, 8) + data = make_minimal_tiff( + 8, 8, np.dtype('float32'), + pixel_data=expected, tiled=True, tile_size=4) + path = str(tmp_path / "exact_1219.tif") + with open(path, 'wb') as f: + f.write(data) + + arr, _ = read_to_array(path) + np.testing.assert_array_equal(arr, expected)