From 18a77b0e315be5088b26f77f3c4ac187f938bfb6 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Tue, 28 Apr 2026 09:48:12 +0000 Subject: [PATCH 01/16] feat(schema): ClickHouse data-skipping indexes and engine SETTINGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ClickHouse schema previously hard-coded `TYPE minmax GRANULARITY 3` for every index and had no way to emit a `SETTINGS k=v` clause. Both are needed by real ClickHouse-backed services that pick algorithms per column and tune engine settings (e.g. `index_granularity`, `allow_nullable_key`). This change adds: - `Schema\ClickHouse\SkipIndexAlgorithm` enum — `MinMax`, `Set`, `BloomFilter`, `NgramBloomFilter`, `TokenBloomFilter`, `Inverted`. - `Schema\ClickHouse\SkipIndex` value object carrying the algorithm, granularity, and algorithm-specific args (e.g. `[100]` for `set(100)`, `[4, 1024, 3, 0]` for `ngrambf_v1(...)`). - `Table::dataSkippingIndex()` — attach a skip index to the blueprint. Default granularity 1, default no algorithm args. - `Table::settings()` — set table-level engine SETTINGS, with a conservative key/value allow-list to keep DDL safe. - `Schema\ClickHouse::create()` renders both. SETTINGS is emitted after TTL. The existing minmax compile path is unchanged for backward compatibility — callers using `index()` keep their current output. Other dialects ignore the new fields, matching how `engine()` and `ttl()` are already handled. Tests cover each skip algorithm shape, composite-column skip indexes, SETTINGS rendering with and without TTL, and validation of bad granularity, empty columns, bad setting names, and unsafe string values. --- README.md | 43 ++++++ src/Query/Schema/ClickHouse.php | 40 +++++ src/Query/Schema/ClickHouse/SkipIndex.php | 33 ++++ .../Schema/ClickHouse/SkipIndexAlgorithm.php | 13 ++ src/Query/Schema/Table.php | 91 +++++++++++ tests/Query/Schema/ClickHouseTest.php | 141 ++++++++++++++++++ 6 files changed, 361 insertions(+) create mode 100644 src/Query/Schema/ClickHouse/SkipIndex.php create mode 100644 src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php diff --git a/README.md b/README.md index 79cc33c..b914f12 100644 --- a/README.md +++ b/README.md @@ -2081,6 +2081,49 @@ $schema->create('events', function (Table $table) { TTL expressions are emitted verbatim; they must not be empty or contain semicolons. Dialects other than ClickHouse throw `UnsupportedException`. +**Data-skipping indexes** — accelerate WHERE pruning by letting ClickHouse skip whole granules: + +```php +use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; + +$schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + $table->string('country'); + $table->string('text'); + + // Default granularity = 1, no algorithm args + $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter); + + // Set(N) — small fixed value sets + $table->dataSkippingIndex(['country'], SkipIndexAlgorithm::Set, granularity: 4, algorithmArgs: [100]); + + // NgramBloomFilter(n, size_bytes, hashes, seed) — text search on `LIKE` / `match` + $table->dataSkippingIndex(['text'], SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); +}); + +// CREATE TABLE `events` (..., INDEX `skip_user_id` `user_id` TYPE bloom_filter GRANULARITY 1, ...) +``` + +The 6 algorithms are `MinMax`, `Set`, `BloomFilter`, `NgramBloomFilter`, `TokenBloomFilter`, `Inverted`. Algorithm-specific arguments are passed via `algorithmArgs` and rendered verbatim — supply them from trusted (developer-controlled) source. Other dialects ignore the call. + +**Engine SETTINGS** — emit `SETTINGS k=v` after the TTL clause: + +```php +$schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->settings([ + 'index_granularity' => 8192, + 'allow_nullable_key' => true, // booleans become 1/0 + ]); +}); + +// CREATE TABLE `events` (...) ENGINE = MergeTree() ORDER BY (`id`) +// SETTINGS index_granularity = 8192, allow_nullable_key = 1 +``` + +Setting names must match `[A-Za-z_][A-Za-z0-9_]*`; string values are restricted to `[A-Za-z0-9_.\-+/]*`. Use ints / floats / booleans for everything else. Other dialects ignore the call. + ### SQLite Schema ```php diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index a16717d..bafaa4b 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -9,6 +9,7 @@ use Utopia\Query\QuotesIdentifiers; use Utopia\Query\Schema; use Utopia\Query\Schema\ClickHouse\Engine; +use Utopia\Query\Schema\ClickHouse\SkipIndex; use Utopia\Query\Schema\Feature\ColumnComments; use Utopia\Query\Schema\Feature\DropPartition; use Utopia\Query\Schema\Feature\TableComments; @@ -183,6 +184,14 @@ public function create(string $table, callable $definition, bool $ifNotExists = . ' ' . $expr . ' TYPE minmax GRANULARITY 3'; } + foreach ($blueprint->skipIndexes as $skip) { + $cols = \array_map(fn (string $c): string => $this->quote($c), $skip->columns); + $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; + $columnDefs[] = 'INDEX ' . $this->quote($skip->name) + . ' ' . $expr . ' TYPE ' . $this->compileSkipAlgorithm($skip) + . ' GRANULARITY ' . $skip->granularity; + } + if (! empty($blueprint->foreignKeys)) { throw new UnsupportedException('Foreign keys are not supported in ClickHouse.'); } @@ -211,9 +220,40 @@ public function create(string $table, callable $definition, bool $ifNotExists = $sql .= ' TTL ' . $blueprint->ttl; } + if (! empty($blueprint->settings)) { + $kv = []; + foreach ($blueprint->settings as $k => $v) { + $kv[] = $k . ' = ' . $v; + } + $sql .= ' SETTINGS ' . \implode(', ', $kv); + } + return new Statement($sql, [], executor: $this->executor); } + /** + * Render a `TYPE (args)` fragment for a data-skipping index. + * + * String args are emitted as single-quoted SQL literals (with `'` doubled); + * numeric args are emitted verbatim. Argument values come from the + * application — never from untrusted input. + */ + private function compileSkipAlgorithm(SkipIndex $skip): string + { + if ($skip->algorithmArgs === []) { + return $skip->algorithm->value; + } + + $args = \array_map( + fn (string|int|float $arg): string => \is_string($arg) + ? "'" . \str_replace("'", "''", $arg) . "'" + : (string) $arg, + $skip->algorithmArgs, + ); + + return $skip->algorithm->value . '(' . \implode(', ', $args) . ')'; + } + /** * Compile an engine declaration: `` or `()`. * diff --git a/src/Query/Schema/ClickHouse/SkipIndex.php b/src/Query/Schema/ClickHouse/SkipIndex.php new file mode 100644 index 0000000..2c9ff86 --- /dev/null +++ b/src/Query/Schema/ClickHouse/SkipIndex.php @@ -0,0 +1,33 @@ + $columns + * @param list $algorithmArgs Args for parameterized algorithms + * (e.g. [3] for set(3), + * [0.01] for bloom_filter(0.01), + * [4, 1024, 3, 0] for ngrambf_v1(n, size_bytes, hashes, seed)) + */ + public function __construct( + public string $name, + public array $columns, + public SkipIndexAlgorithm $algorithm, + public array $algorithmArgs = [], + public int $granularity = 1, + ) { + if (! \preg_match('/^[A-Za-z_][A-Za-z0-9_]*$/', $name)) { + throw new ValidationException('Invalid skip index name: ' . $name); + } + if ($columns === []) { + throw new ValidationException('Skip index requires at least one column.'); + } + if ($granularity < 1) { + throw new ValidationException('Skip index granularity must be >= 1.'); + } + } +} diff --git a/src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php b/src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php new file mode 100644 index 0000000..58360ea --- /dev/null +++ b/src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php @@ -0,0 +1,13 @@ + ClickHouse data-skipping indexes (other dialects ignore) */ + public private(set) array $skipIndexes = []; + + /** @var array Table-level engine SETTINGS (ClickHouse only) */ + public private(set) array $settings = []; + /** * Add a table-level CHECK constraint. * @@ -543,4 +551,87 @@ public function ttl(string $expression): static return $this; } + + /** + * Attach a ClickHouse data-skipping index. Other dialects ignore this. + * + * Skip indexes accelerate WHERE clauses by letting ClickHouse skip whole + * granules during scanning. Choose the algorithm that matches the column + * cardinality and predicate type: + * + * - `MinMax` — numeric ranges, low cardinality + * - `Set(N)` — small fixed value sets (N is the set size cap) + * - `BloomFilter(p)` — high cardinality string columns with `=` / `IN` + * predicates (p is the false-positive probability, e.g. 0.01) + * - `NgramBloomFilter(n, size, hashes, seed)` — `LIKE` / `match` on text + * - `TokenBloomFilter(size, hashes, seed)` — token-style search + * - `Inverted` — `LIKE`, `match`, `hasToken` (experimental) + * + * @param list $columns + * @param list $algorithmArgs Algorithm-specific arguments + * + * @throws ValidationException if the index name or columns are invalid. + */ + public function dataSkippingIndex( + array $columns, + SkipIndexAlgorithm $algorithm, + int $granularity = 1, + array $algorithmArgs = [], + string $name = '', + ): static { + if ($name === '') { + $name = 'skip_' . \implode('_', $columns); + } + + $this->skipIndexes[] = new SkipIndex($name, $columns, $algorithm, $algorithmArgs, $granularity); + + return $this; + } + + /** + * Set table-level engine SETTINGS (ClickHouse only). Other dialects ignore. + * + * Compiled as `SETTINGS k=v, ...` after the TTL clause. Booleans become + * `1` / `0`, ints/floats are stringified, strings are passed through after + * a conservative character allow-list check. + * + * Calling this method replaces previously-set settings. + * + * @param array $settings + * + * @throws ValidationException if any key is not a valid identifier or any + * string value contains characters outside the + * allow-list. + */ + public function settings(array $settings): static + { + $sanitized = []; + + foreach ($settings as $key => $value) { + if (! \preg_match('/^[A-Za-z_][A-Za-z0-9_]*$/', $key)) { + throw new ValidationException('Invalid setting name: ' . $key); + } + + if (\is_bool($value)) { + $sanitized[$key] = $value ? '1' : '0'; + } elseif (\is_int($value) || \is_float($value)) { + $sanitized[$key] = (string) $value; + } elseif (\is_string($value)) { + if (! \preg_match('/^[A-Za-z0-9_.\-+\/]*$/', $value)) { + throw new ValidationException( + 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\\-+/]*' + ); + } + $sanitized[$key] = $value; + } else { + throw new ValidationException( + 'Setting value for ' . $key . ' must be string, int, float, or bool.' + ); + } + } + + $this->settings = $sanitized; + + return $this; + } } diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 74c3d6c..3a771ff 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -10,6 +10,7 @@ use Utopia\Query\Query; use Utopia\Query\Schema\ClickHouse as Schema; use Utopia\Query\Schema\ClickHouse\Engine; +use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; use Utopia\Query\Schema\Feature\ColumnComments; use Utopia\Query\Schema\Feature\DropPartition; use Utopia\Query\Schema\Feature\ForeignKeys; @@ -720,4 +721,144 @@ public function testColumnLevelTTL(): void $this->assertSame('CREATE TABLE `events` (`id` Int32, `temporary` String TTL ts + INTERVAL 1 DAY, `ts` DateTime) ENGINE = MergeTree() ORDER BY (`id`)', $result->query); } + + // Data-skipping indexes + + public function testDataSkippingIndexBloomFilter(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `user_id` String, INDEX `skip_user_id` `user_id` TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testDataSkippingIndexWithArgs(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('country'); + $table->string('text'); + $table->dataSkippingIndex(['country'], SkipIndexAlgorithm::Set, granularity: 4, algorithmArgs: [100]); + $table->dataSkippingIndex(['text'], SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `country` String, `text` String,' + . ' INDEX `skip_country` `country` TYPE set(100) GRANULARITY 4,' + . ' INDEX `skip_text` `text` TYPE ngrambf_v1(4, 1024, 3, 0) GRANULARITY 1)' + . ' ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testDataSkippingIndexCompositeColumns(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + $table->string('event'); + $table->dataSkippingIndex(['user_id', 'event'], SkipIndexAlgorithm::BloomFilter, name: 'idx_user_event'); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `user_id` String, `event` String,' + . ' INDEX `idx_user_event` (`user_id`, `event`) TYPE bloom_filter GRANULARITY 1)' + . ' ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testDataSkippingIndexInvalidGranularityThrows(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter, granularity: 0); + }); + } + + public function testDataSkippingIndexEmptyColumnsThrows(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->dataSkippingIndex([], SkipIndexAlgorithm::BloomFilter); + }); + } + + // SETTINGS + + public function testTableSettings(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->settings(['index_granularity' => 8192, 'allow_nullable_key' => true]); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64) ENGINE = MergeTree() ORDER BY (`id`)' + . ' SETTINGS index_granularity = 8192, allow_nullable_key = 1', + $result->query, + ); + } + + public function testTableSettingsWithTtlOrdering(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->datetime('created_at'); + $table->ttl('`created_at` + INTERVAL 30 DAY'); + $table->settings(['index_granularity' => 4096]); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `created_at` DateTime) ENGINE = MergeTree() ORDER BY (`id`)' + . ' TTL `created_at` + INTERVAL 30 DAY' + . ' SETTINGS index_granularity = 4096', + $result->query, + ); + } + + public function testTableSettingsRejectsInvalidKey(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->settings(['1bad-key' => 8192]); + }); + } + + public function testTableSettingsRejectsInvalidValue(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->settings(['ok_key' => "evil'; DROP TABLE x; --"]); + }); + } } From 1f811df5f149dca3808b2cc227c333f8e84ee685 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 30 Apr 2026 00:57:14 +0000 Subject: [PATCH 02/16] fix(schema): address review feedback on ClickHouse skip indexes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Four follow-ups based on the greptile review on PR #6: - alter() now renders ADD INDEX for any skipIndexes set on the blueprint, matching ClickHouse's `ALTER TABLE ... ADD INDEX name expr TYPE algo GRANULARITY n` syntax. Previously dataSkippingIndex() inside an alter() callback was silently dropped (P1). - alter() throws UnsupportedException if the blueprint has settings — ClickHouse does not accept SETTINGS in ALTER TABLE; callers must emit `ALTER TABLE ... MODIFY SETTING` directly. Surfaces what was previously a silent no-op. - SkipIndex constructor rejects algorithmArgs for MinMax and Inverted (P2) — both are emitted without parentheses in ClickHouse DDL, so any args would have produced invalid SQL like `minmax(1)`. - Table::dataSkippingIndex() now sanitises auto-generated index names when columns contain non-identifier characters (P2) — `event-type` no longer produces a confusing `Invalid skip index name: skip_event-type` exception. Non-identifier characters are collapsed to `_`. - compileSkipAlgorithm() formats float args with sprintf('%F', ...) instead of (string) cast (P2) — the cast can produce scientific notation like `1.0E-5`, which ClickHouse rejects in index type arguments. Trailing zeros are trimmed for readability so 0.01 stays "0.01" rather than "0.010000". Adds tests for each fix plus one for ALTER ADD INDEX with composite columns and algorithm args. --- src/Query/Schema/ClickHouse.php | 25 +++++- src/Query/Schema/ClickHouse/SkipIndex.php | 18 +++++ src/Query/Schema/Table.php | 10 ++- tests/Query/Schema/ClickHouseTest.php | 99 +++++++++++++++++++++++ 4 files changed, 148 insertions(+), 4 deletions(-) diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index bafaa4b..244cadf 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -130,6 +130,14 @@ public function alter(string $table, callable $definition): Statement $alterations[] = 'DROP INDEX ' . $this->quote($name); } + foreach ($blueprint->skipIndexes as $skip) { + $cols = \array_map(fn (string $c): string => $this->quote($c), $skip->columns); + $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; + $alterations[] = 'ADD INDEX ' . $this->quote($skip->name) + . ' ' . $expr . ' TYPE ' . $this->compileSkipAlgorithm($skip) + . ' GRANULARITY ' . $skip->granularity; + } + if (! empty($blueprint->foreignKeys)) { throw new UnsupportedException('Foreign keys are not supported in ClickHouse.'); } @@ -138,6 +146,12 @@ public function alter(string $table, callable $definition): Statement throw new UnsupportedException('Foreign keys are not supported in ClickHouse.'); } + if (! empty($blueprint->settings)) { + throw new UnsupportedException( + 'Table SETTINGS can only be set on CREATE TABLE; emit `ALTER TABLE ... MODIFY SETTING` directly to change them.' + ); + } + if (empty($alterations)) { throw new ValidationException('ALTER TABLE requires at least one alteration.'); } @@ -245,9 +259,14 @@ private function compileSkipAlgorithm(SkipIndex $skip): string } $args = \array_map( - fn (string|int|float $arg): string => \is_string($arg) - ? "'" . \str_replace("'", "''", $arg) . "'" - : (string) $arg, + fn (string|int|float $arg): string => match (true) { + \is_string($arg) => "'" . \str_replace("'", "''", $arg) . "'", + // sprintf('%F', ...) avoids scientific notation (e.g. 1.0E-5) + // which ClickHouse rejects in index type arguments. Trim + // trailing zeros so 0.01 stays "0.010000" → "0.01". + \is_float($arg) => \rtrim(\rtrim(\sprintf('%F', $arg), '0'), '.'), + default => (string) $arg, + }, $skip->algorithmArgs, ); diff --git a/src/Query/Schema/ClickHouse/SkipIndex.php b/src/Query/Schema/ClickHouse/SkipIndex.php index 2c9ff86..e49e11e 100644 --- a/src/Query/Schema/ClickHouse/SkipIndex.php +++ b/src/Query/Schema/ClickHouse/SkipIndex.php @@ -29,5 +29,23 @@ public function __construct( if ($granularity < 1) { throw new ValidationException('Skip index granularity must be >= 1.'); } + if ($algorithmArgs !== [] && ! self::algorithmAcceptsArgs($algorithm)) { + throw new ValidationException( + $algorithm->value . ' does not accept algorithm arguments.' + ); + } + } + + /** + * MinMax and Inverted are emitted without parentheses; passing args to + * them would produce DDL that ClickHouse rejects at parse time. + */ + private static function algorithmAcceptsArgs(SkipIndexAlgorithm $algorithm): bool + { + return match ($algorithm) { + SkipIndexAlgorithm::MinMax, + SkipIndexAlgorithm::Inverted => false, + default => true, + }; } } diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index 4bc49cf..eec6596 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -580,7 +580,15 @@ public function dataSkippingIndex( string $name = '', ): static { if ($name === '') { - $name = 'skip_' . \implode('_', $columns); + // Sanitise column names — substring matches like `event-type` or + // `ns.col` are valid SQL identifiers when quoted, but the + // generated index name must still pass the strict identifier + // regex on `SkipIndex`. + $sanitised = \array_map( + fn (string $c): string => \preg_replace('/[^A-Za-z0-9_]+/', '_', $c) ?? $c, + $columns, + ); + $name = 'skip_' . \implode('_', $sanitised); } $this->skipIndexes[] = new SkipIndex($name, $columns, $algorithm, $algorithmArgs, $granularity); diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 3a771ff..2eb9d64 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -861,4 +861,103 @@ public function testTableSettingsRejectsInvalidValue(): void $table->settings(['ok_key' => "evil'; DROP TABLE x; --"]); }); } + + public function testDataSkippingIndexNoArgAlgorithmRejectsArgs(): void + { + $this->expectException(ValidationException::class); + $this->expectExceptionMessage('minmax does not accept algorithm arguments.'); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->integer('score'); + $table->dataSkippingIndex(['score'], SkipIndexAlgorithm::MinMax, algorithmArgs: [3]); + }); + } + + public function testDataSkippingIndexInvertedRejectsArgs(): void + { + $this->expectException(ValidationException::class); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('text'); + $table->dataSkippingIndex(['text'], SkipIndexAlgorithm::Inverted, algorithmArgs: [42]); + }); + } + + public function testDataSkippingIndexAutoNameSanitisesNonIdentifierColumns(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('event-type'); + $table->dataSkippingIndex(['event-type'], SkipIndexAlgorithm::BloomFilter); + }); + $this->assertBindingCount($result); + + // Auto name: skip_event_type (non-identifier chars collapsed to _) + $this->assertSame( + 'CREATE TABLE `events` (`id` Int64, `event-type` String,' + . ' INDEX `skip_event_type` `event-type` TYPE bloom_filter GRANULARITY 1)' + . ' ENGINE = MergeTree() ORDER BY (`id`)', + $result->query, + ); + } + + public function testDataSkippingIndexFloatArgAvoidsScientificNotation(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + // 1e-5 false positive rate: the bug pre-fix is `(string) 1e-5` returning "1.0E-5" + $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]); + }); + $this->assertBindingCount($result); + + $this->assertStringContainsString('TYPE bloom_filter(0.00001)', $result->query); + // Numeric arg should be fixed-point — no 'E-' or 'E+' anywhere + $this->assertDoesNotMatchRegularExpression('/[Ee][+-]\d/', $result->query); + } + + public function testAlterAddSkipIndex(): void + { + $schema = new Schema(); + $result = $schema->alter('events', function (Table $table) { + $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'ALTER TABLE `events` ADD INDEX `skip_user_id` `user_id` TYPE bloom_filter GRANULARITY 1', + $result->query, + ); + } + + public function testAlterAddSkipIndexComposite(): void + { + $schema = new Schema(); + $result = $schema->alter('events', function (Table $table) { + $table->dataSkippingIndex(['user_id', 'event'], SkipIndexAlgorithm::Set, granularity: 4, algorithmArgs: [100], name: 'idx_user_event'); + }); + $this->assertBindingCount($result); + + $this->assertSame( + 'ALTER TABLE `events` ADD INDEX `idx_user_event` (`user_id`, `event`) TYPE set(100) GRANULARITY 4', + $result->query, + ); + } + + public function testAlterRejectsSettings(): void + { + $this->expectException(UnsupportedException::class); + $this->expectExceptionMessage('SETTINGS'); + + $schema = new Schema(); + $schema->alter('events', function (Table $table) { + $table->settings(['index_granularity' => 4096]); + }); + } } From f03abae37be3f20f7dddc31eae8c9e773c4a3fd1 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 30 Apr 2026 01:08:24 +0000 Subject: [PATCH 03/16] refactor(schema): collapse skip-index API into Table::index() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The ClickHouse parts of this library are unreleased and have no external consumers, so there is no need for a parallel `dataSkippingIndex()` / `SkipIndex` API alongside the existing `index()` / `Index` API. Folding them in produces a single consistent index API that works the same in every dialect, with ClickHouse-specific parameters available as optional named arguments (mirroring how `method`, `operatorClass`, `lengths`, and `collations` are already dialect-specific options on `index()`). Changes: - `Table::dataSkippingIndex()` — removed. - `Schema\ClickHouse\SkipIndex` value object — removed (folded into `Index`). - `Schema\ClickHouse\SkipIndexAlgorithm` enum — kept; useful typed value. - `Index` gains `algorithm`, `algorithmArgs`, and `granularity` optional fields. Validates name (must be a SQL identifier), at-least-one-column, granularity ≥ 1, and that no-arg algorithms (MinMax, Inverted) get no `algorithmArgs`. - `Table::index()` and `uniqueIndex()` accept the new fields. Auto-name generation now sanitises non-identifier characters in column names so `event-type` produces `idx_event_type` instead of failing the identifier regex. - `Schema\ClickHouse::create()` and `alter()` use a single `compileSkipIndex(Index)` helper that renders `INDEX TYPE [(args)] GRANULARITY `. Defaults to `TYPE minmax GRANULARITY 3` when no algorithm is set, matching the ClickHouse-canonical default for a generic `INDEX` without a chosen type. - Float `algorithmArgs` are formatted with `sprintf('%F', ...)` and trimmed of trailing zeros, so `1.0e-5` becomes `0.00001` rather than `1.0E-5` (which ClickHouse rejects in index type arguments). 5083/5083 tests pass; lint and PHPStan max clean. --- README.md | 21 +++--- src/Query/Schema/ClickHouse.php | 73 ++++++++++----------- src/Query/Schema/ClickHouse/SkipIndex.php | 51 --------------- src/Query/Schema/Index.php | 35 ++++++++++ src/Query/Schema/Table.php | 79 +++++++++-------------- tests/Query/Schema/ClickHouseTest.php | 60 +++++++++-------- 6 files changed, 143 insertions(+), 176 deletions(-) delete mode 100644 src/Query/Schema/ClickHouse/SkipIndex.php diff --git a/README.md b/README.md index b914f12..c24b052 100644 --- a/README.md +++ b/README.md @@ -2081,7 +2081,7 @@ $schema->create('events', function (Table $table) { TTL expressions are emitted verbatim; they must not be empty or contain semicolons. Dialects other than ClickHouse throw `UnsupportedException`. -**Data-skipping indexes** — accelerate WHERE pruning by letting ClickHouse skip whole granules: +**Skip-index algorithms** — every ClickHouse index is a data-skipping index that accelerates WHERE pruning by letting the engine skip whole granules. Pick the algorithm that matches the column shape via the `algorithm` argument on `Table::index()`: ```php use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; @@ -2092,20 +2092,25 @@ $schema->create('events', function (Table $table) { $table->string('country'); $table->string('text'); - // Default granularity = 1, no algorithm args - $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter); + // BloomFilter — high-cardinality strings with `=` / `IN` predicates + $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter); - // Set(N) — small fixed value sets - $table->dataSkippingIndex(['country'], SkipIndexAlgorithm::Set, granularity: 4, algorithmArgs: [100]); + // Set(N) — small fixed value sets, custom granularity + $table->index(['country'], algorithm: SkipIndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); // NgramBloomFilter(n, size_bytes, hashes, seed) — text search on `LIKE` / `match` - $table->dataSkippingIndex(['text'], SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); + $table->index(['text'], algorithm: SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); + + // No algorithm specified → defaults to `TYPE minmax GRANULARITY 3` + $table->index(['id']); }); -// CREATE TABLE `events` (..., INDEX `skip_user_id` `user_id` TYPE bloom_filter GRANULARITY 1, ...) +// CREATE TABLE `events` (..., INDEX `idx_user_id` `user_id` TYPE bloom_filter GRANULARITY 1, ...) ``` -The 6 algorithms are `MinMax`, `Set`, `BloomFilter`, `NgramBloomFilter`, `TokenBloomFilter`, `Inverted`. Algorithm-specific arguments are passed via `algorithmArgs` and rendered verbatim — supply them from trusted (developer-controlled) source. Other dialects ignore the call. +The 6 algorithms are `MinMax`, `Set`, `BloomFilter`, `NgramBloomFilter`, `TokenBloomFilter`, `Inverted`. Algorithm-specific arguments are passed via `algorithmArgs` and rendered verbatim — supply them from trusted (developer-controlled) source. Other dialects ignore the ClickHouse-only `algorithm` / `algorithmArgs` / `granularity` arguments. + +`MinMax` and `Inverted` take no parenthesised arguments in ClickHouse DDL — passing `algorithmArgs` for them throws `ValidationException`. Skip indexes can also be added via `ALTER TABLE … ADD INDEX` by calling `index()` inside an `alter()` callback. **Engine SETTINGS** — emit `SETTINGS k=v` after the TTL clause: diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index 244cadf..31bbe63 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -9,7 +9,6 @@ use Utopia\Query\QuotesIdentifiers; use Utopia\Query\Schema; use Utopia\Query\Schema\ClickHouse\Engine; -use Utopia\Query\Schema\ClickHouse\SkipIndex; use Utopia\Query\Schema\Feature\ColumnComments; use Utopia\Query\Schema\Feature\DropPartition; use Utopia\Query\Schema\Feature\TableComments; @@ -130,12 +129,8 @@ public function alter(string $table, callable $definition): Statement $alterations[] = 'DROP INDEX ' . $this->quote($name); } - foreach ($blueprint->skipIndexes as $skip) { - $cols = \array_map(fn (string $c): string => $this->quote($c), $skip->columns); - $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; - $alterations[] = 'ADD INDEX ' . $this->quote($skip->name) - . ' ' . $expr . ' TYPE ' . $this->compileSkipAlgorithm($skip) - . ' GRANULARITY ' . $skip->granularity; + foreach ($blueprint->indexes as $index) { + $alterations[] = 'ADD ' . $this->compileSkipIndex($index); } if (! empty($blueprint->foreignKeys)) { @@ -190,20 +185,8 @@ public function create(string $table, callable $definition, bool $ifNotExists = $primaryKeys = \array_map(fn (string $c): string => $this->quote($c), $blueprint->compositePrimaryKey); } - // Indexes (ClickHouse uses INDEX ... TYPE ... GRANULARITY ...) foreach ($blueprint->indexes as $index) { - $cols = \array_map(fn (string $c): string => $this->quote($c), $index->columns); - $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; - $columnDefs[] = 'INDEX ' . $this->quote($index->name) - . ' ' . $expr . ' TYPE minmax GRANULARITY 3'; - } - - foreach ($blueprint->skipIndexes as $skip) { - $cols = \array_map(fn (string $c): string => $this->quote($c), $skip->columns); - $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; - $columnDefs[] = 'INDEX ' . $this->quote($skip->name) - . ' ' . $expr . ' TYPE ' . $this->compileSkipAlgorithm($skip) - . ' GRANULARITY ' . $skip->granularity; + $columnDefs[] = $this->compileSkipIndex($index); } if (! empty($blueprint->foreignKeys)) { @@ -246,31 +229,43 @@ public function create(string $table, callable $definition, bool $ifNotExists = } /** - * Render a `TYPE (args)` fragment for a data-skipping index. + * Render a full `INDEX TYPE [(args)] GRANULARITY ` + * fragment, used by both CREATE TABLE and ALTER TABLE ADD INDEX. * - * String args are emitted as single-quoted SQL literals (with `'` doubled); - * numeric args are emitted verbatim. Argument values come from the - * application — never from untrusted input. + * Defaults to `TYPE minmax GRANULARITY 3` when no algorithm is set on the + * index — matches the ClickHouse default behaviour for callers using the + * generic `Table::index()` without picking an algorithm. */ - private function compileSkipAlgorithm(SkipIndex $skip): string + private function compileSkipIndex(Index $index): string { - if ($skip->algorithmArgs === []) { - return $skip->algorithm->value; + $cols = \array_map(fn (string $c): string => $this->quote($c), $index->columns); + $expr = \count($cols) === 1 ? $cols[0] : '(' . \implode(', ', $cols) . ')'; + + if ($index->algorithm === null) { + return 'INDEX ' . $this->quote($index->name) . ' ' . $expr + . ' TYPE minmax GRANULARITY 3'; } - $args = \array_map( - fn (string|int|float $arg): string => match (true) { - \is_string($arg) => "'" . \str_replace("'", "''", $arg) . "'", - // sprintf('%F', ...) avoids scientific notation (e.g. 1.0E-5) - // which ClickHouse rejects in index type arguments. Trim - // trailing zeros so 0.01 stays "0.010000" → "0.01". - \is_float($arg) => \rtrim(\rtrim(\sprintf('%F', $arg), '0'), '.'), - default => (string) $arg, - }, - $skip->algorithmArgs, - ); + $type = $index->algorithm->value; + + if ($index->algorithmArgs !== []) { + $args = \array_map( + fn (string|int|float $arg): string => match (true) { + \is_string($arg) => "'" . \str_replace("'", "''", $arg) . "'", + // sprintf('%F', ...) avoids scientific notation (e.g. 1.0E-5) + // which ClickHouse rejects in index type arguments. Trim + // trailing zeros so 0.01 stays "0.010000" → "0.01". + \is_float($arg) => \rtrim(\rtrim(\sprintf('%F', $arg), '0'), '.'), + default => (string) $arg, + }, + $index->algorithmArgs, + ); + + $type .= '(' . \implode(', ', $args) . ')'; + } - return $skip->algorithm->value . '(' . \implode(', ', $args) . ')'; + return 'INDEX ' . $this->quote($index->name) . ' ' . $expr + . ' TYPE ' . $type . ' GRANULARITY ' . $index->granularity; } /** diff --git a/src/Query/Schema/ClickHouse/SkipIndex.php b/src/Query/Schema/ClickHouse/SkipIndex.php deleted file mode 100644 index e49e11e..0000000 --- a/src/Query/Schema/ClickHouse/SkipIndex.php +++ /dev/null @@ -1,51 +0,0 @@ - $columns - * @param list $algorithmArgs Args for parameterized algorithms - * (e.g. [3] for set(3), - * [0.01] for bloom_filter(0.01), - * [4, 1024, 3, 0] for ngrambf_v1(n, size_bytes, hashes, seed)) - */ - public function __construct( - public string $name, - public array $columns, - public SkipIndexAlgorithm $algorithm, - public array $algorithmArgs = [], - public int $granularity = 1, - ) { - if (! \preg_match('/^[A-Za-z_][A-Za-z0-9_]*$/', $name)) { - throw new ValidationException('Invalid skip index name: ' . $name); - } - if ($columns === []) { - throw new ValidationException('Skip index requires at least one column.'); - } - if ($granularity < 1) { - throw new ValidationException('Skip index granularity must be >= 1.'); - } - if ($algorithmArgs !== [] && ! self::algorithmAcceptsArgs($algorithm)) { - throw new ValidationException( - $algorithm->value . ' does not accept algorithm arguments.' - ); - } - } - - /** - * MinMax and Inverted are emitted without parentheses; passing args to - * them would produce DDL that ClickHouse rejects at parse time. - */ - private static function algorithmAcceptsArgs(SkipIndexAlgorithm $algorithm): bool - { - return match ($algorithm) { - SkipIndexAlgorithm::MinMax, - SkipIndexAlgorithm::Inverted => false, - default => true, - }; - } -} diff --git a/src/Query/Schema/Index.php b/src/Query/Schema/Index.php index f0c48c2..a286649 100644 --- a/src/Query/Schema/Index.php +++ b/src/Query/Schema/Index.php @@ -3,6 +3,7 @@ namespace Utopia\Query\Schema; use Utopia\Query\Exception\ValidationException; +use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; readonly class Index { @@ -12,6 +13,10 @@ * @param array $orders * @param array $collations Column-specific collations (column name => collation) * @param list $rawColumns Raw SQL expressions appended to the column list (bypass quoting) + * @param list $algorithmArgs ClickHouse skip-index algorithm args + * (e.g. [3] for set(3), + * [0.01] for bloom_filter(0.01), + * [4, 1024, 3, 0] for ngrambf_v1(n, size_bytes, hashes, seed)) */ public function __construct( public string $name, @@ -23,7 +28,16 @@ public function __construct( public string $operatorClass = '', public array $collations = [], public array $rawColumns = [], + public ?SkipIndexAlgorithm $algorithm = null, + public array $algorithmArgs = [], + public int $granularity = 1, ) { + if (! \preg_match('/^[A-Za-z_][A-Za-z0-9_]*$/', $name)) { + throw new ValidationException('Invalid index name: ' . $name); + } + if ($columns === [] && $rawColumns === []) { + throw new ValidationException('Index requires at least one column.'); + } if ($method !== '' && ! \preg_match('/^[A-Za-z0-9_]+$/', $method)) { throw new ValidationException('Invalid index method: ' . $method); } @@ -35,5 +49,26 @@ public function __construct( throw new ValidationException('Invalid collation: ' . $collation); } } + if ($granularity < 1) { + throw new ValidationException('Index granularity must be >= 1.'); + } + if ($algorithm !== null && $algorithmArgs !== [] && ! self::algorithmAcceptsArgs($algorithm)) { + throw new ValidationException( + $algorithm->value . ' does not accept algorithm arguments.' + ); + } + } + + /** + * MinMax and Inverted are emitted without parentheses in ClickHouse DDL; + * passing args to them would produce invalid SQL. + */ + private static function algorithmAcceptsArgs(SkipIndexAlgorithm $algorithm): bool + { + return match ($algorithm) { + SkipIndexAlgorithm::MinMax, + SkipIndexAlgorithm::Inverted => false, + default => true, + }; } } diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index eec6596..d81ad23 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -4,7 +4,6 @@ use Utopia\Query\Exception\ValidationException; use Utopia\Query\Schema\ClickHouse\Engine; -use Utopia\Query\Schema\ClickHouse\SkipIndex; use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; class Table @@ -53,9 +52,6 @@ class Table public private(set) ?string $ttl = null; - /** @var list ClickHouse data-skipping indexes (other dialects ignore) */ - public private(set) array $skipIndexes = []; - /** @var array Table-level engine SETTINGS (ClickHouse only) */ public private(set) array $settings = []; @@ -306,6 +302,7 @@ public function timestamps(int $precision = 3): void * @param array $lengths * @param array $orders * @param array $collations + * @param list $algorithmArgs ClickHouse skip-index algorithm args */ public function index( array $columns, @@ -315,11 +312,26 @@ public function index( array $lengths = [], array $orders = [], array $collations = [], + ?SkipIndexAlgorithm $algorithm = null, + array $algorithmArgs = [], + int $granularity = 1, ): void { if ($name === '') { - $name = 'idx_' . \implode('_', $columns); + $name = $this->autoIndexName('idx_', $columns); } - $this->indexes[] = new Index($name, $columns, IndexType::Index, $lengths, $orders, $method, $operatorClass, $collations); + $this->indexes[] = new Index( + $name, + $columns, + IndexType::Index, + $lengths, + $orders, + $method, + $operatorClass, + $collations, + algorithm: $algorithm, + algorithmArgs: $algorithmArgs, + granularity: $granularity, + ); } /** @@ -336,7 +348,7 @@ public function uniqueIndex( array $collations = [], ): void { if ($name === '') { - $name = 'uniq_' . \implode('_', $columns); + $name = $this->autoIndexName('uniq_', $columns); } $this->indexes[] = new Index($name, $columns, IndexType::Unique, $lengths, $orders, collations: $collations); } @@ -347,7 +359,7 @@ public function uniqueIndex( public function fulltextIndex(array $columns, string $name = ''): void { if ($name === '') { - $name = 'ft_' . \implode('_', $columns); + $name = $this->autoIndexName('ft_', $columns); } $this->indexes[] = new Index($name, $columns, IndexType::Fulltext); } @@ -358,7 +370,7 @@ public function fulltextIndex(array $columns, string $name = ''): void public function spatialIndex(array $columns, string $name = ''): void { if ($name === '') { - $name = 'sp_' . \implode('_', $columns); + $name = $this->autoIndexName('sp_', $columns); } $this->indexes[] = new Index($name, $columns, IndexType::Spatial); } @@ -553,47 +565,20 @@ public function ttl(string $expression): static } /** - * Attach a ClickHouse data-skipping index. Other dialects ignore this. - * - * Skip indexes accelerate WHERE clauses by letting ClickHouse skip whole - * granules during scanning. Choose the algorithm that matches the column - * cardinality and predicate type: - * - * - `MinMax` — numeric ranges, low cardinality - * - `Set(N)` — small fixed value sets (N is the set size cap) - * - `BloomFilter(p)` — high cardinality string columns with `=` / `IN` - * predicates (p is the false-positive probability, e.g. 0.01) - * - `NgramBloomFilter(n, size, hashes, seed)` — `LIKE` / `match` on text - * - `TokenBloomFilter(size, hashes, seed)` — token-style search - * - `Inverted` — `LIKE`, `match`, `hasToken` (experimental) + * Build an auto-generated index name with a prefix, sanitising any + * non-identifier characters in the column names so the result is always a + * valid SQL identifier. * - * @param list $columns - * @param list $algorithmArgs Algorithm-specific arguments - * - * @throws ValidationException if the index name or columns are invalid. + * @param string[] $columns */ - public function dataSkippingIndex( - array $columns, - SkipIndexAlgorithm $algorithm, - int $granularity = 1, - array $algorithmArgs = [], - string $name = '', - ): static { - if ($name === '') { - // Sanitise column names — substring matches like `event-type` or - // `ns.col` are valid SQL identifiers when quoted, but the - // generated index name must still pass the strict identifier - // regex on `SkipIndex`. - $sanitised = \array_map( - fn (string $c): string => \preg_replace('/[^A-Za-z0-9_]+/', '_', $c) ?? $c, - $columns, - ); - $name = 'skip_' . \implode('_', $sanitised); - } - - $this->skipIndexes[] = new SkipIndex($name, $columns, $algorithm, $algorithmArgs, $granularity); + private function autoIndexName(string $prefix, array $columns): string + { + $sanitised = \array_map( + fn (string $c): string => \preg_replace('/[^A-Za-z0-9_]+/', '_', $c) ?? $c, + $columns, + ); - return $this; + return $prefix . \implode('_', $sanitised); } /** diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 2eb9d64..0aab92d 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -722,53 +722,53 @@ public function testColumnLevelTTL(): void $this->assertSame('CREATE TABLE `events` (`id` Int32, `temporary` String TTL ts + INTERVAL 1 DAY, `ts` DateTime) ENGINE = MergeTree() ORDER BY (`id`)', $result->query); } - // Data-skipping indexes + // ClickHouse skip-index algorithm selection - public function testDataSkippingIndexBloomFilter(): void + public function testIndexBloomFilter(): void { $schema = new Schema(); $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); - $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter); + $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); $this->assertSame( - 'CREATE TABLE `events` (`id` Int64, `user_id` String, INDEX `skip_user_id` `user_id` TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY (`id`)', + 'CREATE TABLE `events` (`id` Int64, `user_id` String, INDEX `idx_user_id` `user_id` TYPE bloom_filter GRANULARITY 1) ENGINE = MergeTree() ORDER BY (`id`)', $result->query, ); } - public function testDataSkippingIndexWithArgs(): void + public function testIndexWithAlgorithmArgs(): void { $schema = new Schema(); $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('country'); $table->string('text'); - $table->dataSkippingIndex(['country'], SkipIndexAlgorithm::Set, granularity: 4, algorithmArgs: [100]); - $table->dataSkippingIndex(['text'], SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); + $table->index(['country'], algorithm: SkipIndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); + $table->index(['text'], algorithm: SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); }); $this->assertBindingCount($result); $this->assertSame( 'CREATE TABLE `events` (`id` Int64, `country` String, `text` String,' - . ' INDEX `skip_country` `country` TYPE set(100) GRANULARITY 4,' - . ' INDEX `skip_text` `text` TYPE ngrambf_v1(4, 1024, 3, 0) GRANULARITY 1)' + . ' INDEX `idx_country` `country` TYPE set(100) GRANULARITY 4,' + . ' INDEX `idx_text` `text` TYPE ngrambf_v1(4, 1024, 3, 0) GRANULARITY 1)' . ' ENGINE = MergeTree() ORDER BY (`id`)', $result->query, ); } - public function testDataSkippingIndexCompositeColumns(): void + public function testIndexCompositeColumnsWithAlgorithm(): void { $schema = new Schema(); $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); $table->string('event'); - $table->dataSkippingIndex(['user_id', 'event'], SkipIndexAlgorithm::BloomFilter, name: 'idx_user_event'); + $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: SkipIndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); @@ -780,7 +780,7 @@ public function testDataSkippingIndexCompositeColumns(): void ); } - public function testDataSkippingIndexInvalidGranularityThrows(): void + public function testIndexInvalidGranularityThrows(): void { $this->expectException(ValidationException::class); @@ -788,18 +788,18 @@ public function testDataSkippingIndexInvalidGranularityThrows(): void $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); - $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter, granularity: 0); + $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter, granularity: 0); }); } - public function testDataSkippingIndexEmptyColumnsThrows(): void + public function testIndexEmptyColumnsThrows(): void { $this->expectException(ValidationException::class); $schema = new Schema(); $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); - $table->dataSkippingIndex([], SkipIndexAlgorithm::BloomFilter); + $table->index([]); }); } @@ -862,7 +862,7 @@ public function testTableSettingsRejectsInvalidValue(): void }); } - public function testDataSkippingIndexNoArgAlgorithmRejectsArgs(): void + public function testIndexNoArgAlgorithmRejectsArgs(): void { $this->expectException(ValidationException::class); $this->expectExceptionMessage('minmax does not accept algorithm arguments.'); @@ -871,11 +871,11 @@ public function testDataSkippingIndexNoArgAlgorithmRejectsArgs(): void $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->integer('score'); - $table->dataSkippingIndex(['score'], SkipIndexAlgorithm::MinMax, algorithmArgs: [3]); + $table->index(['score'], algorithm: SkipIndexAlgorithm::MinMax, algorithmArgs: [3]); }); } - public function testDataSkippingIndexInvertedRejectsArgs(): void + public function testIndexInvertedRejectsArgs(): void { $this->expectException(ValidationException::class); @@ -883,37 +883,35 @@ public function testDataSkippingIndexInvertedRejectsArgs(): void $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('text'); - $table->dataSkippingIndex(['text'], SkipIndexAlgorithm::Inverted, algorithmArgs: [42]); + $table->index(['text'], algorithm: SkipIndexAlgorithm::Inverted, algorithmArgs: [42]); }); } - public function testDataSkippingIndexAutoNameSanitisesNonIdentifierColumns(): void + public function testIndexAutoNameSanitisesNonIdentifierColumns(): void { $schema = new Schema(); $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('event-type'); - $table->dataSkippingIndex(['event-type'], SkipIndexAlgorithm::BloomFilter); + $table->index(['event-type'], algorithm: SkipIndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); - // Auto name: skip_event_type (non-identifier chars collapsed to _) $this->assertSame( 'CREATE TABLE `events` (`id` Int64, `event-type` String,' - . ' INDEX `skip_event_type` `event-type` TYPE bloom_filter GRANULARITY 1)' + . ' INDEX `idx_event_type` `event-type` TYPE bloom_filter GRANULARITY 1)' . ' ENGINE = MergeTree() ORDER BY (`id`)', $result->query, ); } - public function testDataSkippingIndexFloatArgAvoidsScientificNotation(): void + public function testIndexFloatArgAvoidsScientificNotation(): void { $schema = new Schema(); $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); - // 1e-5 false positive rate: the bug pre-fix is `(string) 1e-5` returning "1.0E-5" - $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]); + $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]); }); $this->assertBindingCount($result); @@ -922,25 +920,25 @@ public function testDataSkippingIndexFloatArgAvoidsScientificNotation(): void $this->assertDoesNotMatchRegularExpression('/[Ee][+-]\d/', $result->query); } - public function testAlterAddSkipIndex(): void + public function testAlterAddIndexWithAlgorithm(): void { $schema = new Schema(); $result = $schema->alter('events', function (Table $table) { - $table->dataSkippingIndex(['user_id'], SkipIndexAlgorithm::BloomFilter); + $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); $this->assertSame( - 'ALTER TABLE `events` ADD INDEX `skip_user_id` `user_id` TYPE bloom_filter GRANULARITY 1', + 'ALTER TABLE `events` ADD INDEX `idx_user_id` `user_id` TYPE bloom_filter GRANULARITY 1', $result->query, ); } - public function testAlterAddSkipIndexComposite(): void + public function testAlterAddIndexComposite(): void { $schema = new Schema(); $result = $schema->alter('events', function (Table $table) { - $table->dataSkippingIndex(['user_id', 'event'], SkipIndexAlgorithm::Set, granularity: 4, algorithmArgs: [100], name: 'idx_user_event'); + $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: SkipIndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); }); $this->assertBindingCount($result); From fb4f7831236df631ee99934024f9793b32cefb4f Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 30 Apr 2026 01:28:57 +0000 Subject: [PATCH 04/16] refactor(schema): rename SkipIndexAlgorithm to IndexAlgorithm After collapsing `dataSkippingIndex()` into `index()` and `SkipIndex` into `Index`, the `Skip` prefix on the algorithm enum is the last holdout from the parallel-API design and is now inconsistent with the new naming. The enum already lives in `Schema\ClickHouse\` so the namespace says "ClickHouse"; the type name only needs to say "this is the algorithm a ClickHouse index runs". `SkipIndexAlgorithm` -> `IndexAlgorithm`. No behavioural change. --- README.md | 8 +++---- ...pIndexAlgorithm.php => IndexAlgorithm.php} | 2 +- src/Query/Schema/Index.php | 10 ++++---- src/Query/Schema/Table.php | 4 ++-- tests/Query/Schema/ClickHouseTest.php | 24 +++++++++---------- 5 files changed, 24 insertions(+), 24 deletions(-) rename src/Query/Schema/ClickHouse/{SkipIndexAlgorithm.php => IndexAlgorithm.php} (89%) diff --git a/README.md b/README.md index c24b052..6636a1f 100644 --- a/README.md +++ b/README.md @@ -2084,7 +2084,7 @@ TTL expressions are emitted verbatim; they must not be empty or contain semicolo **Skip-index algorithms** — every ClickHouse index is a data-skipping index that accelerates WHERE pruning by letting the engine skip whole granules. Pick the algorithm that matches the column shape via the `algorithm` argument on `Table::index()`: ```php -use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; +use Utopia\Query\Schema\ClickHouse\IndexAlgorithm; $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); @@ -2093,13 +2093,13 @@ $schema->create('events', function (Table $table) { $table->string('text'); // BloomFilter — high-cardinality strings with `=` / `IN` predicates - $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter); + $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter); // Set(N) — small fixed value sets, custom granularity - $table->index(['country'], algorithm: SkipIndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); + $table->index(['country'], algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); // NgramBloomFilter(n, size_bytes, hashes, seed) — text search on `LIKE` / `match` - $table->index(['text'], algorithm: SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); + $table->index(['text'], algorithm: IndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); // No algorithm specified → defaults to `TYPE minmax GRANULARITY 3` $table->index(['id']); diff --git a/src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php b/src/Query/Schema/ClickHouse/IndexAlgorithm.php similarity index 89% rename from src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php rename to src/Query/Schema/ClickHouse/IndexAlgorithm.php index 58360ea..96d5ffa 100644 --- a/src/Query/Schema/ClickHouse/SkipIndexAlgorithm.php +++ b/src/Query/Schema/ClickHouse/IndexAlgorithm.php @@ -2,7 +2,7 @@ namespace Utopia\Query\Schema\ClickHouse; -enum SkipIndexAlgorithm: string +enum IndexAlgorithm: string { case MinMax = 'minmax'; case Set = 'set'; diff --git a/src/Query/Schema/Index.php b/src/Query/Schema/Index.php index a286649..32c1144 100644 --- a/src/Query/Schema/Index.php +++ b/src/Query/Schema/Index.php @@ -3,7 +3,7 @@ namespace Utopia\Query\Schema; use Utopia\Query\Exception\ValidationException; -use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; +use Utopia\Query\Schema\ClickHouse\IndexAlgorithm; readonly class Index { @@ -28,7 +28,7 @@ public function __construct( public string $operatorClass = '', public array $collations = [], public array $rawColumns = [], - public ?SkipIndexAlgorithm $algorithm = null, + public ?IndexAlgorithm $algorithm = null, public array $algorithmArgs = [], public int $granularity = 1, ) { @@ -63,11 +63,11 @@ public function __construct( * MinMax and Inverted are emitted without parentheses in ClickHouse DDL; * passing args to them would produce invalid SQL. */ - private static function algorithmAcceptsArgs(SkipIndexAlgorithm $algorithm): bool + private static function algorithmAcceptsArgs(IndexAlgorithm $algorithm): bool { return match ($algorithm) { - SkipIndexAlgorithm::MinMax, - SkipIndexAlgorithm::Inverted => false, + IndexAlgorithm::MinMax, + IndexAlgorithm::Inverted => false, default => true, }; } diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index d81ad23..50c8183 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -4,7 +4,7 @@ use Utopia\Query\Exception\ValidationException; use Utopia\Query\Schema\ClickHouse\Engine; -use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; +use Utopia\Query\Schema\ClickHouse\IndexAlgorithm; class Table { @@ -312,7 +312,7 @@ public function index( array $lengths = [], array $orders = [], array $collations = [], - ?SkipIndexAlgorithm $algorithm = null, + ?IndexAlgorithm $algorithm = null, array $algorithmArgs = [], int $granularity = 1, ): void { diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 0aab92d..9c28432 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -10,7 +10,7 @@ use Utopia\Query\Query; use Utopia\Query\Schema\ClickHouse as Schema; use Utopia\Query\Schema\ClickHouse\Engine; -use Utopia\Query\Schema\ClickHouse\SkipIndexAlgorithm; +use Utopia\Query\Schema\ClickHouse\IndexAlgorithm; use Utopia\Query\Schema\Feature\ColumnComments; use Utopia\Query\Schema\Feature\DropPartition; use Utopia\Query\Schema\Feature\ForeignKeys; @@ -730,7 +730,7 @@ public function testIndexBloomFilter(): void $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); - $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter); + $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); @@ -747,8 +747,8 @@ public function testIndexWithAlgorithmArgs(): void $table->bigInteger('id')->primary(); $table->string('country'); $table->string('text'); - $table->index(['country'], algorithm: SkipIndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); - $table->index(['text'], algorithm: SkipIndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); + $table->index(['country'], algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); + $table->index(['text'], algorithm: IndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); }); $this->assertBindingCount($result); @@ -768,7 +768,7 @@ public function testIndexCompositeColumnsWithAlgorithm(): void $table->bigInteger('id')->primary(); $table->string('user_id'); $table->string('event'); - $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: SkipIndexAlgorithm::BloomFilter); + $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: IndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); @@ -788,7 +788,7 @@ public function testIndexInvalidGranularityThrows(): void $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); - $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter, granularity: 0); + $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter, granularity: 0); }); } @@ -871,7 +871,7 @@ public function testIndexNoArgAlgorithmRejectsArgs(): void $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->integer('score'); - $table->index(['score'], algorithm: SkipIndexAlgorithm::MinMax, algorithmArgs: [3]); + $table->index(['score'], algorithm: IndexAlgorithm::MinMax, algorithmArgs: [3]); }); } @@ -883,7 +883,7 @@ public function testIndexInvertedRejectsArgs(): void $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('text'); - $table->index(['text'], algorithm: SkipIndexAlgorithm::Inverted, algorithmArgs: [42]); + $table->index(['text'], algorithm: IndexAlgorithm::Inverted, algorithmArgs: [42]); }); } @@ -893,7 +893,7 @@ public function testIndexAutoNameSanitisesNonIdentifierColumns(): void $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('event-type'); - $table->index(['event-type'], algorithm: SkipIndexAlgorithm::BloomFilter); + $table->index(['event-type'], algorithm: IndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); @@ -911,7 +911,7 @@ public function testIndexFloatArgAvoidsScientificNotation(): void $result = $schema->create('events', function (Table $table) { $table->bigInteger('id')->primary(); $table->string('user_id'); - $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]); + $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]); }); $this->assertBindingCount($result); @@ -924,7 +924,7 @@ public function testAlterAddIndexWithAlgorithm(): void { $schema = new Schema(); $result = $schema->alter('events', function (Table $table) { - $table->index(['user_id'], algorithm: SkipIndexAlgorithm::BloomFilter); + $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter); }); $this->assertBindingCount($result); @@ -938,7 +938,7 @@ public function testAlterAddIndexComposite(): void { $schema = new Schema(); $result = $schema->alter('events', function (Table $table) { - $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: SkipIndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); + $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); }); $this->assertBindingCount($result); From f3e411d0826f42dac323f7e62539a69b0d94cfb4 Mon Sep 17 00:00:00 2001 From: Damodar Lohani Date: Thu, 30 Apr 2026 01:38:40 +0000 Subject: [PATCH 05/16] fix(schema): scope index-name regex to CH; format settings floats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two P1 follow-ups from the latest greptile review: - The strict name regex on `Index` (`[A-Za-z_][A-Za-z0-9_]*`) was a cross-dialect breaking change: MySQL, MariaDB, PostgreSQL, and SQLite all quote index names, so hyphenated and dot-qualified names were valid before this PR. The strict identifier check now only fires when an `IndexAlgorithm` is set — i.e. for ClickHouse data-skipping indexes where the name appears unquoted in `INDEX ... TYPE ...`. - `Table::settings()` cast floats with `(string) $value`, which produces scientific notation for small values (e.g. `(string) 1.0e-5 === "1.0E-5"`). ClickHouse rejects E-notation in SETTINGS values. Apply the same `sprintf('%F', ...)` + trim treatment that `compileSkipIndex` already uses for algorithm args. Adds tests for both: hyphenated index names work without an algorithm and throw with one; SETTINGS values formatted from `1.0e-5` come out as `0.00001`. --- src/Query/Schema/Index.php | 5 +++- src/Query/Schema/Table.php | 7 ++++- tests/Query/Schema/ClickHouseTest.php | 39 +++++++++++++++++++++++++++ 3 files changed, 49 insertions(+), 2 deletions(-) diff --git a/src/Query/Schema/Index.php b/src/Query/Schema/Index.php index 32c1144..c9e2e4f 100644 --- a/src/Query/Schema/Index.php +++ b/src/Query/Schema/Index.php @@ -32,7 +32,10 @@ public function __construct( public array $algorithmArgs = [], public int $granularity = 1, ) { - if (! \preg_match('/^[A-Za-z_][A-Za-z0-9_]*$/', $name)) { + // Only ClickHouse data-skipping indexes require an unquoted identifier + // for the name; other dialects emit the name backtick-quoted, so + // hyphens, dots, and other characters are valid there. + if ($algorithm !== null && ! \preg_match('/^[A-Za-z_][A-Za-z0-9_]*$/', $name)) { throw new ValidationException('Invalid index name: ' . $name); } if ($columns === [] && $rawColumns === []) { diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index 50c8183..d8a2804 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -607,8 +607,13 @@ public function settings(array $settings): static if (\is_bool($value)) { $sanitized[$key] = $value ? '1' : '0'; - } elseif (\is_int($value) || \is_float($value)) { + } elseif (\is_int($value)) { $sanitized[$key] = (string) $value; + } elseif (\is_float($value)) { + // Avoid scientific notation (e.g. 1.0E-5), which ClickHouse + // rejects in SETTINGS values; trim trailing zeros for clean + // output. + $sanitized[$key] = \rtrim(\rtrim(\sprintf('%F', $value), '0'), '.'); } elseif (\is_string($value)) { if (! \preg_match('/^[A-Za-z0-9_.\-+\/]*$/', $value)) { throw new ValidationException( diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 9c28432..229cdd4 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -803,6 +803,32 @@ public function testIndexEmptyColumnsThrows(): void }); } + public function testIndexNameRegexOnlyEnforcedForClickHouseAlgorithms(): void + { + // No algorithm → permissive name allowed (other dialects quote names) + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + $table->index(['user_id'], name: 'idx-with-hyphens'); + }); + $this->assertBindingCount($result); + $this->assertStringContainsString('INDEX `idx-with-hyphens`', $result->query); + } + + public function testIndexNameRegexEnforcedWhenAlgorithmIsSet(): void + { + $this->expectException(ValidationException::class); + $this->expectExceptionMessage('Invalid index name: idx-with-hyphens'); + + $schema = new Schema(); + $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->string('user_id'); + $table->index(['user_id'], name: 'idx-with-hyphens', algorithm: IndexAlgorithm::BloomFilter); + }); + } + // SETTINGS public function testTableSettings(): void @@ -862,6 +888,19 @@ public function testTableSettingsRejectsInvalidValue(): void }); } + public function testTableSettingsFloatAvoidsScientificNotation(): void + { + $schema = new Schema(); + $result = $schema->create('events', function (Table $table) { + $table->bigInteger('id')->primary(); + $table->settings(['merge_with_ttl_timeout' => 1.0e-5]); + }); + $this->assertBindingCount($result); + + $this->assertStringContainsString('SETTINGS merge_with_ttl_timeout = 0.00001', $result->query); + $this->assertDoesNotMatchRegularExpression('/[Ee][+-]\d/', $result->query); + } + public function testIndexNoArgAlgorithmRejectsArgs(): void { $this->expectException(ValidationException::class); From a3fcc2b0a196d9d938b626479b6795439f1ab430 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 03:51:15 +0000 Subject: [PATCH 06/16] fix(schema): restore static return type on Table::index() for fluent chaining Agent-Logs-Url: https://github.com/utopia-php/query/sessions/5b098851-2af2-49e9-8778-d7edb0172860 Co-authored-by: abnegate <5857008+abnegate@users.noreply.github.com> --- src/Query/Schema/Table.php | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index fa0d9f8..d44d3f3 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -373,7 +373,7 @@ public function index( ?IndexAlgorithm $algorithm = null, array $algorithmArgs = [], int $granularity = 1, - ): void { + ): static { if ($name === '') { $name = $this->autoIndexName('idx_', $columns); } @@ -390,6 +390,8 @@ public function index( algorithmArgs: $algorithmArgs, granularity: $granularity, ); + + return $this; } /** From b1df44e62973a09dcc68aa2110acaf6041d7ea7f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 04:02:47 +0000 Subject: [PATCH 07/16] fix(schema): guard ClickHouse index loops against non-skip index types Agent-Logs-Url: https://github.com/utopia-php/query/sessions/498037f0-05ae-40ad-8dfa-b72fe523705a Co-authored-by: abnegate <5857008+abnegate@users.noreply.github.com> --- src/Query/Schema/ClickHouse.php | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index 9d28fb1..2835c4a 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -125,6 +125,11 @@ public function compileAlter(Table $table): Statement } foreach ($table->indexes as $index) { + if ($index->type !== IndexType::Index) { + throw new UnsupportedException( + 'Only data-skipping indexes (index()) are supported in ClickHouse ALTER TABLE.' + ); + } $alterations[] = 'ADD ' . $this->compileSkipIndex($index); } @@ -176,6 +181,11 @@ public function compileCreate(Table $table, bool $ifNotExists = false): Statemen } foreach ($table->indexes as $index) { + if ($index->type !== IndexType::Index) { + throw new UnsupportedException( + 'Only data-skipping indexes (index()) are supported in ClickHouse CREATE TABLE.' + ); + } $columnDefs[] = $this->compileSkipIndex($index); } From 1cdeee321bede5f435b7404183ddca3b82de2560 Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 16:18:27 +1200 Subject: [PATCH 08/16] Update src/Query/Schema/Table.php Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- src/Query/Schema/Table.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index d44d3f3..52210bb 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -728,7 +728,7 @@ public function settings(array $settings): static // output. $sanitized[$key] = \rtrim(\rtrim(\sprintf('%F', $value), '0'), '.'); } elseif (\is_string($value)) { - if (! \preg_match('/^[A-Za-z0-9_.\-+\/]*$/', $value)) { + if (! \preg_match('/^[A-Za-z0-9_.\ -+\/]+$/', $value)) { throw new ValidationException( 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\\-+/]*' ); From f04596929376e94c0041fbef0b9e017e1119ad2d Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 16:34:47 +1200 Subject: [PATCH 09/16] Update src/Query/Schema/Table.php Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- src/Query/Schema/Table.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index 52210bb..ad1691f 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -728,7 +728,7 @@ public function settings(array $settings): static // output. $sanitized[$key] = \rtrim(\rtrim(\sprintf('%F', $value), '0'), '.'); } elseif (\is_string($value)) { - if (! \preg_match('/^[A-Za-z0-9_.\ -+\/]+$/', $value)) { + if (! \preg_match('/^[A-Za-z0-9_. +\/\-]+$/', $value)) { throw new ValidationException( 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\\-+/]*' ); From df22cafa74a298ad6ec9698fdc1dc7226566d9c3 Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 16:43:52 +1200 Subject: [PATCH 10/16] Update src/Query/Schema/ClickHouse.php Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- src/Query/Schema/ClickHouse.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index 2835c4a..fa5246c 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -247,7 +247,7 @@ private function compileSkipIndex(Index $index): string if ($index->algorithm === null) { return 'INDEX ' . $this->quote($index->name) . ' ' . $expr - . ' TYPE minmax GRANULARITY 3'; + . ' TYPE minmax GRANULARITY ' . $index->granularity; } $type = $index->algorithm->value; From d60af4c308698c08568732a3b7bc363b075e05c4 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 04:47:07 +0000 Subject: [PATCH 11/16] docs(readme): use builder style for skip index and SETTINGS examples Agent-Logs-Url: https://github.com/utopia-php/query/sessions/b1fda15b-7a7c-4a63-a597-31ee644b3c39 Co-authored-by: abnegate <5857008+abnegate@users.noreply.github.com> --- README.md | 36 ++++++++++++++++-------------------- 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index e5bb861..72e2640 100644 --- a/README.md +++ b/README.md @@ -2091,42 +2091,38 @@ TTL expressions are emitted verbatim; they must not be empty or contain semicolo ```php use Utopia\Query\Schema\ClickHouse\IndexAlgorithm; -$schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->string('country'); - $table->string('text'); - +$schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->string('country') + ->string('text') // BloomFilter — high-cardinality strings with `=` / `IN` predicates - $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter); - + ->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter) // Set(N) — small fixed value sets, custom granularity - $table->index(['country'], algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); - + ->index(['country'], algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4) // NgramBloomFilter(n, size_bytes, hashes, seed) — text search on `LIKE` / `match` - $table->index(['text'], algorithm: IndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); - + ->index(['text'], algorithm: IndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]) // No algorithm specified → defaults to `TYPE minmax GRANULARITY 3` - $table->index(['id']); -}); + ->index(['id']) + ->create(); // CREATE TABLE `events` (..., INDEX `idx_user_id` `user_id` TYPE bloom_filter GRANULARITY 1, ...) ``` The 6 algorithms are `MinMax`, `Set`, `BloomFilter`, `NgramBloomFilter`, `TokenBloomFilter`, `Inverted`. Algorithm-specific arguments are passed via `algorithmArgs` and rendered verbatim — supply them from trusted (developer-controlled) source. Other dialects ignore the ClickHouse-only `algorithm` / `algorithmArgs` / `granularity` arguments. -`MinMax` and `Inverted` take no parenthesised arguments in ClickHouse DDL — passing `algorithmArgs` for them throws `ValidationException`. Skip indexes can also be added via `ALTER TABLE … ADD INDEX` by calling `index()` inside an `alter()` callback. +`MinMax` and `Inverted` take no parenthesised arguments in ClickHouse DDL — passing `algorithmArgs` for them throws `ValidationException`. Skip indexes can also be added via `ALTER TABLE … ADD INDEX` by calling `alter()` on the builder. **Engine SETTINGS** — emit `SETTINGS k=v` after the TTL clause: ```php -$schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->settings([ +$schema->table('events') + ->bigInteger('id')->primary() + ->settings([ 'index_granularity' => 8192, 'allow_nullable_key' => true, // booleans become 1/0 - ]); -}); + ]) + ->create(); // CREATE TABLE `events` (...) ENGINE = MergeTree() ORDER BY (`id`) // SETTINGS index_granularity = 8192, allow_nullable_key = 1 From 16ddd874aa7d67bb51640e99002c6ae419eadb4e Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 30 Apr 2026 05:32:34 +0000 Subject: [PATCH 12/16] fix(tests): rewrite skip-index and SETTINGS tests to use builder style Agent-Logs-Url: https://github.com/utopia-php/query/sessions/6148863a-df07-4bbf-a771-b4721be69f8f Co-authored-by: abnegate <5857008+abnegate@users.noreply.github.com> --- tests/Query/Schema/ClickHouseTest.php | 170 +++++++++++++------------- 1 file changed, 82 insertions(+), 88 deletions(-) diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 9785101..02b069c 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -712,11 +712,11 @@ public function testColumnLevelTTL(): void public function testIndexBloomFilter(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter) + ->create(); $this->assertBindingCount($result); $this->assertSame( @@ -728,13 +728,13 @@ public function testIndexBloomFilter(): void public function testIndexWithAlgorithmArgs(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('country'); - $table->string('text'); - $table->index(['country'], algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); - $table->index(['text'], algorithm: IndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('country') + ->string('text') + ->index(['country'], algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4) + ->index(['text'], algorithm: IndexAlgorithm::NgramBloomFilter, algorithmArgs: [4, 1024, 3, 0]) + ->create(); $this->assertBindingCount($result); $this->assertSame( @@ -749,12 +749,12 @@ public function testIndexWithAlgorithmArgs(): void public function testIndexCompositeColumnsWithAlgorithm(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->string('event'); - $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: IndexAlgorithm::BloomFilter); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->string('event') + ->index(['user_id', 'event'], name: 'idx_user_event', algorithm: IndexAlgorithm::BloomFilter) + ->create(); $this->assertBindingCount($result); $this->assertSame( @@ -770,11 +770,10 @@ public function testIndexInvalidGranularityThrows(): void $this->expectException(ValidationException::class); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter, granularity: 0); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter, granularity: 0); } public function testIndexEmptyColumnsThrows(): void @@ -782,21 +781,20 @@ public function testIndexEmptyColumnsThrows(): void $this->expectException(ValidationException::class); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->index([]); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->index([]); } public function testIndexNameRegexOnlyEnforcedForClickHouseAlgorithms(): void { // No algorithm → permissive name allowed (other dialects quote names) $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->index(['user_id'], name: 'idx-with-hyphens'); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->index(['user_id'], name: 'idx-with-hyphens') + ->create(); $this->assertBindingCount($result); $this->assertStringContainsString('INDEX `idx-with-hyphens`', $result->query); } @@ -807,11 +805,10 @@ public function testIndexNameRegexEnforcedWhenAlgorithmIsSet(): void $this->expectExceptionMessage('Invalid index name: idx-with-hyphens'); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->index(['user_id'], name: 'idx-with-hyphens', algorithm: IndexAlgorithm::BloomFilter); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->index(['user_id'], name: 'idx-with-hyphens', algorithm: IndexAlgorithm::BloomFilter); } // SETTINGS @@ -819,10 +816,10 @@ public function testIndexNameRegexEnforcedWhenAlgorithmIsSet(): void public function testTableSettings(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->settings(['index_granularity' => 8192, 'allow_nullable_key' => true]); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->settings(['index_granularity' => 8192, 'allow_nullable_key' => true]) + ->create(); $this->assertBindingCount($result); $this->assertSame( @@ -835,12 +832,12 @@ public function testTableSettings(): void public function testTableSettingsWithTtlOrdering(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->datetime('created_at'); - $table->ttl('`created_at` + INTERVAL 30 DAY'); - $table->settings(['index_granularity' => 4096]); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->datetime('created_at') + ->ttl('`created_at` + INTERVAL 30 DAY') + ->settings(['index_granularity' => 4096]) + ->create(); $this->assertBindingCount($result); $this->assertSame( @@ -856,10 +853,9 @@ public function testTableSettingsRejectsInvalidKey(): void $this->expectException(ValidationException::class); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->settings(['1bad-key' => 8192]); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->settings(['1bad-key' => 8192]); } public function testTableSettingsRejectsInvalidValue(): void @@ -867,19 +863,18 @@ public function testTableSettingsRejectsInvalidValue(): void $this->expectException(ValidationException::class); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->settings(['ok_key' => "evil'; DROP TABLE x; --"]); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->settings(['ok_key' => "evil'; DROP TABLE x; --"]); } public function testTableSettingsFloatAvoidsScientificNotation(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->settings(['merge_with_ttl_timeout' => 1.0e-5]); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->settings(['merge_with_ttl_timeout' => 1.0e-5]) + ->create(); $this->assertBindingCount($result); $this->assertStringContainsString('SETTINGS merge_with_ttl_timeout = 0.00001', $result->query); @@ -892,11 +887,10 @@ public function testIndexNoArgAlgorithmRejectsArgs(): void $this->expectExceptionMessage('minmax does not accept algorithm arguments.'); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->integer('score'); - $table->index(['score'], algorithm: IndexAlgorithm::MinMax, algorithmArgs: [3]); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->integer('score') + ->index(['score'], algorithm: IndexAlgorithm::MinMax, algorithmArgs: [3]); } public function testIndexInvertedRejectsArgs(): void @@ -904,21 +898,20 @@ public function testIndexInvertedRejectsArgs(): void $this->expectException(ValidationException::class); $schema = new Schema(); - $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('text'); - $table->index(['text'], algorithm: IndexAlgorithm::Inverted, algorithmArgs: [42]); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->string('text') + ->index(['text'], algorithm: IndexAlgorithm::Inverted, algorithmArgs: [42]); } public function testIndexAutoNameSanitisesNonIdentifierColumns(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('event-type'); - $table->index(['event-type'], algorithm: IndexAlgorithm::BloomFilter); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('event-type') + ->index(['event-type'], algorithm: IndexAlgorithm::BloomFilter) + ->create(); $this->assertBindingCount($result); $this->assertSame( @@ -932,11 +925,11 @@ public function testIndexAutoNameSanitisesNonIdentifierColumns(): void public function testIndexFloatArgAvoidsScientificNotation(): void { $schema = new Schema(); - $result = $schema->create('events', function (Table $table) { - $table->bigInteger('id')->primary(); - $table->string('user_id'); - $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]); - }); + $result = $schema->table('events') + ->bigInteger('id')->primary() + ->string('user_id') + ->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter, algorithmArgs: [1.0e-5]) + ->create(); $this->assertBindingCount($result); $this->assertStringContainsString('TYPE bloom_filter(0.00001)', $result->query); @@ -947,9 +940,9 @@ public function testIndexFloatArgAvoidsScientificNotation(): void public function testAlterAddIndexWithAlgorithm(): void { $schema = new Schema(); - $result = $schema->alter('events', function (Table $table) { - $table->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter); - }); + $result = $schema->table('events') + ->index(['user_id'], algorithm: IndexAlgorithm::BloomFilter) + ->alter(); $this->assertBindingCount($result); $this->assertSame( @@ -961,9 +954,9 @@ public function testAlterAddIndexWithAlgorithm(): void public function testAlterAddIndexComposite(): void { $schema = new Schema(); - $result = $schema->alter('events', function (Table $table) { - $table->index(['user_id', 'event'], name: 'idx_user_event', algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4); - }); + $result = $schema->table('events') + ->index(['user_id', 'event'], name: 'idx_user_event', algorithm: IndexAlgorithm::Set, algorithmArgs: [100], granularity: 4) + ->alter(); $this->assertBindingCount($result); $this->assertSame( @@ -978,8 +971,9 @@ public function testAlterRejectsSettings(): void $this->expectExceptionMessage('SETTINGS'); $schema = new Schema(); - $schema->alter('events', function (Table $table) { - $table->settings(['index_granularity' => 4096]); - }); + $schema->table('events') + ->bigInteger('id')->primary() + ->settings(['index_granularity' => 4096]) + ->alter(); } } From a548ffe76cb0f8306d7e68e3c7b9a5dde7a44472 Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 18:28:29 +1200 Subject: [PATCH 13/16] fix(schema): expose Column forwarders for skip-index and SETTINGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Tests in the fluent builder style call ->index(algorithm: ...) and ->settings([...]) on a Column returned by a column-creation method, but Column lacked forwarders for the new ClickHouse skip-index params and the table-level settings(). Add them so the chain compiles and runs. Also hardcode GRANULARITY 3 for the no-algorithm fallback in ClickHouse::compileSkipIndex (matches its docblock and existing test expectations), and rewrite testTableSettingsWithTtlOrdering to call ttl() on the Table directly — Column::ttl() is column-level and would emit an inline TTL on the column, not the table-level TTL the test asserts. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Query/Schema/ClickHouse.php | 2 +- src/Query/Schema/Column.php | 26 +++++++++++++++++++++++++- tests/Query/Schema/ClickHouseTest.php | 7 ++++--- 3 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index fa5246c..2835c4a 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -247,7 +247,7 @@ private function compileSkipIndex(Index $index): string if ($index->algorithm === null) { return 'INDEX ' . $this->quote($index->name) . ' ' . $expr - . ' TYPE minmax GRANULARITY ' . $index->granularity; + . ' TYPE minmax GRANULARITY 3'; } $type = $index->algorithm->value; diff --git a/src/Query/Schema/Column.php b/src/Query/Schema/Column.php index 5a81cd1..8439a6f 100644 --- a/src/Query/Schema/Column.php +++ b/src/Query/Schema/Column.php @@ -5,6 +5,7 @@ use Utopia\Query\Builder\Statement; use Utopia\Query\Exception\ValidationException; use Utopia\Query\Schema\ClickHouse\Engine; +use Utopia\Query\Schema\ClickHouse\IndexAlgorithm; class Column { @@ -392,6 +393,7 @@ public function dropColumn(string $name): Table * @param array $lengths * @param array $orders * @param array $collations + * @param list $algorithmArgs ClickHouse skip-index algorithm args */ public function index( array $columns, @@ -401,8 +403,22 @@ public function index( array $lengths = [], array $orders = [], array $collations = [], + ?IndexAlgorithm $algorithm = null, + array $algorithmArgs = [], + int $granularity = 1, ): Table { - return $this->table->index($columns, $name, $method, $operatorClass, $lengths, $orders, $collations); + return $this->table->index( + $columns, + $name, + $method, + $operatorClass, + $lengths, + $orders, + $collations, + $algorithm, + $algorithmArgs, + $granularity, + ); } /** @@ -508,6 +524,14 @@ public function engine(Engine $engine, string ...$args): Table return $this->table->engine($engine, ...$args); } + /** + * @param array $settings + */ + public function settings(array $settings): Table + { + return $this->table->settings($settings); + } + /** * @param list $columns */ diff --git a/tests/Query/Schema/ClickHouseTest.php b/tests/Query/Schema/ClickHouseTest.php index 02b069c..852ad04 100644 --- a/tests/Query/Schema/ClickHouseTest.php +++ b/tests/Query/Schema/ClickHouseTest.php @@ -832,9 +832,10 @@ public function testTableSettings(): void public function testTableSettingsWithTtlOrdering(): void { $schema = new Schema(); - $result = $schema->table('events') - ->bigInteger('id')->primary() - ->datetime('created_at') + $table = $schema->table('events'); + $table->bigInteger('id')->primary(); + $table->datetime('created_at'); + $result = $table ->ttl('`created_at` + INTERVAL 30 DAY') ->settings(['index_granularity' => 4096]) ->create(); From 852542f919d1418d7964b63b92764b072589be34 Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 18:40:16 +1200 Subject: [PATCH 14/16] Update src/Query/Schema/Table.php Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- src/Query/Schema/Table.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index ad1691f..f68aafb 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -728,11 +728,10 @@ public function settings(array $settings): static // output. $sanitized[$key] = \rtrim(\rtrim(\sprintf('%F', $value), '0'), '.'); } elseif (\is_string($value)) { - if (! \preg_match('/^[A-Za-z0-9_. +\/\-]+$/', $value)) { + if (! \preg_match('/^[A-Za-z0-9_.\-+\/]+$/', $value)) { throw new ValidationException( - 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\\-+/]*' + 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\.\-+/]+' ); - } $sanitized[$key] = $value; } else { throw new ValidationException( From bb529d88d61797c70631fe48db1f20812cbf9995 Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 18:43:03 +1200 Subject: [PATCH 15/16] fix(schema): make index granularity nullable to preserve user intent The previous "hardcoded 3" fallback in compileSkipIndex discarded any explicit granularity passed for a no-algorithm index. Make the field nullable end-to-end (Index, Table::index, Column::index) and resolve the dialect-specific default at compile time: 3 when no algorithm, 1 when an algorithm is set. Explicit values are now always honoured. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Query/Schema/ClickHouse.php | 4 ++-- src/Query/Schema/Column.php | 2 +- src/Query/Schema/Index.php | 4 ++-- src/Query/Schema/Table.php | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/Query/Schema/ClickHouse.php b/src/Query/Schema/ClickHouse.php index 2835c4a..22d35b0 100644 --- a/src/Query/Schema/ClickHouse.php +++ b/src/Query/Schema/ClickHouse.php @@ -247,7 +247,7 @@ private function compileSkipIndex(Index $index): string if ($index->algorithm === null) { return 'INDEX ' . $this->quote($index->name) . ' ' . $expr - . ' TYPE minmax GRANULARITY 3'; + . ' TYPE minmax GRANULARITY ' . ($index->granularity ?? 3); } $type = $index->algorithm->value; @@ -269,7 +269,7 @@ private function compileSkipIndex(Index $index): string } return 'INDEX ' . $this->quote($index->name) . ' ' . $expr - . ' TYPE ' . $type . ' GRANULARITY ' . $index->granularity; + . ' TYPE ' . $type . ' GRANULARITY ' . ($index->granularity ?? 1); } /** diff --git a/src/Query/Schema/Column.php b/src/Query/Schema/Column.php index 8439a6f..09d4075 100644 --- a/src/Query/Schema/Column.php +++ b/src/Query/Schema/Column.php @@ -405,7 +405,7 @@ public function index( array $collations = [], ?IndexAlgorithm $algorithm = null, array $algorithmArgs = [], - int $granularity = 1, + ?int $granularity = null, ): Table { return $this->table->index( $columns, diff --git a/src/Query/Schema/Index.php b/src/Query/Schema/Index.php index c9e2e4f..154ef26 100644 --- a/src/Query/Schema/Index.php +++ b/src/Query/Schema/Index.php @@ -30,7 +30,7 @@ public function __construct( public array $rawColumns = [], public ?IndexAlgorithm $algorithm = null, public array $algorithmArgs = [], - public int $granularity = 1, + public ?int $granularity = null, ) { // Only ClickHouse data-skipping indexes require an unquoted identifier // for the name; other dialects emit the name backtick-quoted, so @@ -52,7 +52,7 @@ public function __construct( throw new ValidationException('Invalid collation: ' . $collation); } } - if ($granularity < 1) { + if ($granularity !== null && $granularity < 1) { throw new ValidationException('Index granularity must be >= 1.'); } if ($algorithm !== null && $algorithmArgs !== [] && ! self::algorithmAcceptsArgs($algorithm)) { diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index f68aafb..a69ac7a 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -372,7 +372,7 @@ public function index( array $collations = [], ?IndexAlgorithm $algorithm = null, array $algorithmArgs = [], - int $granularity = 1, + ?int $granularity = null, ): static { if ($name === '') { $name = $this->autoIndexName('idx_', $columns); From bf3a8f9f58184e0df3eec829ddeba56f6b72f27f Mon Sep 17 00:00:00 2001 From: Jake Barnby Date: Thu, 30 Apr 2026 18:44:14 +1200 Subject: [PATCH 16/16] fix(schema): restore missing brace in settings() string validation Co-Authored-By: Claude Opus 4.7 (1M context) --- src/Query/Schema/Table.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/Query/Schema/Table.php b/src/Query/Schema/Table.php index a69ac7a..b46121a 100644 --- a/src/Query/Schema/Table.php +++ b/src/Query/Schema/Table.php @@ -730,8 +730,9 @@ public function settings(array $settings): static } elseif (\is_string($value)) { if (! \preg_match('/^[A-Za-z0-9_.\-+\/]+$/', $value)) { throw new ValidationException( - 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\.\-+/]+' + 'Invalid setting value for ' . $key . ': must match [A-Za-z0-9_.\-+/]+' ); + } $sanitized[$key] = $value; } else { throw new ValidationException(