diff --git a/ir/normalize.go b/ir/normalize.go index 1ee4db99..f23b1137 100644 --- a/ir/normalize.go +++ b/ir/normalize.go @@ -1382,19 +1382,30 @@ func IsTextLikeType(typeName string) bool { // convertAnyArrayToIn converts PostgreSQL's "column = ANY (ARRAY[...])" format // to the more readable "column IN (...)" format. // -// Type casts are always preserved to ensure: +// Semantically-significant type casts are preserved to ensure: // - Custom types (enums, domains) are properly qualified (e.g., 'value'::public.my_enum) // - Output matches pg_dump's format exactly // - Comparison between desired and current states is accurate // +// The redundant "::text" cast PostgreSQL adds to varchar literals on a catalog +// round-trip is stripped (see stripRedundantTextCast), since it carries no +// meaning and only differs by how the predicate reached the catalog. +// // Example transformations: // - "status = ANY (ARRAY['active'::public.status_type])" → "status IN ('active'::public.status_type)" // - "gender = ANY (ARRAY['M'::text, 'F'::text])" → "gender IN ('M'::text, 'F'::text)" // - "(col = ANY (ARRAY[...])) AND (other)" → "(col IN (...)) AND (other)" // - "col::text = ANY (ARRAY['a'::varchar]::text[])" → "col::text IN ('a'::varchar)" +// +// PostgreSQL stores the same IN-list predicate on a varchar column two equivalent +// ways depending on how it was written, and these must canonicalize identically +// to avoid a perpetual spurious diff (issue #473): +// - array-level cast: "col::text = ANY ((ARRAY['a'::varchar])::text[])" (wrapping paren + array cast) +// - element-level: "col::text = ANY (ARRAY[('a'::varchar)::text])" (cast on each element) +// Both collapse to "col::text IN ('a'::varchar)". func convertAnyArrayToIn(expr string) string { - const marker = " = ANY (ARRAY[" - idx := strings.Index(expr, marker) + const anyMarker = " = ANY (" + idx := strings.Index(expr, anyMarker) if idx == -1 { return expr } @@ -1402,8 +1413,25 @@ func convertAnyArrayToIn(expr string) string { // Extract the part before the marker (column name with possible leading content) prefix := expr[:idx] - // Find the closing "]" for ARRAY[...] starting after the marker - startIdx := idx + len(marker) + // Position right after " = ANY (". + pos := idx + len(anyMarker) + + // The array may be wrapped in an extra paren with a cast applied to the whole + // array, e.g. "(ARRAY[...])::text[]". Skip that paren so the wrapped and + // element-level forms are handled the same way. + arrayWrapped := false + if pos < len(expr) && expr[pos] == '(' { + arrayWrapped = true + pos++ + } + + const arrayPrefix = "ARRAY[" + if !strings.HasPrefix(expr[pos:], arrayPrefix) { + return expr + } + startIdx := pos + len(arrayPrefix) + + // Find the closing "]" for ARRAY[...] arrayEnd := findArrayClose(expr, startIdx) if arrayEnd == -1 { return expr @@ -1412,8 +1440,9 @@ func convertAnyArrayToIn(expr string) string { // Extract array contents arrayContents := expr[startIdx:arrayEnd] - // Find the closing ")" for ANY(...), which may be after an optional type cast like "::text[]" - // Pattern after "]": optional "::type[]" followed by ")" + // Find the closing ")" for ANY(...). Any cast applied to the whole array + // (e.g. "::text[]") sits between "]" and that ")" and is redundant once the + // predicate is rendered as IN (...), so it is dropped. closeParenIdx := arrayEnd + 1 for closeParenIdx < len(expr) && expr[closeParenIdx] != ')' { closeParenIdx++ @@ -1421,22 +1450,52 @@ func convertAnyArrayToIn(expr string) string { if closeParenIdx >= len(expr) { return expr // No closing paren found } + if arrayWrapped { + // The ")" just found closes the wrapping paren, not ANY(...). Skip past + // the optional "::type[]" cast to the real closing ")". + closeParenIdx++ + for closeParenIdx < len(expr) && expr[closeParenIdx] != ')' { + closeParenIdx++ + } + if closeParenIdx >= len(expr) { + return expr + } + } // Everything after the closing ")" is the suffix suffix := expr[closeParenIdx+1:] - // Split values and preserve them as-is, including all type casts + // Split values, stripping the redundant "::text" cast PostgreSQL adds when a + // varchar IN-list round-trips through the catalog. values := strings.Split(arrayContents, ", ") var cleanValues []string for _, val := range values { - val = strings.TrimSpace(val) - cleanValues = append(cleanValues, val) + cleanValues = append(cleanValues, stripRedundantTextCast(strings.TrimSpace(val))) } // Return converted format: "prefix IN (values)suffix" return fmt.Sprintf("%s IN (%s)%s", prefix, strings.Join(cleanValues, ", "), suffix) } +var ( + // parenTextCastRe matches the parenthesized element form a varchar IN-list + // takes after a catalog round-trip: "('x'::character varying)::text". + parenTextCastRe = regexp.MustCompile(`^\((.*::(?:character varying|varchar))\)::text$`) + // chainedTextCastRe matches the chained form: "'x'::character varying::text". + chainedTextCastRe = regexp.MustCompile(`::(character varying|varchar)::text\b`) +) + +// stripRedundantTextCast removes the redundant "::text" cast PostgreSQL adds when +// a varchar IN-list value is round-tripped through the catalog, so the element +// form ("('x'::character varying)::text") and the chained form +// ("'x'::character varying::text") both collapse to "'x'::character varying". +func stripRedundantTextCast(val string) string { + if m := parenTextCastRe.FindStringSubmatch(val); m != nil { + return m[1] + } + return chainedTextCastRe.ReplaceAllString(val, "::$1") +} + // findArrayClose finds the position of the closing "]" for an ARRAY literal, // handling nested brackets and quoted strings properly. // Returns the position of the "]" that closes the ARRAY. diff --git a/ir/normalize_test.go b/ir/normalize_test.go index 17828657..61839ab8 100644 --- a/ir/normalize_test.go +++ b/ir/normalize_test.go @@ -506,3 +506,80 @@ func TestNormalizeCheckClause(t *testing.T) { }) } } + +func TestConvertAnyArrayToIn(t *testing.T) { + tests := []struct { + name string + input string + expected string + }{ + { + name: "element-level cast form (round-tripped through catalog)", + input: "(status)::text = ANY (ARRAY[('new'::character varying)::text, ('acknowledged'::character varying)::text])", + expected: "(status)::text IN ('new'::character varying, 'acknowledged'::character varying)", + }, + { + name: "array-level cast form (from natural IN-list DDL)", + input: "(status)::text = ANY ((ARRAY['new'::character varying, 'acknowledged'::character varying])::text[])", + expected: "(status)::text IN ('new'::character varying, 'acknowledged'::character varying)", + }, + { + name: "non-wrapped array with trailing cast", + input: "col::text = ANY (ARRAY['a'::character varying]::text[])", + expected: "col::text IN ('a'::character varying)", + }, + { + name: "enum cast is preserved", + input: "status = ANY (ARRAY['active'::public.status_type])", + expected: "status IN ('active'::public.status_type)", + }, + { + name: "integer array converted to IN, values unchanged", + input: "n = ANY (ARRAY[1, 2, 3])", + expected: "n IN (1, 2, 3)", + }, + { + name: "no ANY marker returns unchanged", + input: "status = 'active'", + expected: "status = 'active'", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + if result := convertAnyArrayToIn(tt.input); result != tt.expected { + t.Errorf("convertAnyArrayToIn(%q) = %q, want %q", tt.input, result, tt.expected) + } + }) + } + + // The two equivalent catalog forms must canonicalize to the same string, + // otherwise plan schedules a perpetual spurious index rebuild (issue #473). + arrayLevel := convertAnyArrayToIn("(status)::text = ANY ((ARRAY['new'::character varying])::text[])") + elementLevel := convertAnyArrayToIn("(status)::text = ANY (ARRAY[('new'::character varying)::text])") + if arrayLevel != elementLevel { + t.Errorf("equivalent forms did not converge:\n array-level: %q\n element-level: %q", arrayLevel, elementLevel) + } +} + +func TestStripRedundantTextCast(t *testing.T) { + tests := []struct { + input string + expected string + }{ + {"('new'::character varying)::text", "'new'::character varying"}, + {"('new'::varchar)::text", "'new'::varchar"}, + {"'new'::character varying::text", "'new'::character varying"}, + {"'new'::character varying", "'new'::character varying"}, // already clean + {"'active'::public.status_type", "'active'::public.status_type"}, // custom type untouched + {"42", "42"}, + } + + for _, tt := range tests { + t.Run(tt.input, func(t *testing.T) { + if result := stripRedundantTextCast(tt.input); result != tt.expected { + t.Errorf("stripRedundantTextCast(%q) = %q, want %q", tt.input, result, tt.expected) + } + }) + } +} diff --git a/testdata/diff/create_index/issue_473_partial_index_in_list/diff.sql b/testdata/diff/create_index/issue_473_partial_index_in_list/diff.sql new file mode 100644 index 00000000..e69de29b diff --git a/testdata/diff/create_index/issue_473_partial_index_in_list/new.sql b/testdata/diff/create_index/issue_473_partial_index_in_list/new.sql new file mode 100644 index 00000000..e1bcccf4 --- /dev/null +++ b/testdata/diff/create_index/issue_473_partial_index_in_list/new.sql @@ -0,0 +1,15 @@ +CREATE TABLE findings ( + id uuid PRIMARY KEY, + org_id uuid NOT NULL, + status varchar(20) NOT NULL, + impact_score int +); + +-- Same partial index, but written in the form that pgschema dump renders +-- (copied from pg_get_indexdef). When re-parsed, PostgreSQL stores it with an +-- element-level cast: "= ANY (ARRAY[('x'::varchar)::text, ...])". This is +-- semantically identical to the IN-list form in old.sql, so the diff must be +-- empty - no spurious concurrent rebuild (issue #473). +CREATE INDEX idx_finding_actionable + ON findings (org_id, status, impact_score DESC) + WHERE (status)::text = ANY ((ARRAY['new'::character varying, 'acknowledged'::character varying])::text[]); diff --git a/testdata/diff/create_index/issue_473_partial_index_in_list/old.sql b/testdata/diff/create_index/issue_473_partial_index_in_list/old.sql new file mode 100644 index 00000000..71920bf9 --- /dev/null +++ b/testdata/diff/create_index/issue_473_partial_index_in_list/old.sql @@ -0,0 +1,12 @@ +CREATE TABLE findings ( + id uuid PRIMARY KEY, + org_id uuid NOT NULL, + status varchar(20) NOT NULL, + impact_score int +); + +-- Partial index written with a natural IN-list predicate. PostgreSQL stores +-- this in the catalog with an array-level cast: "= ANY ((ARRAY[...])::text[])". +CREATE INDEX idx_finding_actionable + ON findings (org_id, status, impact_score DESC) + WHERE status IN ('new', 'acknowledged'); diff --git a/testdata/diff/create_index/issue_473_partial_index_in_list/plan.json b/testdata/diff/create_index/issue_473_partial_index_in_list/plan.json new file mode 100644 index 00000000..4b2a3d3d --- /dev/null +++ b/testdata/diff/create_index/issue_473_partial_index_in_list/plan.json @@ -0,0 +1,9 @@ +{ + "version": "1.0.0", + "pgschema_version": "1.11.1", + "created_at": "1970-01-01T00:00:00Z", + "source_fingerprint": { + "hash": "ee04b3107ba14c3c5811bc73d23ffc1e8422ceea20a6e5a129c3c3eca7979ef0" + }, + "groups": null +} diff --git a/testdata/diff/create_index/issue_473_partial_index_in_list/plan.sql b/testdata/diff/create_index/issue_473_partial_index_in_list/plan.sql new file mode 100644 index 00000000..e69de29b diff --git a/testdata/diff/create_index/issue_473_partial_index_in_list/plan.txt b/testdata/diff/create_index/issue_473_partial_index_in_list/plan.txt new file mode 100644 index 00000000..241994af --- /dev/null +++ b/testdata/diff/create_index/issue_473_partial_index_in_list/plan.txt @@ -0,0 +1 @@ +No changes detected.