Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 69 additions & 10 deletions ir/normalize.go
Original file line number Diff line number Diff line change
Expand Up @@ -1382,28 +1382,56 @@ func IsTextLikeType(typeName string) bool {
// convertAnyArrayToIn converts PostgreSQL's "column = ANY (ARRAY[...])" format
// to the more readable "column IN (...)" format.
//
// Type casts are always preserved to ensure:
// Semantically-significant type casts are preserved to ensure:
// - Custom types (enums, domains) are properly qualified (e.g., 'value'::public.my_enum)
// - Output matches pg_dump's format exactly
// - Comparison between desired and current states is accurate
//
// The redundant "::text" cast PostgreSQL adds to varchar literals on a catalog
// round-trip is stripped (see stripRedundantTextCast), since it carries no
// meaning and only differs by how the predicate reached the catalog.
//
// Example transformations:
// - "status = ANY (ARRAY['active'::public.status_type])" → "status IN ('active'::public.status_type)"
// - "gender = ANY (ARRAY['M'::text, 'F'::text])" → "gender IN ('M'::text, 'F'::text)"
// - "(col = ANY (ARRAY[...])) AND (other)" → "(col IN (...)) AND (other)"
// - "col::text = ANY (ARRAY['a'::varchar]::text[])" → "col::text IN ('a'::varchar)"
//
// PostgreSQL stores the same IN-list predicate on a varchar column two equivalent
// ways depending on how it was written, and these must canonicalize identically
// to avoid a perpetual spurious diff (issue #473):
// - array-level cast: "col::text = ANY ((ARRAY['a'::varchar])::text[])" (wrapping paren + array cast)
// - element-level: "col::text = ANY (ARRAY[('a'::varchar)::text])" (cast on each element)
// Both collapse to "col::text IN ('a'::varchar)".
Comment thread
tianzhou marked this conversation as resolved.
func convertAnyArrayToIn(expr string) string {
const marker = " = ANY (ARRAY["
idx := strings.Index(expr, marker)
const anyMarker = " = ANY ("
idx := strings.Index(expr, anyMarker)
if idx == -1 {
return expr
}

// Extract the part before the marker (column name with possible leading content)
prefix := expr[:idx]

// Find the closing "]" for ARRAY[...] starting after the marker
startIdx := idx + len(marker)
// Position right after " = ANY (".
pos := idx + len(anyMarker)

// The array may be wrapped in an extra paren with a cast applied to the whole
// array, e.g. "(ARRAY[...])::text[]". Skip that paren so the wrapped and
// element-level forms are handled the same way.
arrayWrapped := false
if pos < len(expr) && expr[pos] == '(' {
arrayWrapped = true
pos++
}

const arrayPrefix = "ARRAY["
if !strings.HasPrefix(expr[pos:], arrayPrefix) {
return expr
}
startIdx := pos + len(arrayPrefix)

// Find the closing "]" for ARRAY[...]
arrayEnd := findArrayClose(expr, startIdx)
if arrayEnd == -1 {
return expr
Expand All @@ -1412,31 +1440,62 @@ func convertAnyArrayToIn(expr string) string {
// Extract array contents
arrayContents := expr[startIdx:arrayEnd]

// Find the closing ")" for ANY(...), which may be after an optional type cast like "::text[]"
// Pattern after "]": optional "::type[]" followed by ")"
// Find the closing ")" for ANY(...). Any cast applied to the whole array
// (e.g. "::text[]") sits between "]" and that ")" and is redundant once the
// predicate is rendered as IN (...), so it is dropped.
closeParenIdx := arrayEnd + 1
for closeParenIdx < len(expr) && expr[closeParenIdx] != ')' {
closeParenIdx++
}
if closeParenIdx >= len(expr) {
return expr // No closing paren found
}
if arrayWrapped {
// The ")" just found closes the wrapping paren, not ANY(...). Skip past
// the optional "::type[]" cast to the real closing ")".
closeParenIdx++
for closeParenIdx < len(expr) && expr[closeParenIdx] != ')' {
closeParenIdx++
}
if closeParenIdx >= len(expr) {
return expr
}
}
Comment thread
tianzhou marked this conversation as resolved.

// Everything after the closing ")" is the suffix
suffix := expr[closeParenIdx+1:]

// Split values and preserve them as-is, including all type casts
// Split values, stripping the redundant "::text" cast PostgreSQL adds when a
// varchar IN-list round-trips through the catalog.
values := strings.Split(arrayContents, ", ")
var cleanValues []string
for _, val := range values {
val = strings.TrimSpace(val)
cleanValues = append(cleanValues, val)
cleanValues = append(cleanValues, stripRedundantTextCast(strings.TrimSpace(val)))
}

// Return converted format: "prefix IN (values)suffix"
return fmt.Sprintf("%s IN (%s)%s", prefix, strings.Join(cleanValues, ", "), suffix)
}

var (
// parenTextCastRe matches the parenthesized element form a varchar IN-list
// takes after a catalog round-trip: "('x'::character varying)::text".
parenTextCastRe = regexp.MustCompile(`^\((.*::(?:character varying|varchar))\)::text$`)
// chainedTextCastRe matches the chained form: "'x'::character varying::text".
chainedTextCastRe = regexp.MustCompile(`::(character varying|varchar)::text\b`)
)

// stripRedundantTextCast removes the redundant "::text" cast PostgreSQL adds when
// a varchar IN-list value is round-tripped through the catalog, so the element
// form ("('x'::character varying)::text") and the chained form
// ("'x'::character varying::text") both collapse to "'x'::character varying".
func stripRedundantTextCast(val string) string {
if m := parenTextCastRe.FindStringSubmatch(val); m != nil {
return m[1]
}
return chainedTextCastRe.ReplaceAllString(val, "::$1")
}
Comment thread
tianzhou marked this conversation as resolved.

// findArrayClose finds the position of the closing "]" for an ARRAY literal,
// handling nested brackets and quoted strings properly.
// Returns the position of the "]" that closes the ARRAY.
Expand Down
77 changes: 77 additions & 0 deletions ir/normalize_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,3 +506,80 @@ func TestNormalizeCheckClause(t *testing.T) {
})
}
}

func TestConvertAnyArrayToIn(t *testing.T) {
tests := []struct {
name string
input string
expected string
}{
{
name: "element-level cast form (round-tripped through catalog)",
input: "(status)::text = ANY (ARRAY[('new'::character varying)::text, ('acknowledged'::character varying)::text])",
expected: "(status)::text IN ('new'::character varying, 'acknowledged'::character varying)",
},
{
name: "array-level cast form (from natural IN-list DDL)",
input: "(status)::text = ANY ((ARRAY['new'::character varying, 'acknowledged'::character varying])::text[])",
expected: "(status)::text IN ('new'::character varying, 'acknowledged'::character varying)",
},
{
name: "non-wrapped array with trailing cast",
input: "col::text = ANY (ARRAY['a'::character varying]::text[])",
expected: "col::text IN ('a'::character varying)",
},
{
name: "enum cast is preserved",
input: "status = ANY (ARRAY['active'::public.status_type])",
expected: "status IN ('active'::public.status_type)",
},
{
name: "integer array converted to IN, values unchanged",
input: "n = ANY (ARRAY[1, 2, 3])",
expected: "n IN (1, 2, 3)",
},
{
name: "no ANY marker returns unchanged",
input: "status = 'active'",
expected: "status = 'active'",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if result := convertAnyArrayToIn(tt.input); result != tt.expected {
t.Errorf("convertAnyArrayToIn(%q) = %q, want %q", tt.input, result, tt.expected)
}
})
}

// The two equivalent catalog forms must canonicalize to the same string,
// otherwise plan schedules a perpetual spurious index rebuild (issue #473).
arrayLevel := convertAnyArrayToIn("(status)::text = ANY ((ARRAY['new'::character varying])::text[])")
elementLevel := convertAnyArrayToIn("(status)::text = ANY (ARRAY[('new'::character varying)::text])")
if arrayLevel != elementLevel {
t.Errorf("equivalent forms did not converge:\n array-level: %q\n element-level: %q", arrayLevel, elementLevel)
}
}

func TestStripRedundantTextCast(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"('new'::character varying)::text", "'new'::character varying"},
{"('new'::varchar)::text", "'new'::varchar"},
{"'new'::character varying::text", "'new'::character varying"},
{"'new'::character varying", "'new'::character varying"}, // already clean
{"'active'::public.status_type", "'active'::public.status_type"}, // custom type untouched
{"42", "42"},
}

for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
if result := stripRedundantTextCast(tt.input); result != tt.expected {
t.Errorf("stripRedundantTextCast(%q) = %q, want %q", tt.input, result, tt.expected)
}
})
}
}
Empty file.
15 changes: 15 additions & 0 deletions testdata/diff/create_index/issue_473_partial_index_in_list/new.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
CREATE TABLE findings (
id uuid PRIMARY KEY,
org_id uuid NOT NULL,
status varchar(20) NOT NULL,
impact_score int
);

-- Same partial index, but written in the form that pgschema dump renders
-- (copied from pg_get_indexdef). When re-parsed, PostgreSQL stores it with an
-- element-level cast: "= ANY (ARRAY[('x'::varchar)::text, ...])". This is
-- semantically identical to the IN-list form in old.sql, so the diff must be
-- empty - no spurious concurrent rebuild (issue #473).
CREATE INDEX idx_finding_actionable
ON findings (org_id, status, impact_score DESC)
WHERE (status)::text = ANY ((ARRAY['new'::character varying, 'acknowledged'::character varying])::text[]);
12 changes: 12 additions & 0 deletions testdata/diff/create_index/issue_473_partial_index_in_list/old.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
CREATE TABLE findings (
id uuid PRIMARY KEY,
org_id uuid NOT NULL,
status varchar(20) NOT NULL,
impact_score int
);

-- Partial index written with a natural IN-list predicate. PostgreSQL stores
-- this in the catalog with an array-level cast: "= ANY ((ARRAY[...])::text[])".
CREATE INDEX idx_finding_actionable
ON findings (org_id, status, impact_score DESC)
WHERE status IN ('new', 'acknowledged');
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"version": "1.0.0",
"pgschema_version": "1.11.1",
"created_at": "1970-01-01T00:00:00Z",
"source_fingerprint": {
"hash": "ee04b3107ba14c3c5811bc73d23ffc1e8422ceea20a6e5a129c3c3eca7979ef0"
},
"groups": null
}
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
No changes detected.