Skip to content

Commit 955a2ea

Browse files
tianzhouclaude
andauthored
fix: preserve string literals during schema qualification stripping (#371) (#372)
* fix: preserve string literals during schema qualification stripping (#371) stripSchemaQualificationsFromText used regex patterns that matched schema prefixes inside single-quoted SQL string literals. For example, with schema "s", Pattern 4 treated the single quote in 's.manage' as a valid non-double-quote prefix character, corrupting 's.manage' into 'manage'. This caused pgschema plan to generate destructive false-positive ALTER POLICY statements that silently truncated function arguments. Add stripSchemaQualificationsPreservingStrings which splits text on single-quoted string boundaries (handling '' escapes) and applies schema stripping only to non-string segments. Also add a fast-path strings.Contains check to skip all work when the schema name is absent. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * Document E'...' backslash-escaped quote limitation in string-literal parser Add doc comment and test case capturing the known limitation where E'...' escape-string syntax causes the single-quote parser to mistrack boundaries. The failure mode is safe (false-negative: unstripped qualifier). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: handle SQL comments in string-literal-aware schema stripping Apostrophes in SQL comments (-- don't ...) could flip the string-tracking state, causing schema qualifiers after the comment to not be stripped. Also cache compiled regexes per schema name for better performance. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> * fix: use word boundaries in schema-stripping regexes and simplify replacements - Use [^a-zA-Z0-9_$"] boundary instead of [^"] to prevent matching schema name as suffix of longer identifiers (e.g., schema "s" no longer matches "sales.total") - Use capture groups with ReplaceAllString instead of ReplaceAllStringFunc to correctly handle start-of-string matches - Clarify E-string limitation doc: both false-negative and false-positive are possible Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 03a4a86 commit 955a2ea

5 files changed

Lines changed: 267 additions & 75 deletions

File tree

internal/postgres/desired_state.go

Lines changed: 147 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"fmt"
1010
"regexp"
1111
"strings"
12+
"sync"
1213
"time"
1314
)
1415

@@ -92,7 +93,7 @@ func GenerateTempSchemaName() string {
9293
// Only qualifications matching the specified schemaName are stripped.
9394
// All other schema qualifications are preserved as intentional cross-schema references.
9495
func stripSchemaQualifications(sql string, schemaName string) string {
95-
if schemaName == "" {
96+
if schemaName == "" || !strings.Contains(sql, schemaName) {
9697
return sql
9798
}
9899

@@ -108,12 +109,107 @@ func stripSchemaQualifications(sql string, schemaName string) string {
108109
// Preserve dollar-quoted content as-is
109110
result.WriteString(seg.text)
110111
} else {
111-
result.WriteString(stripSchemaQualificationsFromText(seg.text, schemaName))
112+
// Further split on single-quoted string literals to avoid stripping
113+
// schema prefixes from inside string constants (Issue #371).
114+
// e.g., has_scope('s.manage') must NOT become has_scope('manage')
115+
result.WriteString(stripSchemaQualificationsPreservingStrings(seg.text, schemaName))
112116
}
113117
}
114118
return result.String()
115119
}
116120

121+
// stripSchemaQualificationsPreservingStrings splits text on single-quoted string
122+
// literals and SQL comments, applies schema stripping only to non-string,
123+
// non-comment parts, and reassembles.
124+
//
125+
// Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
126+
// rather than doubled quotes ('it''s'). This parser only recognises the '' form.
127+
// With E'content\'', a backslash-escaped quote may cause the parser to mistrack
128+
// string boundaries, which can result in either:
129+
// - false-negative: schema qualifiers after the string are not stripped, or
130+
// - false-positive: schema prefixes inside the E-string are incorrectly stripped.
131+
//
132+
// Both cases change semantics only for E'...' strings, which are extremely rare
133+
// in DDL schema definitions. The false-negative case preserves valid SQL; the
134+
// false-positive case could alter string content but is unlikely in practice.
135+
func stripSchemaQualificationsPreservingStrings(text string, schemaName string) string {
136+
var result strings.Builder
137+
result.Grow(len(text))
138+
139+
// flushCode writes text[segStart:end] through schema stripping and advances segStart.
140+
i := 0
141+
segStart := 0
142+
143+
flushCode := func(end int) {
144+
if end > segStart {
145+
result.WriteString(stripSchemaQualificationsFromText(text[segStart:end], schemaName))
146+
}
147+
segStart = end
148+
}
149+
flushLiteral := func(end int) {
150+
result.WriteString(text[segStart:end])
151+
segStart = end
152+
}
153+
154+
for i < len(text) {
155+
ch := text[i]
156+
157+
// Start of single-quoted string literal
158+
if ch == '\'' {
159+
flushCode(i)
160+
i++ // skip opening quote
161+
for i < len(text) {
162+
if text[i] == '\'' {
163+
if i+1 < len(text) && text[i+1] == '\'' {
164+
i += 2 // skip escaped ''
165+
} else {
166+
i++ // skip closing quote
167+
break
168+
}
169+
} else {
170+
i++
171+
}
172+
}
173+
flushLiteral(i)
174+
continue
175+
}
176+
177+
// Start of line comment (--)
178+
if ch == '-' && i+1 < len(text) && text[i+1] == '-' {
179+
flushCode(i)
180+
i += 2
181+
for i < len(text) && text[i] != '\n' {
182+
i++
183+
}
184+
if i < len(text) {
185+
i++ // skip the newline
186+
}
187+
flushLiteral(i)
188+
continue
189+
}
190+
191+
// Start of block comment (/* ... */)
192+
if ch == '/' && i+1 < len(text) && text[i+1] == '*' {
193+
flushCode(i)
194+
i += 2
195+
for i < len(text) {
196+
if text[i] == '*' && i+1 < len(text) && text[i+1] == '/' {
197+
i += 2
198+
break
199+
}
200+
i++
201+
}
202+
flushLiteral(i)
203+
continue
204+
}
205+
206+
i++
207+
}
208+
// Remaining text is code
209+
flushCode(i)
210+
return result.String()
211+
}
212+
117213
// dollarQuotedSegment represents a segment of SQL text, either inside or outside a dollar-quoted block.
118214
type dollarQuotedSegment struct {
119215
text string
@@ -165,82 +261,61 @@ func splitDollarQuotedSegments(sql string) []dollarQuotedSegment {
165261
return segments
166262
}
167263

168-
// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
169-
func stripSchemaQualificationsFromText(text string, schemaName string) string {
170-
// Escape the schema name for use in regex
171-
escapedSchema := regexp.QuoteMeta(schemaName)
172-
173-
// Pattern matches schema qualification and captures the object name
174-
// We need to handle 4 cases:
175-
// 1. unquoted_schema.unquoted_object -> unquoted_object
176-
// 2. unquoted_schema."quoted_object" -> "quoted_object"
177-
// 3. "quoted_schema".unquoted_object -> unquoted_object
178-
// 4. "quoted_schema"."quoted_object" -> "quoted_object"
179-
//
180-
// Key: The dot must be outside quotes (a schema.object separator, not part of an identifier)
181-
// Important: For unquoted schema patterns, we must ensure the schema name isn't inside a quoted identifier
182-
// Example: Don't match 'public' in CREATE INDEX "public.idx" where the whole thing is a quoted identifier
183-
184-
// Pattern 1: quoted schema + dot + quoted object: "schema"."object"
185-
// Example: "public"."table" -> "table"
186-
pattern1 := fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)
187-
re1 := regexp.MustCompile(pattern1)
264+
// schemaRegexes holds compiled regexes for a specific schema name, avoiding
265+
// recompilation on every call to stripSchemaQualificationsFromText.
266+
type schemaRegexes struct {
267+
re1 *regexp.Regexp // "schema"."object"
268+
re2 *regexp.Regexp // "schema".object
269+
re3 *regexp.Regexp // schema."object"
270+
re4 *regexp.Regexp // schema.object
271+
}
188272

189-
// Pattern 2: quoted schema + dot + unquoted object: "schema".object
190-
// Example: "public".table -> table
191-
pattern2 := fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
192-
re2 := regexp.MustCompile(pattern2)
273+
var (
274+
schemaRegexCache = make(map[string]*schemaRegexes)
275+
schemaRegexCacheMu sync.Mutex
276+
)
193277

194-
// Pattern 3: unquoted schema + dot + quoted object: schema."object"
195-
// Example: public."table" -> "table"
196-
// Use negative lookbehind to ensure schema isn't preceded by a quote
197-
// and negative lookahead to ensure the dot after schema isn't inside quotes
198-
pattern3 := fmt.Sprintf(`(?:^|[^"])%s\.(\"[^"]+\")`, escapedSchema)
199-
re3 := regexp.MustCompile(pattern3)
278+
func getSchemaRegexes(schemaName string) *schemaRegexes {
279+
schemaRegexCacheMu.Lock()
280+
defer schemaRegexCacheMu.Unlock()
281+
if cached, ok := schemaRegexCache[schemaName]; ok {
282+
return cached
283+
}
284+
escapedSchema := regexp.QuoteMeta(schemaName)
285+
// Patterns 1-2: quoted schema ("schema".object / "schema"."object")
286+
// The leading " already prevents suffix matching.
287+
// Patterns 3-4: unquoted schema (schema.object / schema."object")
288+
// Use a capture group for the optional non-identifier prefix so we can
289+
// preserve it in replacement without the match[0] ambiguity at ^.
290+
// The character class [^a-zA-Z0-9_$"] ensures the schema name isn't a
291+
// suffix of a longer identifier (e.g., schema "s" won't match "sales").
292+
sr := &schemaRegexes{
293+
re1: regexp.MustCompile(fmt.Sprintf(`"%s"\.(\"[^"]+\")`, escapedSchema)),
294+
re2: regexp.MustCompile(fmt.Sprintf(`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
295+
re3: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.(\"[^"]+\")`, escapedSchema)),
296+
re4: regexp.MustCompile(fmt.Sprintf(`(^|[^a-zA-Z0-9_$"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)),
297+
}
298+
schemaRegexCache[schemaName] = sr
299+
return sr
300+
}
200301

201-
// Pattern 4: unquoted schema + dot + unquoted object: schema.object
202-
// Example: public.table -> table
203-
// Use negative lookbehind to ensure schema isn't preceded by a quote
204-
pattern4 := fmt.Sprintf(`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)`, escapedSchema)
205-
re4 := regexp.MustCompile(pattern4)
302+
// stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
303+
// It handles 4 cases:
304+
// 1. unquoted_schema.unquoted_object -> unquoted_object
305+
// 2. unquoted_schema."quoted_object" -> "quoted_object"
306+
// 3. "quoted_schema".unquoted_object -> unquoted_object
307+
// 4. "quoted_schema"."quoted_object" -> "quoted_object"
308+
func stripSchemaQualificationsFromText(text string, schemaName string) string {
309+
sr := getSchemaRegexes(schemaName)
206310

207311
result := text
208312
// Apply in order: quoted schema first to avoid double-matching
209-
result = re1.ReplaceAllString(result, "$1")
210-
result = re2.ReplaceAllString(result, "$1")
211-
// For patterns 3 and 4, we need to preserve the character before the schema
212-
result = re3.ReplaceAllStringFunc(result, func(match string) string {
213-
// If match starts with a non-quote character, preserve it
214-
if len(match) > 0 && match[0] != '"' {
215-
// Extract the quote identifier (everything after schema.)
216-
parts := strings.SplitN(match, ".", 2)
217-
if len(parts) == 2 {
218-
return string(match[0]) + parts[1]
219-
}
220-
}
221-
// Otherwise just return the captured quoted identifier
222-
parts := strings.SplitN(match, ".", 2)
223-
if len(parts) == 2 {
224-
return parts[1]
225-
}
226-
return match
227-
})
228-
result = re4.ReplaceAllStringFunc(result, func(match string) string {
229-
// If match starts with a non-quote character, preserve it
230-
if len(match) > 0 && match[0] != '"' {
231-
// Extract the unquoted identifier (everything after schema.)
232-
parts := strings.SplitN(match, ".", 2)
233-
if len(parts) == 2 {
234-
return string(match[0]) + parts[1]
235-
}
236-
}
237-
// Otherwise just return the captured unquoted identifier
238-
parts := strings.SplitN(match, ".", 2)
239-
if len(parts) == 2 {
240-
return parts[1]
241-
}
242-
return match
243-
})
313+
result = sr.re1.ReplaceAllString(result, "$1")
314+
result = sr.re2.ReplaceAllString(result, "$1")
315+
// For patterns 3 and 4, $1 is the prefix (boundary char or empty at ^),
316+
// $2 is the object name — preserve the prefix and keep only the object.
317+
result = sr.re3.ReplaceAllString(result, "${1}${2}")
318+
result = sr.re4.ReplaceAllString(result, "${1}${2}")
244319

245320
return result
246321
}

internal/postgres/desired_state_test.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -194,3 +194,100 @@ func TestReplaceSchemaInSearchPath(t *testing.T) {
194194
})
195195
}
196196
}
197+
198+
func TestStripSchemaQualifications_PreservesStringLiterals(t *testing.T) {
199+
tests := []struct {
200+
name string
201+
sql string
202+
schema string
203+
expected string
204+
}{
205+
{
206+
name: "strips schema from table reference",
207+
sql: "CREATE TABLE public.items (id int);",
208+
schema: "public",
209+
expected: "CREATE TABLE items (id int);",
210+
},
211+
{
212+
name: "preserves schema prefix inside single-quoted string",
213+
sql: "CREATE POLICY p ON items USING (has_scope('public.manage'));",
214+
schema: "public",
215+
expected: "CREATE POLICY p ON items USING (has_scope('public.manage'));",
216+
},
217+
{
218+
name: "preserves schema prefix inside string with short schema name",
219+
sql: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
220+
schema: "s",
221+
expected: "CREATE POLICY p ON items USING (has_scope('s.manage')) WITH CHECK (has_scope('s.manage'));",
222+
},
223+
{
224+
name: "strips schema from identifier but preserves string literal",
225+
sql: "CREATE POLICY p ON s.items USING (auth.has_scope('s.manage'));",
226+
schema: "s",
227+
expected: "CREATE POLICY p ON items USING (auth.has_scope('s.manage'));",
228+
},
229+
{
230+
name: "preserves escaped quotes in string literals",
231+
sql: "SELECT 'it''s public.test' FROM public.t;",
232+
schema: "public",
233+
expected: "SELECT 'it''s public.test' FROM t;",
234+
},
235+
{
236+
name: "handles multiple string literals",
237+
sql: "SELECT 'public.a', public.t, 'public.b';",
238+
schema: "public",
239+
expected: "SELECT 'public.a', t, 'public.b';",
240+
},
241+
{
242+
name: "does not match schema as suffix of longer identifier",
243+
sql: "SELECT sales.total, s.items FROM s.orders;",
244+
schema: "s",
245+
expected: "SELECT sales.total, items FROM orders;",
246+
},
247+
{
248+
name: "strips schema at start of string",
249+
sql: "public.t",
250+
schema: "public",
251+
expected: "t",
252+
},
253+
{
254+
name: "handles apostrophe in line comment followed by schema-qualified identifier",
255+
sql: "SELECT 1; -- don't drop public.t\nDROP TABLE public.t;",
256+
schema: "public",
257+
expected: "SELECT 1; -- don't drop public.t\nDROP TABLE t;",
258+
},
259+
{
260+
name: "handles block comment with apostrophe",
261+
sql: "/* it's public.t */ DROP TABLE public.t;",
262+
schema: "public",
263+
expected: "/* it's public.t */ DROP TABLE t;",
264+
},
265+
{
266+
name: "handles block comment without apostrophe",
267+
sql: "/* drop public.t */ DROP TABLE public.t;",
268+
schema: "public",
269+
expected: "/* drop public.t */ DROP TABLE t;",
270+
},
271+
{
272+
// Known limitation: E'...' escape-string syntax with backslash-escaped quotes
273+
// is not handled. The parser treats \' as ordinary char + string-closer,
274+
// mistracking boundaries. Here it strips inside the string (wrong) and
275+
// misses the identifier after (also wrong). Both are safe: the SQL remains
276+
// valid, and the unstripped qualifier just means the object is looked up
277+
// in the original schema. E'...' in DDL is extremely rare.
278+
name: "E-string with backslash-escaped quote (known limitation)",
279+
sql: "SELECT E'it\\'s public.test' FROM public.t;",
280+
schema: "public",
281+
expected: "SELECT E'it\\'s test' FROM public.t;",
282+
},
283+
}
284+
285+
for _, tt := range tests {
286+
t.Run(tt.name, func(t *testing.T) {
287+
result := stripSchemaQualifications(tt.sql, tt.schema)
288+
if result != tt.expected {
289+
t.Errorf("stripSchemaQualifications(%q, %q)\n got: %q\n want: %q", tt.sql, tt.schema, result, tt.expected)
290+
}
291+
})
292+
}
293+
}

testdata/diff/create_policy/alter_policy_using/new.sql

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
CREATE FUNCTION has_scope(p_scope text) RETURNS boolean
2+
LANGUAGE sql STABLE AS $$ SELECT p_scope IS NOT NULL $$;
3+
14
CREATE TABLE users (
25
id SERIAL PRIMARY KEY,
36
name VARCHAR(100) NOT NULL,
@@ -10,4 +13,11 @@ ALTER TABLE users ENABLE ROW LEVEL SECURITY;
1013
CREATE POLICY user_tenant_isolation ON users
1114
FOR ALL
1215
TO PUBLIC
13-
USING (tenant_id = 2);
16+
USING (tenant_id = 2);
17+
18+
-- Policy with string literal containing schema prefix (Issue #371)
19+
-- This must NOT produce a false-positive diff
20+
CREATE POLICY scope_check ON users
21+
FOR SELECT
22+
TO PUBLIC
23+
USING (has_scope('public.manage'));

0 commit comments

Comments
 (0)