99 "fmt"
1010 "regexp"
1111 "strings"
12+ "sync"
1213 "time"
1314)
1415
@@ -92,7 +93,7 @@ func GenerateTempSchemaName() string {
9293// Only qualifications matching the specified schemaName are stripped.
9394// All other schema qualifications are preserved as intentional cross-schema references.
9495func stripSchemaQualifications (sql string , schemaName string ) string {
95- if schemaName == "" {
96+ if schemaName == "" || ! strings . Contains ( sql , schemaName ) {
9697 return sql
9798 }
9899
@@ -108,12 +109,107 @@ func stripSchemaQualifications(sql string, schemaName string) string {
108109 // Preserve dollar-quoted content as-is
109110 result .WriteString (seg .text )
110111 } else {
111- result .WriteString (stripSchemaQualificationsFromText (seg .text , schemaName ))
112+ // Further split on single-quoted string literals to avoid stripping
113+ // schema prefixes from inside string constants (Issue #371).
114+ // e.g., has_scope('s.manage') must NOT become has_scope('manage')
115+ result .WriteString (stripSchemaQualificationsPreservingStrings (seg .text , schemaName ))
112116 }
113117 }
114118 return result .String ()
115119}
116120
121+ // stripSchemaQualificationsPreservingStrings splits text on single-quoted string
122+ // literals and SQL comments, applies schema stripping only to non-string,
123+ // non-comment parts, and reassembles.
124+ //
125+ // Limitation: E'...' escape-string syntax uses backslash-escaped quotes (E'it\'s')
126+ // rather than doubled quotes ('it''s'). This parser only recognises the '' form.
127+ // With E'content\'', a backslash-escaped quote may cause the parser to mistrack
128+ // string boundaries, which can result in either:
129+ // - false-negative: schema qualifiers after the string are not stripped, or
130+ // - false-positive: schema prefixes inside the E-string are incorrectly stripped.
131+ //
132+ // Both cases change semantics only for E'...' strings, which are extremely rare
133+ // in DDL schema definitions. The false-negative case preserves valid SQL; the
134+ // false-positive case could alter string content but is unlikely in practice.
135+ func stripSchemaQualificationsPreservingStrings (text string , schemaName string ) string {
136+ var result strings.Builder
137+ result .Grow (len (text ))
138+
139+ // flushCode writes text[segStart:end] through schema stripping and advances segStart.
140+ i := 0
141+ segStart := 0
142+
143+ flushCode := func (end int ) {
144+ if end > segStart {
145+ result .WriteString (stripSchemaQualificationsFromText (text [segStart :end ], schemaName ))
146+ }
147+ segStart = end
148+ }
149+ flushLiteral := func (end int ) {
150+ result .WriteString (text [segStart :end ])
151+ segStart = end
152+ }
153+
154+ for i < len (text ) {
155+ ch := text [i ]
156+
157+ // Start of single-quoted string literal
158+ if ch == '\'' {
159+ flushCode (i )
160+ i ++ // skip opening quote
161+ for i < len (text ) {
162+ if text [i ] == '\'' {
163+ if i + 1 < len (text ) && text [i + 1 ] == '\'' {
164+ i += 2 // skip escaped ''
165+ } else {
166+ i ++ // skip closing quote
167+ break
168+ }
169+ } else {
170+ i ++
171+ }
172+ }
173+ flushLiteral (i )
174+ continue
175+ }
176+
177+ // Start of line comment (--)
178+ if ch == '-' && i + 1 < len (text ) && text [i + 1 ] == '-' {
179+ flushCode (i )
180+ i += 2
181+ for i < len (text ) && text [i ] != '\n' {
182+ i ++
183+ }
184+ if i < len (text ) {
185+ i ++ // skip the newline
186+ }
187+ flushLiteral (i )
188+ continue
189+ }
190+
191+ // Start of block comment (/* ... */)
192+ if ch == '/' && i + 1 < len (text ) && text [i + 1 ] == '*' {
193+ flushCode (i )
194+ i += 2
195+ for i < len (text ) {
196+ if text [i ] == '*' && i + 1 < len (text ) && text [i + 1 ] == '/' {
197+ i += 2
198+ break
199+ }
200+ i ++
201+ }
202+ flushLiteral (i )
203+ continue
204+ }
205+
206+ i ++
207+ }
208+ // Remaining text is code
209+ flushCode (i )
210+ return result .String ()
211+ }
212+
117213// dollarQuotedSegment represents a segment of SQL text, either inside or outside a dollar-quoted block.
118214type dollarQuotedSegment struct {
119215 text string
@@ -165,82 +261,61 @@ func splitDollarQuotedSegments(sql string) []dollarQuotedSegment {
165261 return segments
166262}
167263
168- // stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
169- func stripSchemaQualificationsFromText (text string , schemaName string ) string {
170- // Escape the schema name for use in regex
171- escapedSchema := regexp .QuoteMeta (schemaName )
172-
173- // Pattern matches schema qualification and captures the object name
174- // We need to handle 4 cases:
175- // 1. unquoted_schema.unquoted_object -> unquoted_object
176- // 2. unquoted_schema."quoted_object" -> "quoted_object"
177- // 3. "quoted_schema".unquoted_object -> unquoted_object
178- // 4. "quoted_schema"."quoted_object" -> "quoted_object"
179- //
180- // Key: The dot must be outside quotes (a schema.object separator, not part of an identifier)
181- // Important: For unquoted schema patterns, we must ensure the schema name isn't inside a quoted identifier
182- // Example: Don't match 'public' in CREATE INDEX "public.idx" where the whole thing is a quoted identifier
183-
184- // Pattern 1: quoted schema + dot + quoted object: "schema"."object"
185- // Example: "public"."table" -> "table"
186- pattern1 := fmt .Sprintf (`"%s"\.(\"[^"]+\")` , escapedSchema )
187- re1 := regexp .MustCompile (pattern1 )
264+ // schemaRegexes holds compiled regexes for a specific schema name, avoiding
265+ // recompilation on every call to stripSchemaQualificationsFromText.
266+ type schemaRegexes struct {
267+ re1 * regexp.Regexp // "schema"."object"
268+ re2 * regexp.Regexp // "schema".object
269+ re3 * regexp.Regexp // schema."object"
270+ re4 * regexp.Regexp // schema.object
271+ }
188272
189- // Pattern 2: quoted schema + dot + unquoted object: "schema".object
190- // Example: "public".table -> table
191- pattern2 := fmt . Sprintf ( `"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)` , escapedSchema )
192- re2 := regexp . MustCompile ( pattern2 )
273+ var (
274+ schemaRegexCache = make ( map [ string ] * schemaRegexes )
275+ schemaRegexCacheMu sync. Mutex
276+ )
193277
194- // Pattern 3: unquoted schema + dot + quoted object: schema."object"
195- // Example: public."table" -> "table"
196- // Use negative lookbehind to ensure schema isn't preceded by a quote
197- // and negative lookahead to ensure the dot after schema isn't inside quotes
198- pattern3 := fmt .Sprintf (`(?:^|[^"])%s\.(\"[^"]+\")` , escapedSchema )
199- re3 := regexp .MustCompile (pattern3 )
278+ func getSchemaRegexes (schemaName string ) * schemaRegexes {
279+ schemaRegexCacheMu .Lock ()
280+ defer schemaRegexCacheMu .Unlock ()
281+ if cached , ok := schemaRegexCache [schemaName ]; ok {
282+ return cached
283+ }
284+ escapedSchema := regexp .QuoteMeta (schemaName )
285+ // Patterns 1-2: quoted schema ("schema".object / "schema"."object")
286+ // The leading " already prevents suffix matching.
287+ // Patterns 3-4: unquoted schema (schema.object / schema."object")
288+ // Use a capture group for the optional non-identifier prefix so we can
289+ // preserve it in replacement without the match[0] ambiguity at ^.
290+ // The character class [^a-zA-Z0-9_$"] ensures the schema name isn't a
291+ // suffix of a longer identifier (e.g., schema "s" won't match "sales").
292+ sr := & schemaRegexes {
293+ re1 : regexp .MustCompile (fmt .Sprintf (`"%s"\.(\"[^"]+\")` , escapedSchema )),
294+ re2 : regexp .MustCompile (fmt .Sprintf (`"%s"\.([a-zA-Z_][a-zA-Z0-9_$]*)` , escapedSchema )),
295+ re3 : regexp .MustCompile (fmt .Sprintf (`(^|[^a-zA-Z0-9_$"])%s\.(\"[^"]+\")` , escapedSchema )),
296+ re4 : regexp .MustCompile (fmt .Sprintf (`(^|[^a-zA-Z0-9_$"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)` , escapedSchema )),
297+ }
298+ schemaRegexCache [schemaName ] = sr
299+ return sr
300+ }
200301
201- // Pattern 4: unquoted schema + dot + unquoted object: schema.object
202- // Example: public.table -> table
203- // Use negative lookbehind to ensure schema isn't preceded by a quote
204- pattern4 := fmt .Sprintf (`(?:^|[^"])%s\.([a-zA-Z_][a-zA-Z0-9_$]*)` , escapedSchema )
205- re4 := regexp .MustCompile (pattern4 )
302+ // stripSchemaQualificationsFromText performs the actual schema qualification stripping on a text segment.
303+ // It handles 4 cases:
304+ // 1. unquoted_schema.unquoted_object -> unquoted_object
305+ // 2. unquoted_schema."quoted_object" -> "quoted_object"
306+ // 3. "quoted_schema".unquoted_object -> unquoted_object
307+ // 4. "quoted_schema"."quoted_object" -> "quoted_object"
308+ func stripSchemaQualificationsFromText (text string , schemaName string ) string {
309+ sr := getSchemaRegexes (schemaName )
206310
207311 result := text
208312 // Apply in order: quoted schema first to avoid double-matching
209- result = re1 .ReplaceAllString (result , "$1" )
210- result = re2 .ReplaceAllString (result , "$1" )
211- // For patterns 3 and 4, we need to preserve the character before the schema
212- result = re3 .ReplaceAllStringFunc (result , func (match string ) string {
213- // If match starts with a non-quote character, preserve it
214- if len (match ) > 0 && match [0 ] != '"' {
215- // Extract the quote identifier (everything after schema.)
216- parts := strings .SplitN (match , "." , 2 )
217- if len (parts ) == 2 {
218- return string (match [0 ]) + parts [1 ]
219- }
220- }
221- // Otherwise just return the captured quoted identifier
222- parts := strings .SplitN (match , "." , 2 )
223- if len (parts ) == 2 {
224- return parts [1 ]
225- }
226- return match
227- })
228- result = re4 .ReplaceAllStringFunc (result , func (match string ) string {
229- // If match starts with a non-quote character, preserve it
230- if len (match ) > 0 && match [0 ] != '"' {
231- // Extract the unquoted identifier (everything after schema.)
232- parts := strings .SplitN (match , "." , 2 )
233- if len (parts ) == 2 {
234- return string (match [0 ]) + parts [1 ]
235- }
236- }
237- // Otherwise just return the captured unquoted identifier
238- parts := strings .SplitN (match , "." , 2 )
239- if len (parts ) == 2 {
240- return parts [1 ]
241- }
242- return match
243- })
313+ result = sr .re1 .ReplaceAllString (result , "$1" )
314+ result = sr .re2 .ReplaceAllString (result , "$1" )
315+ // For patterns 3 and 4, $1 is the prefix (boundary char or empty at ^),
316+ // $2 is the object name — preserve the prefix and keep only the object.
317+ result = sr .re3 .ReplaceAllString (result , "${1}${2}" )
318+ result = sr .re4 .ReplaceAllString (result , "${1}${2}" )
244319
245320 return result
246321}
0 commit comments