Skip to content

Commit 9da9a99

Browse files
authored
GH-5180 backport fix for handling of pattern in shacl (#5181)
2 parents d03bb96 + 2208216 commit 9da9a99

2 files changed

Lines changed: 100 additions & 5 deletions

File tree

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/constraintcomponents/PatternConstraintComponent.java

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.Objects;
1818
import java.util.Set;
1919
import java.util.function.Function;
20+
import java.util.regex.Pattern;
2021

2122
import org.eclipse.rdf4j.model.IRI;
2223
import org.eclipse.rdf4j.model.Literal;
@@ -42,6 +43,47 @@ public PatternConstraintComponent(String pattern, String flags) {
4243
if (flags == null) {
4344
this.flags = "";
4445
}
46+
47+
verifyPattern(pattern, flags);
48+
}
49+
50+
private static void verifyPattern(String pattern, String flags) {
51+
if (flags != null && !flags.isEmpty()) {
52+
int flag = 0b0;
53+
54+
if (flags.contains("i")) {
55+
flag = flag | Pattern.CASE_INSENSITIVE;
56+
}
57+
58+
if (flags.contains("d")) {
59+
flag = flag | Pattern.UNIX_LINES;
60+
}
61+
62+
if (flags.contains("m")) {
63+
flag = flag | Pattern.MULTILINE;
64+
}
65+
66+
if (flags.contains("s")) {
67+
flag = flag | Pattern.DOTALL;
68+
}
69+
70+
if (flags.contains("u")) {
71+
flag = flag | Pattern.UNICODE_CASE;
72+
}
73+
74+
if (flags.contains("x")) {
75+
flag = flag | Pattern.COMMENTS;
76+
}
77+
78+
if (flags.contains("U")) {
79+
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
80+
}
81+
82+
Pattern.compile(pattern, flag);
83+
84+
} else {
85+
Pattern.compile(pattern, 0b0);
86+
}
4587
}
4688

4789
@Override
@@ -67,7 +109,10 @@ String getSparqlFilterExpression(Variable<Value> variable, boolean negated) {
67109
}
68110

69111
private static String escapeRegexForSparql(String pattern) {
70-
return pattern.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\n");
112+
pattern = pattern.replace("\\", "\\\\");
113+
pattern = pattern.replace("\"", "\\\"");
114+
pattern = pattern.replace("\n", "\\n");
115+
return pattern;
71116
}
72117

73118
@Override

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/PatternFilter.java

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,63 +14,113 @@
1414
import java.util.Objects;
1515
import java.util.regex.Pattern;
1616

17+
import org.eclipse.rdf4j.model.IRI;
18+
import org.eclipse.rdf4j.model.Literal;
19+
import org.eclipse.rdf4j.model.Triple;
1720
import org.eclipse.rdf4j.model.Value;
21+
import org.eclipse.rdf4j.model.ValueFactory;
22+
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
23+
import org.eclipse.rdf4j.query.algebra.evaluation.util.QueryEvaluationUtility;
24+
import org.slf4j.Logger;
25+
import org.slf4j.LoggerFactory;
1826

1927
/**
2028
* @author Håvard Ottestad
2129
*/
2230
public class PatternFilter extends FilterPlanNode {
2331

32+
private static final Logger logger = LoggerFactory.getLogger(PatternFilter.class);
33+
2434
private final Pattern pattern;
2535

2636
public PatternFilter(PlanNode parent, String pattern, String flags) {
2737
super(parent);
2838
if (flags != null && !flags.isEmpty()) {
29-
3039
int flag = 0b0;
3140

3241
if (flags.contains("i")) {
3342
flag = flag | Pattern.CASE_INSENSITIVE;
43+
logger.trace("PatternFilter constructed with case insensitive flag");
3444
}
3545

3646
if (flags.contains("d")) {
3747
flag = flag | Pattern.UNIX_LINES;
48+
logger.trace("PatternFilter constructed with UNIX lines flag");
3849
}
3950

4051
if (flags.contains("m")) {
4152
flag = flag | Pattern.MULTILINE;
53+
logger.trace("PatternFilter constructed with multiline flag");
4254
}
4355

4456
if (flags.contains("s")) {
4557
flag = flag | Pattern.DOTALL;
58+
logger.trace("PatternFilter constructed with dotall flag");
4659
}
4760

4861
if (flags.contains("u")) {
4962
flag = flag | Pattern.UNICODE_CASE;
63+
logger.trace("PatternFilter constructed with unicode case flag");
5064
}
5165

5266
if (flags.contains("x")) {
5367
flag = flag | Pattern.COMMENTS;
68+
logger.trace("PatternFilter constructed with comments flag");
5469
}
5570

5671
if (flags.contains("U")) {
5772
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
73+
logger.trace("PatternFilter constructed with unicode character class flag");
5874
}
5975

6076
this.pattern = Pattern.compile(pattern, flag);
77+
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);
6178

6279
} else {
63-
this.pattern = Pattern.compile(pattern);
64-
80+
this.pattern = Pattern.compile(pattern, 0b0);
81+
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
6582
}
83+
}
6684

85+
private static Literal str(Value argValue, ValueFactory valueFactory) {
86+
if (argValue instanceof IRI || argValue instanceof Triple) {
87+
return valueFactory.createLiteral(argValue.toString());
88+
} else if (argValue instanceof Literal) {
89+
Literal literal = (Literal) argValue;
90+
91+
if (QueryEvaluationUtility.isSimpleLiteral(literal)) {
92+
return literal;
93+
} else {
94+
return valueFactory.createLiteral(literal.getLabel());
95+
}
96+
} else {
97+
return null;
98+
}
6799
}
68100

69101
@Override
70102
boolean checkTuple(ValidationTuple t) {
71103
Value literal = t.getValue();
104+
literal = str(literal, SimpleValueFactory.getInstance());
72105

73-
return pattern.matcher(literal.stringValue()).matches();
106+
if (literal == null) {
107+
return false;
108+
}
109+
110+
if (QueryEvaluationUtility.isStringLiteral(literal)) {
111+
boolean result = pattern.matcher(((Literal) literal).getLabel()).find();
112+
if (logger.isTraceEnabled()) {
113+
logger.trace("PatternFilter value: \"{}\" with pattern: \"{}\" and result: {}",
114+
((Literal) literal).getLabel().replace("\n", "\\n").replace("\"", "\\\""),
115+
pattern.toString().replace("\n", "\\n").replace("\"", "\\\""), result);
116+
}
117+
return result;
118+
}
119+
120+
if (logger.isTraceEnabled()) {
121+
logger.trace("PatternFilter did not match value because value is not a string literal: {}", literal);
122+
}
123+
return false;
74124
}
75125

76126
@Override

0 commit comments

Comments
 (0)