Skip to content

Commit 963fb7a

Browse files
authored
GH-5182 improve performance of SHACL sh:pattern (#5183)
2 parents 3e4f94f + 32d167b commit 963fb7a

2 files changed

Lines changed: 57 additions & 48 deletions

File tree

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/constraintcomponents/PatternConstraintComponent.java

Lines changed: 53 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import java.util.Objects;
1818
import java.util.Set;
1919
import java.util.function.Function;
20+
import java.util.regex.Pattern;
2021

2122
import org.eclipse.rdf4j.model.IRI;
2223
import org.eclipse.rdf4j.model.Literal;
@@ -30,11 +31,16 @@
3031
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PatternFilter;
3132
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PlanNode;
3233
import org.eclipse.rdf4j.sail.shacl.wrapper.data.ConnectionsGroup;
34+
import org.slf4j.Logger;
35+
import org.slf4j.LoggerFactory;
3336

3437
public class PatternConstraintComponent extends AbstractSimpleConstraintComponent {
3538

39+
private static final Logger logger = LoggerFactory.getLogger(PatternConstraintComponent.class);
40+
3641
String pattern;
3742
String flags;
43+
private final Pattern compiledPattern;
3844

3945
public PatternConstraintComponent(String pattern, String flags) {
4046
super();
@@ -44,6 +50,52 @@ public PatternConstraintComponent(String pattern, String flags) {
4450
if (flags == null) {
4551
this.flags = "";
4652
}
53+
54+
if (flags != null && !flags.isEmpty()) {
55+
int flag = 0b0;
56+
57+
if (flags.contains("i")) {
58+
flag = flag | Pattern.CASE_INSENSITIVE;
59+
logger.trace("PatternFilter constructed with case insensitive flag");
60+
}
61+
62+
if (flags.contains("d")) {
63+
flag = flag | Pattern.UNIX_LINES;
64+
logger.trace("PatternFilter constructed with UNIX lines flag");
65+
}
66+
67+
if (flags.contains("m")) {
68+
flag = flag | Pattern.MULTILINE;
69+
logger.trace("PatternFilter constructed with multiline flag");
70+
}
71+
72+
if (flags.contains("s")) {
73+
flag = flag | Pattern.DOTALL;
74+
logger.trace("PatternFilter constructed with dotall flag");
75+
}
76+
77+
if (flags.contains("u")) {
78+
flag = flag | Pattern.UNICODE_CASE;
79+
logger.trace("PatternFilter constructed with unicode case flag");
80+
}
81+
82+
if (flags.contains("x")) {
83+
flag = flag | Pattern.COMMENTS;
84+
logger.trace("PatternFilter constructed with comments flag");
85+
}
86+
87+
if (flags.contains("U")) {
88+
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
89+
logger.trace("PatternFilter constructed with unicode character class flag");
90+
}
91+
92+
this.compiledPattern = Pattern.compile(pattern, flag);
93+
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);
94+
95+
} else {
96+
this.compiledPattern = Pattern.compile(pattern, 0b0);
97+
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
98+
}
4799
}
48100

49101
@Override
@@ -87,7 +139,7 @@ public ConstraintComponent deepClone() {
87139

88140
@Override
89141
Function<PlanNode, FilterPlanNode> getFilterAttacher(ConnectionsGroup connectionsGroup) {
90-
return (parent) -> new PatternFilter(parent, pattern, flags, connectionsGroup);
142+
return (parent) -> new PatternFilter(parent, compiledPattern, connectionsGroup);
91143
}
92144

93145
@Override

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/planNodes/PatternFilter.java

Lines changed: 4 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -34,53 +34,9 @@ public class PatternFilter extends FilterPlanNode {
3434

3535
private final Pattern pattern;
3636

37-
public PatternFilter(PlanNode parent, String pattern, String flags, ConnectionsGroup connectionsGroup) {
37+
public PatternFilter(PlanNode parent, Pattern pattern, ConnectionsGroup connectionsGroup) {
3838
super(parent, connectionsGroup);
39-
if (flags != null && !flags.isEmpty()) {
40-
int flag = 0b0;
41-
42-
if (flags.contains("i")) {
43-
flag = flag | Pattern.CASE_INSENSITIVE;
44-
logger.trace("PatternFilter constructed with case insensitive flag");
45-
}
46-
47-
if (flags.contains("d")) {
48-
flag = flag | Pattern.UNIX_LINES;
49-
logger.trace("PatternFilter constructed with UNIX lines flag");
50-
}
51-
52-
if (flags.contains("m")) {
53-
flag = flag | Pattern.MULTILINE;
54-
logger.trace("PatternFilter constructed with multiline flag");
55-
}
56-
57-
if (flags.contains("s")) {
58-
flag = flag | Pattern.DOTALL;
59-
logger.trace("PatternFilter constructed with dotall flag");
60-
}
61-
62-
if (flags.contains("u")) {
63-
flag = flag | Pattern.UNICODE_CASE;
64-
logger.trace("PatternFilter constructed with unicode case flag");
65-
}
66-
67-
if (flags.contains("x")) {
68-
flag = flag | Pattern.COMMENTS;
69-
logger.trace("PatternFilter constructed with comments flag");
70-
}
71-
72-
if (flags.contains("U")) {
73-
flag = flag | Pattern.UNICODE_CHARACTER_CLASS;
74-
logger.trace("PatternFilter constructed with unicode character class flag");
75-
}
76-
77-
this.pattern = Pattern.compile(pattern, flag);
78-
logger.trace("PatternFilter constructed with pattern: {} and flags: {}", pattern, flags);
79-
80-
} else {
81-
this.pattern = Pattern.compile(pattern, 0b0);
82-
logger.trace("PatternFilter constructed with pattern: {} and no flags", pattern);
83-
}
39+
this.pattern = pattern;
8440
}
8541

8642
private static Literal str(Value argValue, ValueFactory valueFactory) {
@@ -104,8 +60,9 @@ boolean checkTuple(Reference t) {
10460
Value literal = t.get().getValue();
10561
literal = str(literal, SimpleValueFactory.getInstance());
10662

107-
if (literal == null)
63+
if (literal == null) {
10864
return false;
65+
}
10966

11067
if (QueryEvaluationUtility.isStringLiteral(literal)) {
11168
boolean result = pattern.matcher(((Literal) literal).getLabel()).find();

0 commit comments

Comments
 (0)