Skip to content

Commit 77cdb01

Browse files
committed
GH-4750 dedupe shapes before validation
1 parent daff48f commit 77cdb01

63 files changed

Lines changed: 1019 additions & 162 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ShaclSail.java

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@
5151
import org.eclipse.rdf4j.sail.SailException;
5252
import org.eclipse.rdf4j.sail.memory.MemoryStore;
5353
import org.eclipse.rdf4j.sail.memory.MemoryStoreConnection;
54-
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShapes;
54+
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShape;
5555
import org.eclipse.rdf4j.sail.shacl.ast.Shape;
5656
import org.eclipse.rdf4j.sail.shacl.wrapper.shape.CombinedShapeSource;
5757
import org.eclipse.rdf4j.sail.shacl.wrapper.shape.Rdf4jShaclShapeGraphShapeSource;
@@ -180,7 +180,7 @@ public class ShaclSail extends ShaclSailBaseConfiguration {
180180
"ShaclSail_SerializableValidation");
181181

182182
// shapesCacheLockManager used to keep track of changes to the cache
183-
private StampedLockManager.Cache<List<ContextWithShapes>> cachedShapes;
183+
private StampedLockManager.Cache<List<ContextWithShape>> cachedShapes;
184184

185185
// true if the base sail supports IsolationLevels.SNAPSHOT
186186
private boolean supportsSnapshotIsolation;
@@ -195,13 +195,13 @@ public class ShaclSail extends ShaclSailBaseConfiguration {
195195
private final RevivableExecutorService executorService;
196196

197197
@InternalUseOnly
198-
StampedLockManager.Cache<List<ContextWithShapes>>.WritableState getCachedShapesForWriting()
198+
StampedLockManager.Cache<List<ContextWithShape>>.WritableState getCachedShapesForWriting()
199199
throws InterruptedException {
200200
return cachedShapes.getWriteState();
201201
}
202202

203203
@InternalUseOnly
204-
public StampedLockManager.Cache<List<ContextWithShapes>>.ReadableState getCachedShapes()
204+
public StampedLockManager.Cache<List<ContextWithShape>>.ReadableState getCachedShapes()
205205
throws InterruptedException {
206206
return cachedShapes.getReadState();
207207
}
@@ -394,7 +394,7 @@ public void init() throws SailException {
394394
}
395395

396396
@InternalUseOnly
397-
public List<ContextWithShapes> getShapes(RepositoryConnection shapesRepoConnection, SailConnection sailConnection,
397+
public List<ContextWithShape> getShapes(RepositoryConnection shapesRepoConnection, SailConnection sailConnection,
398398
IRI[] shapesGraphs) throws SailException {
399399

400400
try (ShapeSource shapeSource = new CombinedShapeSource(shapesRepoConnection, sailConnection)
@@ -406,7 +406,7 @@ public List<ContextWithShapes> getShapes(RepositoryConnection shapesRepoConnecti
406406
}
407407

408408
@InternalUseOnly
409-
public List<ContextWithShapes> getShapes(RepositoryConnection shapesRepoConnection, IRI[] shapesGraphs)
409+
public List<ContextWithShape> getShapes(RepositoryConnection shapesRepoConnection, IRI[] shapesGraphs)
410410
throws SailException {
411411

412412
try (ShapeSource shapeSource = new Rdf4jShaclShapeGraphShapeSource(shapesRepoConnection)
@@ -490,7 +490,7 @@ public NotifyingSailConnection getConnection() throws SailException {
490490
}
491491

492492
@InternalUseOnly
493-
public List<ContextWithShapes> getShapes(IRI[] shapesGraphs, boolean onlyRdf4jShaclShapeGraph) {
493+
public List<ContextWithShape> getShapes(IRI[] shapesGraphs, boolean onlyRdf4jShaclShapeGraph) {
494494

495495
try (SailRepositoryConnection shapesRepoConnection = shapesRepo.getConnection()) {
496496
shapesRepoConnection.begin(IsolationLevels.READ_COMMITTED);
@@ -517,7 +517,7 @@ public List<ContextWithShapes> getShapes(IRI[] shapesGraphs, boolean onlyRdf4jSh
517517
public void setShapesGraphs(Set<IRI> shapesGraphs) {
518518
if (initialized.get()) {
519519
try {
520-
try (StampedLockManager.Cache<List<ContextWithShapes>>.WritableState writeState = cachedShapes
520+
try (StampedLockManager.Cache<List<ContextWithShape>>.WritableState writeState = cachedShapes
521521
.getWriteState()) {
522522
super.setShapesGraphs(shapesGraphs);
523523
writeState.purge();

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ShaclSailConnection.java

Lines changed: 21 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import org.eclipse.rdf4j.sail.helpers.NotifyingSailConnectionWrapper;
4747
import org.eclipse.rdf4j.sail.memory.MemoryStore;
4848
import org.eclipse.rdf4j.sail.shacl.ShaclSail.TransactionSettings.ValidationApproach;
49-
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShapes;
49+
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShape;
5050
import org.eclipse.rdf4j.sail.shacl.ast.Shape;
5151
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.PlanNode;
5252
import org.eclipse.rdf4j.sail.shacl.ast.planNodes.SingleCloseablePlanNode;
@@ -95,8 +95,8 @@ public class ShaclSailConnection extends NotifyingSailConnectionWrapper implemen
9595
private Lock exclusiveSerializableValidationLock;
9696
private Lock nonExclusiveSerializableValidationLock;
9797

98-
private StampedLockManager.Cache<List<ContextWithShapes>>.WritableState writableShapesCache;
99-
private StampedLockManager.Cache<List<ContextWithShapes>>.ReadableState readableShapesCache;
98+
private StampedLockManager.Cache<List<ContextWithShape>>.WritableState writableShapesCache;
99+
private StampedLockManager.Cache<List<ContextWithShape>>.ReadableState readableShapesCache;
100100

101101
private final SailRepositoryConnection shapesRepoConnection;
102102

@@ -466,7 +466,7 @@ private void cleanupReadWriteLock() {
466466

467467
}
468468

469-
private ValidationReport validate(List<ContextWithShapes> shapes, boolean validateEntireBaseSail)
469+
private ValidationReport validate(List<ContextWithShape> shapes, boolean validateEntireBaseSail)
470470
throws InterruptedException {
471471

472472
assert isValidationEnabled();
@@ -502,7 +502,7 @@ ConnectionsGroup getConnectionsGroup() {
502502
this::getRdfsSubClassOfReasoner, transactionSettings, sail.sparqlValidation);
503503
}
504504

505-
private ValidationReport performValidation(List<ContextWithShapes> shapes, boolean validateEntireBaseSail,
505+
private ValidationReport performValidation(List<ContextWithShape> shapes, boolean validateEntireBaseSail,
506506
ConnectionsGroup connectionsGroup) throws InterruptedException {
507507
long beforeValidation = 0;
508508

@@ -511,27 +511,22 @@ private ValidationReport performValidation(List<ContextWithShapes> shapes, boole
511511
}
512512

513513
try {
514-
int numberOfShapes = shapes.stream()
515-
.map(ContextWithShapes::getShapes)
516-
.map(List::size)
517-
.mapToInt(i -> i)
518-
.sum();
514+
int numberOfShapes = shapes.size();
519515

520516
Stream<Callable<ValidationResultIterator>> callableStream = shapes
521517
.stream()
522-
.flatMap(contextWithShapes -> contextWithShapes.getShapes()
523-
.stream()
524-
.map(shape -> new ShapeValidationContainer(
525-
shape,
526-
() -> shape.generatePlans(connectionsGroup,
518+
.map(contextWithShapes -> new ShapeValidationContainer(
519+
contextWithShapes.getShape(),
520+
() -> contextWithShapes.getShape()
521+
.generatePlans(connectionsGroup,
527522
new ValidationSettings(contextWithShapes.getDataGraph(),
528523
sail.isLogValidationPlans(), validateEntireBaseSail,
529524
sail.isPerformanceLogging())),
530-
sail.isGlobalLogValidationExecution(), sail.isLogValidationViolations(),
531-
sail.getEffectiveValidationResultsLimitPerConstraint(), sail.isPerformanceLogging(),
532-
logger
533-
))
534-
)
525+
sail.isGlobalLogValidationExecution(), sail.isLogValidationViolations(),
526+
sail.getEffectiveValidationResultsLimitPerConstraint(), sail.isPerformanceLogging(),
527+
logger
528+
))
529+
535530
.filter(ShapeValidationContainer::hasPlanNode)
536531
.map(validationContainer -> validationContainer::performValidation);
537532

@@ -835,8 +830,8 @@ public void prepare() throws SailException {
835830
return;
836831
}
837832

838-
List<ContextWithShapes> currentShapes = null;
839-
List<ContextWithShapes> shapesAfterRefresh = null;
833+
List<ContextWithShape> currentShapes = null;
834+
List<ContextWithShape> shapesAfterRefresh = null;
840835

841836
if (shapeRefreshNeeded || !connectionListenerActive || isBulkValidation()) {
842837
if (writableShapesCache == null) {
@@ -913,12 +908,12 @@ public void prepare() throws SailException {
913908

914909
}
915910

916-
private boolean isEmpty(List<ContextWithShapes> shapesList) {
911+
private boolean isEmpty(List<ContextWithShape> shapesList) {
917912
if (shapesList == null) {
918913
return true;
919914
}
920-
for (ContextWithShapes shapesWithContext : shapesList) {
921-
if (!shapesWithContext.getShapes().isEmpty()) {
915+
for (ContextWithShape shapesWithContext : shapesList) {
916+
if (shapesWithContext.hasShape()) {
922917
return false;
923918
}
924919
}
@@ -934,7 +929,7 @@ private boolean isBulkValidation() {
934929
return transactionSettings.getValidationApproach() == ValidationApproach.Bulk;
935930
}
936931

937-
private ValidationReport serializableValidation(List<ContextWithShapes> shapesAfterRefresh)
932+
private ValidationReport serializableValidation(List<ContextWithShape> shapesAfterRefresh)
938933
throws InterruptedException {
939934
try {
940935
try (ConnectionsGroup connectionsGroup = new ConnectionsGroup(

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ShaclValidator.java

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111

1212
package org.eclipse.rdf4j.sail.shacl;
1313

14-
import java.util.Arrays;
1514
import java.util.List;
1615
import java.util.stream.Collectors;
1716
import java.util.stream.Stream;
@@ -22,7 +21,7 @@
2221
import org.eclipse.rdf4j.model.vocabulary.RDF4J;
2322
import org.eclipse.rdf4j.sail.Sail;
2423
import org.eclipse.rdf4j.sail.SailConnection;
25-
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShapes;
24+
import org.eclipse.rdf4j.sail.shacl.ast.ContextWithShape;
2625
import org.eclipse.rdf4j.sail.shacl.ast.Shape;
2726
import org.eclipse.rdf4j.sail.shacl.results.ValidationReport;
2827
import org.eclipse.rdf4j.sail.shacl.results.lazy.LazyValidationReport;
@@ -47,7 +46,7 @@ public class ShaclValidator {
4746

4847
public static ValidationReport validate(Sail dataRepo, Sail shapesRepo) {
4948

50-
List<ContextWithShapes> shapes;
49+
List<ContextWithShape> shapes;
5150
try (SailConnection shapesConnection = shapesRepo.getConnection()) {
5251
shapesConnection.begin(IsolationLevels.NONE);
5352
try (ShapeSource shapeSource = new CombinedShapeSource(shapesConnection,
@@ -59,12 +58,13 @@ public static ValidationReport validate(Sail dataRepo, Sail shapesRepo) {
5958
allShapeContexts = Stream.concat(allShapeContexts,
6059
Stream.of(new ShapeSource.ShapesGraph(RDF4J.NIL)));
6160
}
62-
List<ContextWithShapes> parsed = allShapeContexts
61+
List<ContextWithShape> parsed = allShapeContexts
6362
.map(context -> Shape.Factory.parse(shapeSource.withContext(context.getShapesGraph()), context,
6463
new Shape.ParseSettings(true, true)))
64+
.flatMap(List::stream)
6565
.collect(Collectors.toList());
6666

67-
shapes = Shape.Factory.getShapes(parsed);
67+
shapes = Shape.Factory.getShapes(parsed).stream().distinct().collect(Collectors.toList());
6868

6969
}
7070
shapesConnection.commit();
@@ -90,20 +90,18 @@ null, null, new Stats(), () -> reasoner,
9090

9191
}
9292

93-
private static ValidationReport performValidation(List<ContextWithShapes> shapes,
93+
private static ValidationReport performValidation(List<ContextWithShape> shapes,
9494
ConnectionsGroup connectionsGroup) {
9595

9696
List<ValidationResultIterator> collect = shapes
9797
.stream()
98-
.flatMap(contextWithShapes -> contextWithShapes
99-
.getShapes()
100-
.stream()
101-
.map(shape -> new ShapeValidationContainer(
102-
shape,
103-
() -> shape.generatePlans(connectionsGroup,
104-
new ValidationSettings(contextWithShapes.getDataGraph(), false, true, false)),
105-
false, false, 1000, false, logger)
106-
)
98+
.map(contextWithShape -> new ShapeValidationContainer(
99+
contextWithShape.getShape(),
100+
() -> contextWithShape.getShape()
101+
.generatePlans(connectionsGroup,
102+
new ValidationSettings(contextWithShape.getDataGraph(), false, true, false)),
103+
false, false, 1000, false, logger
104+
)
107105
)
108106
.filter(ShapeValidationContainer::hasPlanNode)
109107
.map(ShapeValidationContainer::performValidation)

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/ContextWithShapes.java renamed to core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/ContextWithShape.java

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -10,34 +10,41 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.shacl.ast;
1212

13+
import java.util.ArrayList;
1314
import java.util.Arrays;
15+
import java.util.HashSet;
1416
import java.util.List;
15-
import java.util.Objects;
17+
import java.util.Set;
1618

1719
import org.eclipse.rdf4j.model.Model;
1820
import org.eclipse.rdf4j.model.Resource;
1921
import org.eclipse.rdf4j.model.Statement;
2022
import org.eclipse.rdf4j.model.impl.DynamicModel;
2123
import org.eclipse.rdf4j.model.impl.DynamicModelFactory;
2224

23-
public class ContextWithShapes {
25+
public class ContextWithShape {
2426

2527
private final Resource[] dataGraph;
2628
private final Resource[] shapeGraph;
27-
private final List<Shape> shapes;
29+
private final Shape shape;
2830

29-
public ContextWithShapes(Resource[] dataGraph, Resource[] shapeGraph, List<Shape> shapes) {
31+
public ContextWithShape(Resource[] dataGraph, Resource[] shapeGraph, Shape shape) {
3032
this.shapeGraph = shapeGraph;
3133
this.dataGraph = dataGraph;
32-
this.shapes = shapes;
34+
Arrays.sort(this.dataGraph);
35+
this.shape = shape;
3336
}
3437

3538
public Resource[] getShapeGraph() {
3639
return shapeGraph;
3740
}
3841

39-
public List<Shape> getShapes() {
40-
return shapes;
42+
public Shape getShape() {
43+
return shape;
44+
}
45+
46+
public boolean hasShape() {
47+
return shape != null;
4148
}
4249

4350
public Resource[] getDataGraph() {
@@ -52,28 +59,27 @@ public boolean equals(Object o) {
5259
if (o == null || getClass() != o.getClass()) {
5360
return false;
5461
}
55-
ContextWithShapes that = (ContextWithShapes) o;
56-
return Arrays.equals(dataGraph, that.dataGraph) && Arrays.equals(shapeGraph, that.shapeGraph)
57-
&& shapes.equals(that.shapes);
62+
ContextWithShape that = (ContextWithShape) o;
63+
return Arrays.equals(dataGraph, that.dataGraph)
64+
&& shape.equals(that.shape);
5865
}
5966

6067
@Override
6168
public int hashCode() {
62-
int result = Objects.hash(shapes);
69+
int result = shape.hashCode();
6370
result = 31 * result + Arrays.hashCode(dataGraph);
64-
result = 31 * result + Arrays.hashCode(shapeGraph);
6571
return result;
6672
}
6773

68-
public void toModel(Model model) {
74+
public void toModel(Model model, Set<Resource> cycleDetection) {
6975
DynamicModel emptyModel = new DynamicModelFactory().createEmptyModel();
70-
for (Shape shape : shapes) {
71-
shape.toModel(emptyModel);
72-
}
76+
shape.toModel(null, null, emptyModel, cycleDetection);
77+
7378
for (Statement statement : emptyModel) {
7479
for (Resource context : shapeGraph) {
7580
model.add(statement.getSubject(), statement.getPredicate(), statement.getObject(), context);
7681
}
7782
}
7883
}
84+
7985
}

core/sail/shacl/src/main/java/org/eclipse/rdf4j/sail/shacl/ast/NodeShape.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -102,10 +102,10 @@ public void toModel(Resource subject, IRI predicate, Model model, Set<Resource>
102102

103103
}
104104

105-
if (cycleDetection.contains(getId())) {
106-
return;
107-
}
108-
cycleDetection.add(getId());
105+
// if (cycleDetection.contains(getId())) {
106+
// return;
107+
// }
108+
// cycleDetection.add(getId());
109109

110110
constraintComponents.forEach(c -> c.toModel(getId(), null, model, cycleDetection));
111111

0 commit comments

Comments
 (0)