diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConjunctiveConstraintSplitterOptimizer.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConjunctiveConstraintSplitterOptimizer.java index b9a0f78765f..6f1ad30cb3d 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConjunctiveConstraintSplitterOptimizer.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConjunctiveConstraintSplitterOptimizer.java @@ -56,7 +56,7 @@ public void meet(Filter filter) { TupleExpr filterArg = filter.getArg(); for (int i = conjunctiveConstraints.size() - 1; i >= 1; i--) { - Filter newFilter = new Filter(filterArg, conjunctiveConstraints.get(i)); + Filter newFilter = new Filter(filterArg, conjunctiveConstraints.get(i).clone()); filterArg = newFilter; } @@ -77,11 +77,11 @@ public void meet(LeftJoin node) { for (ValueExpr constraint : conjunctiveConstraints) { if (isWithinBindingScope(constraint, arg)) { - arg = new Filter(arg, constraint); + arg = new Filter(arg, constraint.clone()); } else if (condition == null) { condition = constraint; } else { - condition = new And(condition, constraint); + condition = new And(condition, constraint.clone()); } } diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/FilterOptimizer.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/FilterOptimizer.java index f0bda9ac755..af8dfe19d10 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/FilterOptimizer.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/FilterOptimizer.java @@ -11,6 +11,7 @@ package org.eclipse.rdf4j.query.algebra.evaluation.optimizer; +import java.util.Objects; import java.util.Set; import org.eclipse.rdf4j.query.BindingSet; @@ -31,6 +32,7 @@ import org.eclipse.rdf4j.query.algebra.StatementPattern; import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.Union; +import org.eclipse.rdf4j.query.algebra.VariableScopeChange; import org.eclipse.rdf4j.query.algebra.evaluation.QueryOptimizer; import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor; import org.eclipse.rdf4j.query.algebra.helpers.AbstractSimpleQueryModelVisitor; @@ -78,9 +80,49 @@ public class FilterOptimizer implements QueryOptimizer { @Override public void optimize(TupleExpr tupleExpr, Dataset dataset, BindingSet bindings) { - tupleExpr.visit(new FilterUnMerger()); - tupleExpr.visit(new FilterOrganizer()); - tupleExpr.visit(new FilterMerger()); + Objects.requireNonNull(tupleExpr, "tupleExpr must not be null"); + optimizeScope(tupleExpr); + } + + /** + * Bottom‑up optimisation: optimise each child subtree that starts a new variable scope, then run the three filter + * passes on the current subtree. + */ + private void optimizeScope(TupleExpr expr) { + // 1) recurse into nested scopes first + expr.visit(new AbstractQueryModelVisitor<>() { + final TupleExpr current = expr; + + @Override + protected void meetNode(QueryModelNode node) { + if (node != current && node instanceof TupleExpr && node instanceof VariableScopeChange + && ((VariableScopeChange) node).isVariableScopeChange()) { + + optimizeScope(((TupleExpr) node)); + // do NOT traverse further into that subtree with this visitor + } else { + super.meetNode(node); + } + } + + }); + + // 2) run the three filter passes for *this* scope + expr.visit(new FilterUnMerger()); + expr.visit(new FilterOrganizer()); + expr.visit(new FilterMerger()); + } + + /** + * Copies the {@code isVariableScopeChange} flag from one node to another if both implement + * {@link VariableScopeChange}. + */ + private static void transferScopeChange(QueryModelNode source, QueryModelNode target) { + if (source instanceof VariableScopeChange && target instanceof VariableScopeChange) { + VariableScopeChange src = (VariableScopeChange) source; + VariableScopeChange tgt = (VariableScopeChange) target; + tgt.setVariableScopeChange(src.isVariableScopeChange()); + } } private static class FilterUnMerger extends AbstractSimpleQueryModelVisitor { @@ -95,6 +137,7 @@ public void meet(Filter filter) { And and = (And) filter.getCondition(); filter.setCondition(and.getLeftArg().clone()); Filter newFilter = new Filter(filter.getArg().clone(), and.getRightArg().clone()); + transferScopeChange(filter, newFilter); // preserve scope flag filter.replaceChildNode(filter.getArg(), newFilter); } super.meet(filter); @@ -117,6 +160,7 @@ public void meet(Filter filter) { And merge = new And(childFilter.getCondition().clone(), filter.getCondition().clone()); Filter newFilter = new Filter(childFilter.getArg().clone(), merge); + transferScopeChange(filter, newFilter); // both have same scope flag parent.replaceChildNode(filter, newFilter); } } @@ -190,6 +234,7 @@ public void meet(LeftJoin leftJoin) { public void meet(Union union) { Filter clone = new Filter(); clone.setCondition(filter.getCondition().clone()); + transferScopeChange(filter, clone); relocate(filter, union.getLeftArg()); relocate(clone, union.getRightArg()); @@ -202,6 +247,7 @@ public void meet(Union union) { public void meet(Difference node) { Filter clone = new Filter(); clone.setCondition(filter.getCondition().clone()); + transferScopeChange(filter, clone); relocate(filter, node.getLeftArg()); relocate(clone, node.getRightArg()); @@ -214,6 +260,7 @@ public void meet(Difference node) { public void meet(Intersection node) { Filter clone = new Filter(); clone.setCondition(filter.getCondition().clone()); + transferScopeChange(filter, clone); relocate(filter, node.getLeftArg()); relocate(clone, node.getRightArg()); @@ -278,5 +325,4 @@ private void relocate(Filter filter, TupleExpr newFilterArg) { } } } - } diff --git a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/QueryPlanRetrievalTest.java b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/QueryPlanRetrievalTest.java index 187e7eb0e75..d420c9b5d2e 100644 --- a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/QueryPlanRetrievalTest.java +++ b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/QueryPlanRetrievalTest.java @@ -18,6 +18,7 @@ import java.io.IOException; import java.io.StringReader; import java.nio.charset.StandardCharsets; +import java.util.Arrays; import org.apache.commons.io.IOUtils; import org.eclipse.rdf4j.common.transaction.IsolationLevels; @@ -121,6 +122,386 @@ private void addData(SailRepository sailRepository) { } } + @Test + public void testFilterDontMergeAcrossSubqueryOptimizedPlanRetrieval() throws Exception { + String sparql = "SELECT * WHERE {?s ?p ?o . {?o ?p2 ?o2. FILTER(?o > ?o2) FILTER(?o2 != ?o)} {?o ?p3 ?o3. FILTER(?o > ?o3) FILTER(?o != ?o3 || ?o = ?o3)} FILTER(?s > ?o)}"; + SailRepository sailRepository = new SailRepository(new MemoryStore()); + addData(sailRepository); + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + Query query = connection.prepareTupleQuery(sparql); + String actual = query.explain(Explanation.Level.Optimized).toString(); + assertThat(actual).isEqualToNormalizingNewlines( + "Join (JoinIterator)\n" + + "╠══ Filter [left]\n" + + "║ ├── Compare (>)\n" + + "║ │ Var (name=s)\n" + + "║ │ Var (name=o)\n" + + "║ └── StatementPattern (costEstimate=5.67, resultSizeEstimate=12)\n" + + "║ s: Var (name=s)\n" + + "║ p: Var (name=p)\n" + + "║ o: Var (name=o)\n" + + "╚══ Join (HashJoinIteration) [right]\n" + + " ├── Filter (new scope) [left]\n" + + " │ ╠══ And\n" + + " │ ║ ├── Compare (>)\n" + + " │ ║ │ Var (name=o)\n" + + " │ ║ │ Var (name=o2)\n" + + " │ ║ └── Compare (!=)\n" + + " │ ║ Var (name=o2)\n" + + " │ ║ Var (name=o)\n" + + " │ ╚══ StatementPattern (resultSizeEstimate=12)\n" + + " │ s: Var (name=o)\n" + + " │ p: Var (name=p2)\n" + + " │ o: Var (name=o2)\n" + + " └── Filter (new scope) [right]\n" + + " ╠══ And\n" + + " ║ ├── Compare (>)\n" + + " ║ │ Var (name=o)\n" + + " ║ │ Var (name=o3)\n" + + " ║ └── Or\n" + + " ║ ╠══ Compare (!=)\n" + + " ║ ║ Var (name=o)\n" + + " ║ ║ Var (name=o3)\n" + + " ║ ╚══ Compare (=)\n" + + " ║ Var (name=o)\n" + + " ║ Var (name=o3)\n" + + " ╚══ StatementPattern (resultSizeEstimate=12)\n" + + " s: Var (name=o)\n" + + " p: Var (name=p3)\n" + + " o: Var (name=o3)\n"); + } + sailRepository.shutDown(); + } + + @Test + public void testSpecificFilterScopeScenario() throws Exception { + String sparql = "PREFIX ex: \n" + + "\n" + + "SELECT ?s ?p ?o ?o2 ?g WHERE {\n" + + " { # scope‑0\n" + + " { # scope‑1 (UNION A)\n" + + " ?s ex:p ?o . # pattern A1\n" + + " ?s ex:q ?o2 .\n" + + " FILTER (?o > 1 && ?o2 < 5 && BOUND(?s) && !BOUND(?g))\n" + + " }\n" + + " UNION { # scope‑1 (UNION B)\n" + + " GRAPH ?g { ?s ex:r ?o } # GRAPH introduces scope‑2\n" + + " FILTER (?o != 42 && ?g != ex:Bad)\n" + + " }\n" + + " }\n" + + " OPTIONAL { # scope‑0 → scope‑3\n" + + " BIND (EXISTS { ?s ex:p2 ?x } AS ?hasX)\n" + + " FILTER (?hasX && STRLEN(STR(?x)) > 3)\n" + + " }\n" + + " FILTER (?o2 IN (1,2,3,4,5)) # top‑level filter\n" + + "}"; + SailRepository sailRepository = new SailRepository(new MemoryStore()); + addData(sailRepository); + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + Query query = connection.prepareTupleQuery(sparql); + String actual = query.explain(Explanation.Level.Optimized).toString(); + assertThat(actual).isEqualToNormalizingNewlines("Projection\n" + + "╠══ ProjectionElemList\n" + + "║ ProjectionElem \"s\"\n" + + "║ ProjectionElem \"p\"\n" + + "║ ProjectionElem \"o\"\n" + + "║ ProjectionElem \"o2\"\n" + + "║ ProjectionElem \"g\"\n" + + "╚══ LeftJoin (LeftJoinIterator)\n" + + " Compare (>)\n" + + " ╠══ FunctionCall (http://www.w3.org/2005/xpath-functions#string-length)\n" + + " ║ Str\n" + + " ║ Var (name=x)\n" + + " ╚══ ValueConstant (value=\"3\"^^)\n" + + " Union\n" + + " ╠══ Filter\n" + + " ║ ├── Not\n" + + " ║ │ Bound\n" + + " ║ │ Var (name=g)\n" + + " ║ └── Join (JoinIterator)\n" + + " ║ ╠══ Filter (new scope) [left]\n" + + " ║ ║ ├── And\n" + + " ║ ║ │ ╠══ Bound\n" + + " ║ ║ │ ║ Var (name=s)\n" + + " ║ ║ │ ╚══ Compare (>)\n" + + " ║ ║ │ Var (name=o)\n" + + " ║ ║ │ ValueConstant (value=\"1\"^^)\n" + + + " ║ ║ └── StatementPattern (costEstimate=2.50, resultSizeEstimate=0)\n" + + " ║ ║ s: Var (name=s)\n" + + " ║ ║ p: Var (name=_const_c03ab50c_uri, value=http://example.com/p, anonymous)\n" + + " ║ ║ o: Var (name=o)\n" + + " ║ ╚══ Filter [right]\n" + + " ║ ├── And\n" + + " ║ │ ╠══ ListMemberOperator\n" + + " ║ │ ║ Var (name=o2)\n" + + " ║ │ ║ ValueConstant (value=\"1\"^^)\n" + + + " ║ │ ║ ValueConstant (value=\"2\"^^)\n" + + + " ║ │ ║ ValueConstant (value=\"3\"^^)\n" + + + " ║ │ ║ ValueConstant (value=\"4\"^^)\n" + + + " ║ │ ║ ValueConstant (value=\"5\"^^)\n" + + + " ║ │ ╚══ Compare (<)\n" + + " ║ │ Var (name=o2)\n" + + " ║ │ ValueConstant (value=\"5\"^^)\n" + + + " ║ └── StatementPattern (costEstimate=2.24, resultSizeEstimate=0)\n" + + " ║ s: Var (name=s)\n" + + " ║ p: Var (name=_const_c03ab50d_uri, value=http://example.com/q, anonymous)\n" + + " ║ o: Var (name=o2)\n" + + " ╚══ Filter\n" + + " ├── And\n" + + " │ ╠══ And\n" + + " │ ║ ├── Compare (!=)\n" + + " │ ║ │ Var (name=g)\n" + + " │ ║ │ ValueConstant (value=http://example.com/Bad)\n" + + " │ ║ └── Compare (!=)\n" + + " │ ║ Var (name=o)\n" + + " │ ║ ValueConstant (value=\"42\"^^)\n" + + " │ ╚══ ListMemberOperator\n" + + " │ Var (name=o2)\n" + + " │ ValueConstant (value=\"1\"^^)\n" + + " │ ValueConstant (value=\"2\"^^)\n" + + " │ ValueConstant (value=\"3\"^^)\n" + + " │ ValueConstant (value=\"4\"^^)\n" + + " │ ValueConstant (value=\"5\"^^)\n" + + " └── StatementPattern FROM NAMED CONTEXT (resultSizeEstimate=0)\n" + + " s: Var (name=s)\n" + + " p: Var (name=_const_c03ab50e_uri, value=http://example.com/r, anonymous)\n" + + " o: Var (name=o)\n" + + " c: Var (name=g)\n" + + " Filter\n" + + " ╠══ Var (name=hasX)\n" + + " ╚══ Extension\n" + + " ├── SingletonSet\n" + + " └── ExtensionElem (hasX)\n" + + " Exists\n" + + " StatementPattern (resultSizeEstimate=0)\n" + + " s: Var (name=s)\n" + + " p: Var (name=_const_471beca6_uri, value=http://example.com/p2, anonymous)\n" + + " o: Var (name=x)\n"); + } + sailRepository.shutDown(); + } + + @Test + public void multipleScopesAndFilters() throws Exception { + String sparql = "PREFIX : \n" + + "\n" + + "SELECT ?s ?o ?score ?lvl WHERE {\n" + + "\n" + + " VALUES ?s { :A :B :C }\n" + + "\n" + + " ?s :prop ?o .\n" + + "\n" + + " FILTER (BOUND(?s)) # F‑0‑1\n" + + " FILTER (?o != :Forbidden) # F‑0‑2\n" + + " {FILTER (LCASE(STR(?o)) != \"bad\") # F‑0‑3\n" + + " FILTER (NOT EXISTS { ?s :deprecated true }) } # F‑0‑4\n" + + "\n" + + "\n" + + + " {\n" + + " ?s :score ?score .\n" + + "\n" + + " FILTER (?score > 10) # F‑1‑1\n" + + " {FILTER (?score < 100)} # F‑1‑2\n" + + " FILTER ((?score - 2) != 0) # F‑1‑3\n" + + "\n" + + "\n" + + + " OPTIONAL {\n" + + " ?s :reviewedBy ?reviewer .\n" + + " ?reviewer :level ?lvl .\n" + + "\n" + + " FILTER (?lvl >= 3) # F‑2‑1\n" + + " FILTER (?lvl <= 8) # F‑2‑2\n" + + " FILTER (REGEX(STR(?reviewer),\n" + + " \"^http://example\\\\.com/user\")) # F‑2‑3\n" + + " }\n" + + " }\n" + + "}"; + + SailRepository sailRepository = new SailRepository(new MemoryStore()); + addData(sailRepository); + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + Query query = connection.prepareTupleQuery(sparql); + String actual = query.explain(Explanation.Level.Optimized).toString(); + assertThat(actual).isEqualToNormalizingNewlines("Projection\n" + + "╠══ ProjectionElemList\n" + + "║ ProjectionElem \"s\"\n" + + "║ ProjectionElem \"o\"\n" + + "║ ProjectionElem \"score\"\n" + + "║ ProjectionElem \"lvl\"\n" + + "╚══ Join (JoinIterator)\n" + + " ├── Filter [left]\n" + + " │ ╠══ Bound\n" + + " │ ║ Var (name=s)\n" + + " │ ╚══ BindingSetAssignment ([[s=http://example.com/A], [s=http://example.com/B], [s=http://example.com/C]]) (costEstimate=0, resultSizeEstimate=1.00)\n" + + + " └── Join (JoinIterator) [right]\n" + + " ╠══ Filter (new scope) [left]\n" + + " ║ ├── And\n" + + " ║ │ ╠══ Compare (!=)\n" + + " ║ │ ║ ├── FunctionCall (http://www.w3.org/2005/xpath-functions#lower-case)\n" + + " ║ │ ║ │ Str\n" + + " ║ │ ║ │ Var (name=o)\n" + + " ║ │ ║ └── ValueConstant (value=\"bad\")\n" + + " ║ │ ╚══ Not\n" + + " ║ │ Exists\n" + + " ║ │ StatementPattern (resultSizeEstimate=0)\n" + + " ║ │ s: Var (name=s)\n" + + " ║ │ p: Var (name=_const_52097_uri, value=http://example.com/deprecated, anonymous)\n" + + + " ║ │ o: Var (name=_const_36758e_lit_eeeee601, value=\"true\"^^, anonymous)\n" + + + " ║ └── SingletonSet\n" + + " ╚══ Join (JoinIterator) [right]\n" + + " ├── Filter [left]\n" + + " │ ╠══ Compare (!=)\n" + + " │ ║ Var (name=o)\n" + + " │ ║ ValueConstant (value=http://example.com/Forbidden)\n" + + " │ ╚══ StatementPattern (costEstimate=2.24, resultSizeEstimate=0)\n" + + " │ s: Var (name=s)\n" + + " │ p: Var (name=_const_efd45947_uri, value=http://example.com/prop, anonymous)\n" + + " │ o: Var (name=o)\n" + + " └── LeftJoin [right]\n" + + " ╠══ Join (HashJoinIteration) [left]\n" + + " ║ ├── Filter (new scope) [left]\n" + + " ║ │ ╠══ And\n" + + " ║ │ ║ ├── Compare (>)\n" + + " ║ │ ║ │ Var (name=score)\n" + + " ║ │ ║ │ ValueConstant (value=\"10\"^^)\n" + + + " ║ │ ║ └── Compare (!=)\n" + + " ║ │ ║ ╠══ MathExpr (-)\n" + + " ║ │ ║ ║ Var (name=score)\n" + + " ║ │ ║ ║ ValueConstant (value=\"2\"^^)\n" + + + " ║ │ ║ ╚══ ValueConstant (value=\"0\"^^)\n" + + + " ║ │ ╚══ StatementPattern (costEstimate=2.24, resultSizeEstimate=0)\n" + + " ║ │ s: Var (name=s)\n" + + " ║ │ p: Var (name=_const_ada452e_uri, value=http://example.com/score, anonymous)\n" + + + " ║ │ o: Var (name=score)\n" + + " ║ └── Filter (new scope) (costEstimate=6.00, resultSizeEstimate=1.00) [right]\n" + + " ║ ╠══ Compare (<)\n" + + " ║ ║ Var (name=score)\n" + + " ║ ║ ValueConstant (value=\"100\"^^)\n" + + + " ║ ╚══ SingletonSet\n" + + " ╚══ Join (JoinIterator) [right]\n" + + " ├── Filter [left]\n" + + " │ ╠══ Regex\n" + + " │ ║ ├── Str\n" + + " │ ║ │ Var (name=reviewer)\n" + + " │ ║ └── ValueConstant (value=\"^http://example\\.com/user\")\n" + + " │ ╚══ StatementPattern (costEstimate=1.12, resultSizeEstimate=0)\n" + + " │ s: Var (name=s)\n" + + " │ p: Var (name=_const_f053af92_uri, value=http://example.com/reviewedBy, anonymous)\n" + + + " │ o: Var (name=reviewer)\n" + + " └── Filter [right]\n" + + " ╠══ And\n" + + " ║ ├── Compare (>=)\n" + + " ║ │ Var (name=lvl)\n" + + " ║ │ ValueConstant (value=\"3\"^^)\n" + + + " ║ └── Compare (<=)\n" + + " ║ Var (name=lvl)\n" + + " ║ ValueConstant (value=\"8\"^^)\n" + + + " ╚══ StatementPattern (costEstimate=2.24, resultSizeEstimate=0)\n" + + " s: Var (name=reviewer)\n" + + " p: Var (name=_const_a78a220_uri, value=http://example.com/level, anonymous)\n" + + + " o: Var (name=lvl)\n"); + } + sailRepository.shutDown(); + } + + @Test + public void multipleScopesAndFilters2() throws Exception { + String sparql = "PREFIX : \n" + + "\n" + + "SELECT ?s ?o WHERE {\n" + + " VALUES ?s { :S } # scope‑0 (outermost)\n" + + "\n" + + " { # scope‑1 (new group graph pattern)\n" + + " { " + + " FILTER (?o != ?s) # ▸ Filter‑B (scope‑1)\n" + + + " FILTER (NOT EXISTS {?o ?s ?c}) # ▸ Filter‑B (scope‑1)\n" + + " BIND(?s as ?o)\n" + +// " FILTER (?o = :O) # ▸ Filter‑A (scope‑2)\n" + + + " }\n" + + "\n" + + + " }\n" + + "}"; + SailRepository sailRepository = new SailRepository(new MemoryStore()); + addData(sailRepository); + + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + Query query = connection.prepareTupleQuery(sparql); + String actual = query.explain(Explanation.Level.Unoptimized).toString(); + assertThat(actual).isEqualToNormalizingNewlines("Projection\n" + + "╠══ ProjectionElemList\n" + + "║ ProjectionElem \"s\"\n" + + "║ ProjectionElem \"o\"\n" + + "╚══ Join\n" + + " ├── BindingSetAssignment ([[s=http://example/S]]) [left]\n" + + " └── Filter (new scope) [right]\n" + + " ╠══ Not\n" + + " ║ Exists\n" + + " ║ StatementPattern\n" + + " ║ s: Var (name=o)\n" + + " ║ p: Var (name=s)\n" + + " ║ o: Var (name=c)\n" + + " ╚══ Filter\n" + + " ├── Compare (!=)\n" + + " │ Var (name=o)\n" + + " │ Var (name=s)\n" + + " └── Extension\n" + + " ╠══ SingletonSet\n" + + " ╚══ ExtensionElem (o)\n" + + " Var (name=s)\n"); + } + + try (SailRepositoryConnection connection = sailRepository.getConnection()) { + Query query = connection.prepareTupleQuery(sparql); + String actual = query.explain(Explanation.Level.Optimized).toString(); + assertThat(actual).isEqualToNormalizingNewlines("Projection\n" + + "╠══ ProjectionElemList\n" + + "║ ProjectionElem \"s\"\n" + + "║ ProjectionElem \"o\"\n" + + "╚══ Join (JoinIterator)\n" + + " ├── Filter (new scope) [left]\n" + + " │ ╠══ And\n" + + " │ ║ ├── Compare (!=)\n" + + " │ ║ │ Var (name=o)\n" + + " │ ║ │ Var (name=s)\n" + + " │ ║ └── Not\n" + + " │ ║ Exists\n" + + " │ ║ StatementPattern (resultSizeEstimate=12)\n" + + " │ ║ s: Var (name=o)\n" + + " │ ║ p: Var (name=s)\n" + + " │ ║ o: Var (name=c)\n" + + " │ ╚══ Extension\n" + + " │ ├── SingletonSet\n" + + " │ └── ExtensionElem (o)\n" + + " │ Var (name=s)\n" + + " └── BindingSetAssignment ([[s=http://example/S]]) (costEstimate=6.00, resultSizeEstimate=1.00) [right]\n"); + } + sailRepository.shutDown(); + } + @Test public void testTupleQuery() { SailRepository sailRepository = new SailRepository(new MemoryStore()); @@ -608,7 +989,6 @@ public void bigDataset() throws IOException { try (SailRepositoryConnection connection = repository.getConnection()) { String s = connection.prepareTupleQuery(query1).explain(Explanation.Level.Timed).toString(); - System.out.println(s); } repository.shutDown(); diff --git a/docker/Dockerfile-jetty b/docker/Dockerfile-jetty index b2f3e93d915..b4fe328d666 100644 --- a/docker/Dockerfile-jetty +++ b/docker/Dockerfile-jetty @@ -11,12 +11,12 @@ WORKDIR /tmp RUN unzip -q /tmp/rdf4j.zip # Final workbench -FROM jetty:9-jre17-eclipse-temurin +FROM jetty:9-jre17-eclipse-temurin LABEL org.opencontainers.image.authors="Bart Hanssens (bart.hanssens@bosa.fgov.be)" USER root -ENV JAVA_OPTIONS="-Dorg.eclipse.rdf4j.appdata.basedir=/var/rdf4j -Dorg.eclipse.rdf4j.rio.jsonld_secure_mode=false" +ENV JAVA_OPTIONS="-Xmx2g -Dorg.eclipse.rdf4j.appdata.basedir=/var/rdf4j -Dorg.eclipse.rdf4j.rio.jsonld_secure_mode=false" ENV JETTY_MODULES="server,bytebufferpool,threadpool,security,servlet,webapp,ext,plus,deploy,annotations,http,jsp,jstl" COPY --from=temp /tmp/eclipse-rdf4j*/war/*.war /var/lib/jetty/webapps/ diff --git a/scripts/milestone-release.sh b/scripts/milestone-release.sh index 28830823be2..e7a9531eeff 100755 --- a/scripts/milestone-release.sh +++ b/scripts/milestone-release.sh @@ -218,7 +218,7 @@ git checkout main RELEASE_NOTES_BRANCH="${MVN_VERSION_RELEASE}-release-notes" git checkout -b "${RELEASE_NOTES_BRANCH}" -tar -cvzf "site/static/javadoc/${MVN_VERSION_RELEASE}.tgz" -C target/site/apidocs . +tar --no-xattrs --exclude ".*" -cvzf "site/static/javadoc/${MVN_VERSION_RELEASE}.tgz" -C target/site/apidocs . git add --all git commit -s -a -m "javadocs for ${MVN_VERSION_RELEASE}" git push --set-upstream origin "${RELEASE_NOTES_BRANCH}" diff --git a/scripts/release.sh b/scripts/release.sh index 2db8679eb21..1ccbac680d5 100755 --- a/scripts/release.sh +++ b/scripts/release.sh @@ -259,7 +259,7 @@ mvn package -Passembly -DskipTests -Djapicmp.skip git checkout main git checkout -b "${RELEASE_NOTES_BRANCH}" -tar -cvzf "site/static/javadoc/${MVN_VERSION_RELEASE}.tgz" -C target/site/apidocs . +tar --no-xattrs --exclude ".*" -cvzf "site/static/javadoc/${MVN_VERSION_RELEASE}.tgz" -C target/site/apidocs . cp -f "site/static/javadoc/${MVN_VERSION_RELEASE}.tgz" "site/static/javadoc/latest.tgz" git add --all git commit -s -a -m "javadocs for ${MVN_VERSION_RELEASE}" diff --git a/tools/server-spring/src/main/java/org/eclipse/rdf4j/http/server/repository/statements/ExportStatementsView.java b/tools/server-spring/src/main/java/org/eclipse/rdf4j/http/server/repository/statements/ExportStatementsView.java index ac009798bb8..9e14ba676ec 100644 --- a/tools/server-spring/src/main/java/org/eclipse/rdf4j/http/server/repository/statements/ExportStatementsView.java +++ b/tools/server-spring/src/main/java/org/eclipse/rdf4j/http/server/repository/statements/ExportStatementsView.java @@ -12,10 +12,11 @@ import static javax.servlet.http.HttpServletResponse.SC_OK; -import java.io.ByteArrayOutputStream; +import java.io.IOException; import java.io.OutputStream; import java.nio.charset.Charset; import java.util.Map; +import java.util.Objects; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; @@ -24,42 +25,62 @@ import org.eclipse.rdf4j.http.server.repository.RepositoryInterceptor; import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Resource; +import org.eclipse.rdf4j.model.Statement; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.repository.RepositoryConnection; import org.eclipse.rdf4j.repository.RepositoryException; import org.eclipse.rdf4j.rio.RDFFormat; +import org.eclipse.rdf4j.rio.RDFHandler; import org.eclipse.rdf4j.rio.RDFHandlerException; import org.eclipse.rdf4j.rio.RDFWriter; import org.eclipse.rdf4j.rio.RDFWriterFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.web.servlet.View; /** - * View used to export statements. Renders the statements as RDF using a serialization specified using a parameter or - * Accept header. + * Streams statements as RDF in the format requested by the client. * * @author Herko ter Horst */ public class ExportStatementsView implements View { - public static final String SUBJECT_KEY = "subject"; + private static final Logger logger = LoggerFactory.getLogger(ExportStatementsView.class); + public static final String SUBJECT_KEY = "subject"; public static final String PREDICATE_KEY = "predicate"; - public static final String OBJECT_KEY = "object"; - public static final String CONTEXTS_KEY = "contexts"; - public static final String USE_INFERENCING_KEY = "useInferencing"; - public static final String CONNECTION_KEY = "connection"; - public static final String TRANSACTION_ID_KEY = "transactionID"; - public static final String FACTORY_KEY = "factory"; - public static final String HEADERS_ONLY = "headersOnly"; private static final ExportStatementsView INSTANCE = new ExportStatementsView(); + public static int MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS; + + static { + int max = 1024; // default value + String maxStatements = System.getProperty( + "org.eclipse.rdf4j.http.server.repository.statements.ExportStatementsView.MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS"); + if (maxStatements != null) { + try { + int userMax = Integer.parseInt(maxStatements); + if (userMax >= -1) { + max = userMax; + } else { + logger.warn( + "Invalid value for MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS: {}, must be >= -1, using default value of {}.", + maxStatements, max); + } + } catch (NumberFormatException e) { + logger.warn("Invalid value for MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS: " + + maxStatements, e); + } + } + MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS = max; + } public static ExportStatementsView getInstance() { return INSTANCE; @@ -70,26 +91,87 @@ private ExportStatementsView() { @Override public String getContentType() { + // Spring ignores this for View implementations; we set it in render(). return null; } - @SuppressWarnings("rawtypes") @Override public void render(Map model, HttpServletRequest request, HttpServletResponse response) throws Exception { - Resource subj = (Resource) model.get(SUBJECT_KEY); + + response.setBufferSize(1024 * 1024); // 1MB + + Resource subj = (Resource) Objects.requireNonNull(model, "model should not be null").get(SUBJECT_KEY); IRI pred = (IRI) model.get(PREDICATE_KEY); Value obj = (Value) model.get(OBJECT_KEY); Resource[] contexts = (Resource[]) model.get(CONTEXTS_KEY); - boolean useInferencing = (Boolean) model.get(USE_INFERENCING_KEY); + boolean useInferencing = Boolean.TRUE.equals(model.get(USE_INFERENCING_KEY)); + boolean headersOnly = Boolean.TRUE.equals(model.get(HEADERS_ONLY)); + + RDFWriterFactory factory = (RDFWriterFactory) model.get(FACTORY_KEY); + RDFFormat rdfFormat = factory.getRDFFormat(); + + attemptToDetectExceptions(request, factory, headersOnly, subj, pred, obj, useInferencing, contexts); + + response.setStatus(SC_OK); + + String mimeType = rdfFormat.getDefaultMIMEType(); + if (rdfFormat.hasCharset()) { + Charset charset = rdfFormat.getCharset(); + mimeType += "; charset=" + charset.name(); + } + response.setContentType(mimeType); + + String filename = "statements"; + if (rdfFormat.getDefaultFileExtension() != null) { + filename += "." + rdfFormat.getDefaultFileExtension(); + } + response.setHeader("Content-Disposition", "attachment; filename=" + filename); + + if (headersOnly) { + response.setContentLength(0); + response.flushBuffer(); + return; + } + + try (OutputStream out = response.getOutputStream()) { + RDFWriter writer = factory.getWriter(out); + try (RepositoryConnection conn = RepositoryInterceptor.getRepositoryConnection(request)) { + conn.exportStatements(subj, pred, obj, useInferencing, writer, contexts); + out.flush(); + response.flushBuffer(); + } catch (RDFHandlerException e) { + var serverHTTPException = new ServerHTTPException("Serialization error: " + e.getMessage(), e); + if (!response.isCommitted()) { + response.reset(); + } + throw serverHTTPException; + } catch (RepositoryException e) { + var serverHTTPException = new ServerHTTPException("Repository error: " + e.getMessage(), e); + if (!response.isCommitted()) { + response.reset(); + } + throw serverHTTPException; + } catch (Throwable e) { + if (!response.isCommitted()) { + response.reset(); + } + throw e; + } - boolean headersOnly = (Boolean) model.get(HEADERS_ONLY); + } - RDFWriterFactory rdfWriterFactory = (RDFWriterFactory) model.get(FACTORY_KEY); + } - RDFFormat rdfFormat = rdfWriterFactory.getRDFFormat(); + private static void attemptToDetectExceptions(HttpServletRequest request, RDFWriterFactory rdfWriterFactory, + boolean headersOnly, Resource subj, IRI pred, Value obj, boolean useInferencing, Resource[] contexts) + throws IOException, ServerHTTPException { + if (MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS == 0) { + return; + } - try (ByteArrayOutputStream baos = new ByteArrayOutputStream()) { - RDFWriter rdfWriter = rdfWriterFactory.getWriter(baos); + try (OutputStream out = OutputStream.nullOutputStream()) { + RDFHandler rdfWriter = new LimitedSizeRDFHandler(rdfWriterFactory.getWriter(out), + MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS); if (!headersOnly) { try (RepositoryConnection conn = RepositoryInterceptor.getRepositoryConnection(request)) { conn.exportStatements(subj, pred, obj, useInferencing, rdfWriter, contexts); @@ -97,26 +179,68 @@ public void render(Map model, HttpServletRequest request, HttpServletResponse re throw new ServerHTTPException("Serialization error: " + e.getMessage(), e); } catch (RepositoryException e) { throw new ServerHTTPException("Repository error: " + e.getMessage(), e); + } catch (LimitedSizeReachedException ignored) { } } - try (OutputStream out = response.getOutputStream()) { - response.setStatus(SC_OK); + } + } - String mimeType = rdfFormat.getDefaultMIMEType(); - if (rdfFormat.hasCharset()) { - Charset charset = rdfFormat.getCharset(); - mimeType += "; charset=" + charset.name(); - } - response.setContentType(mimeType); + private static class LimitedSizeRDFHandler implements RDFHandler { - String filename = "statements"; - if (rdfFormat.getDefaultFileExtension() != null) { - filename += "." + rdfFormat.getDefaultFileExtension(); - } - response.setHeader("Content-Disposition", "attachment; filename=" + filename); - out.write(baos.toByteArray()); + private final RDFHandler delegate; + private final long maxSize; + private long currentSize = 0; + + public LimitedSizeRDFHandler(RDFHandler delegate, long maxSize) { + this.delegate = delegate; + this.maxSize = maxSize; + } + + @Override + public void startRDF() throws RDFHandlerException { + delegate.startRDF(); + } + + @Override + public void endRDF() throws RDFHandlerException { + delegate.endRDF(); + } + + @Override + public void handleNamespace(String prefix, String uri) throws RDFHandlerException { + delegate.handleNamespace(prefix, uri); + incrementCurrentSize(); + } + + @Override + public void handleStatement(Statement st) throws RDFHandlerException { + delegate.handleStatement(st); + incrementCurrentSize(); + } + + @Override + public void handleComment(String comment) throws RDFHandlerException { + delegate.handleComment(comment); + incrementCurrentSize(); + } + + private void incrementCurrentSize() { + currentSize++; + if (maxSize >= 0 && currentSize > maxSize) { + endRDF(); + logger.trace( + "Limited size reached, throwing LimitedSizeReachedException to signal that we are done testing the export of statements for exceptions."); + throw new LimitedSizeReachedException(); } } } + private static class LimitedSizeReachedException extends RuntimeException { + @Override + public Throwable fillInStackTrace() { + // Do not fill in the stack trace to avoid performance overhead + return this; + } + } + } diff --git a/tools/server/src/test/java/org/eclipse/rdf4j/http/server/ProtocolIT.java b/tools/server/src/test/java/org/eclipse/rdf4j/http/server/ProtocolIT.java index fd1ac4c346d..68ef879f8b7 100644 --- a/tools/server/src/test/java/org/eclipse/rdf4j/http/server/ProtocolIT.java +++ b/tools/server/src/test/java/org/eclipse/rdf4j/http/server/ProtocolIT.java @@ -29,6 +29,7 @@ import java.util.List; import java.util.Objects; import java.util.Random; +import java.util.Scanner; import java.util.Set; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; @@ -46,6 +47,8 @@ import org.apache.http.message.BasicNameValuePair; import org.eclipse.rdf4j.common.io.IOUtil; import org.eclipse.rdf4j.http.protocol.Protocol; +import org.eclipse.rdf4j.http.server.repository.statements.ExportStatementsView; +import org.eclipse.rdf4j.model.IRI; import org.eclipse.rdf4j.model.Namespace; import org.eclipse.rdf4j.model.ValueFactory; import org.eclipse.rdf4j.model.impl.SimpleNamespace; @@ -57,6 +60,7 @@ import org.eclipse.rdf4j.rio.RDFFormat; import org.eclipse.rdf4j.rio.Rio; import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -86,6 +90,14 @@ public static void stopServer() throws Exception { server.stop(); } + @AfterEach + public void clearRepository() throws Exception { + // Clear the repository after each test + delete(Protocol.getStatementsLocation(TestServer.REPOSITORY_URL)); + ExportStatementsView.MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS = 1024; + + } + /** * Tests the server's methods for updating all data in a repository. */ @@ -172,7 +184,7 @@ public void testQueryDirect_POST() throws Exception { System.out.println("Query Direct POST Status: " + response.getStatusLine()); int statusCode = response.getStatusLine().getStatusCode(); - assertEquals(true, statusCode >= 200 && statusCode < 400); + assertTrue(statusCode >= 200 && statusCode < 400); } /** @@ -192,7 +204,7 @@ public void testUpdateDirect_POST() throws Exception { System.out.println("Update Direct Post Status: " + response.getStatusLine()); int statusCode = response.getStatusLine().getStatusCode(); - assertEquals(true, statusCode >= 200 && statusCode < 400); + assertTrue(statusCode >= 200 && statusCode < 400); } /** @@ -215,7 +227,7 @@ public void testUpdateForm_POST() throws Exception { System.out.println("Update Form Post Status: " + response.getStatusLine()); int statusCode = response.getStatusLine().getStatusCode(); - assertEquals(true, statusCode >= 200 && statusCode < 400); + assertTrue(statusCode >= 200 && statusCode < 400); } /** @@ -370,6 +382,137 @@ public void testUpdateResponse_HEAD() throws Exception { } } + @Test + public void testUploadAndRetrieveStatements_GET() throws Exception { + + String statementsLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL); + + // PUT the Turtle file into the repository + final String baseLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL); + final String file = "/testcases/default-graph-2.ttl"; + + // 1. PUT the same file multiple times so that we would trigger an OOM error when retrieving it if it were not + // directly written to the http output stream + for (int i = 1; i <= 20000; i++) { + IRI context = vf.createIRI("http://example.org/graph" + i); + putFile(baseLocation, file, context); + } + + // GET all statements back from the same endpoint + URL url = new URL(statementsLocation); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setRequestProperty("Accept", RDFFormat.NQUADS.getDefaultMIMEType()); // ask for easy-to-parse format + conn.connect(); + + try { + assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode(), + "GET /statements should respond 200 OK"); + + // Parse the response stream and count triples + try (InputStream in = conn.getInputStream()) { + Scanner scanner = new Scanner(in); + int count = 0; + while (scanner.hasNext()) { + scanner.nextLine(); + count++; + } + assertEquals(1860000, count, "Expected 1860000 triples, but got " + count + " instead."); + } + } finally { + conn.disconnect(); + } + + } + + @Test + public void testUploadAndRetrieveStatementsNoValidation_GET() throws Exception { + + ExportStatementsView.MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS = 0; + + String statementsLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL); + + // PUT the Turtle file into the repository + final String baseLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL); + final String file = "/testcases/default-graph-2.ttl"; + + // 1. PUT the same file multiple times so that we would trigger an OOM error when retrieving it if it were not + // directly written to the http output stream + for (int i = 1; i <= 20000; i++) { + IRI context = vf.createIRI("http://example.org/graph" + i); + putFile(baseLocation, file, context); + } + + // GET all statements back from the same endpoint + URL url = new URL(statementsLocation); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setRequestProperty("Accept", RDFFormat.NQUADS.getDefaultMIMEType()); // ask for easy-to-parse format + conn.connect(); + + try { + assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode(), + "GET /statements should respond 200 OK"); + + // Parse the response stream and count triples + try (InputStream in = conn.getInputStream()) { + Scanner scanner = new Scanner(in); + int count = 0; + while (scanner.hasNext()) { + scanner.nextLine(); + count++; + } + assertEquals(1860000, count, "Expected 1860000 triples, but got " + count + " instead."); + } + } finally { + conn.disconnect(); + } + + } + + @Test + public void testUploadAndRetrieveStatementsWithFullValidation_GET() throws Exception { + + ExportStatementsView.MAX_NUMBER_OF_STATEMENTS_WHEN_TESTING_FOR_POSSIBLE_EXCEPTIONS = -1; + + String statementsLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL); + + String baseLocation = Protocol.getStatementsLocation(TestServer.REPOSITORY_URL); + String file = "/testcases/default-graph-2.ttl"; + + // PUT the Turtle file into the repository + for (int i = 1; i <= 2000; i++) { + IRI context = vf.createIRI("http://example.org/graph" + i); + putFile(baseLocation, file, context); + } + + // GET all statements back from the same endpoint + URL url = new URL(statementsLocation); + HttpURLConnection conn = (HttpURLConnection) url.openConnection(); + conn.setRequestMethod("GET"); + conn.setRequestProperty("Accept", RDFFormat.NQUADS.getDefaultMIMEType()); // ask for easy-to-parse format + conn.connect(); + + try { + assertEquals(HttpURLConnection.HTTP_OK, conn.getResponseCode(), + "GET /statements should respond 200 OK"); + + // Parse the response stream and count triples + try (InputStream in = conn.getInputStream()) { + Scanner scanner = new Scanner(in); + int count = 0; + while (scanner.hasNext()) { + scanner.nextLine(); + count++; + } + assertEquals(186000, count, "Expected 186000 triples, but got " + count + " instead."); + } + } finally { + conn.disconnect(); + } + + } + /** * Test for SES-1861 * @@ -560,9 +703,16 @@ private void deleteNamespace(String location) throws Exception { } } - private void putFile(String location, String file) throws Exception { - System.out.println("Put file to " + location); + /** + * PUT a file into a specific named graph (context) by adding the ?context= query parameter. + */ + private void putFile(String repositoryBaseLocation, String file, IRI context) throws Exception { + String location = repositoryBaseLocation + + "?" + Protocol.CONTEXT_PARAM_NAME + "=" + Protocol.encodeValue(context); + putFile(location, file); // delegate to the existing helper + } + private void putFile(String location, String file) throws Exception { URL url = new URL(location); HttpURLConnection conn = (HttpURLConnection) url.openConnection(); conn.setRequestMethod("PUT"); diff --git a/tools/server/src/test/resources/testcases/default-graph-2.ttl b/tools/server/src/test/resources/testcases/default-graph-2.ttl new file mode 100644 index 00000000000..9e7874ce409 --- /dev/null +++ b/tools/server/src/test/resources/testcases/default-graph-2.ttl @@ -0,0 +1,29 @@ +# Default graph +@prefix dc: . +@prefix xsd: . + + dc:publisher "Bob" . + dc:date "2004-12-06T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2005-01-10T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2004-12-06T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2005-01-10T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2004-12-06T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2005-01-10T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2004-12-06T00:00:00Z"^^xsd:dateTime . + + dc:publisher "Bob" . + dc:date "2005-01-10T00:00:00Z"^^xsd:dateTime . + + dc:publisher [],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[].