Skip to content

Commit 2a7075a

Browse files
committed
GH-5121: configurability of bind left joins
Bind left joins for OPTIONAL can be disabled using the "enableOptionalAsBindJoin" flag in the federation config Integrate the switch between implementations in the unit test as parameterized test
1 parent 558a595 commit 2a7075a

8 files changed

Lines changed: 129 additions & 21 deletions

File tree

site/content/documentation/programming/federation.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,8 @@ FedX provides various means for configuration. Configuration settings can be def
305305
|leftJoinWorkerThreads | The number of left join worker threads for parallelization, default _10_ |
306306
|boundJoinBlockSize | Block size for bound joins, default _25_ |
307307
|enforceMaxQueryTime | Max query time in seconds, 0 to disable, default _30_ |
308-
|enableServiceAsBoundJoin | Flag for evaluating a SERVICE expression (contacting non-federation members) using vectored evaluation, default _true_. For today's endpoints it is more efficient to disable vectored evaluation of SERVICE |
308+
|enableServiceAsBoundJoin | Flag for evaluating a SERVICE expression (contacting non-federation members) using vectored evaluation, default _true_. |
309+
|enableOptionalAsBindJoin | Flag for evaluating an OPTIONAL expression using bind join, default _true_. |
309310
|includeInferredDefault | whether include inferred statements should be considered, default _true_ |
310311
|consumingIterationMax | the max number of results to be consumed by `ConsumingIteration`, default _1000_ |
311312
|debugQueryPlan | Print the optimized query execution plan to stdout, default _false_ |

tools/federation/src/main/java/org/eclipse/rdf4j/federated/FedXConfig.java

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
import java.util.Optional;
1414

1515
import org.eclipse.rdf4j.collection.factory.api.CollectionFactory;
16-
import org.eclipse.rdf4j.collection.factory.impl.DefaultCollectionFactory;
1716
import org.eclipse.rdf4j.federated.cache.SourceSelectionCache;
1817
import org.eclipse.rdf4j.federated.cache.SourceSelectionCacheFactory;
1918
import org.eclipse.rdf4j.federated.cache.SourceSelectionMemoryCache;
@@ -48,6 +47,8 @@ public class FedXConfig {
4847

4948
private boolean enableServiceAsBoundJoin = true;
5049

50+
private boolean enableOptionalAsBindJoin = true;
51+
5152
private boolean enableMonitoring = false;
5253

5354
private boolean isLogQueryPlan = false;
@@ -68,7 +69,6 @@ public class FedXConfig {
6869

6970
private int consumingIterationMax = 1000;
7071

71-
private CollectionFactory cf = new DefaultCollectionFactory();
7272
/* factory like setters */
7373

7474
/**
@@ -244,6 +244,17 @@ public FedXConfig withEnableServiceAsBoundJoin(boolean flag) {
244244
return this;
245245
}
246246

247+
/**
248+
* Whether OPTIONAL clauses are evaluated using bind join (i.e. with the VALUES clause). Default <i>true</i>
249+
*
250+
* @param flag
251+
* @return the current config.
252+
*/
253+
public FedXConfig withEnableOptionalAsBindJoin(boolean flag) {
254+
this.enableOptionalAsBindJoin = flag;
255+
return this;
256+
}
257+
247258
/**
248259
* The cache specification for the {@link SourceSelectionMemoryCache}. If not set explicitly, the
249260
* {@link SourceSelectionMemoryCache#DEFAULT_CACHE_SPEC} is used.
@@ -326,16 +337,26 @@ public int getBoundJoinBlockSize() {
326337
* Returns a flag indicating whether vectored evaluation using the VALUES clause shall be applied for SERVICE
327338
* expressions.
328339
*
329-
* Default: false
340+
* Default: true
330341
*
331-
* Note: for todays endpoints it is more efficient to disable vectored evaluation of SERVICE.
332-
*
333-
* @return whether SERVICE expressions are evaluated using bound joins
342+
* @return whether SERVICE expressions are evaluated using bind joins
334343
*/
335344
public boolean getEnableServiceAsBoundJoin() {
336345
return enableServiceAsBoundJoin;
337346
}
338347

348+
/**
349+
* Returns a flag indicating whether bind join evaluation using the VALUES clause shall be applied for OPTIONAL
350+
* expressions.
351+
*
352+
* Default: true
353+
*
354+
* @return whether OPTIONAL expressions are evaluated using bind joins
355+
*/
356+
public boolean isEnableOptionalAsBindJoin() {
357+
return enableOptionalAsBindJoin;
358+
}
359+
339360
/**
340361
* Get the maximum query time in seconds used for query evaluation. Applied if {@link QueryManager} is used to
341362
* create queries.
@@ -485,9 +506,10 @@ public int getConsumingIterationMax() {
485506
*
486507
* @param cf
487508
* @return the current config
509+
* @deprecated unusedO
488510
*/
511+
@Deprecated(forRemoval = true)
489512
public FedXConfig withCollectionFactory(CollectionFactory cf) {
490-
this.cf = cf;
491513
return this;
492514
}
493515
}

tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/SparqlFederationEvalStrategy.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,10 +210,11 @@ protected CloseableIteration<BindingSet> executeLeftJoin(ControlledWorkerSchedul
210210
throws QueryEvaluationException {
211211

212212
var rightArg = leftJoin.getRightArg();
213+
var fedxConfig = queryInfo.getFederationContext().getConfig();
213214

214215
// determine if we can execute the expr as bind join
215216
boolean executeAsBindJoin = false;
216-
if (rightArg instanceof BoundJoinTupleExpr) {
217+
if (fedxConfig.isEnableOptionalAsBindJoin() && rightArg instanceof BoundJoinTupleExpr) {
217218
if (rightArg instanceof FedXService) {
218219
executeAsBindJoin = false;
219220
} else {

tools/federation/src/main/java/org/eclipse/rdf4j/federated/repository/FedXRepositoryConfig.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,11 @@ public class FedXRepositoryConfig extends AbstractRepositoryImplConfig {
135135
*/
136136
public static final IRI CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN = vf.createIRI(NAMESPACE, "enableServiceAsBoundJoin");
137137

138+
/**
139+
* IRI of the property populating {@link FedXConfig#isEnableOptionalAsBindJoin()}
140+
*/
141+
public static final IRI CONFIG_ENABLE_OPTIONAL_AS_BIND_JOIN = vf.createIRI(NAMESPACE, "enableOptionalAsBindJoin");
142+
138143
/**
139144
* IRI of the property populating {@link FedXConfig#isEnableMonitoring()}
140145
*/
@@ -331,6 +336,9 @@ private void parseFedXConfigInternal(Model m, Resource confNode) throws Reposito
331336
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN, null))
332337
.ifPresent(value -> config.withEnableServiceAsBoundJoin(value.booleanValue()));
333338

339+
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENABLE_OPTIONAL_AS_BIND_JOIN, null))
340+
.ifPresent(value -> config.withEnableOptionalAsBindJoin(value.booleanValue()));
341+
334342
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENABLE_MONITORING, null))
335343
.ifPresent(value -> config.withEnableMonitoring(value.booleanValue()));
336344

@@ -384,6 +392,9 @@ protected void exportFedXConfig(Model model, Resource implNode) {
384392
model.add(confNode, CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN,
385393
vf.createLiteral(config.getEnableServiceAsBoundJoin()));
386394

395+
model.add(confNode, CONFIG_ENABLE_OPTIONAL_AS_BIND_JOIN,
396+
vf.createLiteral(config.isEnableOptionalAsBindJoin()));
397+
387398
model.add(confNode, CONFIG_ENABLE_MONITORING, vf.createLiteral(config.isEnableMonitoring()));
388399

389400
model.add(confNode, CONFIG_LOG_QUERY_PLAN, vf.createLiteral(config.isLogQueryPlan()));

tools/federation/src/test/java/org/eclipse/rdf4j/federated/BindLeftJoinTests.java

Lines changed: 34 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,8 @@
2424
import org.eclipse.rdf4j.repository.Repository;
2525
import org.eclipse.rdf4j.repository.RepositoryConnection;
2626
import org.junit.jupiter.api.Assertions;
27-
import org.junit.jupiter.api.Test;
27+
import org.junit.jupiter.params.ParameterizedTest;
28+
import org.junit.jupiter.params.provider.ValueSource;
2829

2930
public class BindLeftJoinTests extends SPARQLBaseTest {
3031

@@ -36,8 +37,9 @@ protected void initFedXConfig() {
3637
});
3738
}
3839

39-
@Test
40-
public void test_leftBindJoin_basic() throws Exception {
40+
@ParameterizedTest
41+
@ValueSource(booleans = { true, false })
42+
public void test_leftBindJoin_basic(boolean bindLeftJoinOptimizationEnabled) throws Exception {
4143

4244
prepareTest(
4345
Arrays.asList("/tests/basic/data_emptyStore.ttl", "/tests/basic/data_emptyStore.ttl",
@@ -51,6 +53,7 @@ public void test_leftBindJoin_basic() throws Exception {
5153

5254
fedxRule.setConfig(config -> {
5355
config.withBoundJoinBlockSize(10);
56+
config.withEnableOptionalAsBindJoin(bindLeftJoinOptimizationEnabled);
5457
});
5558

5659
// add some persons
@@ -95,8 +98,6 @@ public void test_leftBindJoin_basic() throws Exception {
9598
try (TupleQueryResult tqr = tupleQuery.evaluate()) {
9699
var bindings = Iterations.asList(tqr);
97100

98-
MonitoringUtil.printMonitoringInformation(federationContext());
99-
100101
Assertions.assertEquals(30, bindings.size());
101102

102103
for (int i = 1; i <= 30; i++) {
@@ -122,14 +123,25 @@ public void test_leftBindJoin_basic() throws Exception {
122123

123124
}
124125

126+
if (bindLeftJoinOptimizationEnabled) {
127+
assertNumberOfRequests("endpoint1", 3);
128+
assertNumberOfRequests("endpoint2", 5);
129+
assertNumberOfRequests("endpoint3", 5);
130+
} else {
131+
assertNumberOfRequests("endpoint1", 3);
132+
assertNumberOfRequests("endpoint2", 32);
133+
assertNumberOfRequests("endpoint3", 32);
134+
}
135+
125136
} finally {
126137
fedxRepo.shutDown();
127138
}
128139

129140
}
130141

131-
@Test
132-
public void testBoundLeftJoin_stmt_nonExclusive_boundCheck()
142+
@ParameterizedTest
143+
@ValueSource(booleans = { true, false })
144+
public void testBoundLeftJoin_stmt_nonExclusive_boundCheck(boolean bindLeftJoinOptimizationEnabled)
133145
throws Exception {
134146

135147
prepareTest(
@@ -147,6 +159,7 @@ public void testBoundLeftJoin_stmt_nonExclusive_boundCheck()
147159

148160
fedxRule.setConfig(config -> {
149161
config.withBoundJoinBlockSize(10);
162+
config.withEnableOptionalAsBindJoin(bindLeftJoinOptimizationEnabled);
150163
});
151164

152165
// add some persons
@@ -180,6 +193,8 @@ public void testBoundLeftJoin_stmt_nonExclusive_boundCheck()
180193
conn.add(Values.iri("http://other.com/p30"), FOAF.GENDER, Values.literal("male"));
181194
}
182195

196+
fedxRule.enableDebug();
197+
183198
try {
184199
// run query which joins results from multiple repos
185200
// for a subset of persons there exist names
@@ -217,6 +232,18 @@ public void testBoundLeftJoin_stmt_nonExclusive_boundCheck()
217232

218233
}
219234

235+
if (bindLeftJoinOptimizationEnabled) {
236+
assertNumberOfRequests("endpoint1", 3);
237+
assertNumberOfRequests("endpoint2", 5);
238+
assertNumberOfRequests("endpoint3", 5);
239+
} else {
240+
assertNumberOfRequests("endpoint1", 3);
241+
// Note: with the current implementation we cannot
242+
// make exact assertions for endpoint 2 and 3
243+
// this is because due to the check statement
244+
// not all requests are required
245+
}
246+
220247
} finally {
221248
fedxRepo.shutDown();
222249
}

tools/federation/src/test/java/org/eclipse/rdf4j/federated/SPARQLServerBaseTest.java

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.List;
1919

2020
import org.eclipse.rdf4j.federated.endpoint.Endpoint;
21+
import org.eclipse.rdf4j.federated.monitoring.MonitoringService;
2122
import org.eclipse.rdf4j.federated.repository.RepositorySettings;
2223
import org.eclipse.rdf4j.federated.server.NativeStoreServer;
2324
import org.eclipse.rdf4j.federated.server.SPARQLEmbeddedServer;
@@ -28,6 +29,7 @@
2829
import org.eclipse.rdf4j.rio.RDFParseException;
2930
import org.eclipse.rdf4j.rio.Rio;
3031
import org.junit.jupiter.api.AfterAll;
32+
import org.junit.jupiter.api.Assertions;
3133
import org.junit.jupiter.api.Assumptions;
3234
import org.junit.jupiter.api.BeforeAll;
3335
import org.junit.jupiter.api.BeforeEach;
@@ -237,4 +239,39 @@ protected RepositorySettings repoSettings(int endpoint) {
237239
return server.getRepository(endpoint);
238240
}
239241

242+
/**
243+
* Helper method to check the number of requests sent to respective endpoint
244+
*
245+
* @param memberName the memberName, typically "endpointN", where N >= 1
246+
* @param expectedRequests
247+
*/
248+
protected void assertNumberOfRequests(String memberName, int expectedRequests) {
249+
if (!isSPARQLServer()) {
250+
return; // ignore for non SPARQL server environment where requests are not counted
251+
}
252+
var fedxContext = federationContext();
253+
if (!fedxContext.getConfig().isEnableMonitoring()) {
254+
Assertions.fail("monitoring is not enabled in the current federation.");
255+
}
256+
MonitoringService monitoringService = (MonitoringService) fedxContext.getMonitoringService();
257+
258+
// obtain the monitoring information
259+
// Note: this method has some simplifications for the name
260+
var monitoringInformation = monitoringService.getAllMonitoringInformation()
261+
.stream()
262+
.filter(m -> {
263+
var endpoint = m.getE();
264+
return endpoint.getId().equals(memberName)
265+
|| endpoint.getId().equals("http://" + memberName)
266+
|| endpoint.getName().equals(memberName)
267+
|| endpoint.getName().equals("http://" + memberName);
268+
})
269+
.findFirst()
270+
.orElse(null);
271+
272+
Assertions.assertEquals(expectedRequests,
273+
(monitoringInformation != null ? monitoringInformation.getNumberOfRequests() : 0));
274+
275+
}
276+
240277
}

tools/federation/src/test/java/org/eclipse/rdf4j/federated/repository/FedXRepositoryConfigTest.java

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,8 @@
1212

1313
import static org.assertj.core.api.Assertions.assertThat;
1414
import static org.eclipse.rdf4j.model.util.Models.subject;
15-
import static org.junit.Assert.assertThat;
1615

1716
import java.io.InputStream;
18-
import java.util.Optional;
1917

2018
import org.eclipse.rdf4j.federated.FedXConfig;
2119
import org.eclipse.rdf4j.federated.util.Vocabulary.FEDX;
@@ -161,7 +159,7 @@ public void testExport() throws Exception {
161159
.orElse(null);
162160
assertThat(configNode).isNotNull();
163161

164-
assertThat(export.filter(configNode, null, null)).hasSize(14);
162+
assertThat(export.filter(configNode, null, null)).hasSize(15);
165163

166164
assertThat(
167165
Models.objectLiteral(
@@ -189,6 +187,11 @@ public void testExport() throws Exception {
189187
export.getStatements(configNode, FedXRepositoryConfig.CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN,
190188
null)))
191189
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isFalse());
190+
assertThat(
191+
Models.objectLiteral(
192+
export.getStatements(configNode, FedXRepositoryConfig.CONFIG_ENABLE_OPTIONAL_AS_BIND_JOIN,
193+
null)))
194+
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isFalse());
192195
assertThat(
193196
Models.objectLiteral(
194197
export.getStatements(configNode, FedXRepositoryConfig.CONFIG_ENABLE_MONITORING, null)))
@@ -242,9 +245,9 @@ public void testExportWithEmptyConfig() throws Exception {
242245
.orElse(null);
243246
assertThat(configNode).isNotNull();
244247

245-
// Note: 14 instead of 12 since CONFIG_SOURCE_SELECTION_CACHE_SPEC & CONFIG_PREFIX_DECLARATIONS are null
248+
// Note: 13 instead of 15 since CONFIG_SOURCE_SELECTION_CACHE_SPEC & CONFIG_PREFIX_DECLARATIONS are null
246249
// and thus should not be populated
247-
assertThat(export.filter(configNode, null, null)).hasSize(12);
250+
assertThat(export.filter(configNode, null, null)).hasSize(13);
248251

249252
assertThat(
250253
Models.objectLiteral(
@@ -272,6 +275,11 @@ public void testExportWithEmptyConfig() throws Exception {
272275
export.getStatements(configNode, FedXRepositoryConfig.CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN,
273276
null)))
274277
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isTrue());
278+
assertThat(
279+
Models.objectLiteral(
280+
export.getStatements(configNode, FedXRepositoryConfig.CONFIG_ENABLE_OPTIONAL_AS_BIND_JOIN,
281+
null)))
282+
.hasValueSatisfying(v -> assertThat(v.booleanValue()).isTrue());
275283
assertThat(
276284
Models.objectLiteral(
277285
export.getStatements(configNode, FedXRepositoryConfig.CONFIG_ENABLE_MONITORING, null)))

tools/federation/src/test/resources/tests/rdf4jserver/config-withFedXConfig.ttl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
fedx:boundJoinBlockSize 104 ;
1717
fedx:enforceMaxQueryTime 105 ;
1818
fedx:enableServiceAsBoundJoin false ;
19+
fedx:enableOptionalAsBindJoin false ;
1920
fedx:enableMonitoring true ;
2021
fedx:logQueryPlan true ;
2122
fedx:logQueries true ;

0 commit comments

Comments
 (0)