Skip to content

Commit 03aefb9

Browse files
authored
GH-4952 - Introduce FedXConfig overrides for FedXRepositoryConfig (#4953)
2 parents e283cde + 290d78c commit 03aefb9

4 files changed

Lines changed: 483 additions & 1 deletion

File tree

site/content/documentation/programming/federation.md

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,17 +299,50 @@ FedX provides various means for configuration. Configuration settings can be def
299299
|Property | Description |
300300
|---------|-------------|
301301
|prefixDeclarations | Path to prefix declarations file, see [PREFIX Declarations](#prefix-declarations) |
302-
|cacheLocation | Location where the memory cache gets persisted at shutdown, default _cache.db_ |
302+
|sourceSelectionCacheSpec | Cache specification for the `SourceSelectionMemoryCache`, default _maximumSize=1000,expireAfterWrite=6h_ |
303303
|joinWorkerThreads | The number of join worker threads for parallelization, default _20_ |
304304
|unionWorkerThreads | The number of union worker threads for parallelization, default _20_ |
305+
|leftJoinWorkerThreads | The number of left join worker threads for parallelization, default _10_ |
305306
|boundJoinBlockSize | Block size for bound joins, default _15_ |
306307
|enforceMaxQueryTime | Max query time in seconds, 0 to disable, default _30_ |
307308
|enableServiceAsBoundJoin | Flag for evaluating a SERVICE expression (contacting non-federation members) using vectored evaluation, default _true_. For today's endpoints it is more efficient to disable vectored evaluation of SERVICE |
309+
|includeInferredDefault | whether include inferred statements should be considered, default _true_ |
310+
|consumingIterationMax | the max number of results to be consumed by `ConsumingIteration`, default _1000_ |
308311
|debugQueryPlan | Print the optimized query execution plan to stdout, default _false_ |
309312
|enableMonitoring | Flag to enable/disable monitoring features, default _false_ |
310313
|logQueryPlan | Flag to enable/disable query plan logging via Java class _QueryPlanLog_, default _false_ |
311314
|logQueries | Flag to enable/disable query logging via _QueryLog_, default _false_. The _QueryLog_ facility allows to log all queries to a file |
312315

316+
#### Overriding via configuration template
317+
318+
The aforementioned properties can also be set using a configuration template, via the `fedx:config` property, e.g.:
319+
320+
```turtle
321+
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#>.
322+
@prefix rep: <http://www.openrdf.org/config/repository#>.
323+
@prefix config: <tag:rdf4j.org,2023:config/>.
324+
@prefix fedx: <http://rdf4j.org/config/federation#>.
325+
326+
[] a rep:Repository ;
327+
rep:repositoryImpl [
328+
rep:repositoryType "fedx:FedXRepository" ;
329+
fedx:member [
330+
fedx:store "ResolvableRepository" ;
331+
fedx:repositoryName "endpoint1"
332+
],
333+
[
334+
fedx:store "ResolvableRepository" ;
335+
fedx:repositoryName "endpoint2"
336+
]
337+
fedx:config [
338+
fedx:sourceSelectionCacheSpec "maximumSize=0" ;
339+
fedx:enforceMaxQueryTime 30 ;
340+
]
341+
];
342+
rep:repositoryID "fedx" ;
343+
rdfs:label "FedX Federation" .
344+
```
345+
313346
### Query timeouts
314347

315348
FedX supports to define the maximum execution time for a query. This can be set on query level `Query#setMaxExecutionTime`or globally using the FedX config setting _enforceMaxQueryTime_.

tools/federation/src/main/java/org/eclipse/rdf4j/federated/repository/FedXRepositoryConfig.java

Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
import org.eclipse.rdf4j.federated.FedXConfig;
1616
import org.eclipse.rdf4j.federated.util.Vocabulary.FEDX;
17+
import org.eclipse.rdf4j.model.BNode;
1718
import org.eclipse.rdf4j.model.IRI;
1819
import org.eclipse.rdf4j.model.Model;
1920
import org.eclipse.rdf4j.model.Resource;
@@ -23,6 +24,7 @@
2324
import org.eclipse.rdf4j.model.impl.TreeModel;
2425
import org.eclipse.rdf4j.model.util.ModelException;
2526
import org.eclipse.rdf4j.model.util.Models;
27+
import org.eclipse.rdf4j.model.util.Values;
2628
import org.eclipse.rdf4j.repository.config.AbstractRepositoryImplConfig;
2729
import org.eclipse.rdf4j.repository.config.RepositoryConfigException;
2830
import org.eclipse.rdf4j.repository.config.RepositoryImplConfig;
@@ -60,6 +62,12 @@
6062
* # optionally define data config
6163
* #fedx:fedxConfig "fedxConfig.prop" ;
6264
* fedx:dataConfig "dataConfig.ttl" ;
65+
*
66+
* # optionally define FedXConfig overrides
67+
* fedx:config [
68+
* fedx:sourceSelectionCacheSpec "maximumSize=0" ;
69+
* fedx:enforceMaxQueryTime 30 ;
70+
* ]
6371
* ];
6472
* rep:repositoryID "fedx" ;
6573
* rdfs:label "FedX Federation" .
@@ -87,11 +95,86 @@ public class FedXRepositoryConfig extends AbstractRepositoryImplConfig {
8795
*/
8896
public static final IRI DATA_CONFIG = vf.createIRI(NAMESPACE, "dataConfig");
8997

98+
/**
99+
* IRI of the property pointing to the {@link FedXConfig}
100+
*/
101+
public static final IRI FEDX_CONFIG = vf.createIRI(NAMESPACE, "config");
102+
90103
/**
91104
* IRI of the property pointing to a federation member node
92105
*/
93106
public static final IRI MEMBER = vf.createIRI(NAMESPACE, "member");
94107

108+
/**
109+
* IRI of the property populating {@link FedXConfig#getJoinWorkerThreads()}
110+
*/
111+
public static final IRI CONFIG_JOIN_WORKER_THREADS = vf.createIRI(NAMESPACE, "joinWorkerThreads");
112+
113+
/**
114+
* IRI of the property populating {@link FedXConfig#getUnionWorkerThreads()}
115+
*/
116+
public static final IRI CONFIG_UNION_WORKER_THREADS = vf.createIRI(NAMESPACE, "unionWorkerThreads");
117+
118+
/**
119+
* IRI of the property populating {@link FedXConfig#getLeftJoinWorkerThreads()}
120+
*/
121+
public static final IRI CONFIG_LEFT_JOIN_WORKER_THREADS = vf.createIRI(NAMESPACE, "leftJoinWorkerThreads");
122+
123+
/**
124+
* IRI of the property populating {@link FedXConfig#getBoundJoinBlockSize()}
125+
*/
126+
public static final IRI CONFIG_BOUND_JOIN_BLOCK_SIZE = vf.createIRI(NAMESPACE, "boundJoinBlockSize");
127+
128+
/**
129+
* IRI of the property populating {@link FedXConfig#getEnforceMaxQueryTime()}
130+
*/
131+
public static final IRI CONFIG_ENFORCE_MAX_QUERY_TIME = vf.createIRI(NAMESPACE, "enforceMaxQueryTime");
132+
133+
/**
134+
* IRI of the property populating {@link FedXConfig#getEnableServiceAsBoundJoin()}
135+
*/
136+
public static final IRI CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN = vf.createIRI(NAMESPACE, "enableServiceAsBoundJoin");
137+
138+
/**
139+
* IRI of the property populating {@link FedXConfig#isEnableMonitoring()}
140+
*/
141+
public static final IRI CONFIG_ENABLE_MONITORING = vf.createIRI(NAMESPACE, "enableMonitoring");
142+
143+
/**
144+
* IRI of the property populating {@link FedXConfig#isLogQueryPlan()}
145+
*/
146+
public static final IRI CONFIG_LOG_QUERY_PLAN = vf.createIRI(NAMESPACE, "logQueryPlan");
147+
148+
/**
149+
* IRI of the property populating {@link FedXConfig#isLogQueries()}
150+
*/
151+
public static final IRI CONFIG_LOG_QUERIES = vf.createIRI(NAMESPACE, "logQueries");
152+
153+
/**
154+
* IRI of the property populating {@link FedXConfig#isDebugQueryPlan()}
155+
*/
156+
public static final IRI CONFIG_DEBUG_QUERY_PLAN = vf.createIRI(NAMESPACE, "debugQueryPlan");
157+
158+
/**
159+
* IRI of the property populating {@link FedXConfig#getIncludeInferredDefault()}
160+
*/
161+
public static final IRI CONFIG_INCLUDE_INFERRED_DEFAULT = vf.createIRI(NAMESPACE, "includeInferredDefault");
162+
163+
/**
164+
* IRI of the property populating {@link FedXConfig#getSourceSelectionCacheSpec()}
165+
*/
166+
public static final IRI CONFIG_SOURCE_SELECTION_CACHE_SPEC = vf.createIRI(NAMESPACE, "sourceSelectionCacheSpec");
167+
168+
/**
169+
* IRI of the property populating {@link FedXConfig#getPrefixDeclarations()}
170+
*/
171+
public static final IRI CONFIG_PREFIX_DECLARATIONS = vf.createIRI(NAMESPACE, "prefixDeclarations");
172+
173+
/**
174+
* IRI of the property populating {@link FedXConfig#getConsumingIterationMax()}
175+
*/
176+
public static final IRI CONFIG_CONSUMING_ITERATION_MAX = vf.createIRI(NAMESPACE, "consumingIterationMax");
177+
95178
/**
96179
* the location of the data configuration
97180
*/
@@ -152,6 +235,8 @@ public Resource export(Model m) {
152235
m.add(implNode, DATA_CONFIG, vf.createLiteral(getDataConfig()));
153236
}
154237

238+
exportFedXConfig(m, implNode);
239+
155240
if (getMembers() != null) {
156241

157242
Model members = getMembers();
@@ -187,6 +272,8 @@ public void parse(Model m, Resource implNode) throws RepositoryConfigException {
187272
Models.objectLiteral(m.getStatements(implNode, DATA_CONFIG, null))
188273
.ifPresent(value -> setDataConfig(value.stringValue()));
189274

275+
parseFedXConfig(m, implNode);
276+
190277
Set<Value> memberNodes = m.filter(implNode, MEMBER, null).objects();
191278
if (!memberNodes.isEmpty()) {
192279
Model members = new TreeModel();
@@ -205,4 +292,120 @@ public void parse(Model m, Resource implNode) throws RepositoryConfigException {
205292
throw new RepositoryConfigException(e.getMessage(), e);
206293
}
207294
}
295+
296+
/**
297+
* Updates the container {@link FedXConfig} instance with properties from the supplied model. It is up to the caller
298+
* to retrieve configuration from {@link #FEDX_CONFIG} as well as to initialise the parsed configuration (via
299+
* {@link #setConfig(FedXConfig)}) since it can be null.
300+
*
301+
* @param m the model from which to read configuration properties
302+
* @param implNode the subject against which to expect the {@link #FEDX_CONFIG} property.
303+
*
304+
* @throws RepositoryConfigException if any of the overridden fields are deemed to be invalid
305+
*/
306+
protected void parseFedXConfig(Model m, Resource implNode) throws RepositoryConfigException {
307+
Models.objectResource(m.getStatements(implNode, FEDX_CONFIG, null))
308+
.ifPresent(res -> parseFedXConfigInternal(m, res));
309+
}
310+
311+
private void parseFedXConfigInternal(Model m, Resource confNode) throws RepositoryConfigException {
312+
if (getConfig() == null) {
313+
setConfig(new FedXConfig());
314+
}
315+
316+
Models.objectLiteral(m.getStatements(confNode, CONFIG_JOIN_WORKER_THREADS, null))
317+
.ifPresent(value -> config.withJoinWorkerThreads(value.intValue()));
318+
319+
Models.objectLiteral(m.getStatements(confNode, CONFIG_UNION_WORKER_THREADS, null))
320+
.ifPresent(value -> config.withUnionWorkerThreads(value.intValue()));
321+
322+
Models.objectLiteral(m.getStatements(confNode, CONFIG_LEFT_JOIN_WORKER_THREADS, null))
323+
.ifPresent(value -> config.withLeftJoinWorkerThreads(value.intValue()));
324+
325+
Models.objectLiteral(m.getStatements(confNode, CONFIG_BOUND_JOIN_BLOCK_SIZE, null))
326+
.ifPresent(value -> config.withBoundJoinBlockSize(value.intValue()));
327+
328+
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENFORCE_MAX_QUERY_TIME, null))
329+
.ifPresent(value -> config.withEnforceMaxQueryTime(value.intValue()));
330+
331+
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN, null))
332+
.ifPresent(value -> config.withEnableServiceAsBoundJoin(value.booleanValue()));
333+
334+
Models.objectLiteral(m.getStatements(confNode, CONFIG_ENABLE_MONITORING, null))
335+
.ifPresent(value -> config.withEnableMonitoring(value.booleanValue()));
336+
337+
Models.objectLiteral(m.getStatements(confNode, CONFIG_LOG_QUERY_PLAN, null))
338+
.ifPresent(value -> config.withLogQueryPlan(value.booleanValue()));
339+
340+
Models.objectLiteral(m.getStatements(confNode, CONFIG_LOG_QUERIES, null))
341+
.ifPresent(value -> config.withLogQueries(value.booleanValue()));
342+
343+
Models.objectLiteral(m.getStatements(confNode, CONFIG_DEBUG_QUERY_PLAN, null))
344+
.ifPresent(value -> config.withDebugQueryPlan(value.booleanValue()));
345+
346+
Models.objectLiteral(m.getStatements(confNode, CONFIG_INCLUDE_INFERRED_DEFAULT, null))
347+
.ifPresent(value -> config.withIncludeInferredDefault(value.booleanValue()));
348+
349+
Models.objectLiteral(m.getStatements(confNode, CONFIG_SOURCE_SELECTION_CACHE_SPEC, null))
350+
.ifPresent(value -> config.withSourceSelectionCacheSpec(value.stringValue()));
351+
352+
Models.objectLiteral(m.getStatements(confNode, CONFIG_PREFIX_DECLARATIONS, null))
353+
.ifPresent(value -> config.withPrefixDeclarations(value.stringValue()));
354+
355+
Models.objectLiteral(m.getStatements(confNode, CONFIG_CONSUMING_ITERATION_MAX, null))
356+
.ifPresent(value -> config.withConsumingIterationMax(value.intValue()));
357+
358+
}
359+
360+
/**
361+
* Export the provided {@link FedXConfig} to its RDF representation. Note that {@link #getConfig()} could be null if
362+
* configuration has not been set yet.
363+
*
364+
* @param config the configuration to export
365+
* @param implNode the node to which to write the config reference (i.e. {@link #FEDX_CONFIG}) to
366+
*/
367+
protected void exportFedXConfig(Model model, Resource implNode) {
368+
if (getConfig() == null) {
369+
return;
370+
}
371+
372+
BNode confNode = Values.bnode();
373+
374+
model.add(confNode, CONFIG_JOIN_WORKER_THREADS, vf.createLiteral(config.getJoinWorkerThreads()));
375+
376+
model.add(confNode, CONFIG_UNION_WORKER_THREADS, vf.createLiteral(config.getUnionWorkerThreads()));
377+
378+
model.add(confNode, CONFIG_LEFT_JOIN_WORKER_THREADS, vf.createLiteral(config.getLeftJoinWorkerThreads()));
379+
380+
model.add(confNode, CONFIG_BOUND_JOIN_BLOCK_SIZE, vf.createLiteral(config.getBoundJoinBlockSize()));
381+
382+
model.add(confNode, CONFIG_ENFORCE_MAX_QUERY_TIME, vf.createLiteral(config.getEnforceMaxQueryTime()));
383+
384+
model.add(confNode, CONFIG_ENABLE_SERVICE_AS_BOUND_JOIN,
385+
vf.createLiteral(config.getEnableServiceAsBoundJoin()));
386+
387+
model.add(confNode, CONFIG_ENABLE_MONITORING, vf.createLiteral(config.isEnableMonitoring()));
388+
389+
model.add(confNode, CONFIG_LOG_QUERY_PLAN, vf.createLiteral(config.isLogQueryPlan()));
390+
391+
model.add(confNode, CONFIG_LOG_QUERIES, vf.createLiteral(config.isLogQueries()));
392+
393+
model.add(confNode, CONFIG_DEBUG_QUERY_PLAN, vf.createLiteral(config.isDebugQueryPlan()));
394+
395+
model.add(confNode, CONFIG_INCLUDE_INFERRED_DEFAULT, vf.createLiteral(config.getIncludeInferredDefault()));
396+
397+
if (config.getSourceSelectionCacheSpec() != null) {
398+
model.add(confNode, CONFIG_SOURCE_SELECTION_CACHE_SPEC,
399+
vf.createLiteral(config.getSourceSelectionCacheSpec()));
400+
}
401+
402+
if (config.getPrefixDeclarations() != null) {
403+
model.add(confNode, CONFIG_PREFIX_DECLARATIONS,
404+
vf.createLiteral(config.getPrefixDeclarations()));
405+
}
406+
407+
model.add(confNode, CONFIG_CONSUMING_ITERATION_MAX, vf.createLiteral(config.getConsumingIterationMax()));
408+
409+
model.add(implNode, FEDX_CONFIG, confNode);
410+
}
208411
}

0 commit comments

Comments
 (0)