Skip to content

Commit 84c6860

Browse files
authored
merge main into develop (#5191)
2 parents 5ba73ef + 136be9f commit 84c6860

19 files changed

Lines changed: 1402 additions & 63 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ org.eclipse.dash.licenses-1.0.2.jar
5252
e2e/node_modules
5353
e2e/playwright-report
5454
e2e/test-results
55+
.aider*

core/repository/sparql/src/main/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnection.java

Lines changed: 57 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,9 @@
1717
import java.io.InputStream;
1818
import java.io.Reader;
1919
import java.net.URL;
20+
import java.util.Arrays;
2021
import java.util.Objects;
22+
import java.util.stream.Collectors;
2123

2224
import org.apache.http.client.HttpClient;
2325
import org.eclipse.rdf4j.common.iteration.CloseableIteration;
@@ -39,6 +41,8 @@
3941
import org.eclipse.rdf4j.model.impl.DynamicModelFactory;
4042
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
4143
import org.eclipse.rdf4j.model.util.Literals;
44+
import org.eclipse.rdf4j.model.vocabulary.RDF4J;
45+
import org.eclipse.rdf4j.model.vocabulary.SESAME;
4246
import org.eclipse.rdf4j.query.BindingSet;
4347
import org.eclipse.rdf4j.query.BooleanQuery;
4448
import org.eclipse.rdf4j.query.GraphQuery;
@@ -79,6 +83,8 @@
7983
*/
8084
public class SPARQLConnection extends AbstractRepositoryConnection implements HttpClientDependent {
8185

86+
private static final String COUNT_EVERYTHING = "SELECT (COUNT(*) AS ?count) WHERE { ?s ?p ?o }";
87+
8288
private static final String EVERYTHING = "CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }";
8389

8490
private static final String EVERYTHING_WITH_GRAPH = "SELECT * WHERE { ?s ?p ?o . OPTIONAL { GRAPH ?ctx { ?s ?p ?o } } }";
@@ -281,16 +287,61 @@ public boolean isEmpty() throws RepositoryException {
281287

282288
@Override
283289
public long size(Resource... contexts) throws RepositoryException {
284-
try (RepositoryResult<Statement> stmts = getStatements(null, null, null, true, contexts)) {
285-
long i = 0;
286-
while (stmts.hasNext()) {
287-
stmts.next();
288-
i++;
290+
String query = sizeAsTupleQuery(contexts);
291+
TupleQuery tq = prepareTupleQuery(SPARQL, query);
292+
try (TupleQueryResult res = tq.evaluate()) {
293+
if (res.hasNext()) {
294+
295+
Value value = res.next().getBinding("count").getValue();
296+
if (value instanceof Literal) {
297+
return ((Literal) value).longValue();
298+
} else {
299+
return 0;
300+
}
301+
}
302+
} catch (QueryEvaluationException e) {
303+
throw new RepositoryException(e);
304+
}
305+
return 0;
306+
}
307+
308+
String sizeAsTupleQuery(Resource... contexts) {
309+
310+
// in case the context is null we want the
311+
// default graph of the remote store i.e. ask without graph/from.
312+
if (contexts != null && isQuadMode() && contexts.length > 0) {
313+
// this is an optimization for the case that we can use a GRAPH instead of a FROM.
314+
if (contexts.length == 1 && isExposableGraphIri(contexts[0])) {
315+
return "SELECT (COUNT(*) AS ?count) WHERE { GRAPH <" + contexts[0].stringValue()
316+
+ "> { ?s ?p ?o}}";
317+
} else {
318+
// If we had an default graph setting that is sesame/rdf4j specific
319+
// we must drop it before sending it over the wire. Otherwise
320+
// gather up the given contexts and send them as a from clauses
321+
// to make the matching dataset.
322+
String graphs = Arrays.stream(contexts)
323+
.filter(SPARQLConnection::isExposableGraphIri)
324+
.map(Resource::stringValue)
325+
.map(s -> "FROM <" + s + ">")
326+
.collect(Collectors.joining(" "));
327+
return "SELECT (COUNT(*) AS ?count) " + graphs + "WHERE { ?s ?p ?o}";
289328
}
290-
return i;
329+
} else {
330+
return COUNT_EVERYTHING;
291331
}
292332
}
293333

334+
/**
335+
* For the sparql protocol a context must be an IRI However we can't send out the RDF4j internal default graph IRIs
336+
*
337+
* @param resource to test if it can be the IRI for a named graph
338+
* @return true if it the input can be a foreign named graph.
339+
*/
340+
private static boolean isExposableGraphIri(Resource resource) {
341+
// We use the instanceof test to avoid any issue with a null pointer.
342+
return resource instanceof IRI && !RDF4J.NIL.equals(resource) && !SESAME.NIL.equals(resource);
343+
}
344+
294345
@Override
295346
public RepositoryResult<Statement> getStatements(Resource subj, IRI pred, Value obj, boolean includeInferred,
296347
Resource... contexts) throws RepositoryException {

core/repository/sparql/src/test/java/org/eclipse/rdf4j/repository/sparql/SPARQLConnectionTest.java

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,41 @@
1212

1313
import static org.assertj.core.api.Assertions.assertThat;
1414
import static org.eclipse.rdf4j.model.util.Values.iri;
15+
import static org.junit.jupiter.api.Assertions.assertEquals;
16+
import static org.junit.jupiter.api.Assertions.assertFalse;
17+
import static org.junit.jupiter.api.Assertions.assertNotNull;
1518
import static org.mockito.ArgumentMatchers.any;
1619
import static org.mockito.ArgumentMatchers.anyBoolean;
1720
import static org.mockito.ArgumentMatchers.anyInt;
21+
import static org.mockito.Mockito.atLeastOnce;
1822
import static org.mockito.Mockito.mock;
1923
import static org.mockito.Mockito.never;
2024
import static org.mockito.Mockito.times;
2125
import static org.mockito.Mockito.verify;
26+
import static org.mockito.Mockito.when;
27+
28+
import java.lang.ref.WeakReference;
2229

2330
import org.eclipse.rdf4j.http.client.SPARQLProtocolSession;
2431
import org.eclipse.rdf4j.model.IRI;
2532
import org.eclipse.rdf4j.model.ValueFactory;
2633
import org.eclipse.rdf4j.model.impl.SimpleValueFactory;
2734
import org.eclipse.rdf4j.model.vocabulary.FOAF;
2835
import org.eclipse.rdf4j.model.vocabulary.RDF;
36+
import org.eclipse.rdf4j.model.vocabulary.RDF4J;
2937
import org.eclipse.rdf4j.model.vocabulary.RDFS;
38+
import org.eclipse.rdf4j.query.impl.MapBindingSet;
39+
import org.eclipse.rdf4j.query.impl.SimpleBinding;
40+
import org.eclipse.rdf4j.query.impl.TupleQueryResultBuilder;
41+
import org.eclipse.rdf4j.query.parser.ParsedQuery;
42+
import org.eclipse.rdf4j.query.parser.sparql.SPARQLParser;
43+
import org.eclipse.rdf4j.query.parser.sparql.SPARQLParserFactory;
3044
import org.eclipse.rdf4j.rio.ParserConfig;
3145
import org.junit.jupiter.api.BeforeEach;
3246
import org.junit.jupiter.api.Test;
3347
import org.mockito.ArgumentCaptor;
48+
import org.mockito.Mock;
49+
import org.mockito.invocation.InvocationOnMock;
3450

3551
public class SPARQLConnectionTest {
3652

@@ -100,6 +116,36 @@ public void testAddSingleContextHandling() throws Exception {
100116
assertThat(sparqlUpdate).containsPattern(expectedAddPattern).containsPattern(expectedRemovePattern);
101117
}
102118

119+
@Test
120+
public void testSizeQuery() throws Exception {
121+
122+
String sizeAsTupleQuery = subject.sizeAsTupleQuery();
123+
ParsedQuery query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
124+
assertNotNull(query);
125+
126+
sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"));
127+
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
128+
assertNotNull(query);
129+
130+
sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createIRI("urn:g2"));
131+
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
132+
assertNotNull(query);
133+
134+
sizeAsTupleQuery = subject.sizeAsTupleQuery(vf.createIRI("urn:g1"), vf.createBNode());
135+
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
136+
assertNotNull(query);
137+
138+
sizeAsTupleQuery = subject.sizeAsTupleQuery(RDF4J.NIL);
139+
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
140+
assertNotNull(query);
141+
assertFalse(sizeAsTupleQuery.contains("nil"));
142+
143+
sizeAsTupleQuery = subject.sizeAsTupleQuery(null);
144+
query = new SPARQLParserFactory().getParser().parseQuery(sizeAsTupleQuery, "http://example.org/");
145+
146+
assertNotNull(query);
147+
}
148+
103149
@Test
104150
public void testAddMultipleContextHandling() throws Exception {
105151
ArgumentCaptor<String> sparqlUpdateCaptor = ArgumentCaptor.forClass(String.class);

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStatementIterator.java

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010
*******************************************************************************/
1111
package org.eclipse.rdf4j.sail.nativerdf;
1212

13+
import static org.eclipse.rdf4j.sail.nativerdf.NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES;
14+
1315
import java.io.IOException;
1416

1517
import org.eclipse.rdf4j.common.io.ByteArrayUtil;
@@ -20,13 +22,20 @@
2022
import org.eclipse.rdf4j.model.Value;
2123
import org.eclipse.rdf4j.sail.SailException;
2224
import org.eclipse.rdf4j.sail.nativerdf.btree.RecordIterator;
25+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRI;
26+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptIRIOrBNode;
27+
import org.eclipse.rdf4j.sail.nativerdf.model.CorruptUnknownValue;
28+
import org.slf4j.Logger;
29+
import org.slf4j.LoggerFactory;
2330

2431
/**
2532
* A statement iterator that wraps a RecordIterator containing statement records and translates these records to
2633
* {@link Statement} objects.
2734
*/
2835
class NativeStatementIterator extends LookAheadIteration<Statement> {
2936

37+
private static final Logger logger = LoggerFactory.getLogger(NativeStatementIterator.class);
38+
3039
/*-----------*
3140
* Variables *
3241
*-----------*/
@@ -54,25 +63,42 @@ public NativeStatementIterator(RecordIterator btreeIter, ValueStore valueStore)
5463
@Override
5564
public Statement getNextElement() throws SailException {
5665
try {
57-
byte[] nextValue = btreeIter.next();
66+
byte[] nextValue;
67+
try {
68+
nextValue = btreeIter.next();
69+
} catch (AssertionError | Exception e) {
70+
logger.error("Error while reading next value from btree iterator for {}", btreeIter.toString(), e);
71+
throw e;
72+
}
5873

5974
if (nextValue == null) {
6075
return null;
6176
}
6277

6378
int subjID = ByteArrayUtil.getInt(nextValue, TripleStore.SUBJ_IDX);
64-
Resource subj = (Resource) valueStore.getValue(subjID);
79+
Resource subj = valueStore.getResource(subjID);
6580

6681
int predID = ByteArrayUtil.getInt(nextValue, TripleStore.PRED_IDX);
67-
IRI pred = (IRI) valueStore.getValue(predID);
82+
IRI pred = valueStore.getIRI(predID);
6883

6984
int objID = ByteArrayUtil.getInt(nextValue, TripleStore.OBJ_IDX);
7085
Value obj = valueStore.getValue(objID);
7186

7287
Resource context = null;
7388
int contextID = ByteArrayUtil.getInt(nextValue, TripleStore.CONTEXT_IDX);
7489
if (contextID != 0) {
75-
context = (Resource) valueStore.getValue(contextID);
90+
context = valueStore.getResource(contextID);
91+
}
92+
if (SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
93+
if (subj == null) {
94+
subj = new CorruptIRIOrBNode(valueStore.getRevision(), subjID, null);
95+
}
96+
if (pred == null) {
97+
pred = new CorruptIRI(valueStore.getRevision(), predID, null, null);
98+
}
99+
if (obj == null) {
100+
obj = new CorruptUnknownValue(valueStore.getRevision(), objID, null);
101+
}
76102
}
77103

78104
return valueStore.createStatement(subj, pred, obj, context);

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/NativeStore.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
import org.apache.commons.io.FileUtils;
2525
import org.eclipse.rdf4j.collection.factory.api.CollectionFactory;
2626
import org.eclipse.rdf4j.collection.factory.mapdb.MapDb3CollectionFactory;
27+
import org.eclipse.rdf4j.common.annotation.InternalUseOnly;
2728
import org.eclipse.rdf4j.common.concurrent.locks.Lock;
2829
import org.eclipse.rdf4j.common.concurrent.locks.LockManager;
2930
import org.eclipse.rdf4j.common.io.MavenUtil;
@@ -62,6 +63,17 @@ public class NativeStore extends AbstractNotifyingSail implements FederatedServi
6263

6364
private static final String VERSION = MavenUtil.loadVersion("org.eclipse.rdf4j", "rdf4j-sail-nativerdf", "devel");
6465

66+
/**
67+
* Do not throw an exception when corrupt data is detected. Instead, try to return as much data as possible.
68+
*
69+
* Variable can be set through the system property
70+
* org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes.
71+
*/
72+
@InternalUseOnly
73+
public static boolean SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES = "true"
74+
.equalsIgnoreCase(
75+
System.getProperty("org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes"));;
76+
6577
private static final Cleaner REMOVE_STORES_USED_FOR_MEMORY_OVERFLOW = Cleaner.create();
6678

6779
/**

core/sail/nativerdf/src/main/java/org/eclipse/rdf4j/sail/nativerdf/TripleStore.java

Lines changed: 75 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -287,9 +287,71 @@ private Set<String> parseIndexSpecList(String indexSpecStr) throws SailException
287287
}
288288

289289
private void initIndexes(Set<String> indexSpecs) throws IOException {
290+
291+
HashSet<String> invalidIndexes = new HashSet<>();
292+
290293
for (String fieldSeq : indexSpecs) {
291294
logger.trace("Initializing index '{}'...", fieldSeq);
292-
indexes.add(new TripleIndex(fieldSeq));
295+
try {
296+
indexes.add(new TripleIndex(fieldSeq, false));
297+
} catch (Exception e) {
298+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
299+
invalidIndexes.add(fieldSeq);
300+
logger.warn("Ignoring index because it failed to initialize index '{}'", fieldSeq, e);
301+
} else {
302+
logger.error(
303+
"Failed to initialize index '{}', consider setting org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true.",
304+
fieldSeq, e);
305+
throw e;
306+
}
307+
308+
}
309+
310+
}
311+
312+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
313+
indexSpecs.removeAll(invalidIndexes);
314+
}
315+
316+
List<TripleIndex> emptyIndexes = new ArrayList<>();
317+
List<TripleIndex> nonEmptyIndexes = new ArrayList<>();
318+
319+
checkIfIndexesAreEmptyOrNot(nonEmptyIndexes, emptyIndexes);
320+
321+
if (!emptyIndexes.isEmpty() && !nonEmptyIndexes.isEmpty()) {
322+
if (NativeStore.SOFT_FAIL_ON_CORRUPT_DATA_AND_REPAIR_INDEXES) {
323+
indexes.removeAll(emptyIndexes);
324+
} else {
325+
for (TripleIndex index : emptyIndexes) {
326+
throw new IOException("Index '" + new String(index.getFieldSeq())
327+
+ "' is unexpectedly empty while other indexes are not. Consider setting the system property org.eclipse.rdf4j.sail.nativerdf.softFailOnCorruptDataAndRepairIndexes to true. Index file: "
328+
+ index.getBTree().getFile().getAbsolutePath());
329+
}
330+
}
331+
}
332+
333+
}
334+
335+
private void checkIfIndexesAreEmptyOrNot(List<TripleIndex> nonEmptyIndexes, List<TripleIndex> emptyIndexes)
336+
throws IOException {
337+
for (TripleIndex index : indexes) {
338+
try (RecordIterator recordIterator = index.getBTree().iterateAll()) {
339+
try {
340+
byte[] next = recordIterator.next();
341+
if (next != null) {
342+
next = recordIterator.next();
343+
if (next != null) {
344+
nonEmptyIndexes.add(index);
345+
} else {
346+
emptyIndexes.add(index);
347+
}
348+
} else {
349+
emptyIndexes.add(index);
350+
}
351+
} catch (Throwable ignored) {
352+
emptyIndexes.add(index);
353+
}
354+
}
293355
}
294356
}
295357

@@ -355,7 +417,7 @@ private void reindex(Set<String> currentIndexSpecs, Set<String> newIndexSpecs) t
355417
for (String fieldSeq : addedIndexSpecs) {
356418
logger.debug("Initializing new index '{}'...", fieldSeq);
357419

358-
TripleIndex addedIndex = new TripleIndex(fieldSeq);
420+
TripleIndex addedIndex = new TripleIndex(fieldSeq, true);
359421
BTree addedBTree = null;
360422
RecordIterator sourceIter = null;
361423
try {
@@ -1122,7 +1184,17 @@ private class TripleIndex {
11221184

11231185
private final BTree btree;
11241186

1125-
public TripleIndex(String fieldSeq) throws IOException {
1187+
public TripleIndex(String fieldSeq, boolean deleteExistingIndexFile) throws IOException {
1188+
if (deleteExistingIndexFile) {
1189+
File indexFile = new File(dir, getFilenamePrefix(fieldSeq) + ".dat");
1190+
if (indexFile.exists()) {
1191+
indexFile.delete();
1192+
}
1193+
File alloxFile = new File(dir, getFilenamePrefix(fieldSeq) + ".alloc");
1194+
if (alloxFile.exists()) {
1195+
alloxFile.delete();
1196+
}
1197+
}
11261198
tripleComparator = new TripleComparator(fieldSeq);
11271199
btree = new BTree(dir, getFilenamePrefix(fieldSeq), 2048, RECORD_LENGTH, tripleComparator, forceSync);
11281200
}

0 commit comments

Comments
 (0)