Skip to content

Commit 0545804

Browse files
committed
GH-4950 LMDB: support inlined values in value store
1 parent 109f2aa commit 0545804

3 files changed

Lines changed: 66 additions & 30 deletions

File tree

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/LmdbSailStore.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -977,7 +977,8 @@ private long removeStatements(long subj, long pred, long obj, boolean explicit,
977977
tripleStore.removeTriplesByContext(subj, pred, obj, contextId, explicit, quad -> {
978978
removeCount[0]++;
979979
for (long id : quad) {
980-
if (id != 0L) {
980+
if (id != 0L && !ValueIds.isInlined(id)) {
981+
// only add references, exclude inlined values
981982
unusedIds.add(id);
982983
}
983984
}

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ValueStore.java

Lines changed: 59 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@
8282
import org.eclipse.rdf4j.sail.SailException;
8383
import org.eclipse.rdf4j.sail.lmdb.LmdbUtil.Transaction;
8484
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
85+
import org.eclipse.rdf4j.sail.lmdb.inlined.Values;
8586
import org.eclipse.rdf4j.sail.lmdb.model.LmdbBNode;
8687
import org.eclipse.rdf4j.sail.lmdb.model.LmdbIRI;
8788
import org.eclipse.rdf4j.sail.lmdb.model.LmdbLiteral;
@@ -638,6 +639,10 @@ public LmdbValue getLazyValue(long id) throws IOException {
638639
resultValue = new LmdbBNode(lazyRevision, id);
639640
break;
640641
default:
642+
if (ValueIds.isInlined(id)) {
643+
resultValue = new LmdbLiteral(lazyRevision, id);
644+
break;
645+
}
641646
throw new IOException("Unsupported value with id type: " + idType);
642647
}
643648
// Store value in cache
@@ -664,6 +669,12 @@ public LmdbValue getValue(long id) throws IOException {
664669
LmdbValue resultValue = cachedValue(id);
665670

666671
if (resultValue == null) {
672+
// unpack inlined values if possible
673+
if (ValueIds.isInlined(id)) {
674+
Literal unpacked = Values.unpackLiteral(id, this);
675+
return new LmdbLiteral(revision, unpacked.getLabel(), unpacked.getDatatype(), id);
676+
}
677+
667678
// Value not in cache, fetch it from file
668679
byte[] data = getData(id);
669680

@@ -688,6 +699,13 @@ public LmdbValue getValue(long id) throws IOException {
688699
* @return <code>true</code> if value could be successfully resolved, else <code>false</code>
689700
*/
690701
public boolean resolveValue(long id, LmdbValue value) {
702+
// unpack inlined values if possible
703+
if (ValueIds.isInlined(id)) {
704+
Literal unpacked = Values.unpackLiteral(id, this);
705+
((LmdbLiteral) value).setLabel(unpacked.getLabel());
706+
((LmdbLiteral) value).setDatatype(unpacked.getDatatype());
707+
return true;
708+
}
691709
// Try to get from cache
692710
LmdbValue cached = cachedValue(id);
693711
if (cached != null && this.getRevision().getRevisionId() == cached.getValueStoreRevision().getRevisionId()) {
@@ -1038,13 +1056,10 @@ public long getId(Value value) throws IOException {
10381056
public long getId(Value value, boolean create) throws IOException {
10391057
// Try to get the internal ID from the value itself
10401058
boolean isOwnValue = isOwnValue(value);
1041-
10421059
if (isOwnValue) {
10431060
LmdbValue lmdbValue = (LmdbValue) value;
1044-
10451061
if (revisionIsCurrent(lmdbValue)) {
10461062
long id = lmdbValue.getInternalID();
1047-
10481063
if (id != LmdbValue.UNKNOWN_ID) {
10491064
return id;
10501065
}
@@ -1061,43 +1076,61 @@ public long getId(Value value, boolean create) throws IOException {
10611076

10621077
if (cachedID != null) {
10631078
long id = cachedID;
1064-
10651079
if (isOwnValue) {
10661080
// Store id in value for fast access in any consecutive calls
10671081
((LmdbValue) value).setInternalID(id, revision);
10681082
}
1069-
10701083
return id;
10711084
}
10721085

1073-
// ID not cached, search in file
1074-
byte[] data = value2data(value, create);
1075-
if (data == null && value instanceof Literal) {
1076-
data = literal2legacy((Literal) value);
1086+
long id = LmdbValue.UNKNOWN_ID;
1087+
if (value instanceof Literal) {
1088+
// inline value into id if possible
1089+
try {
1090+
long packedId = Values.packLiteral((Literal) value);
1091+
if (packedId != 0L) {
1092+
Literal unpacked = Values.unpackLiteral(packedId, this);
1093+
if (unpacked.equals(value)) {
1094+
id = packedId;
1095+
}
1096+
}
1097+
} catch (IllegalArgumentException e) {
1098+
// ignore, invalid literal
1099+
}
10771100
}
10781101

1079-
if (data != null) {
1080-
long id = findId(data, create);
1081-
if (id != LmdbValue.UNKNOWN_ID) {
1082-
if (isOwnValue) {
1083-
// Store id in value for fast access in any consecutive calls
1084-
((LmdbValue) value).setInternalID(id, revision);
1085-
// Store id in cache
1086-
valueIDCache.put((LmdbValue) value, id);
1087-
} else {
1088-
// Store id in cache
1089-
LmdbValue nv = getLmdbValue(value);
1090-
nv.setInternalID(id, revision);
1102+
if (id == LmdbValue.UNKNOWN_ID) {
1103+
// not inlined or ID not cached, search in index
1104+
byte[] data = value2data(value, create);
1105+
if (data == null && value instanceof Literal) {
1106+
data = literal2legacy((Literal) value);
1107+
}
10911108

1092-
if (nv.isIRI() && isCommonVocabulary(((IRI) nv))) {
1093-
commonVocabulary.put(value, id);
1094-
}
1109+
if (data != null) {
1110+
id = findId(data, create);
1111+
}
1112+
}
10951113

1096-
valueIDCache.put(nv, id);
1114+
if (id != LmdbValue.UNKNOWN_ID) {
1115+
if (isOwnValue) {
1116+
// Store id in value for fast access in any consecutive calls
1117+
((LmdbValue) value).setInternalID(id, revision);
1118+
// Store id in cache
1119+
valueIDCache.put((LmdbValue) value, id);
1120+
} else {
1121+
// Store id in cache
1122+
LmdbValue nv = getLmdbValue(value);
1123+
nv.setInternalID(id, revision);
1124+
1125+
if (nv.isIRI() && isCommonVocabulary(((IRI) nv))) {
1126+
commonVocabulary.put(value, id);
10971127
}
1128+
valueIDCache.put(nv, id);
1129+
}
1130+
// only store hash for non-inlined values
1131+
if (! ValueIds.isInlined(id)) {
10981132
storeHashIfAbsent(id, value);
10991133
}
1100-
11011134
return id;
11021135
}
11031136
} finally {

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreTest.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -165,11 +165,12 @@ public void testGcValuesAfterRestart() throws Exception {
165165

166166
@Test
167167
public void testGcDatatypes() throws Exception {
168-
IRI[] types = new IRI[] { XSD.STRING, XSD.INTEGER, XSD.DOUBLE, XSD.DECIMAL, XSD.FLOAT };
168+
IRI[] types = new IRI[] { XSD.STRING, XSD.INTEGER, XSD.LONG, XSD.DECIMAL };
169169
LmdbValue values[] = new LmdbValue[types.length];
170170
valueStore.startTransaction(true);
171171
for (int i = 0; i < values.length; i++) {
172-
values[i] = valueStore.createLiteral("123", types[i]);
172+
// use a value that is large enough to not being inlined
173+
values[i] = valueStore.createLiteral(Long.toString(Long.MAX_VALUE - 1), types[i]);
173174
valueStore.storeValue(values[i]);
174175
}
175176
valueStore.commit();
@@ -211,7 +212,8 @@ public void testGcDatatypes() throws Exception {
211212
public void testGcURIs() throws Exception {
212213
for (boolean storeAndGcUri : List.of(false, true)) {
213214
valueStore.startTransaction(true);
214-
LmdbLiteral literal = valueStore.createLiteral("123", XSD.STRING);
215+
// use a value that is large enough to not being inlined
216+
LmdbLiteral literal = valueStore.createLiteral("123".repeat(5), XSD.STRING);
215217
valueStore.storeValue(literal);
216218
if (storeAndGcUri) {
217219
valueStore.storeValue(XSD.STRING);

0 commit comments

Comments
 (0)