GH-4950 LMDB: Encode doubles with a maximum number of 63 bits to avoid negative ids.

kenwenzel · kenwenzel · commit 7f640bce1ae2 · 2026-04-23T14:05:03.000+02:00
diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/Varint.java
@@ -133,9 +133,7 @@ public static void writeUnsigned(final ByteBuffer bb, final long value) {
 		}
 
 		if (value < 0) {
-			int bytes = descriptor(value) + 1;
-			bb.put((byte) (250 + (bytes - 3)));
-			writeSignificantBits(bb, value, bytes);
+			throw new IllegalArgumentException("Negative value can not be encoded as varint: " + value);
 		} else if (value <= 240) {
 			bb.put((byte) value);
 		} else if (value <= 2287) {
@@ -221,13 +219,12 @@ private static void writeSignificantBits(ByteBuffer bb, long value, int bytes) {
 	/**
 	 * Calculates required length in bytes to encode the given long value using variable-length encoding.
 	 *
-	 * @param value the value value
+	 * @param value the value
 	 * @return length in bytes
 	 */
 	public static int calcLengthUnsigned(long value) {
 		if (value < 0) {
-			int bytes = descriptor(value) + 1;
-			return 1 + bytes;
+			throw new IllegalArgumentException("Negative value can not be encoded as varint: " + value);
 		} else if (value <= 240) {
 			return 1;
 		} else if (value <= 2287) {
diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Decimals.java
@@ -25,6 +25,12 @@ public class Decimals {
 	static final BigInteger MIN_DECIMAL_VALUE = BigInteger.valueOf(-(1L << (DECIMAL_VALUE_BITS - 1)));
 	static final int MAX_DECIMAL_SCALE = 2 ^ (DECIMAL_SCALE_BITS - 1) - 1;
 	static final int MIN_DECIMAL_SCALE = -2 ^ (DECIMAL_SCALE_BITS - 1);
+	static final int DOUBLE_EXPONENT_BITS = 9;
+	private static final int DOUBLE_EXPONENT_ZERO_OR_SUBNORMAL = 0;
+	private static final int DOUBLE_EXPONENT_INF_OR_NAN = (1 << DOUBLE_EXPONENT_BITS) - 1;
+	private static final int DOUBLE_EXPONENT_BIAS = (1 << (DOUBLE_EXPONENT_BITS - 1)) - 1;
+	private static final int DOUBLE_EXPONENT_MIN_NORMAL = -DOUBLE_EXPONENT_BIAS + 1;
+	private static final int DOUBLE_EXPONENT_MAX_NORMAL = DOUBLE_EXPONENT_BIAS;
 
 	/**
 	 * Encodes a {@link BigDecimal} in 56 bits [48 bits value, 8 bits scale].
@@ -46,66 +52,60 @@ static long packDecimal(BigDecimal value) {
 	}
 
 	/**
-	 * Extracts the exponent of a double, unbiased, and encodes it into 10 bits if possible. Handles special cases: NaN
-	 * and Infinity.
+	 * Encodes a double exponent into 9 bits if possible. Handles special cases for zero/subnormal and NaN/Infinity.
 	 *
 	 * @param exponent11 The original 11-bit exponent.
-	 * @return Encoded 10-bit exponent as int (0-1023), or -1 if not encodable.
+	 * @return Encoded 9-bit exponent as int (0-511), or -1 if not encodable.
 	 */
-	public static int encodeExponent10Bits(int exponent11) {
-		// isNaN or Inf - we do not distinguish between them in the compact representation, but reserve a special
-		// pattern for both
+	public static int encodeExponent9Bits(int exponent11) {
 		if (exponent11 == 0x7FF) {
-			// Reserve special pattern, e.g., 0x3FF (all 10 bits set) for NaN/Inf
-			return 0x3FF;
+			return DOUBLE_EXPONENT_INF_OR_NAN;
 		}
 
 		if (exponent11 == 0) {
-			// Subnormal number or zero, exponent = -1022
-			return 0; // Use 0 for subnormal/zero
+			return DOUBLE_EXPONENT_ZERO_OR_SUBNORMAL;
 		}
 
-		// Normal number, unbiased exponent in [-1022, 1023]
 		int unbiasedExp = exponent11 - 1023;
-		int encoded = unbiasedExp + 511; // Shift range to [0, 1023]
-		if (encoded < 1 || encoded > 1022) {
-			// Out of range for 10 bits (excluding reserved 0 and 0x3FF)
+		if (unbiasedExp < DOUBLE_EXPONENT_MIN_NORMAL || unbiasedExp > DOUBLE_EXPONENT_MAX_NORMAL) {
 			return -1;
 		}
-		return encoded;
+		return unbiasedExp + DOUBLE_EXPONENT_BIAS;
 	}
 
 	/**
-	 * Decodes a 10-bit encoded exponent back to unbiased exponent.
+	 * Decodes a 9-bit exponent back to the original 11-bit exponent.
 	 *
-	 * @param encoded 10-bit encoded exponent
+	 * @param encoded 9-bit encoded exponent
 	 * @return 11-bit biased exponent or special values for reserved patterns
 	 */
-	public static int decodeExponent10Bits(int encoded) {
-		if (encoded == 0) {
-			// Subnormal/zero
-			return 0; // -1022;
+	public static int decodeExponent9Bits(int encoded) {
+		if (encoded == DOUBLE_EXPONENT_ZERO_OR_SUBNORMAL) {
+			return 0;
 		}
-		if (encoded == 0x3FF) {
-			// Reserved for NaN/Inf
+		if (encoded == DOUBLE_EXPONENT_INF_OR_NAN) {
 			return 0x7FF;
 		}
-		// Normal
-		int unbiased = encoded - 511;
+		int unbiased = encoded - DOUBLE_EXPONENT_BIAS;
 		return unbiased + 1023;
 	}
 
+	/**
+	 * @deprecated Use {@link #decodeExponent9Bits(int)}.
+	 */
+	@Deprecated
+	public static int decodeExponent10Bits(int encoded) {
+		return decodeExponent9Bits(encoded);
+	}
+
 	static long packDouble(double value) {
 		long valueBits = Double.doubleToRawLongBits(value);
-		// 11-bit exponent
 		int exponent11 = (int) ((valueBits >>> 52) & 0x7FF);
-		// encode to 10 bits
-		int exponent10 = encodeExponent10Bits(exponent11);
-		if (exponent10 >= 0) {
-			// encoding of exponent was possible
+		int exponent9 = encodeExponent9Bits(exponent11);
+		if (exponent9 >= 0) {
 			int sign = value < 0 ? 1 : 0;
 			long mantissa = valueBits & 0x000fffffffffffffL;
-			return ((long) exponent10) << 54 | mantissa << 2 | sign << 1 | 1;
+			return ((long) exponent9) << 54 | mantissa << 2 | sign << 1 | 1;
 		}
 		return 0L;
 	}
@@ -127,12 +127,10 @@ static Literal unpackDouble(long value, ValueFactory valueFactory) {
 		}
 		int sign = (int) ((value >> 1) & 1);
 		long mantissa = (value >> 2) & 0x000fffffffffffffL;
-		int exponent10 = (int) (value >>> 54);
+		int exponent9 = (int) ((value >>> 54) & DOUBLE_EXPONENT_INF_OR_NAN);
 
-		// Decode back to original exponent
-		int exponent11 = decodeExponent10Bits(exponent10);
+		int exponent11 = decodeExponent9Bits(exponent9);
 
-		// Reconstruct raw bits
 		long valueBits = ((long) sign << 63) |
 				((long) (exponent11 & 0x7FF) << 52) |
 				mantissa;
diff --git a/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java b/core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/inlined/Strings.java
@@ -28,7 +28,7 @@ static long packString(Literal literal) {
 		byte[] bytes = label.getBytes(StandardCharsets.UTF_8);
 		int maxLength = Values.MAX_LENGTH - 1;
 		if (bytes.length > maxLength) {
-			// multi-byte string is longer than maximum encodable length
+			// multibyte string is longer than maximum encodable length
 			return 0L;
 		}
 
diff --git a/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java b/core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/inlined/DecimalsTest.java
@@ -11,6 +11,7 @@
 package org.eclipse.rdf4j.sail.lmdb.inlined;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
 import static org.junit.jupiter.api.Assertions.assertNotEquals;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
@@ -70,6 +71,7 @@ void testPackDouble() {
 
 		for (double value : values) {
 			long packedValue = Decimals.packDouble(value);
+			assertFalse(packedValue < 0, "Packed value should be non-negative for value: " + value);
 			assertNotEquals(0L, packedValue, "Packing failed for value: " + value);
 			Literal literal = Decimals.unpackDouble(packedValue, SimpleValueFactory.getInstance());
 			if (Double.isNaN(value)) {
@@ -98,6 +100,7 @@ void testPackFloat() {
 
 		for (float value : values) {
 			long packedValue = Decimals.packFloat(value);
+			assertFalse(packedValue < 0, "Packed value should be non-negative for value: " + value);
 			assertNotEquals(0L, packedValue, "Packing failed for value: " + value);
 			Literal literal = Decimals.unpackFloat(packedValue, SimpleValueFactory.getInstance());
 			if (Float.isNaN(value)) {

Original file line number	Diff line number	Diff line change
`@@ -28,7 +28,7 @@ static long packString(Literal literal) {`
`28`	`28`	`byte[] bytes = label.getBytes(StandardCharsets.UTF_8);`
`29`	`29`	`int maxLength = Values.MAX_LENGTH - 1;`
`30`	`30`	`if (bytes.length > maxLength) {`
`31`		`- // multi-byte string is longer than maximum encodable length`
	`31`	`+ // multibyte string is longer than maximum encodable length`
`32`	`32`	`return 0L;`
`33`	`33`	`}`
`34`	`34`