Skip to content

Commit fffdc84

Browse files
authored
GH-5779 reduce cleanup cost ConcurrentCache in LMDB (#5780)
2 parents 4aa8422 + e63f53c commit fffdc84

3 files changed

Lines changed: 148 additions & 33 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,4 @@ e2e/test-results
5656
.serena/
5757
.vscode
5858
/.codex/environments/environment.toml
59+
/.m2_repo_linux_j25

core/sail/lmdb/src/main/java/org/eclipse/rdf4j/sail/lmdb/ConcurrentCache.java

Lines changed: 130 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,45 +8,135 @@
88
*
99
* SPDX-License-Identifier: BSD-3-Clause
1010
*******************************************************************************/
11+
// Some portions generated by Codex
1112
package org.eclipse.rdf4j.sail.lmdb;
1213

13-
import java.util.Iterator;
14+
import java.util.Arrays;
15+
import java.util.Objects;
1416
import java.util.concurrent.ConcurrentHashMap;
1517

1618
/**
17-
* Limited-size concurrent cache. The actual cleanup to keep the size limited is done once per
18-
* <code>CLEANUP_INTERVAL</code> invocations of the protected method <code>cleanUp</code>. <code>cleanUp</code> method
19-
* is called every time by <code>put</code> The maximum size is maintained approximately. Cleanup is not done if size is
20-
* less than <code>capacity + CLEANUP_INTERVAL / 2</code>.
19+
* Fixed-size concurrent cache with approximate FIFO eviction per hash set. The cache never grows beyond its slot
20+
* budget; capacity is rounded up to a power-of-two number of slots and entries are evicted in O(1).
2121
*/
2222
public class ConcurrentCache<K, V> {
2323

24-
private static final int CLEANUP_INTERVAL = 1024;
25-
24+
private static final int WAYS = 4;
2625
private static final float LOAD_FACTOR = 0.75f;
2726

28-
private final int capacity;
29-
30-
private volatile int cleanupTick = 0;
27+
private final Entry<K, V>[] entries;
28+
private final int[] nextVictim;
29+
private final int setMask;
30+
private final int setShift;
3131

3232
protected final ConcurrentHashMap<K, V> cache;
3333

34+
private volatile long generation = 1;
35+
36+
@SuppressWarnings("unchecked")
3437
public ConcurrentCache(int capacity) {
35-
this.capacity = capacity;
36-
this.cache = new ConcurrentHashMap<>((int) (capacity / LOAD_FACTOR), LOAD_FACTOR);
38+
cache = new ConcurrentHashMap<>((int) (Math.max(1, capacity) / LOAD_FACTOR), LOAD_FACTOR);
39+
if (capacity <= 0) {
40+
entries = (Entry<K, V>[]) new Entry[0];
41+
nextVictim = new int[0];
42+
setMask = 0;
43+
setShift = 0;
44+
return;
45+
}
46+
47+
int slotCount = nextPowerOfTwo(Math.max(WAYS, capacity));
48+
int setCount = slotCount / WAYS;
49+
entries = (Entry<K, V>[]) new Entry[slotCount];
50+
nextVictim = new int[setCount];
51+
setMask = setCount - 1;
52+
setShift = Integer.numberOfTrailingZeros(setCount);
3753
}
3854

3955
public V get(Object key) {
56+
Objects.requireNonNull(key);
57+
if (entries.length == 0) {
58+
return cache.get(key);
59+
}
60+
61+
int hash = spread(key.hashCode());
62+
int base = setBase(hash);
63+
int preferredOffset = preferredOffset(hash);
64+
long currentGeneration = generation;
65+
66+
for (int i = 0; i < WAYS; i++) {
67+
Entry<K, V> entry = entries[base + ((preferredOffset + i) & (WAYS - 1))];
68+
if (entry != null && entry.hash == hash && entry.generation == currentGeneration
69+
&& sameKey(key, entry.key)) {
70+
return entry.value;
71+
}
72+
}
4073
return cache.get(key);
4174
}
4275

4376
public V put(K key, V value) {
77+
Objects.requireNonNull(key);
78+
Objects.requireNonNull(value);
4479
cleanUp();
45-
return cache.put(key, value);
80+
81+
V previous = cache.put(key, value);
82+
if (entries.length == 0) {
83+
return previous;
84+
}
85+
86+
int hash = spread(key.hashCode());
87+
int setIndex = hash & setMask;
88+
int base = setIndex * WAYS;
89+
int preferredOffset = preferredOffset(hash);
90+
long currentGeneration = generation;
91+
int emptySlot = -1;
92+
93+
for (int i = 0; i < WAYS; i++) {
94+
int slot = base + ((preferredOffset + i) & (WAYS - 1));
95+
Entry<K, V> entry = entries[slot];
96+
if (entry == null) {
97+
if (emptySlot < 0) {
98+
emptySlot = slot;
99+
}
100+
continue;
101+
}
102+
103+
if (entry.hash == hash && entry.generation == currentGeneration && sameKey(key, entry.key)) {
104+
if (entry.value != value) {
105+
entries[slot] = new Entry<>(key, value, hash, currentGeneration);
106+
}
107+
return previous;
108+
}
109+
}
110+
111+
if (emptySlot >= 0) {
112+
entries[emptySlot] = new Entry<>(key, value, hash, currentGeneration);
113+
return previous;
114+
}
115+
116+
int firstVictim = nextVictim[setIndex] & (WAYS - 1);
117+
for (int i = 0; i < WAYS; i++) {
118+
int victimOffset = (firstVictim + i) & (WAYS - 1);
119+
int slot = base + victimOffset;
120+
Entry<K, V> victim = entries[slot];
121+
if (victim == null || victim.generation != currentGeneration || onEntryRemoval(victim.key)) {
122+
entries[slot] = new Entry<>(key, value, hash, currentGeneration);
123+
if (victim != null && victim.generation == currentGeneration && !sameKey(victim.key, key)) {
124+
cache.remove(victim.key, victim.value);
125+
}
126+
nextVictim[setIndex] = (victimOffset + 1) & (WAYS - 1);
127+
return previous;
128+
}
129+
}
130+
131+
return previous;
46132
}
47133

48134
public void clear() {
135+
long nextGeneration = generation + 1;
136+
generation = nextGeneration == 0 ? 1 : nextGeneration;
49137
cache.clear();
138+
Arrays.fill(entries, null);
139+
Arrays.fill(nextVictim, 0);
50140
}
51141

52142
/**
@@ -59,34 +149,41 @@ protected boolean onEntryRemoval(K key) {
59149
}
60150

61151
protected void cleanUp() {
62-
// This is not thread-safe, but the worst that can happen is that we may (rarely) get slightly longer
63-
// cleanup intervals or run cleanUp twice
64-
cleanupTick++;
65-
if (cleanupTick <= CLEANUP_INTERVAL) {
66-
return;
67-
}
152+
// Legacy hook. Eviction happens during put.
153+
}
68154

69-
cleanupTick %= CLEANUP_INTERVAL;
155+
private int setBase(int hash) {
156+
return (hash & setMask) * WAYS;
157+
}
70158

71-
synchronized (cache) {
159+
private int preferredOffset(int hash) {
160+
return (hash >>> setShift) & (WAYS - 1);
161+
}
72162

73-
final int size = cache.size();
74-
if (size < capacity + CLEANUP_INTERVAL / 2) {
75-
return;
76-
}
163+
private static int spread(int hash) {
164+
return hash ^ (hash >>> 16);
165+
}
77166

78-
Iterator<K> iter = cache.keySet().iterator();
167+
private static boolean sameKey(Object key, Object entryKey) {
168+
return key == entryKey || key.equals(entryKey);
169+
}
79170

80-
float removeEachTh = (float) size / (size - capacity);
171+
private static int nextPowerOfTwo(int n) {
172+
return 1 << (Integer.SIZE - Integer.numberOfLeadingZeros(n - 1));
173+
}
81174

82-
for (int i = 0; iter.hasNext(); i++) {
175+
private static final class Entry<K, V> {
83176

84-
K key = iter.next();
177+
private final K key;
178+
private final V value;
179+
private final int hash;
180+
private final long generation;
85181

86-
if (i % removeEachTh < 1) {
87-
cache.computeIfPresent(key, (k, v) -> onEntryRemoval(k) ? null : v);
88-
}
89-
}
182+
private Entry(K key, V value, int hash, long generation) {
183+
this.key = key;
184+
this.value = value;
185+
this.hash = hash;
186+
this.generation = generation;
90187
}
91188
}
92189
}

core/sail/lmdb/src/test/java/org/eclipse/rdf4j/sail/lmdb/ValueStoreNamespaceCacheTest.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,19 @@
88
*
99
* SPDX-License-Identifier: BSD-3-Clause
1010
*******************************************************************************/
11+
// Some portions generated by Codex
1112
package org.eclipse.rdf4j.sail.lmdb;
1213

1314
import static org.junit.jupiter.api.Assertions.assertEquals;
15+
import static org.junit.jupiter.api.Assertions.assertNotNull;
1416

1517
import java.io.File;
1618
import java.lang.invoke.MethodHandle;
1719
import java.lang.invoke.MethodHandles;
1820
import java.lang.invoke.MethodType;
1921
import java.lang.reflect.Field;
22+
import java.lang.reflect.Method;
23+
import java.lang.reflect.Modifier;
2024
import java.util.concurrent.atomic.AtomicInteger;
2125

2226
import org.eclipse.rdf4j.sail.lmdb.config.LmdbStoreConfig;
@@ -25,6 +29,19 @@
2529

2630
class ValueStoreNamespaceCacheTest {
2731

32+
@Test
33+
void legacyProtectedApiRemainsAvailable() throws Exception {
34+
Field cacheField = ConcurrentCache.class.getDeclaredField("cache");
35+
assertNotNull(cacheField);
36+
assertEquals("cache", cacheField.getName());
37+
assertEquals(Modifier.PROTECTED | Modifier.FINAL, cacheField.getModifiers());
38+
39+
Method cleanUp = ConcurrentCache.class.getDeclaredMethod("cleanUp");
40+
assertNotNull(cleanUp);
41+
assertEquals(void.class, cleanUp.getReturnType());
42+
assertEquals(Modifier.PROTECTED, cleanUp.getModifiers());
43+
}
44+
2845
@Test
2946
void getNamespaceUsesLastResult(@TempDir File dataDir) throws Throwable {
3047
ValueStore valueStore = new ValueStore(new File(dataDir, "values"), new LmdbStoreConfig());

0 commit comments

Comments
 (0)