diff --git a/.github/workflows/dash-license.yml b/.github/workflows/dash-license.yml index d0a083e5cae..ae56c45a68f 100644 --- a/.github/workflows/dash-license.yml +++ b/.github/workflows/dash-license.yml @@ -10,18 +10,17 @@ jobs: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: - java-version: '21' - - name: Cache local Maven repository - uses: actions/cache@v3 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-jdk21-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-jdk20-maven- + java-version: 21 + distribution: 'temurin' + cache: maven + - name: Clean install + run: mvn -B clean install -Pquick -DskipTests + - name: Package + run: mvn -B -U package -Pquick -DskipTests - name: Run license-check run: mvn -B -Plicence-check org.eclipse.dash:license-tool-plugin:license-check -Ddash.summary=DEPENDENCIES - name: Print Dash Summary diff --git a/.github/workflows/develop-status.yml b/.github/workflows/develop-status.yml index 193d1754297..c007077a989 100644 --- a/.github/workflows/develop-status.yml +++ b/.github/workflows/develop-status.yml @@ -14,7 +14,7 @@ jobs: jdk: [11, 17] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK uses: actions/setup-java@v1 with: diff --git a/.github/workflows/main-status.yml b/.github/workflows/main-status.yml index aa50c4590c0..53d14ab0a92 100644 --- a/.github/workflows/main-status.yml +++ b/.github/workflows/main-status.yml @@ -14,7 +14,7 @@ jobs: jdk: [11, 17] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK uses: actions/setup-java@v1 with: diff --git a/.github/workflows/merge_main_to_develop.yml b/.github/workflows/merge_main_to_develop.yml index f4f12b53ba7..7e35baaeb31 100644 --- a/.github/workflows/merge_main_to_develop.yml +++ b/.github/workflows/merge_main_to_develop.yml @@ -10,11 +10,11 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout - uses: actions/checkout@v2 + uses: actions/checkout@v4 - name: pull-request uses: repo-sync/pull-request@v2.6.2 with: destination_branch: "develop" # If blank, default: main pr_title: "Merge main into develop" pr_body: "Automatically generated PR to keep develop in sync with main.\n\n **USE MERGE COMMIT TO MERGE THIS PR**.\n\nSee [merge_main_to_develop.yml](/eclipse/rdf4j/.github/workflows/merge_main_to_develop.yml)." # Full markdown support, requires pr_title to be set - github_token: ${{ github.token }} + github_token: ${{secrets.GITHUB_TOKEN}} diff --git a/.github/workflows/pr-verify.yml b/.github/workflows/pr-verify.yml index 16443b76af1..00ee011a57e 100644 --- a/.github/workflows/pr-verify.yml +++ b/.github/workflows/pr-verify.yml @@ -2,143 +2,144 @@ name: PR verify on: pull_request +permissions: + contents: read # checkout & other read‑only operations + actions: write # allows the cancel‑workflow step to call the Actions API + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + jobs: + + formatting-and-quick-compile: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 11 + distribution: 'temurin' + cache: maven + - name: Check formatting + run: mvn -B --quiet -T 2C formatter:validate impsort:check xml-format:xml-check + - name: Quick compile + run: mvn -B --quiet -T 2C compile -DskipTests -Pquick + - name: Download all other dependencies + run: mvn -B --quiet -T 2C dependency:go-offline + + compile: + needs: formatting-and-quick-compile + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 11 + distribution: 'temurin' + cache: maven + - name: Compile (mvn clean install) + run: mvn -B clean install -DskipTests + + build: + needs: formatting-and-quick-compile runs-on: ubuntu-latest strategy: + fail-fast: true matrix: - jdk: [11, 21] + jdk: [ 11, 24 ] steps: - - uses: actions/checkout@v2 - - name: Set up JDK - uses: actions/setup-java@v1 - with: - java-version: ${{ matrix.jdk }} - - name: Cache local Maven repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-jdk${{ matrix.jdk }}-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-jdk${{ matrix.jdk }}-maven- - - name: Check formatting - run: mvn -B formatter:validate impsort:check xml-format:xml-check - - name: Build - run: mvn -B -U -T 2 clean install -Pquick,-formatting -Dmaven.javadoc.skip=true -Djapicmp.skip -Denforcer.skip=true -Danimal.sniffer.skip=true - - name: Test - run: mvn -B test -P-formatting -DskipITs -Dmaven.javadoc.skip=true - - name: Publish Test Report - if: failure() - uses: scacap/action-surefire-report@v1 - with: - check_name: Test report - build - ${{ matrix.jdk }} - - name: Cancel workflow on failure - uses: vishnudxb/cancel-workflow@v1.2 - if: failure() - with: - repo: eclipse/rdf4j - workflow_id: ${{ github.run_id }} - access_token: ${{ github.token }} + - uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: ${{ matrix.jdk }} + distribution: 'temurin' + cache: maven + - name: Build + run: mvn --quiet clean && mvn -B --quiet -T 2C install -Pquick + - name: Test + run: mvn -B test -DskipITs -P-formatting -Dmaven.javadoc.skip -Djapicmp.skip -Denforcer.skip -Danimal.sniffer.skip + - name: Publish Test Report + if: failure() + uses: scacap/action-surefire-report@v1.9.0 + with: + check_name: Test report - build - ${{ matrix.jdk }} + + integration-tests: + needs: formatting-and-quick-compile runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up JDK - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Cache local Maven repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-jdk11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-jdk11-maven- - - name: Check formatting - run: mvn -B formatter:validate impsort:check xml-format:xml-check - - name: Build - run: mvn -B -U -T 2 clean install -Pquick,-formatting -Dmaven.javadoc.skip=true -Djapicmp.skip -Denforcer.skip=true -Danimal.sniffer.skip=true - - name: Verify - run: mvn -B verify -PskipUnitTests,-formatting -Dmaven.javadoc.skip=true -Denforcer.skip=true -Danimal.sniffer.skip=true - - name: Publish Test Report - if: failure() - uses: scacap/action-surefire-report@v1 - with: - check_name: Test report - integration-tests - - name: Cancel workflow on failure - uses: vishnudxb/cancel-workflow@v1.2 - if: failure() - with: - repo: eclipse/rdf4j - workflow_id: ${{ github.run_id }} - access_token: ${{ github.token }} + - uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 11 + distribution: 'temurin' + cache: maven + - name: Build + run: mvn --quiet clean && mvn -B --quiet -T 2C install -Pquick + - name: Verify + run: mvn -B verify -PskipUnitTests,-formatting -Dmaven.javadoc.skip -Denforcer.skip -Danimal.sniffer.skip + - name: Publish Test Report + if: failure() + uses: scacap/action-surefire-report@v1.9.0 + with: + check_name: Test report - integration-tests + slow-tests: + needs: formatting-and-quick-compile runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: Set up JDK - uses: actions/setup-java@v1 - with: - java-version: 11 - - name: Cache local Maven repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-jdk11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-jdk11-maven- - - name: Check formatting - run: mvn -B formatter:validate impsort:check xml-format:xml-check - - name: Build - run: mvn -B -U -T 2 clean install -Pquick,-formatting -Dmaven.javadoc.skip=true -Djapicmp.skip -Denforcer.skip=true -Danimal.sniffer.skip=true - - name: Verify - run: mvn -B verify -PslowTestsOnly,-skipSlowTests,-formatting -Dmaven.javadoc.skip=true -Djapicmp.skip -Denforcer.skip=true -Danimal.sniffer.skip=true - - name: Publish Test Report - if: failure() - uses: scacap/action-surefire-report@v1 - with: - check_name: Test report - slow-tests + - uses: actions/checkout@v4 + - name: Set up JDK + uses: actions/setup-java@v4 + with: + java-version: 11 + distribution: 'temurin' + cache: maven + - name: Build + run: mvn --quiet clean && mvn -B --quiet -T 2C install -Pquick + - name: Verify + run: mvn -B verify -PslowTestsOnly,-skipSlowTests,-formatting -Dmaven.javadoc.skip -Djapicmp.skip -Denforcer.skip -Danimal.sniffer.skip + - name: Publish Test Report + if: failure() + uses: scacap/action-surefire-report@v1.9.0 + with: + check_name: Test report - slow-tests + package-assembly: + needs: formatting-and-quick-compile runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: java-version: 11 - - name: Cache local Maven repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-jdk11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-jdk11-maven- + distribution: 'temurin' + cache: maven - name: Run install - run: mvn -B install -DskipTests + run: mvn --quiet clean && mvn -B --quiet -T 2C install -Pquick - name: Package assembly run: mvn package -Passembly -DskipTests - - name: Cancel workflow on failure - uses: vishnudxb/cancel-workflow@v1.2 - if: failure() - with: - repo: eclipse/rdf4j - workflow_id: ${{ github.run_id }} - access_token: ${{ github.token }} + + e2e: + needs: formatting-and-quick-compile runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v4 - name: Set up JDK - uses: actions/setup-java@v1 + uses: actions/setup-java@v4 with: java-version: 11 - - name: Cache local Maven repository - uses: actions/cache@v4 - with: - path: ~/.m2/repository - key: ${{ runner.os }}-jdk11-maven-${{ hashFiles('**/pom.xml') }} - restore-keys: | - ${{ runner.os }}-jdk11-maven- + distribution: 'temurin' + cache: maven - name: Install dependencies run: sudo apt-get update && sudo apt-get install -y libxml2-utils - name: Install Node.js @@ -148,16 +149,11 @@ jobs: - name: Run end-to-end tests of RDF4J Server and Workbench working-directory: ./e2e run: ./run.sh - - name: Cancel workflow on failure - uses: vishnudxb/cancel-workflow@v1.2 - if: failure() - with: - repo: eclipse/rdf4j - workflow_id: ${{ github.run_id }} - access_token: ${{ github.token }} + copyright-check: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 - - name: check copyright header present - run: scripts/checkCopyrightPresent.sh + - uses: actions/checkout@v4 + - name: check copyright header present + run: scripts/checkCopyrightPresent.sh + diff --git a/core/collection-factory/mapdb3/src/main/java/org/eclipse/rdf4j/collection/factory/mapdb/MapDb3CollectionFactory.java b/core/collection-factory/mapdb3/src/main/java/org/eclipse/rdf4j/collection/factory/mapdb/MapDb3CollectionFactory.java index a9e7f6d8807..3cd40561465 100644 --- a/core/collection-factory/mapdb3/src/main/java/org/eclipse/rdf4j/collection/factory/mapdb/MapDb3CollectionFactory.java +++ b/core/collection-factory/mapdb3/src/main/java/org/eclipse/rdf4j/collection/factory/mapdb/MapDb3CollectionFactory.java @@ -22,11 +22,16 @@ import java.util.NoSuchElementException; import java.util.Queue; import java.util.Set; +import java.util.Spliterator; import java.util.function.BiConsumer; +import java.util.function.BiFunction; +import java.util.function.Consumer; import java.util.function.Function; +import java.util.function.IntFunction; import java.util.function.Predicate; import java.util.function.Supplier; import java.util.function.ToIntFunction; +import java.util.stream.Stream; import org.eclipse.rdf4j.collection.factory.api.BindingSetKey; import org.eclipse.rdf4j.collection.factory.api.CollectionFactory; @@ -69,9 +74,10 @@ private MapDb3BackedQueue(Map m) { @Override public boolean offer(T arg0) { - m.put((Long) tail++, arg0); - if (tail % iterationCacheSyncThreshold == 0) + m.put(tail++, arg0); + if (tail % iterationCacheSyncThreshold == 0) { db.commit(); + } return true; } @@ -83,8 +89,9 @@ public T peek() { @Override public T poll() { T r = m.remove(head++); - if (head % iterationCacheSyncThreshold == 0) + if (head % iterationCacheSyncThreshold == 0) { db.commit(); + } return r; } @@ -169,12 +176,18 @@ public List createValueList() { public Set createSetOfBindingSets(Supplier create, Function> getHas, Function> getget, Function> getSet) { + if (iterationCacheSyncThreshold > 0) { - init(); - Serializer serializer = createBindingSetSerializer(create, getHas, getget, getSet); - MemoryTillSizeXSet set = new MemoryTillSizeXSet<>(colectionId++, - delegate.createSetOfBindingSets(), serializer, DEFAULT_SWITCH_TO_DISK_BASED_SET_AT_SIZE); - return new CommitingSet<>(set, iterationCacheSyncThreshold, db); + return new SyncThresholdAwareSet<>(delegate.createSet(), iterationCacheSyncThreshold, previousSet -> { + init(); + Serializer serializer = createBindingSetSerializer(create, getHas, getget, getSet); + MemoryTillSizeXSet set = new MemoryTillSizeXSet<>(colectionId++, + delegate.createSetOfBindingSets(), serializer, DEFAULT_SWITCH_TO_DISK_BASED_SET_AT_SIZE); + CommitingSet bindingSets = new CommitingSet<>(set, iterationCacheSyncThreshold, db); + bindingSets.addAll(previousSet); + return bindingSets; + }); + } else { return delegate.createSetOfBindingSets(); } @@ -183,11 +196,15 @@ public Set createSetOfBindingSets(Supplier create @Override public Set createSet() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer serializer = createAnySerializer(); - MemoryTillSizeXSet set = new MemoryTillSizeXSet<>(colectionId++, delegate.createSet(), serializer, - DEFAULT_SWITCH_TO_DISK_BASED_SET_AT_SIZE); - return new CommitingSet<>(set, iterationCacheSyncThreshold, db); + return new SyncThresholdAwareSet<>(delegate.createSet(), iterationCacheSyncThreshold, previousSet -> { + init(); + Serializer serializer = createAnySerializer(); + MemoryTillSizeXSet set = new MemoryTillSizeXSet<>(colectionId++, delegate.createSet(), serializer, + DEFAULT_SWITCH_TO_DISK_BASED_SET_AT_SIZE); + CommitingSet ts = new CommitingSet<>(set, iterationCacheSyncThreshold, db); + ts.addAll(previousSet); + return ts; + }); } else { return delegate.createSet(); } @@ -196,11 +213,15 @@ public Set createSet() { @Override public Set createValueSet() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer serializer = createValueSerializer(); - Set set = new MemoryTillSizeXSet<>(colectionId++, delegate.createValueSet(), serializer, - DEFAULT_SWITCH_TO_DISK_BASED_SET_AT_SIZE); - return new CommitingSet<>(set, iterationCacheSyncThreshold, db); + return new SyncThresholdAwareSet<>(delegate.createValueSet(), iterationCacheSyncThreshold, previousSet -> { + init(); + Serializer serializer = createValueSerializer(); + Set set = new MemoryTillSizeXSet<>(colectionId++, delegate.createValueSet(), serializer, + DEFAULT_SWITCH_TO_DISK_BASED_SET_AT_SIZE); + CommitingSet values = new CommitingSet<>(set, iterationCacheSyncThreshold, db); + values.addAll(previousSet); + return values; + }); } else { return delegate.createValueSet(); } @@ -209,12 +230,16 @@ public Set createValueSet() { @Override public Map createMap() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer keySerializer = createAnySerializer(); - Serializer valueSerializer = createAnySerializer(); - HashMapMaker hashMap = db.hashMap(Long.toHexString(colectionId++), keySerializer, valueSerializer); - HTreeMap create = hashMap.create(); - return new CommitingMap<>(create, iterationCacheSyncThreshold, db); + return new SyncThresholdAwareMap<>(delegate.createMap(), iterationCacheSyncThreshold, previousMap -> { + Serializer keySerializer = createAnySerializer(); + Serializer valueSerializer = createAnySerializer(); + HashMapMaker hashMap = db.hashMap(Long.toHexString(colectionId++), keySerializer, + valueSerializer); + HTreeMap create = hashMap.create(); + CommitingMap map = new CommitingMap<>(create, iterationCacheSyncThreshold, db); + map.putAll(previousMap); + return map; + }); } else { return delegate.createMap(); } @@ -223,12 +248,18 @@ public Map createMap() { @Override public Map createValueKeyedMap() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer keySerializer = createValueSerializer(); - Serializer valueSerializer = createAnySerializer(); - return new CommitingMap<>( - db.hashMap(Long.toHexString(colectionId++), keySerializer, valueSerializer).create(), - iterationCacheSyncThreshold, db); + return new SyncThresholdAwareMap<>(delegate.createValueKeyedMap(), iterationCacheSyncThreshold, + previousMap -> { + init(); + Serializer keySerializer = createValueSerializer(); + Serializer valueSerializer = createAnySerializer(); + CommitingMap map = new CommitingMap<>( + db.hashMap(Long.toHexString(colectionId++), keySerializer, valueSerializer).create(), + iterationCacheSyncThreshold, db); + map.putAll(previousMap); + return map; + }); + } else { return delegate.createValueKeyedMap(); } @@ -237,11 +268,15 @@ public Map createValueKeyedMap() { @Override public Queue createQueue() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer s = createAnySerializer(); - Map m = db.hashMap(Long.toHexString(colectionId++), new SerializerLong(), s).create(); - - return new MemoryTillSizeXQueue<>(delegate.createQueue(), 128, () -> new MapDb3BackedQueue<>(m)); + return new SyncThresholdAwareQueue<>(delegate.createQueue(), iterationCacheSyncThreshold, prev -> { + init(); + Serializer s = createAnySerializer(); + Map m = db.hashMap(Long.toHexString(colectionId++), new SerializerLong(), s).create(); + + Queue ts = new MemoryTillSizeXQueue<>(delegate.createQueue(), 128, () -> new MapDb3BackedQueue<>(m)); + ts.addAll(prev); + return ts; + }); } else { return delegate.createQueue(); } @@ -250,10 +285,15 @@ public Queue createQueue() { @Override public Queue createValueQueue() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer s = createValueSerializer(); - Map m = db.hashMap(Long.toHexString(colectionId++), new SerializerLong(), s).create(); - return new MemoryTillSizeXQueue<>(delegate.createQueue(), 128, () -> new MapDb3BackedQueue<>(m)); + return new SyncThresholdAwareQueue<>(delegate.createValueQueue(), iterationCacheSyncThreshold, prev -> { + init(); + Serializer s = createValueSerializer(); + Map m = db.hashMap(Long.toHexString(colectionId++), new SerializerLong(), s).create(); + MemoryTillSizeXQueue values = new MemoryTillSizeXQueue<>(delegate.createQueue(), 128, + () -> new MapDb3BackedQueue<>(m)); + values.addAll(prev); + return values; + }); } else { return delegate.createValueQueue(); @@ -266,12 +306,18 @@ public Queue createBindingSetQueue(Supplier creat Function> getHas, Function> getget, Function> getSet) { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer s = createBindingSetSerializer(create, getHas, getget, getSet); - Map m = db.hashMap(Long.toHexString(colectionId++), new SerializerLong(), s).create(); - return new MemoryTillSizeXQueue<>(delegate.createBindingSetQueue(create, getHas, getget, getSet), 128, - () -> new MapDb3BackedQueue<>(m)); - + return new SyncThresholdAwareQueue<>(delegate.createBindingSetQueue(), iterationCacheSyncThreshold, + prev -> { + init(); + Serializer s = createBindingSetSerializer(create, getHas, getget, getSet); + Map m = db.hashMap(Long.toHexString(colectionId++), new SerializerLong(), s) + .create(); + MemoryTillSizeXQueue bindingSets = new MemoryTillSizeXQueue<>( + delegate.createBindingSetQueue(create, getHas, getget, getSet), 128, + () -> new MapDb3BackedQueue<>(m)); + bindingSets.addAll(prev); + return bindingSets; + }); } else { return delegate.createBindingSetQueue(); } @@ -287,12 +333,18 @@ public void close() throws RDF4JException { @Override public Map createGroupByMap() { if (iterationCacheSyncThreshold > 0) { - init(); - Serializer keySerializer = createBindingSetKeySerializer(); - Serializer valueSerializer = createAnySerializer(); - return new CommitingMap<>( - db.hashMap(Long.toHexString(colectionId++), keySerializer, valueSerializer).create(), - iterationCacheSyncThreshold, db); + return new SyncThresholdAwareMap<>(delegate.createGroupByMap(), iterationCacheSyncThreshold, + previousMap -> { + init(); + Serializer keySerializer = createBindingSetKeySerializer(); + Serializer valueSerializer = createAnySerializer(); + CommitingMap map = new CommitingMap<>( + db.hashMap(Long.toHexString(colectionId++), keySerializer, valueSerializer).create(), + iterationCacheSyncThreshold, db); + map.putAll(previousMap); + return map; + }); + } else { return delegate.createGroupByMap(); } @@ -393,7 +445,7 @@ public Set> entrySet() { /** * Only create a disk based set once the contents are large enough that it starts to pay off. * - * @param of the contents of the set. + * @param of the contents of the set. */ protected class MemoryTillSizeXSet extends AbstractSet { private Set wrapped; @@ -484,7 +536,7 @@ public int size() { /** * Only create a disk based set once the contents are large enough that it starts to pay off. * - * @param of the contents of the set. + * @param of the contents of the set. */ protected class MemoryTillSizeXQueue extends AbstractQueue { private Queue wrapped; @@ -554,4 +606,486 @@ protected Serializer createValueSerializer() { protected final Serializer createBindingSetKeySerializer() { return new BindingSetKeySerializer(createValueSerializer()); } + + private static class SyncThresholdAwareQueue implements Queue { + + private int estimatedSize = 0; + private final long threshold; + private final Function, Queue> createSyncingQueue; + private Queue wrapped; + private boolean switched = false; + + public SyncThresholdAwareQueue(Queue wrapped, long threshold, + Function, Queue> createSyncingQueue) { + this.wrapped = wrapped; + this.threshold = threshold; + this.createSyncingQueue = createSyncingQueue; + } + + private void checkAndSwitch() { + if (!switched && estimatedSize > threshold && wrapped.size() > threshold) { + wrapped = createSyncingQueue.apply(wrapped); + switched = true; + } + } + + @Override + public boolean add(E e) { + boolean add = wrapped.add(e); + if (add) { + estimatedSize++; + checkAndSwitch(); + } + return add; + } + + @Override + public boolean offer(E e) { + boolean offer = wrapped.offer(e); + if (offer) { + estimatedSize++; + checkAndSwitch(); + } + return offer; + } + + @Override + public E remove() { + estimatedSize--; + return wrapped.remove(); + } + + @Override + public E poll() { + estimatedSize--; + return wrapped.poll(); + } + + @Override + public E element() { + return wrapped.element(); + } + + @Override + public E peek() { + return wrapped.peek(); + } + + @Override + public int size() { + return wrapped.size(); + } + + @Override + public boolean isEmpty() { + return wrapped.isEmpty(); + } + + @Override + public boolean contains(Object o) { + return wrapped.contains(o); + } + + @Override + public Iterator iterator() { + return wrapped.iterator(); + } + + @Override + public Object[] toArray() { + return wrapped.toArray(); + } + + @Override + public T[] toArray(T[] a) { + return wrapped.toArray(a); + } + + @Override + public T[] toArray(IntFunction generator) { + return wrapped.toArray(generator); + } + + @Override + public boolean remove(Object o) { + boolean remove = wrapped.remove(o); + if (remove) { + estimatedSize--; + } + return remove; + } + + @Override + public boolean containsAll(Collection c) { + return wrapped.containsAll(c); + } + + @Override + public boolean addAll(Collection c) { + estimatedSize += c.size(); + checkAndSwitch(); + return wrapped.addAll(c); + } + + @Override + public boolean removeAll(Collection c) { + return wrapped.removeAll(c); + } + + @Override + public boolean removeIf(Predicate filter) { + return wrapped.removeIf(filter); + } + + @Override + public boolean retainAll(Collection c) { + return wrapped.retainAll(c); + } + + @Override + public void clear() { + estimatedSize = 0; + wrapped.clear(); + } + + @Override + public boolean equals(Object o) { + return wrapped.equals(o); + } + + @Override + public int hashCode() { + return wrapped.hashCode(); + } + + @Override + public Spliterator spliterator() { + return wrapped.spliterator(); + } + + @Override + public Stream stream() { + return wrapped.stream(); + } + + @Override + public Stream parallelStream() { + return wrapped.parallelStream(); + } + + @Override + public void forEach(Consumer action) { + wrapped.forEach(action); + } + } + + private static class SyncThresholdAwareSet implements Set { + + private int estimatedSize = 0; + private final long threshold; + private final Function, Set> createSyncingSet; + private Set wrapped; + private boolean switched = false; + + public SyncThresholdAwareSet(Set wrapped, long threshold, Function, Set> createSyncingSet) { + this.wrapped = wrapped; + this.threshold = threshold; + this.createSyncingSet = createSyncingSet; + } + + private void checkAndSwitch() { + if (!switched && estimatedSize > threshold && wrapped.size() > threshold) { + wrapped = createSyncingSet.apply(wrapped); + switched = true; + } + } + + @Override + public int size() { + return wrapped.size(); + } + + @Override + public boolean isEmpty() { + return wrapped.isEmpty(); + } + + @Override + public boolean contains(Object o) { + return wrapped.contains(o); + } + + @Override + public Iterator iterator() { + return wrapped.iterator(); + } + + @Override + public Object[] toArray() { + return wrapped.toArray(); + } + + @Override + public T[] toArray(T[] a) { + return wrapped.toArray(a); + } + + @Override + public boolean add(E e) { + + boolean add = wrapped.add(e); + if (add) { + estimatedSize++; + checkAndSwitch(); + } + return add; + } + + @Override + public boolean remove(Object o) { + boolean remove = wrapped.remove(o); + if (remove) { + estimatedSize--; + } + return remove; + } + + @Override + public boolean containsAll(Collection c) { + return wrapped.containsAll(c); + } + + @Override + public boolean addAll(Collection c) { + estimatedSize += c.size(); + checkAndSwitch(); + return wrapped.addAll(c); + } + + @Override + public boolean retainAll(Collection c) { + return wrapped.retainAll(c); + } + + @Override + public boolean removeAll(Collection c) { + return wrapped.removeAll(c); + } + + @Override + public void clear() { + estimatedSize = 0; + wrapped.clear(); + } + + @Override + public boolean equals(Object o) { + return wrapped.equals(o); + } + + @Override + public int hashCode() { + return wrapped.hashCode(); + } + + @Override + public Spliterator spliterator() { + return wrapped.spliterator(); + } + + @Override + public T[] toArray(IntFunction generator) { + return wrapped.toArray(generator); + } + + @Override + public boolean removeIf(Predicate filter) { + return wrapped.removeIf(filter); + } + + @Override + public Stream stream() { + return wrapped.stream(); + } + + @Override + public Stream parallelStream() { + return wrapped.parallelStream(); + } + + @Override + public void forEach(Consumer action) { + wrapped.forEach(action); + } + } + + private static class SyncThresholdAwareMap implements Map { + + private int estimatedSize = 0; + private final long threshold; + private final Function, Map> createSyncingMap; + private Map wrapped; + private boolean switched = false; + + public SyncThresholdAwareMap(Map wrapped, long threshold, + Function, Map> createSyncingMap) { + this.wrapped = wrapped; + this.threshold = threshold; + this.createSyncingMap = createSyncingMap; + } + + private void checkAndSwitch() { + if (!switched && estimatedSize > threshold && wrapped.size() > threshold) { + wrapped = createSyncingMap.apply(wrapped); + switched = true; + } + } + + @Override + public int size() { + return wrapped.size(); + } + + @Override + public boolean isEmpty() { + return wrapped.isEmpty(); + } + + @Override + public boolean containsKey(Object key) { + return wrapped.containsKey(key); + } + + @Override + public boolean containsValue(Object value) { + return wrapped.containsValue(value); + } + + @Override + public E get(Object key) { + return wrapped.get(key); + } + + @Override + public E put(K key, E value) { + E put = wrapped.put(key, value); + if (put == null) { + estimatedSize++; + checkAndSwitch(); + } + return put; + } + + @Override + public E remove(Object key) { + E remove = wrapped.remove(key); + if (remove != null) { + estimatedSize--; + } + return remove; + } + + @Override + public void putAll(Map m) { + estimatedSize += m.size(); + checkAndSwitch(); + wrapped.putAll(m); + } + + @Override + public void clear() { + estimatedSize = 0; + wrapped.clear(); + } + + @Override + public Set keySet() { + return wrapped.keySet(); + } + + @Override + public Collection values() { + return wrapped.values(); + } + + @Override + public Set> entrySet() { + return wrapped.entrySet(); + } + + @Override + public boolean equals(Object o) { + return wrapped.equals(o); + } + + @Override + public int hashCode() { + return wrapped.hashCode(); + } + + @Override + public E getOrDefault(Object key, E defaultValue) { + return wrapped.getOrDefault(key, defaultValue); + } + + @Override + public void forEach(BiConsumer action) { + wrapped.forEach(action); + } + + @Override + public void replaceAll(BiFunction function) { + wrapped.replaceAll(function); + } + + @Override + public E putIfAbsent(K key, E value) { + return wrapped.putIfAbsent(key, value); + } + + @Override + public boolean remove(Object key, Object value) { + boolean remove = wrapped.remove(key, value); + if (remove) { + estimatedSize--; + } + return remove; + } + + @Override + public boolean replace(K key, E oldValue, E newValue) { + return wrapped.replace(key, oldValue, newValue); + } + + @Override + public E replace(K key, E value) { + return wrapped.replace(key, value); + } + + @Override + public E computeIfAbsent(K key, Function mappingFunction) { + estimatedSize++; + checkAndSwitch(); + return wrapped.computeIfAbsent(key, mappingFunction); + } + + @Override + public E computeIfPresent(K key, BiFunction remappingFunction) { + return wrapped.computeIfPresent(key, remappingFunction); + } + + @Override + public E compute(K key, BiFunction remappingFunction) { + estimatedSize++; + checkAndSwitch(); + return wrapped.compute(key, remappingFunction); + } + + @Override + public E merge(K key, E value, BiFunction remappingFunction) { + return wrapped.merge(key, value, remappingFunction); + } + + } + } diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/QueryValueEvaluationStep.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/QueryValueEvaluationStep.java index 33ee5d815da..ded54888b2e 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/QueryValueEvaluationStep.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/QueryValueEvaluationStep.java @@ -12,12 +12,14 @@ package org.eclipse.rdf4j.query.algebra.evaluation; import java.util.function.Function; +import java.util.function.Predicate; import org.eclipse.rdf4j.model.Value; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.ValueConstant; import org.eclipse.rdf4j.query.algebra.ValueExpr; +import org.eclipse.rdf4j.query.algebra.evaluation.util.QueryEvaluationUtility; /** * A step in the query evaluation that works on ValueExpresions. @@ -26,6 +28,18 @@ public interface QueryValueEvaluationStep { Value evaluate(BindingSet bindings) throws QueryEvaluationException; + default Predicate asPredicate() { + return bs -> { + try { + Value value = evaluate(bs); + return QueryEvaluationUtility.getEffectiveBooleanValue(value).orElse(false); + } catch (ValueExprEvaluationException e) { + // Ignore, condition not evaluated successfully + return false; + } + }; + } + /** * If an value expression results in a constant then it may be executed once per query invocation. This can reduce * computation time significantly. diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/LeftJoinQueryEvaluationStep.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/LeftJoinQueryEvaluationStep.java index 7d86ec5cd9e..9da57b8d179 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/LeftJoinQueryEvaluationStep.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/LeftJoinQueryEvaluationStep.java @@ -20,9 +20,8 @@ import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; -import org.eclipse.rdf4j.query.algebra.evaluation.iterator.BadlyDesignedLeftJoinIterator; -import org.eclipse.rdf4j.query.algebra.evaluation.iterator.HashJoinIteration; -import org.eclipse.rdf4j.query.algebra.evaluation.iterator.LeftJoinIterator; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps.values.ScopedQueryValueEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.iterator.*; import org.eclipse.rdf4j.query.algebra.helpers.TupleExprs; import org.eclipse.rdf4j.query.algebra.helpers.collectors.VarNameCollector; @@ -32,6 +31,7 @@ public final class LeftJoinQueryEvaluationStep implements QueryEvaluationStep { private final QueryEvaluationStep left; private final LeftJoin leftJoin; private final Set optionalVars; + private final QueryEvaluationStep wellDesignedRightEvaluationStep; public static QueryEvaluationStep supply(EvaluationStrategy strategy, LeftJoin leftJoin, QueryEvaluationContext context) { @@ -85,7 +85,11 @@ public LeftJoinQueryEvaluationStep(QueryEvaluationStep right, QueryValueEvaluati } this.optionalVars = optionalVars; - + this.wellDesignedRightEvaluationStep = determineRightEvaluationStep( + leftJoin, + right, + condition, + leftJoin.getBindingNames()); } @Override @@ -103,13 +107,81 @@ public CloseableIteration evaluate(BindingSet bindings) { if (containsNone) { // left join is "well designed" leftJoin.setAlgorithm(LeftJoinIterator.class.getSimpleName()); - return LeftJoinIterator.getInstance(left, right, condition, bindings, leftJoin.getBindingNames()); + return LeftJoinIterator.getInstance(left, bindings, wellDesignedRightEvaluationStep); } else { Set problemVars = new HashSet<>(optionalVars); problemVars.retainAll(bindings.getBindingNames()); leftJoin.setAlgorithm(BadlyDesignedLeftJoinIterator.class.getSimpleName()); - return new BadlyDesignedLeftJoinIterator(left, right, condition, bindings, problemVars); + var rightEvaluationStep = determineRightEvaluationStep(leftJoin, right, condition, problemVars); + return new BadlyDesignedLeftJoinIterator(left, bindings, problemVars, rightEvaluationStep); } } + + /** + * This function determines the way the right-hand side is evaluated. There are 3 options: + *

+ * 1. No join condition:
+ * The right-hand side should just be joined with the left-hand side. No filtering is applied. + *

+ *

+ * 2. The join condition can be fully evaluated by the left-hand side: + * + *

+	 * SELECT * WHERE {
+	 * 	?dist a dcat:Distribution .
+	 *  ?dist dc:license ?license .
+	 *
+	 *  OPTIONAL {
+	 *     	?a dcat:distribution ?dist.
+	 *
+	 *      FILTER(?license = )
+	 * 	}
+	 * }
+	 * 
+ * + * In this case, pre-filtering can be applied. The right-hand side does not have to evaluated when the join + * condition evaluates to false. + *

+ *

+ * 3. The join condition needs right-hand side evaluation: + * + *

+	 * SELECT * WHERE {
+	 *  ?dist a dcat:Distribution .
+	 *
+	 * 	OPTIONAL {
+	 *		?a dcat:distribution ?dist .
+	 * 		?a dct:language $lang .
+	 *
+	 * 		FILTER(?lang = eu-lang:ENG)
+	 *     	}
+	 * }
+	 * 
+ * + * In this case, the join condition can only be evaluated after the right-hand side is evaluated (post-filtering). + *

+ */ + public static QueryEvaluationStep determineRightEvaluationStep( + LeftJoin join, + QueryEvaluationStep prepareRightArg, + QueryValueEvaluationStep joinCondition, + Set scopeBindingNames) { + if (joinCondition == null) { + return prepareRightArg; + } else if (canEvaluateConditionBasedOnLeftHandSide(join)) { + return new PreFilterQueryEvaluationStep( + prepareRightArg, + new ScopedQueryValueEvaluationStep(join.getAssuredBindingNames(), joinCondition)); + } else { + return new PostFilterQueryEvaluationStep( + prepareRightArg, + new ScopedQueryValueEvaluationStep(scopeBindingNames, joinCondition)); + } + } + + private static boolean canEvaluateConditionBasedOnLeftHandSide(LeftJoin leftJoin) { + var varNames = VarNameCollector.process(leftJoin.getCondition()); + return leftJoin.getAssuredBindingNames().containsAll(varNames); + } } diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PostFilterQueryEvaluationStep.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PostFilterQueryEvaluationStep.java new file mode 100644 index 00000000000..4119f7fa6d4 --- /dev/null +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PostFilterQueryEvaluationStep.java @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps; + +import java.util.function.Predicate; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.common.iteration.FilterIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; + +public class PostFilterQueryEvaluationStep implements QueryEvaluationStep { + + private final QueryEvaluationStep wrapped; + private final Predicate condition; + + public PostFilterQueryEvaluationStep(QueryEvaluationStep wrapped, + QueryValueEvaluationStep condition) { + this.wrapped = wrapped; + this.condition = condition.asPredicate(); + } + + @Override + public CloseableIteration evaluate(BindingSet leftBindings) { + var rightIteration = wrapped.evaluate(leftBindings); + + if (rightIteration == QueryEvaluationStep.EMPTY_ITERATION) { + return rightIteration; + } + + return new FilterIteration<>(rightIteration) { + + @Override + protected boolean accept(BindingSet bindings) { + return condition.test(bindings); + } + + @Override + protected void handleClose() { + // Nothing to close + } + }; + } +} diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PreFilterQueryEvaluationStep.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PreFilterQueryEvaluationStep.java new file mode 100644 index 00000000000..9cb90e9608b --- /dev/null +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PreFilterQueryEvaluationStep.java @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps; + +import java.util.function.Predicate; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; + +public class PreFilterQueryEvaluationStep implements QueryEvaluationStep { + + private final QueryEvaluationStep wrapped; + private final Predicate condition; + + public PreFilterQueryEvaluationStep(QueryEvaluationStep wrapped, + QueryValueEvaluationStep condition) { + this.wrapped = wrapped; + this.condition = condition.asPredicate(); + } + + @Override + public CloseableIteration evaluate(BindingSet leftBindings) { + if (!condition.test(leftBindings)) { + // Usage of this method assume this instance is returned + return QueryEvaluationStep.EMPTY_ITERATION; + } + + return wrapped.evaluate(leftBindings); + } +} diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/values/ScopedQueryValueEvaluationStep.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/values/ScopedQueryValueEvaluationStep.java new file mode 100644 index 00000000000..3845eb88270 --- /dev/null +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/values/ScopedQueryValueEvaluationStep.java @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps.values; + +import java.util.Set; + +import org.eclipse.rdf4j.model.Value; +import org.eclipse.rdf4j.query.Binding; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; + +public class ScopedQueryValueEvaluationStep implements QueryValueEvaluationStep { + + /** + * The set of binding names that are "in scope" for the filter. The filter must not include bindings that are (only) + * included because of the depth-first evaluation strategy in the evaluation of the constraint. + */ + private final Set scopeBindingNames; + private final QueryValueEvaluationStep wrapped; + + public ScopedQueryValueEvaluationStep(Set scopeBindingNames, QueryValueEvaluationStep condition) { + this.scopeBindingNames = scopeBindingNames; + this.wrapped = condition; + } + + @Override + public Value evaluate(BindingSet bindings) { + BindingSet scopeBindings = createScopeBindings(scopeBindingNames, bindings); + + return wrapped.evaluate(scopeBindings); + } + + private BindingSet createScopeBindings(Set scopeBindingNames, BindingSet bindings) { + QueryBindingSet scopeBindings = new QueryBindingSet(scopeBindingNames.size()); + for (String scopeBindingName : scopeBindingNames) { + Binding binding = bindings.getBinding(scopeBindingName); + if (binding != null) { + scopeBindings.addBinding(binding); + } + } + + return scopeBindings; + } +} diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/BadlyDesignedLeftJoinIterator.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/BadlyDesignedLeftJoinIterator.java index e24ea116534..a80e93c01b6 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/BadlyDesignedLeftJoinIterator.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/BadlyDesignedLeftJoinIterator.java @@ -40,9 +40,13 @@ public class BadlyDesignedLeftJoinIterator extends LeftJoinIterator { * Constructors * *--------------*/ - public BadlyDesignedLeftJoinIterator(EvaluationStrategy strategy, LeftJoin join, BindingSet inputBindings, - Set problemVars, QueryEvaluationContext context) throws QueryEvaluationException { - super(strategy, join, getFilteredBindings(inputBindings, problemVars), context); + public BadlyDesignedLeftJoinIterator( + EvaluationStrategy strategy, + LeftJoin join, + BindingSet inputBindings, + Set problemVars, + QueryEvaluationStep rightEvaluationStep) throws QueryEvaluationException { + super(strategy, join, getFilteredBindings(inputBindings, problemVars), rightEvaluationStep); this.inputBindings = inputBindings; this.problemVars = problemVars; @@ -52,10 +56,12 @@ public BadlyDesignedLeftJoinIterator(EvaluationStrategy strategy, LeftJoin join, * Methods * *---------*/ - public BadlyDesignedLeftJoinIterator(QueryEvaluationStep left, QueryEvaluationStep right, - QueryValueEvaluationStep joinCondition, BindingSet inputBindings, Set problemVars) + public BadlyDesignedLeftJoinIterator(QueryEvaluationStep left, + BindingSet inputBindings, + Set problemVars, + QueryEvaluationStep rightEvaluationStep) throws QueryEvaluationException { - super(left, right, joinCondition, getFilteredBindings(inputBindings, problemVars), problemVars); + super(left, getFilteredBindings(inputBindings, problemVars), rightEvaluationStep); this.inputBindings = inputBindings; this.problemVars = problemVars; } diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIterator.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIterator.java index 7f7b290771c..9a362365fdb 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIterator.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIterator.java @@ -319,8 +319,17 @@ private List emptySolutionSpecialCase(List collectors = makeCollectors(aggregates); List> predicates = new ArrayList<>(aggregates.size()); - for (int i = 0; i < aggregates.size(); i++) { - predicates.add(ALWAYS_TRUE_BINDING_SET); + for (var ag : aggregates) { + if (ag.agg instanceof WildCardCountAggregate) { + predicates.add(ALWAYS_TRUE_BINDING_SET); + } else if (ag.agg instanceof CountAggregate) { + // Counts are special, because they always return a number related to the number of solutions. + // which in the empty case should be 0. So we should never accept a value here. + // Even in the case that the Count is of a constant value. + predicates.add(ALWAYS_FALSE_VALUE); + } else { + predicates.add(ALWAYS_TRUE_VALUE); + } } final Entry entry = new Entry(null, collectors, predicates); entry.addSolution(EmptyBindingSet.getInstance(), aggregates); @@ -393,6 +402,7 @@ private void operate(BindingSet bs, Predicate predicate, Object t) { private static final Predicate ALWAYS_TRUE_BINDING_SET = t -> true; private static final Predicate ALWAYS_TRUE_VALUE = t -> true; + private static final Predicate ALWAYS_FALSE_VALUE = t -> false; private static final Supplier> ALWAYS_TRUE_VALUE_SUPPLIER = () -> ALWAYS_TRUE_VALUE; private AggregatePredicateCollectorSupplier create(GroupElem ge, ValueFactory vf) @@ -407,40 +417,33 @@ private void operate(BindingSet bs, Predicate predicate, Object t) { return new AggregatePredicateCollectorSupplier<>(wildCardCountAggregate, potentialDistinctTest, () -> new CountCollector(vf), ge.getName()); } else { - QueryStepEvaluator f = new QueryStepEvaluator( - strategy.precompile(((Count) operator).getArg(), context)); + QueryStepEvaluator f = precompileArg(operator); CountAggregate agg = new CountAggregate(f); - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, () -> new CountCollector(vf), ge.getName()); } } else if (operator instanceof Min) { MinAggregate agg = new MinAggregate(precompileArg(operator), shouldValueComparisonBeStrict()); - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, ValueCollector::new, ge.getName()); } else if (operator instanceof Max) { MaxAggregate agg = new MaxAggregate(precompileArg(operator), shouldValueComparisonBeStrict()); - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, ValueCollector::new, ge.getName()); } else if (operator instanceof Sum) { SumAggregate agg = new SumAggregate(precompileArg(operator)); - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, () -> new IntegerCollector(vf), ge.getName()); } else if (operator instanceof Avg) { AvgAggregate agg = new AvgAggregate(precompileArg(operator)); - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, () -> new AvgCollector(vf), ge.getName()); } else if (operator instanceof Sample) { SampleAggregate agg = new SampleAggregate(precompileArg(operator)); - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, SampleCollector::new, ge.getName()); } else if (operator instanceof GroupConcat) { ValueExpr separatorExpr = ((GroupConcat) operator).getSeparator(); @@ -451,19 +454,17 @@ private void operate(BindingSet bs, Predicate predicate, Object t) { } else { agg = new ConcatAggregate(precompileArg(operator)); } - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); return new AggregatePredicateCollectorSupplier<>(agg, predicate, () -> new StringBuilderCollector(vf), ge.getName()); } else if (operator instanceof AggregateFunctionCall) { var aggOperator = (AggregateFunctionCall) operator; - Supplier> predicate = operator.isDistinct() ? DistinctValues::new - : ALWAYS_TRUE_VALUE_SUPPLIER; + Supplier> predicate = createDistinctValueTest(operator); var factory = CustomAggregateFunctionRegistry.getInstance().get(aggOperator.getIRI()); var function = factory.orElseThrow( () -> new QueryEvaluationException("Unknown aggregate function '" + aggOperator.getIRI() + "'")) - .buildFunction(new QueryStepEvaluator(strategy.precompile(aggOperator.getArg(), context))); + .buildFunction(precompileArg(aggOperator)); return new AggregatePredicateCollectorSupplier<>(function, predicate, () -> factory.get().getCollector(), ge.getName()); @@ -472,8 +473,21 @@ private void operate(BindingSet bs, Predicate predicate, Object t) { return null; } + /** + * Create a predicate that tests if the value is distinct (returning true if the value was not seen yet), or always + * returns true if the operator is not distinct. + * + * @param operator + * @return a supplier that returns a predicate that tests if the value is distinct, or always returns true if the + * operator is not distinct. + */ + private Supplier> createDistinctValueTest(AggregateOperator operator) { + return operator.isDistinct() ? DistinctValues::new : ALWAYS_TRUE_VALUE_SUPPLIER; + } + private QueryStepEvaluator precompileArg(AggregateOperator operator) { - return new QueryStepEvaluator(strategy.precompile(((UnaryValueOperator) operator).getArg(), context)); + ValueExpr ve = ((UnaryValueOperator) operator).getArg(); + return new QueryStepEvaluator(strategy.precompile(ve, context)); } private boolean shouldValueComparisonBeStrict() { @@ -491,7 +505,7 @@ public CountCollector(ValueFactory vf) { @Override public Value getFinalValue() { - return SimpleValueFactory.getInstance().createLiteral(Long.toString(value), CoreDatatype.XSD.INTEGER); + return vf.createLiteral(Long.toString(value), CoreDatatype.XSD.INTEGER); } } @@ -561,7 +575,7 @@ public Value getFinalValue() throws ValueExprEvaluationException { } if (count == 0) { - return SimpleValueFactory.getInstance().createLiteral("0", CoreDatatype.XSD.INTEGER); + return vf.createLiteral("0", CoreDatatype.XSD.INTEGER); } Literal sizeLit = SimpleValueFactory.getInstance().createLiteral(count); @@ -791,9 +805,9 @@ public StringBuilderCollector(ValueFactory vf) { @Override public Value getFinalValue() throws ValueExprEvaluationException { if (concatenated == null || concatenated.length() == 0) { - return SimpleValueFactory.getInstance().createLiteral(""); + return vf.createLiteral(""); } - return SimpleValueFactory.getInstance().createLiteral(concatenated.toString()); + return vf.createLiteral(concatenated.toString()); } } diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIterator.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIterator.java index adae64942aa..7fc65bc6941 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIterator.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIterator.java @@ -11,102 +11,68 @@ package org.eclipse.rdf4j.query.algebra.evaluation.iterator; import java.util.NoSuchElementException; -import java.util.Set; import org.eclipse.rdf4j.common.iteration.CloseableIteration; import org.eclipse.rdf4j.common.iteration.LookAheadIteration; -import org.eclipse.rdf4j.model.Value; -import org.eclipse.rdf4j.query.Binding; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; import org.eclipse.rdf4j.query.algebra.LeftJoin; -import org.eclipse.rdf4j.query.algebra.ValueExpr; import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy; -import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; -import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; -import org.eclipse.rdf4j.query.algebra.evaluation.ValueExprEvaluationException; -import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; -import org.eclipse.rdf4j.query.algebra.evaluation.util.QueryEvaluationUtility; public class LeftJoinIterator extends LookAheadIteration { /*-----------* * Variables * *-----------*/ - /** - * The set of binding names that are "in scope" for the filter. The filter must not include bindings that are (only) - * included because of the depth-first evaluation strategy in the evaluation of the constraint. - */ - private final Set scopeBindingNames; - private final CloseableIteration leftIter; + private final QueryEvaluationStep rightEvaluationStep; private CloseableIteration rightIter; - private final QueryEvaluationStep prepareRightArg; - - private final QueryValueEvaluationStep joinCondition; - /*--------------* * Constructors * *--------------*/ - public LeftJoinIterator(EvaluationStrategy strategy, LeftJoin join, BindingSet bindings, - QueryEvaluationContext context) - throws QueryEvaluationException { - this.scopeBindingNames = join.getBindingNames(); - + public LeftJoinIterator( + EvaluationStrategy strategy, + LeftJoin join, + BindingSet bindings, + QueryEvaluationStep rightEvaluationStep) throws QueryEvaluationException { leftIter = strategy.evaluate(join.getLeftArg(), bindings); rightIter = null; - prepareRightArg = strategy.precompile(join.getRightArg(), context); join.setAlgorithm(this); - final ValueExpr condition = join.getCondition(); - if (condition == null) { - joinCondition = null; - } else { - joinCondition = strategy.precompile(condition, context); - } - } - - public LeftJoinIterator(QueryEvaluationStep left, QueryEvaluationStep right, QueryValueEvaluationStep joinCondition, - BindingSet bindings, Set scopeBindingNamse) - throws QueryEvaluationException { - this.scopeBindingNames = scopeBindingNamse; - - leftIter = left.evaluate(bindings); - // Initialize with empty iteration so that var is never null - rightIter = null; - - prepareRightArg = right; - this.joinCondition = joinCondition; + this.rightEvaluationStep = rightEvaluationStep; + } + public LeftJoinIterator( + QueryEvaluationStep left, + BindingSet bindings, + QueryEvaluationStep rightEvaluationStep) throws QueryEvaluationException { + this(left.evaluate(bindings), rightEvaluationStep); } - public LeftJoinIterator(CloseableIteration leftIter, QueryEvaluationStep prepareRightArg, - QueryValueEvaluationStep joinCondition, Set scopeBindingNamse) { - this.scopeBindingNames = scopeBindingNamse; + public LeftJoinIterator(CloseableIteration leftIter, QueryEvaluationStep rightEvaluationStep) { this.leftIter = leftIter; this.rightIter = null; - this.prepareRightArg = prepareRightArg; - this.joinCondition = joinCondition; + this.rightEvaluationStep = rightEvaluationStep; } - public static CloseableIteration getInstance(QueryEvaluationStep left, - QueryEvaluationStep prepareRightArg, QueryValueEvaluationStep joinCondition, BindingSet bindings, - Set scopeBindingNamse) { + public static CloseableIteration getInstance( + QueryEvaluationStep left, + BindingSet bindings, + QueryEvaluationStep rightEvaluationStep) { CloseableIteration leftIter = left.evaluate(bindings); if (leftIter == QueryEvaluationStep.EMPTY_ITERATION) { return leftIter; } else { - return new LeftJoinIterator(leftIter, prepareRightArg, joinCondition, scopeBindingNamse); + return new LeftJoinIterator(leftIter, rightEvaluationStep); } - } /*---------* @@ -125,17 +91,16 @@ protected BindingSet getNextElement() throws QueryEvaluationException { if (leftIter.hasNext()) { // Use left arg's bindings in case join fails leftBindings = leftIter.next(); - nextRightIter = rightIter = prepareRightArg.evaluate(leftBindings); + nextRightIter = rightIter = rightEvaluationStep.evaluate(leftBindings); } else { return null; } - } else if (!nextRightIter.hasNext()) { // Use left arg's bindings in case join fails leftBindings = leftIter.next(); nextRightIter.close(); - nextRightIter = rightIter = prepareRightArg.evaluate(leftBindings); + nextRightIter = rightIter = rightEvaluationStep.evaluate(leftBindings); } if (nextRightIter == QueryEvaluationStep.EMPTY_ITERATION) { @@ -143,31 +108,8 @@ protected BindingSet getNextElement() throws QueryEvaluationException { return leftBindings; } - while (nextRightIter.hasNext()) { - BindingSet rightBindings = nextRightIter.next(); - - try { - if (joinCondition == null) { - return rightBindings; - } else { - // Limit the bindings to the ones that are in scope for - // this filter - - QueryBindingSet scopeBindings = new QueryBindingSet(scopeBindingNames.size()); - for (String scopeBindingName : scopeBindingNames) { - Binding binding = rightBindings.getBinding(scopeBindingName); - if (binding != null) { - scopeBindings.addBinding(binding); - } - } - - if (isTrue(joinCondition, scopeBindings)) { - return rightBindings; - } - } - } catch (ValueExprEvaluationException e) { - // Ignore, condition not evaluated successfully - } + if (nextRightIter.hasNext()) { + return nextRightIter.next(); } if (leftBindings != null) { @@ -184,11 +126,6 @@ protected BindingSet getNextElement() throws QueryEvaluationException { return null; } - private boolean isTrue(QueryValueEvaluationStep expr, QueryBindingSet bindings) { - Value value = expr.evaluate(bindings); - return QueryEvaluationUtility.getEffectiveBooleanValue(value).orElse(false); - } - @Override protected void handleClose() throws QueryEvaluationException { try { diff --git a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConstantOptimizer.java b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConstantOptimizer.java index 675ee395546..fc2dc723dce 100644 --- a/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConstantOptimizer.java +++ b/core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/optimizer/ConstantOptimizer.java @@ -22,6 +22,7 @@ import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.Dataset; import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.AggregateOperator; import org.eclipse.rdf4j.query.algebra.And; import org.eclipse.rdf4j.query.algebra.BinaryValueOperator; import org.eclipse.rdf4j.query.algebra.Bound; @@ -223,7 +224,11 @@ protected void meetBinaryValueOperator(BinaryValueOperator binaryValueOp) { protected void meetUnaryValueOperator(UnaryValueOperator unaryValueOp) { super.meetUnaryValueOperator(unaryValueOp); - if (isConstant(unaryValueOp.getArg())) { + ValueExpr arg = unaryValueOp.getArg(); + // This is not the place to optimize aggregates, as they are not + // evaluated in the same way as other unary value operators. + // They must be evaluated in the context of a group. + if (isConstant(arg) && !(unaryValueOp instanceof AggregateOperator)) { try { Value value = strategy.precompile(unaryValueOp, context).evaluate(EmptyBindingSet.getInstance()); unaryValueOp.replaceWith(new ValueConstant(value)); diff --git a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ConstantOptimizerTest.java b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ConstantOptimizerTest.java index b9110523dfe..d3205805e46 100644 --- a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ConstantOptimizerTest.java +++ b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/ConstantOptimizerTest.java @@ -11,18 +11,21 @@ package org.eclipse.rdf4j.query.algebra.evaluation.impl; import static org.assertj.core.api.Assertions.assertThat; +import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.Mockito.mock; import org.eclipse.rdf4j.common.exception.RDF4JException; import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.Literal; import org.eclipse.rdf4j.model.impl.BooleanLiteral; import org.eclipse.rdf4j.model.impl.SimpleValueFactory; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryLanguage; import org.eclipse.rdf4j.query.algebra.And; import org.eclipse.rdf4j.query.algebra.FunctionCall; +import org.eclipse.rdf4j.query.algebra.GroupElem; import org.eclipse.rdf4j.query.algebra.QueryRoot; import org.eclipse.rdf4j.query.algebra.TupleExpr; import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy; @@ -112,12 +115,105 @@ public void testFunctionOptimization() throws RDF4JException { } + @Test + public void testAggregateOptimization() throws RDF4JException { + String query = "prefix ex: " + "select (max(1) AS ?a) \n " + "where {\n" + "?x a ?z \n" + + "}"; + + ParsedQuery pq = QueryParserUtil.parseQuery(QueryLanguage.SPARQL, query, null); + EvaluationStrategy strategy = new DefaultEvaluationStrategy(new EmptyTripleSource(), null); + TupleExpr original = pq.getTupleExpr(); + + final AlgebraFinder finder = new AlgebraFinder(); + original.visit(finder); + assertTrue(finder.groupElemFound); + + // reset for re-use on optimized query + finder.reset(); + + QueryBindingSet constants = new QueryBindingSet(); + constants.addBinding("x", SimpleValueFactory.getInstance().createLiteral("foo")); + constants.addBinding("z", SimpleValueFactory.getInstance().createLiteral("bar")); + + TupleExpr optimized = optimize(pq.getTupleExpr().clone(), constants, strategy); + + optimized.visit(finder); + assertThat(finder.functionCallFound).isFalse(); + + CloseableIteration result = strategy.precompile(optimized) + .evaluate( + new EmptyBindingSet()); + assertNotNull(result); + assertTrue(result.hasNext()); + BindingSet bindings = result.next(); + assertTrue(bindings.hasBinding("a")); + assertEquals(1, ((Literal) bindings.getBinding("a").getValue()).intValue()); + } + + @Test + public void testAllAggregateOptimizations() throws RDF4JException { + String query = String.join("\n", + "PREFIX ex: ", + "SELECT", + " (MAX(1) AS ?a)", + " (MIN(1) AS ?b)", + " (AVG(1) AS ?c)", + " (COUNT(1) AS ?d)", + " (COUNT(DISTINCT 1) AS ?e)", + " (COUNT(*) AS ?f)", + "WHERE {", + " ?x a ?z ;", + " ex:someProperty ?val .", + "}" + ); + + ParsedQuery pq = QueryParserUtil.parseQuery(QueryLanguage.SPARQL, query, null); + EvaluationStrategy strategy = new DefaultEvaluationStrategy(new EmptyTripleSource(), null); + TupleExpr original = pq.getTupleExpr(); + + final AlgebraFinder finder = new AlgebraFinder(); + original.visit(finder); + assertTrue(finder.groupElemFound); + + // reset for re-use on optimized query + finder.reset(); + + QueryBindingSet constants = new QueryBindingSet(); + + TupleExpr optimized = optimize(pq.getTupleExpr().clone(), constants, strategy); + + optimized.visit(finder); + assertThat(finder.functionCallFound).isFalse(); + + CloseableIteration result = strategy.precompile(optimized) + .evaluate( + new EmptyBindingSet()); + assertNotNull(result); + assertTrue(result.hasNext()); + + BindingSet bindings = result.next(); + assertTrue(bindings.hasBinding("a")); + assertTrue(bindings.hasBinding("b")); + assertTrue(bindings.hasBinding("c")); + assertTrue(bindings.hasBinding("d")); + assertTrue(bindings.hasBinding("e")); + assertTrue(bindings.hasBinding("f")); + assertEquals(1, ((Literal) bindings.getBinding("a").getValue()).intValue()); + assertEquals(1, ((Literal) bindings.getBinding("b").getValue()).intValue()); + assertEquals(1, ((Literal) bindings.getBinding("c").getValue()).intValue()); + assertEquals(0, ((Literal) bindings.getBinding("d").getValue()).intValue()); + assertEquals(0, ((Literal) bindings.getBinding("e").getValue()).intValue()); + assertEquals(0, ((Literal) bindings.getBinding("f").getValue()).intValue()); + } + private class AlgebraFinder extends AbstractQueryModelVisitor { public boolean logicalAndfound = false; public boolean functionCallFound = false; + public boolean groupElemFound = false; + @Override public void meet(And and) { logicalAndfound = true; @@ -134,6 +230,12 @@ public void meet(FunctionCall arg) { public void reset() { logicalAndfound = false; functionCallFound = false; + groupElemFound = false; + } + + public void meet(GroupElem ge) { + groupElemFound = true; + super.meet(ge); } } diff --git a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PostFilterQueryEvaluationStepTest.java b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PostFilterQueryEvaluationStepTest.java new file mode 100644 index 00000000000..1b006b06e40 --- /dev/null +++ b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PostFilterQueryEvaluationStepTest.java @@ -0,0 +1,107 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.LinkedList; +import java.util.Queue; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.BooleanLiteral; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +class PostFilterQueryEvaluationStepTest { + + private static final ValueFactory VALUE_FACTORY = SimpleValueFactory.getInstance(); + + private QueryBindingSet bindingSet; + + @BeforeEach + void setUp() { + bindingSet = new QueryBindingSet(1); + bindingSet.addBinding("a", VALUE_FACTORY.createLiteral(42)); + } + + @Test + @DisplayName("when wrapped returns empty iteration, then LeftJoinPostFilterQueryEvaluationStep returns empty iteration") + void emptyIteration() { + QueryEvaluationStep wrapped = (bindings) -> QueryEvaluationStep.EMPTY_ITERATION; + QueryEvaluationStep postFilter = new PostFilterQueryEvaluationStep( + wrapped, + bindings -> BooleanLiteral.valueOf(true)); + + var result = postFilter.evaluate(bindingSet); + + assertThat(result).isEqualTo(QueryEvaluationStep.EMPTY_ITERATION); + } + + @Test + @DisplayName("when wrapped returns non-empty iteration, then LeftJoinPostFilterQueryEvaluationStep returns filtered iteration") + void filteredIteration() { + QueryEvaluationStep wrapped = new TestQueryEvaluationStep(); + QueryValueEvaluationStep condition = bindings -> { + var shouldAccept = bindings.getValue("b") + .stringValue() + .equals("abc"); + return BooleanLiteral.valueOf(shouldAccept); + }; + QueryEvaluationStep postFilter = new PostFilterQueryEvaluationStep(wrapped, condition); + + var result = postFilter.evaluate(bindingSet); + + assertThat(result) + .toIterable() + .map(bindings -> bindings.getValue("b")) + .containsExactly(VALUE_FACTORY.createLiteral("abc")); + } + + private static class TestQueryEvaluationStep implements QueryEvaluationStep { + + private final Queue values = new LinkedList<>(); + + private TestQueryEvaluationStep() { + values.add("abc"); + values.add("xyz"); + } + + @Override + public CloseableIteration evaluate(BindingSet bindings) { + return new CloseableIteration<>() { + @Override + public void close() { + // Nothing to close + } + + @Override + public boolean hasNext() { + return !values.isEmpty(); + } + + @Override + public BindingSet next() { + var output = new QueryBindingSet(2); + bindings.forEach(output::addBinding); + output.addBinding("b", VALUE_FACTORY.createLiteral(values.poll())); + return output; + } + }; + } + } +} diff --git a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PreFilterQueryEvaluationStepTest.java b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PreFilterQueryEvaluationStepTest.java new file mode 100644 index 00000000000..c03721ee67f --- /dev/null +++ b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/impl/evaluationsteps/PreFilterQueryEvaluationStepTest.java @@ -0,0 +1,135 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps; + +import static org.assertj.core.api.Assertions.assertThat; + +import java.util.LinkedList; +import java.util.Queue; +import java.util.concurrent.atomic.AtomicInteger; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.ValueFactory; +import org.eclipse.rdf4j.model.impl.BooleanLiteral; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryValueEvaluationStep; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +class PreFilterQueryEvaluationStepTest { + + private static final ValueFactory VALUE_FACTORY = SimpleValueFactory.getInstance(); + + private QueryBindingSet bindingSet; + + @BeforeEach + void setUp() { + bindingSet = new QueryBindingSet(1); + bindingSet.addBinding("a", VALUE_FACTORY.createLiteral(42)); + } + + @Test + @DisplayName("when condition evaluates to false, then don't evaluate wrapped") + void skipWrappedEvaluation() { + var wrapped = new TestQueryEvaluationStep(); + QueryEvaluationStep preFilter = new PreFilterQueryEvaluationStep( + wrapped, + bindings -> BooleanLiteral.valueOf(false)); + + try (var ignored = preFilter.evaluate(bindingSet)) { + assertThat(wrapped.isEvaluated).isFalse(); + } + } + + @Test + @DisplayName("when condition evaluates to false, then return empty iteration") + void returnOnlyInput() { + var wrapped = new TestQueryEvaluationStep(); + QueryEvaluationStep preFilter = new PreFilterQueryEvaluationStep( + wrapped, + bindings -> BooleanLiteral.valueOf(false)); + + var result = preFilter.evaluate(bindingSet); + + assertThat(result).isEqualTo(QueryEvaluationStep.EMPTY_ITERATION); + } + + @Test + @DisplayName("when condition evaluates to true, then evaluate and return wrapped output") + void evaluateWrapped() { + var wrapped = new TestQueryEvaluationStep(); + QueryEvaluationStep preFilter = new PreFilterQueryEvaluationStep( + wrapped, + bindings1 -> BooleanLiteral.valueOf(true)); + + var result = preFilter.evaluate(bindingSet); + + assertThat(result) + .toIterable() + .map(bindings -> bindings.getValue("b")) + .containsExactly(VALUE_FACTORY.createLiteral("abc"), VALUE_FACTORY.createLiteral("xyz")); + } + + @Test + @DisplayName("when condition evaluates to true, then only evaluate condition once") + void onlyEvaluatesConditionOnce() { + var evaluations = new AtomicInteger(0); + QueryValueEvaluationStep condition = bindings -> { + evaluations.incrementAndGet(); + return BooleanLiteral.valueOf(true); + }; + TestQueryEvaluationStep wrapped = new TestQueryEvaluationStep(); + QueryEvaluationStep preFilter = new PreFilterQueryEvaluationStep(wrapped, condition); + + try (var ignored = preFilter.evaluate(bindingSet)) { + assertThat(evaluations).hasValue(1); + } + } + + private static class TestQueryEvaluationStep implements QueryEvaluationStep { + + private final Queue values = new LinkedList<>(); + private boolean isEvaluated = false; + + private TestQueryEvaluationStep() { + values.add("abc"); + values.add("xyz"); + } + + @Override + public CloseableIteration evaluate(BindingSet bindings) { + isEvaluated = true; + return new CloseableIteration<>() { + @Override + public void close() { + // Nothing to close + } + + @Override + public boolean hasNext() { + return !values.isEmpty(); + } + + @Override + public BindingSet next() { + var output = new QueryBindingSet(2); + bindings.forEach(output::addBinding); + output.addBinding("b", VALUE_FACTORY.createLiteral(values.poll())); + return output; + } + }; + } + } +} diff --git a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIteratorTest.java b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIteratorTest.java index bdcb5d6abbe..530db3eb656 100644 --- a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIteratorTest.java +++ b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/GroupIteratorTest.java @@ -12,6 +12,8 @@ import static org.assertj.core.api.Assertions.assertThat; import static org.assertj.core.api.Assertions.assertThatExceptionOfType; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.time.Duration; import java.time.Instant; @@ -35,10 +37,27 @@ import org.eclipse.rdf4j.model.vocabulary.XSD; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.QueryEvaluationException; -import org.eclipse.rdf4j.query.algebra.*; -import org.eclipse.rdf4j.query.algebra.evaluation.*; +import org.eclipse.rdf4j.query.algebra.AggregateFunctionCall; +import org.eclipse.rdf4j.query.algebra.Avg; +import org.eclipse.rdf4j.query.algebra.BindingSetAssignment; +import org.eclipse.rdf4j.query.algebra.Count; +import org.eclipse.rdf4j.query.algebra.EmptySet; +import org.eclipse.rdf4j.query.algebra.Group; +import org.eclipse.rdf4j.query.algebra.GroupConcat; +import org.eclipse.rdf4j.query.algebra.GroupElem; +import org.eclipse.rdf4j.query.algebra.MathExpr; +import org.eclipse.rdf4j.query.algebra.Max; +import org.eclipse.rdf4j.query.algebra.Min; +import org.eclipse.rdf4j.query.algebra.Sample; +import org.eclipse.rdf4j.query.algebra.Sum; +import org.eclipse.rdf4j.query.algebra.ValueConstant; +import org.eclipse.rdf4j.query.algebra.Var; +import org.eclipse.rdf4j.query.algebra.evaluation.EvaluationStrategy; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet; +import org.eclipse.rdf4j.query.algebra.evaluation.QueryEvaluationStep; +import org.eclipse.rdf4j.query.algebra.evaluation.ValueExprEvaluationException; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.DefaultEvaluationStrategy; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; -import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategy; import org.eclipse.rdf4j.query.algebra.evaluation.util.MathUtil; import org.eclipse.rdf4j.query.impl.EmptyBindingSet; import org.eclipse.rdf4j.query.parser.sparql.aggregate.AggregateCollector; @@ -53,85 +72,41 @@ * @author Bart Hanssens */ public class GroupIteratorTest { - private final static ValueFactory vf = SimpleValueFactory.getInstance(); - private static final EvaluationStrategy evaluator = new StrictEvaluationStrategy(null, null); - private static final QueryEvaluationContext context = new QueryEvaluationContext.Minimal( - vf.createLiteral(Date.from(Instant.now())), null, null); - private static BindingSetAssignment EMPTY_ASSIGNMENT; - private static BindingSetAssignment NONEMPTY_ASSIGNMENT; - private static AggregateFunctionFactory aggregateFunctionFactory; + private static final ValueFactory VF = SimpleValueFactory.getInstance(); + private static final EvaluationStrategy EVALUATOR = new DefaultEvaluationStrategy(null, null); + private static final QueryEvaluationContext CONTEXT = new QueryEvaluationContext.Minimal( + VF.createLiteral(Date.from(Instant.now())), null, null); + private static final BindingSetAssignment EMPTY_ASSIGNMENT = new BindingSetAssignment(); + private static final BindingSetAssignment NONEMPTY_ASSIGNMENT = new BindingSetAssignment(); + private static final AggregateFunctionFactory AGGREGATE_FUNCTION_FACTORY = new FakeAggregateFunctionFactory(); @BeforeAll public static void init() { - EMPTY_ASSIGNMENT = new BindingSetAssignment(); EMPTY_ASSIGNMENT.setBindingSets(Collections.emptyList()); - NONEMPTY_ASSIGNMENT = new BindingSetAssignment(); var list = new ArrayList(); for (int i = 1; i < 10; i++) { var bindings = new QueryBindingSet(); - bindings.addBinding("a", vf.createLiteral(i)); + bindings.addBinding("a", VF.createLiteral(i)); list.add(bindings); } NONEMPTY_ASSIGNMENT.setBindingSets(Collections.unmodifiableList(list)); - aggregateFunctionFactory = new AggregateFunctionFactory() { - @Override - public String getIri() { - return "https://www.rdf4j.org/aggregate#x"; - } - - @Override - public AggregateFunction buildFunction(Function evaluationStep) { - return new AggregateFunction<>(evaluationStep) { - - private ValueExprEvaluationException typeError = null; - - @Override - public void processAggregate(BindingSet s, Predicate distinctValue, SumCollector sum) - throws QueryEvaluationException { - if (typeError != null) { - // halt further processing if a type error has been raised - return; - } - Value v = evaluate(s); - if (v instanceof Literal) { - if (distinctValue.test(v)) { - Literal nextLiteral = (Literal) v; - if (nextLiteral.getDatatype() != null - && XMLDatatypeUtil.isNumericDatatype(nextLiteral.getDatatype())) { - sum.value = MathUtil.compute(sum.value, nextLiteral, MathExpr.MathOp.PLUS); - } else { - typeError = new ValueExprEvaluationException("not a number: " + v); - } - } else { - typeError = new ValueExprEvaluationException("not a number: " + v); - } - } - } - }; - } - - @Override - public SumCollector getCollector() { - return new SumCollector(); - } - }; - CustomAggregateFunctionRegistry.getInstance().add(aggregateFunctionFactory); + CustomAggregateFunctionRegistry.getInstance().add(AGGREGATE_FUNCTION_FACTORY); } @AfterAll public static void cleanUp() { - CustomAggregateFunctionRegistry.getInstance().remove(aggregateFunctionFactory); + CustomAggregateFunctionRegistry.getInstance().remove(AGGREGATE_FUNCTION_FACTORY); } @Test public void testAvgEmptySet() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("avg", new Avg(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThat(gi.next().getBinding("avg").getValue()) .describedAs("AVG on empty set should result in 0") - .isEqualTo(vf.createLiteral("0", XSD.INTEGER)); + .isEqualTo(VF.createLiteral("0", XSD.INTEGER)); } } @@ -139,20 +114,60 @@ public void testAvgEmptySet() throws QueryEvaluationException { public void testMaxEmptySet_DefaultGroup() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("max", new Max(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThat(gi.hasNext()).isTrue(); assertThat(gi.next().size()).isEqualTo(0); } } + @Test + public void testConstantCountEmptySet_DefaultGroup() throws QueryEvaluationException { + Group group = new Group(EMPTY_ASSIGNMENT); + group.addGroupElement(new GroupElem("count", new Count(new ValueConstant(VF.createLiteral("a"))))); + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { + + assertTrue(gi.hasNext()); + BindingSet next = gi.next(); + assertEquals(1, next.size()); + assertEquals(0, ((Literal) next.getBinding("count").getValue()).intValue()); + } + } + + @Test + public void testMaxSet_DefaultGroup() throws QueryEvaluationException { + Group group = new Group(NONEMPTY_ASSIGNMENT); + group.addGroupElement(new GroupElem("max", new Max(new Var("a")))); + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { + + assertThat(gi.hasNext()).isTrue(); + BindingSet next = gi.next(); + assertEquals(1, next.size()); + assertEquals(VF.createLiteral(9), next.getBinding("max").getValue()); + } + } + + @Test + public void testMaxConstantEmptySet_DefaultGroup() throws QueryEvaluationException { + Group group = new Group(EMPTY_ASSIGNMENT); + Literal one = VF.createLiteral(1); + group.addGroupElement(new GroupElem("max", new Max(new ValueConstant(one)))); + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { + + assertThat(gi.hasNext()).isTrue(); + BindingSet next = gi.next(); + assertEquals(1, next.size()); + assertEquals(one, next.getBinding("max").getValue()); + } + } + @Test public void testMaxEmptySet_Grouped() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("max", new Max(new Var("a")))); group.addGroupBindingName("x"); // we are grouping by variable x - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThat(gi.hasNext()).isFalse(); } @@ -162,7 +177,7 @@ public void testMaxEmptySet_Grouped() throws QueryEvaluationException { public void testMinEmptySet() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("min", new Min(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThat(gi.hasNext()).isTrue(); assertThat(gi.next().size()).isEqualTo(0); @@ -173,7 +188,7 @@ public void testMinEmptySet() throws QueryEvaluationException { public void testSampleEmptySet() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("sample", new Sample(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThat(gi.hasNext()).isTrue(); assertThat(gi.next().size()).isEqualTo(0); @@ -184,11 +199,11 @@ public void testSampleEmptySet() throws QueryEvaluationException { public void testGroupConcatEmptySet() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("groupconcat", new GroupConcat(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThat(gi.next().getBinding("groupconcat").getValue()) .describedAs("GROUP_CONCAT on empty set should result in empty string") - .isEqualTo(vf.createLiteral("")); + .isEqualTo(VF.createLiteral("")); } } @@ -196,9 +211,9 @@ public void testGroupConcatEmptySet() throws QueryEvaluationException { public void testAvgNotZero() throws QueryEvaluationException { Group group = new Group(NONEMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("avg", new Avg(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { - assertThat(gi.next().getBinding("avg").getValue()).isEqualTo(vf.createLiteral("5", XSD.DECIMAL)); + assertThat(gi.next().getBinding("avg").getValue()).isEqualTo(VF.createLiteral("5", XSD.DECIMAL)); } } @@ -206,9 +221,9 @@ public void testAvgNotZero() throws QueryEvaluationException { public void testCountNotZero() throws QueryEvaluationException { Group group = new Group(NONEMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("count", new Count(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { - assertThat(gi.next().getBinding("count").getValue()).isEqualTo(vf.createLiteral("9", XSD.INTEGER)); + assertThat(gi.next().getBinding("count").getValue()).isEqualTo(VF.createLiteral("9", XSD.INTEGER)); } } @@ -216,9 +231,9 @@ public void testCountNotZero() throws QueryEvaluationException { public void testSumNotZero() throws QueryEvaluationException { Group group = new Group(NONEMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("sum", new Sum(new Var("a")))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { - assertThat(gi.next().getBinding("sum").getValue()).isEqualTo(vf.createLiteral("45", XSD.INTEGER)); + assertThat(gi.next().getBinding("sum").getValue()).isEqualTo(VF.createLiteral("45", XSD.INTEGER)); } } @@ -226,9 +241,9 @@ public void testSumNotZero() throws QueryEvaluationException { public void testCustomAggregateFunction_Nonempty() throws QueryEvaluationException { Group group = new Group(NONEMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("customSum", - new AggregateFunctionCall(new Var("a"), aggregateFunctionFactory.getIri(), false))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { - assertThat(gi.next().getBinding("customSum").getValue()).isEqualTo(vf.createLiteral("45", XSD.INTEGER)); + new AggregateFunctionCall(new Var("a"), AGGREGATE_FUNCTION_FACTORY.getIri(), false))); + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { + assertThat(gi.next().getBinding("customSum").getValue()).isEqualTo(VF.createLiteral("45", XSD.INTEGER)); } } @@ -236,9 +251,9 @@ public void testCustomAggregateFunction_Nonempty() throws QueryEvaluationExcepti public void testCustomAggregateFunction_Empty() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("customSum", - new AggregateFunctionCall(new Var("a"), aggregateFunctionFactory.getIri(), false))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { - assertThat(gi.next().getBinding("customSum").getValue()).isEqualTo(vf.createLiteral("0", XSD.INTEGER)); + new AggregateFunctionCall(new Var("a"), AGGREGATE_FUNCTION_FACTORY.getIri(), false))); + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { + assertThat(gi.next().getBinding("customSum").getValue()).isEqualTo(VF.createLiteral("0", XSD.INTEGER)); } } @@ -246,7 +261,7 @@ public void testCustomAggregateFunction_Empty() throws QueryEvaluationException public void testCustomAggregateFunction_WrongIri() throws QueryEvaluationException { Group group = new Group(EMPTY_ASSIGNMENT); group.addGroupElement(new GroupElem("customSum", new AggregateFunctionCall(new Var("a"), "urn:i", false))); - try (GroupIterator gi = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context)) { + try (GroupIterator gi = new GroupIterator(EVALUATOR, group, EmptyBindingSet.getInstance(), CONTEXT)) { assertThatExceptionOfType(QueryEvaluationException.class) .isThrownBy(() -> gi.next().getBinding("customSum").getValue()); } @@ -262,7 +277,7 @@ public void testGroupIteratorClose() throws QueryEvaluationException, Interrupte // Latch to record whether the iteration under GroupIterator was closed CountDownLatch closed = new CountDownLatch(1); - EvaluationStrategy evaluator = new StrictEvaluationStrategy(null, null) { + EvaluationStrategy evaluator = new DefaultEvaluationStrategy(null, null) { @Override protected QueryEvaluationStep prepare(EmptySet emptySet, QueryEvaluationContext context) throws QueryEvaluationException { @@ -283,7 +298,7 @@ protected void handleClose() { }; Group group = new Group(new EmptySet()); - GroupIterator groupIterator = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), context); + GroupIterator groupIterator = new GroupIterator(evaluator, group, EmptyBindingSet.getInstance(), CONTEXT); Thread iteratorThread = new Thread(groupIterator::next, "GroupIteratorTest#testGroupIteratorClose"); try { @@ -298,11 +313,54 @@ protected void handleClose() { } } + private static final class FakeAggregateFunctionFactory implements AggregateFunctionFactory { + @Override + public String getIri() { + return "https://www.rdf4j.org/aggregate#x"; + } + + @Override + public AggregateFunction buildFunction(Function evaluationStep) { + return new AggregateFunction<>(evaluationStep) { + + private ValueExprEvaluationException typeError = null; + + @Override + public void processAggregate(BindingSet s, Predicate distinctValue, SumCollector sum) + throws QueryEvaluationException { + if (typeError != null) { + // halt further processing if a type error has been raised + return; + } + Value v = evaluate(s); + if (v instanceof Literal) { + if (distinctValue.test(v)) { + Literal nextLiteral = (Literal) v; + if (nextLiteral.getDatatype() != null + && XMLDatatypeUtil.isNumericDatatype(nextLiteral.getDatatype())) { + sum.value = MathUtil.compute(sum.value, nextLiteral, MathExpr.MathOp.PLUS); + } else { + typeError = new ValueExprEvaluationException("not a number: " + v); + } + } else { + typeError = new ValueExprEvaluationException("not a number: " + v); + } + } + } + }; + } + + @Override + public SumCollector getCollector() { + return new SumCollector(); + } + } + /** * Dummy collector to verify custom aggregate functions */ private static class SumCollector implements AggregateCollector { - protected Literal value = vf.createLiteral("0", CoreDatatype.XSD.INTEGER); + protected Literal value = VF.createLiteral("0", CoreDatatype.XSD.INTEGER); @Override public Value getFinalValue() { diff --git a/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIteratorTest.java b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIteratorTest.java new file mode 100644 index 00000000000..e74fa5efe51 --- /dev/null +++ b/core/queryalgebra/evaluation/src/test/java/org/eclipse/rdf4j/query/algebra/evaluation/iterator/LeftJoinIteratorTest.java @@ -0,0 +1,108 @@ +/******************************************************************************* + * Copyright (c) 2025 Eclipse RDF4J contributors. + * + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Distribution License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + *******************************************************************************/ +package org.eclipse.rdf4j.query.algebra.evaluation.iterator; + +import static org.junit.jupiter.api.Assertions.*; + +import java.util.List; + +import org.eclipse.rdf4j.common.iteration.CloseableIteration; +import org.eclipse.rdf4j.model.*; +import org.eclipse.rdf4j.model.impl.SimpleValueFactory; +import org.eclipse.rdf4j.query.BindingSet; +import org.eclipse.rdf4j.query.QueryEvaluationException; +import org.eclipse.rdf4j.query.algebra.*; +import org.eclipse.rdf4j.query.algebra.evaluation.*; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.DefaultEvaluationStrategy; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; + +class LeftJoinIteratorTest { + + private static final QueryBindingSet RIGHT_BINDINGS = new QueryBindingSet(1); + private static final ValueFactory VALUE_FACTORY = SimpleValueFactory.getInstance(); + private static final EvaluationStrategy EVALUATOR = new DefaultEvaluationStrategy(new TripleSource() { + + @Override + public ValueFactory getValueFactory() { + return SimpleValueFactory.getInstance(); + } + + @Override + public CloseableIteration getStatements(Resource subj, IRI pred, + Value obj, Resource... contexts) throws QueryEvaluationException { + return null; + } + }, null); + + private QueryBindingSet bindingSet; + private BindingSetAssignment left; + private QueryEvaluationStep leftHandSide; + private RightHandSideQueryEvaluationStep rightHandSide; + + @BeforeAll + static void beforeAll() { + RIGHT_BINDINGS.addBinding("right", VALUE_FACTORY.createLiteral(42)); + } + + @BeforeEach + void setUp() { + bindingSet = new QueryBindingSet(1); + bindingSet.addBinding("left", VALUE_FACTORY.createLiteral(42)); + + left = new BindingSetAssignment(); + left.setBindingSets(List.of(bindingSet)); + + leftHandSide = EVALUATOR.precompile(left); + rightHandSide = new RightHandSideQueryEvaluationStep(); + } + + @Test + void evaluatesRightHandSideQueryEvaluationStep() { + try (LeftJoinIterator iterator = new LeftJoinIterator(leftHandSide.evaluate(bindingSet), rightHandSide)) { + iterator.getNextElement(); + } + + assertTrue(rightHandSide.isEvaluated); + } + + private static class RightHandSideQueryEvaluationStep implements QueryEvaluationStep { + + private boolean isEvaluated = false; + + @Override + public CloseableIteration evaluate(BindingSet bindings) { + isEvaluated = true; + var bothBindings = new QueryBindingSet(2); + bindings.forEach(bothBindings::addBinding); + RIGHT_BINDINGS.forEach(bothBindings::addBinding); + var rightIterator = List.of(bothBindings).iterator(); + + return new CloseableIteration<>() { + @Override + public void close() { + // Nothing to close + } + + @Override + public boolean hasNext() { + return rightIterator.hasNext(); + } + + @Override + public BindingSet next() { + return rightIterator.next(); + } + }; + } + } +} diff --git a/core/sail/api/src/test/java/org/eclipse/rdf4j/common/concurrent/locks/ExclusiveReentrantLockManagerTest.java b/core/sail/api/src/test/java/org/eclipse/rdf4j/common/concurrent/locks/ExclusiveReentrantLockManagerTest.java index 83fe59d93ea..0fdd5c8ae7e 100644 --- a/core/sail/api/src/test/java/org/eclipse/rdf4j/common/concurrent/locks/ExclusiveReentrantLockManagerTest.java +++ b/core/sail/api/src/test/java/org/eclipse/rdf4j/common/concurrent/locks/ExclusiveReentrantLockManagerTest.java @@ -82,7 +82,7 @@ void cleanupUnreleasedLocksWithTracking() throws InterruptedException { assertThat(memoryAppender.countEventsForLogger(ExclusiveReentrantLockManager.class.getName())).isEqualTo(2); memoryAppender.assertContains( - "at org.eclipse.rdf4j.common.concurrent.locks.ExclusiveReentrantLockManagerTest.lambda$lock$2", + "at org.eclipse.rdf4j.common.concurrent.locks.ExclusiveReentrantLockManagerTest.lambda$lock$", Level.WARN); } diff --git a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/benchmark/QueryBenchmark.java b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/benchmark/QueryBenchmark.java index cad7c7c4609..4e4bb21e363 100644 --- a/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/benchmark/QueryBenchmark.java +++ b/core/sail/memory/src/test/java/org/eclipse/rdf4j/sail/memory/benchmark/QueryBenchmark.java @@ -21,7 +21,6 @@ import org.eclipse.rdf4j.common.transaction.IsolationLevels; import org.eclipse.rdf4j.query.BindingSet; import org.eclipse.rdf4j.query.TupleQueryResult; -import org.eclipse.rdf4j.query.explanation.Explanation; import org.eclipse.rdf4j.repository.sail.SailRepository; import org.eclipse.rdf4j.repository.sail.SailRepositoryConnection; import org.eclipse.rdf4j.rio.RDFFormat; @@ -61,6 +60,8 @@ public class QueryBenchmark { private static final String common_themes; private static final String different_datasets_with_similar_distributions; private static final String long_chain; + private static final String optional_lhs_filter; + private static final String optional_rhs_filter; private static final String lots_of_optional; private static final String minus; private static final String nested_optionals; @@ -79,6 +80,10 @@ public class QueryBenchmark { getResourceAsStream("benchmarkFiles/different-datasets-with-similar-distributions.qr"), StandardCharsets.UTF_8); long_chain = IOUtils.toString(getResourceAsStream("benchmarkFiles/long-chain.qr"), StandardCharsets.UTF_8); + optional_lhs_filter = IOUtils.toString(getResourceAsStream("benchmarkFiles/optional-lhs-filter.qr"), + StandardCharsets.UTF_8); + optional_rhs_filter = IOUtils.toString(getResourceAsStream("benchmarkFiles/optional-rhs-filter.qr"), + StandardCharsets.UTF_8); lots_of_optional = IOUtils.toString(getResourceAsStream("benchmarkFiles/lots-of-optional.qr"), StandardCharsets.UTF_8); minus = IOUtils.toString(getResourceAsStream("benchmarkFiles/minus.qr"), StandardCharsets.UTF_8); @@ -303,6 +308,24 @@ public long long_chain() { } } + @Benchmark + public long optional_lhs_filter() { + try (SailRepositoryConnection connection = repository.getConnection()) { + return count(connection + .prepareTupleQuery(optional_lhs_filter) + .evaluate()); + } + } + + @Benchmark + public long optional_rhs_filter() { + try (SailRepositoryConnection connection = repository.getConnection()) { + return count(connection + .prepareTupleQuery(optional_rhs_filter) + .evaluate()); + } + } + @Benchmark public long lots_of_optional() { try (SailRepositoryConnection connection = repository.getConnection()) { diff --git a/core/sail/memory/src/test/resources/benchmarkFiles/optional-lhs-filter.qr b/core/sail/memory/src/test/resources/benchmarkFiles/optional-lhs-filter.qr new file mode 100644 index 00000000000..e31be4318c3 --- /dev/null +++ b/core/sail/memory/src/test/resources/benchmarkFiles/optional-lhs-filter.qr @@ -0,0 +1,25 @@ +PREFIX ex: +PREFIX owl: +PREFIX rdf: +PREFIX rdfs: +PREFIX sh: +PREFIX xsd: +PREFIX dcat: +PREFIX dc: +PREFIX skos: +PREFIX foaf: +PREFIX dct: + +SELECT * WHERE { + + ?dist a dcat:Distribution. + + ?dist dc:license ?license . + + OPTIONAL { + + ?a dcat:distribution ?dist. + + FILTER(?license = ) + } +} diff --git a/core/sail/memory/src/test/resources/benchmarkFiles/optional-rhs-filter.qr b/core/sail/memory/src/test/resources/benchmarkFiles/optional-rhs-filter.qr new file mode 100644 index 00000000000..fc071c4e921 --- /dev/null +++ b/core/sail/memory/src/test/resources/benchmarkFiles/optional-rhs-filter.qr @@ -0,0 +1,26 @@ +PREFIX eu-lang: +PREFIX ex: +PREFIX owl: +PREFIX rdf: +PREFIX rdfs: +PREFIX sh: +PREFIX xsd: +PREFIX dcat: +PREFIX dc: +PREFIX skos: +PREFIX foaf: +PREFIX dct: + +SELECT * WHERE { + + ?dist a dcat:Distribution. + + OPTIONAL { + + ?a dcat:distribution ?dist. + + ?a dct:language $lang. + + FILTER(?lang = eu-lang:ENG) + } +} diff --git a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/benchmark/QueryBenchmark.java b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/benchmark/QueryBenchmark.java index 26534592871..c74a794a246 100644 --- a/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/benchmark/QueryBenchmark.java +++ b/core/sail/nativerdf/src/test/java/org/eclipse/rdf4j/sail/nativerdf/benchmark/QueryBenchmark.java @@ -120,7 +120,9 @@ public static void main(String[] args) throws RunnerException { public void beforeClass() throws IOException { file = Files.newTemporaryFolder(); - repository = new SailRepository(new NativeStore(file, "spoc,ospc,psoc")); + NativeStore sail = new NativeStore(file, "spoc,ospc,psoc"); + sail.setIterationCacheSyncThreshold(10000); + repository = new SailRepository(sail); try (SailRepositoryConnection connection = repository.getConnection()) { connection.begin(IsolationLevels.NONE); @@ -282,6 +284,20 @@ public long simple_filter_not() { } } + @Benchmark + public long zeroOrMore() { + try (SailRepositoryConnection connection = repository.getConnection()) { + return connection + .prepareTupleQuery("" + + "SELECT ?x WHERE {\n" + + " ?x rdf:type/rdfs:subClassOf* ?class\n" + + "} limit 30") + .evaluate() + .stream() + .count(); + } + } + // @Benchmark // public long wild_card_chain_with_common_ends() { // try (SailRepositoryConnection connection = repository.getConnection()) { diff --git a/pom.xml b/pom.xml index 4ed6c113b2f..df353e707b9 100644 --- a/pom.xml +++ b/pom.xml @@ -208,6 +208,7 @@ true true true + true @@ -371,7 +372,7 @@ 8.9.0 8.9.0 7.15.2 - 5.3.37 + 5.3.39 32.1.3-jre 1.37 4.0.0 @@ -654,7 +655,7 @@ org.apache.maven.plugins maven-assembly-plugin - 3.6.0 + 3.7.1 org.apache.maven.plugins @@ -664,26 +665,25 @@ org.apache.maven.plugins maven-clean-plugin - 3.3.2 + 3.5.0 org.apache.maven.plugins maven-compiler-plugin - 3.12.1 + 3.14.0 false - utf8 org.apache.maven.plugins maven-dependency-plugin - 3.6.1 + 3.8.1 org.apache.maven.plugins maven-deploy-plugin - 3.1.1 + 3.1.4 org.apache.maven.plugins @@ -718,7 +718,7 @@ org.apache.maven.plugins maven-jar-plugin - 3.3.0 + 3.4.2 @@ -750,7 +750,7 @@ org.apache.maven.plugins maven-javadoc-plugin - 3.6.3 + 3.11.2 utf8 11 @@ -790,7 +790,7 @@ org.apache.maven.plugins maven-source-plugin - 3.3.0 + 3.3.1 org.codehaus.mojo @@ -814,7 +814,6 @@ maven-surefire-plugin 3.2.5 - UTF-8 -Xmx2048M @@ -823,7 +822,6 @@ maven-failsafe-plugin 3.2.5 - UTF-8 1 false -Xmx1G @@ -1052,7 +1050,7 @@ org.apache.maven.plugins maven-install-plugin - 3.1.1 + 3.1.4 org.apache.maven.plugins diff --git a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java index 25bede851c3..04ca4cdca59 100644 --- a/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java +++ b/tools/federation/src/main/java/org/eclipse/rdf4j/federated/evaluation/FederationEvalStrategy.java @@ -12,6 +12,7 @@ import java.util.HashSet; import java.util.List; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; @@ -116,6 +117,7 @@ import org.eclipse.rdf4j.query.algebra.evaluation.impl.EvaluationStatistics; import org.eclipse.rdf4j.query.algebra.evaluation.impl.QueryEvaluationContext; import org.eclipse.rdf4j.query.algebra.evaluation.impl.StrictEvaluationStrategy; +import org.eclipse.rdf4j.query.algebra.evaluation.impl.evaluationsteps.LeftJoinQueryEvaluationStep; import org.eclipse.rdf4j.query.algebra.evaluation.iterator.BadlyDesignedLeftJoinIterator; import org.eclipse.rdf4j.query.algebra.evaluation.iterator.HashJoinIteration; import org.eclipse.rdf4j.query.algebra.evaluation.optimizer.ConstantOptimizer; @@ -813,8 +815,21 @@ public CloseableIteration evaluate(BindingSet bindings) { } else { Set problemVarsClone = new HashSet<>(problemVars); problemVarsClone.retainAll(bindings.getBindingNames()); - return new BadlyDesignedLeftJoinIterator(FederationEvalStrategy.this, leftJoin, bindings, - problemVarsClone, context); + QueryEvaluationStep preCompiledRight = this.precompile(leftJoin.getRightArg(), context); + var joinCondition = Optional.ofNullable(leftJoin.getCondition()) + .map(condition -> this.precompile(condition, context)) + .orElse(null); + var rightEvaluationStep = LeftJoinQueryEvaluationStep.determineRightEvaluationStep( + leftJoin, + preCompiledRight, + joinCondition, + problemVars); + return new BadlyDesignedLeftJoinIterator( + FederationEvalStrategy.this, + leftJoin, + bindings, + problemVarsClone, + rightEvaluationStep); } }; }