Skip to content

Commit 8bb861d

Browse files
authored
GH-5149 numdocs (#5202)
2 parents f687e85 + 5833e57 commit 8bb861d

21 files changed

Lines changed: 405 additions & 82 deletions

File tree

core/model-api/src/main/java/org/eclipse/rdf4j/model/Model.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public interface Model extends Set<Statement>, Serializable, NamespaceAware {
4545
*/
4646
default Namespace setNamespace(String prefix, String name) {
4747
Optional<? extends Namespace> result = getNamespace(prefix);
48-
if (!result.isPresent() || !result.get().getName().equals(name)) {
48+
if (result.isEmpty() || !result.get().getName().equals(name)) {
4949
result = Optional.of(new ModelNamespace(prefix, name));
5050
setNamespace(result.get());
5151
}

core/model/src/main/java/org/eclipse/rdf4j/model/util/Configurations.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -240,7 +240,7 @@ public static Optional<Resource> getSubjectByType(Model model, IRI type, IRI leg
240240

241241
private static void logDiscrepancyWarning(Optional<? extends Value> preferred,
242242
Optional<? extends Value> fallback) {
243-
if (!fallback.isEmpty() && !preferred.equals(fallback)) {
243+
if (fallback.isPresent() && !preferred.equals(fallback)) {
244244
var msg = "Discrepancy between use of the old and new config vocabulary.";
245245
// depending on whether preferred is set, we log on warn or debug
246246
if (preferred.isEmpty()) {

core/model/src/main/java/org/eclipse/rdf4j/model/util/GraphComparisons.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,7 @@ private static boolean isomorphicSingleContext(Model model1, Model model2) {
169169

170170
// Because we have previously already checked that the models are the same size, we don't have to check both
171171
// ways to establish model equality.
172-
return !missingInModel2.isPresent();
172+
return missingInModel2.isEmpty();
173173
}
174174

175175
private static boolean mappingsIncompatible(Map<BNode, HashCode> mapping1, Map<BNode, HashCode> mapping2) {

core/queryalgebra/evaluation/src/main/java/org/eclipse/rdf4j/query/algebra/evaluation/function/string/Contains.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ public Literal evaluate(ValueFactory valueFactory, Value... args) throws ValueEx
4545
Literal rightLit = (Literal) rightVal;
4646

4747
if (leftLit.getLanguage().isPresent()) {
48-
if (!rightLit.getLanguage().isPresent() || rightLit.getLanguage().equals(leftLit.getLanguage())) {
48+
if (rightLit.getLanguage().isEmpty() || rightLit.getLanguage().equals(leftLit.getLanguage())) {
4949

5050
String leftLexVal = leftLit.getLabel();
5151
String rightLexVal = rightLit.getLabel();

core/rio/api/src/main/java/org/eclipse/rdf4j/rio/helpers/RDFParserHelper.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@ public static Literal createLiteral(String label, String lang, IRI datatype, Par
190190
try {
191191
// Removes datatype for langString datatype with no language tag when VERIFY_DATATYPE_VALUES is False.
192192
if ((workingDatatype == null || RDF.LANGSTRING.equals(workingDatatype))
193-
&& (!workingLang.isPresent() || workingLang.get().isEmpty())
193+
&& (workingLang.isEmpty() || workingLang.get().isEmpty())
194194
&& !parserConfig.get(BasicParserSettings.VERIFY_DATATYPE_VALUES)) {
195195
workingLang = Optional.ofNullable(null);
196196
workingDatatype = null;

core/rio/jsonld-legacy/src/main/java/org/eclipse/rdf4j/rio/jsonld/legacy/JSONLDInternalRDFParser.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ public void handleStatement(RDFDataset result, Statement nextStatement) {
6464

6565
// In RDF-1.1, RDF-1.0 Plain Literals are now Typed Literals with
6666
// type xsd:String
67-
if (!literal.getLanguage().isPresent() && datatype == null) {
67+
if (literal.getLanguage().isEmpty() && datatype == null) {
6868
datatype = XSD.STRING.stringValue();
6969
}
7070

core/sail/elasticsearch/src/main/java/org/eclipse/rdf4j/sail/elasticsearch/ElasticsearchIndex.java

Lines changed: 61 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -87,14 +87,13 @@
8787

8888
/**
8989
* Requires an Elasticsearch cluster with the DeleteByQuery plugin.
90-
*
90+
* <p>
9191
* Note that, while RDF4J is licensed under the EDL, several ElasticSearch dependencies are licensed under the Elastic
9292
* license or the SSPL, which may have implications for some projects.
93-
*
93+
* <p>
9494
* Please consult the ElasticSearch website and license FAQ for more information.
9595
*
9696
* @see <a href="https://www.elastic.co/licensing/elastic-license/faq">Elastic License FAQ</a>
97-
*
9897
* @see LuceneSail
9998
*/
10099
public class ElasticsearchIndex extends AbstractSearchIndex {
@@ -409,13 +408,8 @@ protected SearchDocument getDocument(String id) throws IOException {
409408
@Override
410409
protected Iterable<? extends SearchDocument> getDocuments(String resourceId) throws IOException {
411410
SearchHits hits = getDocuments(QueryBuilders.termQuery(SearchFields.URI_FIELD_NAME, resourceId));
412-
return Iterables.transform(hits, new Function<>() {
413-
414-
@Override
415-
public SearchDocument apply(SearchHit hit) {
416-
return new ElasticsearchDocument(hit, geoContextMapper);
417-
}
418-
});
411+
return Iterables.transform(hits,
412+
(Function<SearchHit, SearchDocument>) hit -> new ElasticsearchDocument(hit, geoContextMapper));
419413
}
420414

421415
@Override
@@ -577,18 +571,26 @@ protected Iterable<? extends DocumentScore> query(Resource subject, QuerySpec sp
577571
}
578572

579573
SearchHits hits;
580-
if (subject != null) {
581-
hits = search(subject, request, qb);
574+
575+
int numDocs;
576+
577+
Integer specNumDocs = spec.getNumDocs();
578+
if (specNumDocs != null) {
579+
if (specNumDocs < 0) {
580+
throw new IllegalArgumentException("numDocs must be >= 0");
581+
}
582+
numDocs = specNumDocs;
582583
} else {
583-
hits = search(request, qb);
584+
numDocs = -1;
584585
}
585-
return Iterables.transform(hits, new Function<>() {
586586

587-
@Override
588-
public DocumentScore apply(SearchHit hit) {
589-
return new ElasticsearchDocumentScore(hit, geoContextMapper);
590-
}
591-
});
587+
if (subject != null) {
588+
hits = search(subject, request, qb, numDocs);
589+
} else {
590+
hits = search(request, qb, numDocs);
591+
}
592+
return Iterables.transform(hits,
593+
(Function<SearchHit, DocumentScore>) hit -> new ElasticsearchDocumentScore(hit, geoContextMapper));
592594
}
593595

594596
/**
@@ -600,11 +602,24 @@ public DocumentScore apply(SearchHit hit) {
600602
* @return search hits
601603
*/
602604
public SearchHits search(Resource resource, SearchRequestBuilder request, QueryBuilder query) {
605+
return search(resource, request, query, -1);
606+
}
607+
608+
/**
609+
* Evaluates the given query only for the given resource.
610+
*
611+
* @param resource
612+
* @param request
613+
* @param query
614+
* @param numDocs
615+
* @return search hits
616+
*/
617+
public SearchHits search(Resource resource, SearchRequestBuilder request, QueryBuilder query, int numDocs) {
603618
// rewrite the query
604619
QueryBuilder idQuery = QueryBuilders.termQuery(SearchFields.URI_FIELD_NAME,
605620
SearchFields.getResourceID(resource));
606621
QueryBuilder combinedQuery = QueryBuilders.boolQuery().must(idQuery).must(query);
607-
return search(request, combinedQuery);
622+
return search(request, combinedQuery, numDocs);
608623
}
609624

610625
@Override
@@ -686,13 +701,8 @@ protected Iterable<? extends DocumentResult> geoRelationQuery(String relation, I
686701

687702
SearchRequestBuilder request = client.prepareSearch();
688703
SearchHits hits = search(request, QueryBuilders.boolQuery().must(qb).filter(fb));
689-
return Iterables.transform(hits, new Function<>() {
690-
691-
@Override
692-
public DocumentResult apply(SearchHit hit) {
693-
return new ElasticsearchDocumentResult(hit, geoContextMapper);
694-
}
695-
});
704+
return Iterables.transform(hits,
705+
(Function<SearchHit, DocumentResult>) hit -> new ElasticsearchDocumentResult(hit, geoContextMapper));
696706
}
697707

698708
private ShapeRelation toSpatialOp(String relation) {
@@ -712,25 +722,43 @@ private ShapeRelation toSpatialOp(String relation) {
712722
* Evaluates the given query and returns the results as a TopDocs instance.
713723
*/
714724
public SearchHits search(SearchRequestBuilder request, QueryBuilder query) {
725+
return search(request, query, -1);
726+
}
727+
728+
/**
729+
* Evaluates the given query and returns the results as a TopDocs instance.
730+
*/
731+
public SearchHits search(SearchRequestBuilder request, QueryBuilder query, int numDocs) {
715732
String[] types = getTypes();
716-
int nDocs;
717-
if (maxDocs > 0) {
718-
nDocs = maxDocs;
719-
} else {
733+
734+
if (numDocs < -1) {
735+
throw new IllegalArgumentException("numDocs should be 0 or greater if defined by the user");
736+
}
737+
738+
int size = defaultNumDocs;
739+
if (numDocs >= 0) {
740+
// If the user has set numDocs we will use that. If it is 0 then the implementation may end up throwing an
741+
// exception.
742+
size = Math.min(maxDocs, numDocs);
743+
}
744+
745+
if (size < 0) {
746+
// defaultNumDocs is not set
720747
long docCount = client.prepareSearch(indexName)
721748
.setTypes(types)
722749
.setSource(new SearchSourceBuilder().size(0).query(query))
723750
.get()
724751
.getHits()
725752
.getTotalHits().value;
726-
nDocs = Math.max((int) Math.min(docCount, Integer.MAX_VALUE), 1);
753+
size = Math.max((int) Math.min(docCount, maxDocs), 1);
727754
}
755+
728756
SearchResponse response = request.setIndices(indexName)
729757
.setTypes(types)
730758
.setVersion(false)
731759
.seqNoAndPrimaryTerm(true)
732760
.setQuery(query)
733-
.setSize(nDocs)
761+
.setSize(size)
734762
.execute()
735763
.actionGet();
736764
return response.getHits();

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/AbstractSearchIndex.java

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import org.eclipse.rdf4j.query.MalformedQueryException;
4242
import org.eclipse.rdf4j.query.algebra.Var;
4343
import org.eclipse.rdf4j.query.algebra.evaluation.QueryBindingSet;
44+
import org.eclipse.rdf4j.sail.Sail;
4445
import org.eclipse.rdf4j.sail.SailException;
4546
import org.eclipse.rdf4j.sail.lucene.util.MapOfListMaps;
4647
import org.locationtech.spatial4j.context.SpatialContext;
@@ -65,7 +66,8 @@ public abstract class AbstractSearchIndex implements SearchIndex {
6566
REJECTED_DATATYPES.add("http://www.w3.org/2001/XMLSchema#float");
6667
}
6768

68-
protected int maxDocs;
69+
protected int defaultNumDocs = -1;
70+
protected int maxDocs = Integer.MAX_VALUE;
6971

7072
protected Set<String> wktFields = Collections.singleton(SearchFields.getPropertyField(GEO.AS_WKT));
7173

@@ -75,8 +77,29 @@ public abstract class AbstractSearchIndex implements SearchIndex {
7577

7678
@Override
7779
public void initialize(Properties parameters) throws Exception {
78-
String maxDocParam = parameters.getProperty(LuceneSail.MAX_DOCUMENTS_KEY);
79-
maxDocs = (maxDocParam != null) ? Integer.parseInt(maxDocParam) : -1;
80+
String maxDocumentsParam = parameters.getProperty(LuceneSail.MAX_DOCUMENTS_KEY);
81+
String defaultNumDocsParam = parameters.getProperty(LuceneSail.DEFAULT_NUM_DOCS_KEY);
82+
83+
if ((maxDocumentsParam != null)) {
84+
maxDocs = Integer.parseInt(maxDocumentsParam);
85+
86+
// if maxDocs is set then defaultNumDocs is set to maxDocs if it is not set, because we now have a known
87+
// upper limit
88+
defaultNumDocs = (defaultNumDocsParam != null) ? Math.min(maxDocs, Integer.parseInt(defaultNumDocsParam))
89+
: maxDocs;
90+
} else {
91+
// we can never return more than Integer.MAX_VALUE documents
92+
maxDocs = Integer.MAX_VALUE;
93+
94+
// legacy behaviour is to return the number of documents that the query would return if there was no limit,
95+
// so if the defaultNumDocs is not set, we set it to -1 to signal that there is no limit
96+
defaultNumDocs = (defaultNumDocsParam != null) ? Integer.parseInt(defaultNumDocsParam) : -1;
97+
}
98+
99+
if (defaultNumDocs > maxDocs) {
100+
throw new IllegalArgumentException(LuceneSail.DEFAULT_NUM_DOCS_KEY + " must be less than or equal to "
101+
+ LuceneSail.MAX_DOCUMENTS_KEY + " (" + defaultNumDocs + " > " + maxDocs + ")");
102+
}
80103

81104
String wktFieldParam = parameters.getProperty(LuceneSail.WKT_FIELDS);
82105
if (wktFieldParam != null) {
@@ -146,7 +169,7 @@ public boolean accept(Literal literal) {
146169

147170
// we reject literals that aren't in the list of the indexed lang
148171
if (indexedLangs != null
149-
&& (!literal.getLanguage().isPresent()
172+
&& (literal.getLanguage().isEmpty()
150173
|| !indexedLangs.contains(literal.getLanguage().get().toLowerCase()
151174
))) {
152175
return false;
@@ -353,11 +376,8 @@ public final synchronized void addRemoveStatements(Collection<Statement> added,
353376
// remove value from both property field and the
354377
// corresponding text field
355378
String field = SearchFields.getPropertyField(r.getPredicate());
356-
Set<String> removedValues = removedOfResource.get(field);
357-
if (removedValues == null) {
358-
removedValues = new HashSet<>();
359-
removedOfResource.put(field, removedValues);
360-
}
379+
Set<String> removedValues = removedOfResource.computeIfAbsent(field,
380+
k -> new HashSet<>());
361381
removedValues.add(val);
362382
}
363383
}
@@ -545,7 +565,8 @@ private Iterable<? extends DocumentScore> evaluateQuery(QuerySpec query) {
545565
hits = query(query.getSubject(), query);
546566
}
547567
} catch (Exception e) {
548-
logger.error("There was a problem evaluating query '" + query.getCatQuery() + "'!", e);
568+
logger.error("There was a problem evaluating query '{}'!", query.getCatQuery(), e);
569+
assert false : "There was a problem evaluating query '" + query.getCatQuery() + "'!";
549570
}
550571

551572
return hits;
@@ -717,6 +738,8 @@ private Iterable<? extends DocumentDistance> evaluateQuery(DistanceQuerySpec que
717738
} catch (Exception e) {
718739
logger.error("There was a problem evaluating distance query 'within " + distance + getUnitSymbol(units)
719740
+ " of " + from.getLabel() + "'!", e);
741+
assert false : "There was a problem evaluating distance query 'within " + distance + getUnitSymbol(units)
742+
+ " of " + from.getLabel() + "'!";
720743
}
721744

722745
return hits;
@@ -825,6 +848,8 @@ private Iterable<? extends DocumentResult> evaluateQuery(GeoRelationQuerySpec qu
825848
} catch (Exception e) {
826849
logger.error("There was a problem evaluating spatial relation query '" + query.getRelation() + " "
827850
+ qgeom.getLabel() + "'!", e);
851+
assert false : "There was a problem evaluating spatial relation query '" + query.getRelation() + " "
852+
+ qgeom.getLabel() + "'!";
828853
}
829854

830855
return hits;

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/LuceneSail.java

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -290,10 +290,17 @@ public class LuceneSail extends NotifyingSailWrapper {
290290
public static final String LUCENE_RAMDIR_KEY = "useramdir";
291291

292292
/**
293-
* Set the key "maxDocuments=&lt;n&gt;" as sail parameter to limit the maximum number of documents to return from a
294-
* search query. The default is to return all documents. NB: this may involve extra cost for some SearchIndex
293+
* Set the key "defaultNumDocs=&lt;n&gt;" as sail parameter to limit the maximum number of documents to return from
294+
* a search query. The default is to return all documents. NB: this may involve extra cost for some SearchIndex
295295
* implementations as they may have to determine this number.
296296
*/
297+
public static final String DEFAULT_NUM_DOCS_KEY = "defaultNumDocs";
298+
299+
/**
300+
* Set the key "maxDocuments=&lt;n&gt;" as sail parameter to limit the maximum number of documents the user can
301+
* query at a time to return from a search query. The default is the value of the {@link #DEFAULT_NUM_DOCS_KEY}
302+
* parameter.
303+
*/
297304
public static final String MAX_DOCUMENTS_KEY = "maxDocuments";
298305

299306
/**

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/LuceneSailSchema.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ public class LuceneSailSchema {
5252

5353
public static final IRI CONTEXT;
5454

55+
public static final IRI NUM_DOCS;
56+
5557
static {
5658
ValueFactory factory = SimpleValueFactory.getInstance(); // compatible with beta4:
5759
// creating a new factory
@@ -73,5 +75,6 @@ public class LuceneSailSchema {
7375
WITHIN_DISTANCE = factory.createIRI(NAMESPACE + "withinDistance");
7476
DISTANCE = factory.createIRI(NAMESPACE + "distance");
7577
CONTEXT = factory.createIRI(NAMESPACE + "context");
78+
NUM_DOCS = factory.createIRI(NAMESPACE + "numDocs");
7679
}
7780
}

0 commit comments

Comments
 (0)