Skip to content

Commit 41d4869

Browse files
committed
GH-5149 Add search:numDocs property and maxQueryDocuments param in LuceneSail query
1 parent 3e4f94f commit 41d4869

10 files changed

Lines changed: 281 additions & 19 deletions

File tree

core/sail/elasticsearch/src/main/java/org/eclipse/rdf4j/sail/elasticsearch/ElasticsearchIndex.java

Lines changed: 39 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -577,10 +577,19 @@ protected Iterable<? extends DocumentScore> query(Resource subject, QuerySpec sp
577577
}
578578

579579
SearchHits hits;
580+
Integer numDocs = spec.getNumDocs();
580581
if (subject != null) {
581-
hits = search(subject, request, qb);
582+
if (numDocs != null) {
583+
hits = search(subject, request, qb, numDocs);
584+
} else {
585+
hits = search(subject, request, qb);
586+
}
582587
} else {
583-
hits = search(request, qb);
588+
if (numDocs != null) {
589+
hits = search(request, qb, numDocs);
590+
} else {
591+
hits = search(request, qb);
592+
}
584593
}
585594
return Iterables.transform(hits, new Function<>() {
586595

@@ -600,11 +609,24 @@ public DocumentScore apply(SearchHit hit) {
600609
* @return search hits
601610
*/
602611
public SearchHits search(Resource resource, SearchRequestBuilder request, QueryBuilder query) {
612+
return search(resource, request, query, -1);
613+
}
614+
615+
/**
616+
* Evaluates the given query only for the given resource.
617+
*
618+
* @param resource
619+
* @param request
620+
* @param query
621+
* @param numDocs
622+
* @return search hits
623+
*/
624+
public SearchHits search(Resource resource, SearchRequestBuilder request, QueryBuilder query, int numDocs) {
603625
// rewrite the query
604626
QueryBuilder idQuery = QueryBuilders.termQuery(SearchFields.URI_FIELD_NAME,
605627
SearchFields.getResourceID(resource));
606628
QueryBuilder combinedQuery = QueryBuilders.boolQuery().must(idQuery).must(query);
607-
return search(request, combinedQuery);
629+
return search(request, combinedQuery, numDocs);
608630
}
609631

610632
@Override
@@ -712,9 +734,22 @@ private ShapeRelation toSpatialOp(String relation) {
712734
* Evaluates the given query and returns the results as a TopDocs instance.
713735
*/
714736
public SearchHits search(SearchRequestBuilder request, QueryBuilder query) {
737+
return search(request, query, -1);
738+
}
739+
740+
/**
741+
* Evaluates the given query and returns the results as a TopDocs instance.
742+
*/
743+
public SearchHits search(SearchRequestBuilder request, QueryBuilder query, int numDocs) {
715744
String[] types = getTypes();
716745
int nDocs;
717-
if (maxDocs > 0) {
746+
if (numDocs > 0) {
747+
if (maxQueryDocs > 0 && maxQueryDocs < numDocs) {
748+
nDocs = maxQueryDocs;
749+
} else {
750+
nDocs = numDocs;
751+
}
752+
} else if (maxDocs > 0) {
718753
nDocs = maxDocs;
719754
} else {
720755
long docCount = client.prepareSearch(indexName)

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/AbstractSearchIndex.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ public abstract class AbstractSearchIndex implements SearchIndex {
6666
}
6767

6868
protected int maxDocs;
69+
protected int maxQueryDocs;
6970

7071
protected Set<String> wktFields = Collections.singleton(SearchFields.getPropertyField(GEO.AS_WKT));
7172

@@ -77,6 +78,8 @@ public abstract class AbstractSearchIndex implements SearchIndex {
7778
public void initialize(Properties parameters) throws Exception {
7879
String maxDocParam = parameters.getProperty(LuceneSail.MAX_DOCUMENTS_KEY);
7980
maxDocs = (maxDocParam != null) ? Integer.parseInt(maxDocParam) : -1;
81+
String maxQueryDocParam = parameters.getProperty(LuceneSail.MAX_QUERY_DOCUMENTS_KEY);
82+
maxQueryDocs = (maxQueryDocParam != null) ? Integer.parseInt(maxQueryDocParam) : maxDocs;
8083

8184
String wktFieldParam = parameters.getProperty(LuceneSail.WKT_FIELDS);
8285
if (wktFieldParam != null) {

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/LuceneSail.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,6 +296,13 @@ public class LuceneSail extends NotifyingSailWrapper {
296296
*/
297297
public static final String MAX_DOCUMENTS_KEY = "maxDocuments";
298298

299+
/**
300+
* Set the key "maxQueryDocuments=&lt;n&gt;" as sail parameter to limit the maximum number of documents the user can
301+
* query at a time to return from a search query. The default is the value of the {@link #MAX_DOCUMENTS_KEY}
302+
* parameter.
303+
*/
304+
public static final String MAX_QUERY_DOCUMENTS_KEY = "maxQueryDocuments";
305+
299306
/**
300307
* Set this key to configure which fields contain WKT and should be spatially indexed. The value should be a
301308
* space-separated list of URIs. Default is http://www.opengis.net/ont/geosparql#asWKT.

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/LuceneSailSchema.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@ public class LuceneSailSchema {
5252

5353
public static final IRI CONTEXT;
5454

55+
public static final IRI NUM_DOCS;
56+
5557
static {
5658
ValueFactory factory = SimpleValueFactory.getInstance(); // compatible with beta4:
5759
// creating a new factory
@@ -73,5 +75,6 @@ public class LuceneSailSchema {
7375
WITHIN_DISTANCE = factory.createIRI(NAMESPACE + "withinDistance");
7476
DISTANCE = factory.createIRI(NAMESPACE + "distance");
7577
CONTEXT = factory.createIRI(NAMESPACE + "context");
78+
NUM_DOCS = factory.createIRI(NAMESPACE + "numDocs");
7679
}
7780
}

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/QuerySpec.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,9 @@
1616
import java.util.stream.Collectors;
1717

1818
import org.eclipse.rdf4j.model.IRI;
19+
import org.eclipse.rdf4j.model.Literal;
1920
import org.eclipse.rdf4j.model.Resource;
21+
import org.eclipse.rdf4j.model.Value;
2022
import org.eclipse.rdf4j.query.algebra.QueryModelNode;
2123
import org.eclipse.rdf4j.query.algebra.SingletonSet;
2224
import org.eclipse.rdf4j.query.algebra.StatementPattern;
@@ -67,21 +69,43 @@ private static void append(Var var, StringBuilder buffer) {
6769

6870
private final StatementPattern idPattern;
6971

72+
private final StatementPattern numDocsPattern;
73+
7074
private final Resource subject;
7175

7276
private final String matchesVarName;
7377

7478
private final String scoreVarName;
7579

80+
private final Integer numDocs;
81+
7682
public QuerySpec(StatementPattern matchesPattern, Collection<QueryParam> queryPatterns,
7783
StatementPattern scorePattern, StatementPattern typePattern,
7884
StatementPattern idPattern, Resource subject) {
85+
this(matchesPattern, queryPatterns, scorePattern, typePattern, idPattern, null, subject);
86+
}
87+
88+
public QuerySpec(StatementPattern matchesPattern, Collection<QueryParam> queryPatterns,
89+
StatementPattern scorePattern, StatementPattern typePattern,
90+
StatementPattern idPattern, StatementPattern numDocsPattern, Resource subject) {
7991
this.matchesPattern = matchesPattern;
8092
this.queryPatterns = queryPatterns;
8193
this.scorePattern = scorePattern;
8294
this.typePattern = typePattern;
8395
this.idPattern = idPattern;
96+
this.numDocsPattern = numDocsPattern;
8497
this.subject = subject;
98+
if (numDocsPattern != null) {
99+
Value val = numDocsPattern.getObjectVar().getValue();
100+
if (val != null && val.isLiteral()) {
101+
this.numDocs = ((Literal) val).intValue();
102+
} else {
103+
throw new IllegalArgumentException("numDocs should be constant literal value");
104+
}
105+
} else {
106+
this.numDocs = null;
107+
}
108+
85109
if (matchesPattern != null) {
86110
this.matchesVarName = matchesPattern.getSubjectVar().getName();
87111
} else {
@@ -101,9 +125,11 @@ public QuerySpec(String matchesVarName, String propertyVarName, String scoreVarN
101125
this.matchesPattern = null;
102126
this.scorePattern = null;
103127
this.typePattern = null;
128+
this.numDocsPattern = null;
104129
this.queryPatterns = Set.of();
105130
this.idPattern = null;
106131
this.subject = subject;
132+
this.numDocs = null;
107133
}
108134

109135
@Override
@@ -121,6 +147,7 @@ public QueryModelNode removeQueryPatterns() {
121147
replace(getScorePattern(), replacement);
122148
replace(getTypePattern(), replacement);
123149
replace(getIdPattern(), replacement);
150+
replace(getNumDocsPattern(), replacement);
124151

125152
final QueryModelNode placeholder = new SingletonSet();
126153

@@ -154,6 +181,10 @@ public StatementPattern getScorePattern() {
154181
return scorePattern;
155182
}
156183

184+
public StatementPattern getNumDocsPattern() {
185+
return numDocsPattern;
186+
}
187+
157188
/**
158189
* The variable name associated with the query score
159190
*
@@ -163,6 +194,10 @@ public String getScoreVariableName() {
163194
return scoreVarName;
164195
}
165196

197+
public Integer getNumDocs() {
198+
return numDocs;
199+
}
200+
166201
public StatementPattern getTypePattern() {
167202
return typePattern;
168203
}

core/sail/lucene-api/src/main/java/org/eclipse/rdf4j/sail/lucene/QuerySpecBuilder.java

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.INDEXID;
1616
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.LUCENE_QUERY;
1717
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.MATCHES;
18+
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.NUM_DOCS;
1819
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.PROPERTY;
1920
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.QUERY;
2021
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SCORE;
@@ -152,7 +153,7 @@ public void process(TupleExpr tupleExpr, BindingSet bindings, Collection<SearchQ
152153
}
153154

154155
// find the relevant outgoing patterns
155-
StatementPattern typePattern, propertyPattern, scorePattern, snippetPattern;
156+
StatementPattern typePattern, propertyPattern, scorePattern, snippetPattern, numDocsPattern;
156157
List<StatementPattern> queryPatterns;
157158

158159
try {
@@ -161,6 +162,7 @@ public void process(TupleExpr tupleExpr, BindingSet bindings, Collection<SearchQ
161162
propertyPattern = getPattern(matchesVar, filter.propertyPatterns);
162163
scorePattern = getPattern(matchesVar, filter.scorePatterns);
163164
snippetPattern = getPattern(matchesVar, filter.snippetPatterns);
165+
numDocsPattern = getPattern(matchesVar, filter.numDocsPatterns);
164166
} catch (IllegalArgumentException e) {
165167
failOrWarn(e);
166168
continue;
@@ -302,7 +304,8 @@ else if (propertyValue != null) {
302304
queryString, propertyURI, null));
303305
}
304306

305-
QuerySpec querySpec = new QuerySpec(matchesPattern, queries, scorePattern, typePattern, idPattern, subject);
307+
QuerySpec querySpec = new QuerySpec(matchesPattern, queries, scorePattern, typePattern, idPattern,
308+
numDocsPattern, subject);
306309

307310
if (querySpec.isEvaluable()) {
308311
// constant optimizer
@@ -341,6 +344,10 @@ else if (propertyValue != null) {
341344
funcCall.addArg(new ValueConstant(LuceneSailSchema.SNIPPET));
342345
funcCall.addResultVar(snippetVar);
343346
}
347+
if (numDocsPattern != null) {
348+
funcCall.addArg(new ValueConstant(LuceneSailSchema.NUM_DOCS));
349+
funcCall.addArg(numDocsPattern.getObjectVar());
350+
}
344351

345352
Join join = new Join();
346353
matchesPattern.replaceWith(join);
@@ -465,6 +472,8 @@ private static class PatternFilter extends AbstractQueryModelVisitor<RuntimeExce
465472

466473
public ArrayList<StatementPattern> boostPatterns = new ArrayList<>();
467474

475+
public ArrayList<StatementPattern> numDocsPatterns = new ArrayList<>();
476+
468477
/**
469478
* Method implementing the visitor pattern that gathers all statements using a predicate from the LuceneSail's
470479
* namespace.
@@ -487,6 +496,8 @@ public void meet(StatementPattern node) {
487496
idPatterns.add(node);
488497
} else if (BOOST.equals(predicate)) {
489498
boostPatterns.add(node);
499+
} else if (NUM_DOCS.equals(predicate)) {
500+
numDocsPatterns.add(node);
490501
} else if (TYPE.equals(predicate)) {
491502
Value object = node.getObjectVar().getValue();
492503
if (LUCENE_QUERY.equals(object)) {

core/sail/lucene-api/src/test/java/org/eclipse/rdf4j/sail/lucene/QuerySpecBuilderTest.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.BOOST;
1515
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.LUCENE_QUERY;
1616
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.MATCHES;
17+
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.NUM_DOCS;
1718
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.QUERY;
1819
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SCORE;
1920
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SNIPPET;
@@ -55,6 +56,7 @@ public void testQueryInterpretation() {
5556
"<" + TYPE + "> <" + LUCENE_QUERY + ">; " +
5657
"<" + QUERY + "> \"my Lucene query\"; " +
5758
"<" + SCORE + "> ?Score; " +
59+
"<" + NUM_DOCS + "> 76; " +
5860
"<" + SNIPPET + "> ?Snippet ]. } ";
5961
ParsedQuery query = parser.parseQuery(buffer, null);
6062
TupleExpr tupleExpr = query.getTupleExpr();
@@ -69,6 +71,8 @@ public void testQueryInterpretation() {
6971
assertEquals("Score", querySpec.getScorePattern().getObjectVar().getName());
7072
assertEquals("Snippet", param.getSnippetPattern().getObjectVar().getName());
7173
assertEquals(LUCENE_QUERY, querySpec.getTypePattern().getObjectVar().getValue());
74+
assertEquals(76, querySpec.getNumDocs());
75+
assertEquals(76, ((Literal) querySpec.getNumDocsPattern().getObjectVar().getValue()).intValue());
7276
assertEquals("my Lucene query", param.getQuery());
7377
assertNull(querySpec.getSubject());
7478
}
@@ -80,11 +84,13 @@ public void testMultipleQueriesInterpretation() {
8084
"<" + TYPE + "> <" + LUCENE_QUERY + ">; " +
8185
"<" + QUERY + "> \"my Lucene query\"; " +
8286
"<" + SCORE + "> ?score1; " +
87+
"<" + NUM_DOCS + "> 86; " +
8388
"<" + SNIPPET + "> ?snippet1 ]. " +
8489
" ?sub2 <" + MATCHES + "> [ " +
8590
"<" + TYPE + "> <" + LUCENE_QUERY + ">; " +
8691
"<" + QUERY + "> \"second lucene query\"; " +
8792
"<" + SCORE + "> ?score2; " +
93+
"<" + NUM_DOCS + "> 13; " +
8894
"<" + SNIPPET + "> ?snippet2 ]. " +
8995
// and connect them both via any X in between, just as salt to make the
9096
// parser do something
@@ -103,6 +109,7 @@ public void testMultipleQueriesInterpretation() {
103109
// Matched the first
104110
assertEquals("sub1", querySpec.getMatchesPattern().getSubjectVar().getName());
105111
assertEquals(1, querySpec.getQueryPatterns().size());
112+
assertEquals(86, querySpec.getNumDocs());
106113
QuerySpec.QueryParam param = querySpec.getQueryPatterns().iterator().next();
107114
assertEquals("my Lucene query",
108115
((Literal) param.getQueryPattern().getObjectVar().getValue()).getLabel());
@@ -116,6 +123,7 @@ public void testMultipleQueriesInterpretation() {
116123
// and the second
117124
assertEquals("sub2", querySpec.getMatchesPattern().getSubjectVar().getName());
118125
assertEquals(1, querySpec.getQueryPatterns().size());
126+
assertEquals(13, querySpec.getNumDocs());
119127
QuerySpec.QueryParam param = querySpec.getQueryPatterns().iterator().next();
120128
assertEquals("second lucene query",
121129
((Literal) param.getQueryPattern().getObjectVar().getValue()).getLabel());

0 commit comments

Comments
 (0)