Skip to content

Commit ec773bc

Browse files
committed
improved query explanation etc.
1 parent d77a7f5 commit ec773bc

8 files changed

Lines changed: 926 additions & 35 deletions

File tree

testsuites/benchmark-common/src/main/java/org/eclipse/rdf4j/benchmark/common/plan/QueryPlanCapture.java

Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,12 @@
1313

1414
import java.io.IOException;
1515
import java.io.InputStream;
16+
import java.math.BigDecimal;
1617
import java.nio.charset.StandardCharsets;
1718
import java.nio.file.Files;
1819
import java.nio.file.Path;
20+
import java.security.MessageDigest;
21+
import java.security.NoSuchAlgorithmException;
1922
import java.time.Instant;
2023
import java.time.ZoneOffset;
2124
import java.time.format.DateTimeFormatter;
@@ -29,6 +32,8 @@
2932
import java.util.concurrent.TimeUnit;
3033
import java.util.function.Function;
3134
import java.util.function.Supplier;
35+
import java.util.regex.Matcher;
36+
import java.util.regex.Pattern;
3237
import java.util.stream.Stream;
3338

3439
import org.eclipse.rdf4j.common.annotation.Experimental;
@@ -38,6 +43,7 @@
3843

3944
import com.fasterxml.jackson.annotation.JsonInclude;
4045
import com.fasterxml.jackson.databind.DeserializationFeature;
46+
import com.fasterxml.jackson.databind.JsonNode;
4147
import com.fasterxml.jackson.databind.ObjectMapper;
4248
import com.fasterxml.jackson.databind.SerializationFeature;
4349

@@ -58,6 +64,12 @@ public final class QueryPlanCapture {
5864
private static final DateTimeFormatter FILE_TIMESTAMP_FORMATTER = DateTimeFormatter
5965
.ofPattern("yyyyMMdd-HHmmssSSS")
6066
.withZone(ZoneOffset.UTC);
67+
private static final ObjectMapper JSON_MAPPER = new ObjectMapper()
68+
.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false);
69+
private static final Pattern ANONYMOUS_VARIABLE_NAME_PATTERN = Pattern
70+
.compile("(_anon_[A-Za-z]+_)[A-Za-z0-9]+");
71+
private static final Pattern ANONYMOUS_VARIABLE_TOKEN_PATTERN = Pattern
72+
.compile("_anon_[A-Za-z]+_[A-Za-z0-9]+");
6173

6274
private final ObjectMapper snapshotMapper = new ObjectMapper()
6375
.configure(SerializationFeature.INDENT_OUTPUT, true)
@@ -191,6 +203,7 @@ private QueryPlanExplanation captureLevel(Explanation.Level level, Explanation e
191203
captured.setLevel(level.name());
192204
captured.setExplanationText(explanation.toString());
193205
captured.setExplanationJson(explanation.toJson());
206+
captured.setDebugMetrics(extractDebugMetrics(captured.getExplanationJson()));
194207

195208
Object tupleExprObject = explanation.tupleExpr();
196209
if (tupleExprObject instanceof TupleExpr) {
@@ -205,6 +218,237 @@ private QueryPlanExplanation captureLevel(Explanation.Level level, Explanation e
205218
return captured;
206219
}
207220

221+
private static Map<String, String> extractDebugMetrics(String explanationJson) {
222+
LinkedHashMap<String, String> metrics = new LinkedHashMap<>();
223+
if (explanationJson == null || explanationJson.isBlank()) {
224+
return metrics;
225+
}
226+
227+
JsonNode root;
228+
try {
229+
root = JSON_MAPPER.readTree(explanationJson);
230+
} catch (Exception e) {
231+
metrics.put("metricsError", e.getClass().getSimpleName());
232+
return metrics;
233+
}
234+
235+
DebugMetricAccumulator accumulator = new DebugMetricAccumulator();
236+
appendDebugSignatures(root, 1, accumulator);
237+
238+
metrics.put("planNodeCount", Integer.toString(accumulator.planNodeCount));
239+
metrics.put("maxDepth", Integer.toString(accumulator.maxDepth));
240+
metrics.put("joinNodeCount", Integer.toString(accumulator.joinNodeCount));
241+
metrics.put("filterNodeCount", Integer.toString(accumulator.filterNodeCount));
242+
metrics.put("statementPatternCount", Integer.toString(accumulator.statementPatternCount));
243+
metrics.put("anonymousTypeTokenCount", Integer.toString(accumulator.anonymousTypeTokenCount));
244+
metrics.put("joinAlgorithmCounts", formatJoinAlgorithmCounts(accumulator.joinAlgorithmCounts));
245+
metrics.put("structureSignatureRawSha256", sha256Hex(accumulator.structureRawSignature.toString()));
246+
metrics.put("structureSignatureNormalizedSha256",
247+
sha256Hex(accumulator.structureNormalizedSignature.toString()));
248+
metrics.put("joinAlgorithmSignatureSha256", sha256Hex(accumulator.joinSignature.toString()));
249+
metrics.put("actualResultSizesSignatureSha256", sha256Hex(accumulator.actualSignature.toString()));
250+
metrics.put("estimatesSignatureSha256", sha256Hex(accumulator.estimatesSignature.toString()));
251+
252+
if (accumulator.costEstimateCount > 0) {
253+
metrics.put("costEstimateSum", toPlainString(accumulator.costEstimateSum));
254+
metrics.put("costEstimateMax", toPlainString(accumulator.costEstimateMax));
255+
}
256+
if (accumulator.resultSizeEstimateCount > 0) {
257+
metrics.put("resultSizeEstimateSum", toPlainString(accumulator.resultSizeEstimateSum));
258+
metrics.put("resultSizeEstimateMax", toPlainString(accumulator.resultSizeEstimateMax));
259+
}
260+
if (accumulator.resultSizeActualCount > 0) {
261+
metrics.put("resultSizeActualSum", toPlainString(accumulator.resultSizeActualSum));
262+
metrics.put("resultSizeActualMax", toPlainString(accumulator.resultSizeActualMax));
263+
}
264+
265+
return metrics;
266+
}
267+
268+
private static void appendDebugSignatures(JsonNode node, int depth, DebugMetricAccumulator accumulator) {
269+
if (node == null || node.isNull()) {
270+
appendAllNullTokens(accumulator);
271+
return;
272+
}
273+
274+
String rawType = readText(node, "type");
275+
String normalizedType = canonicalizeType(rawType);
276+
277+
accumulator.planNodeCount++;
278+
accumulator.maxDepth = Math.max(accumulator.maxDepth, depth);
279+
accumulator.anonymousTypeTokenCount += countAnonymousTokens(rawType);
280+
if (normalizedType.contains("Join")) {
281+
accumulator.joinNodeCount++;
282+
}
283+
if (normalizedType.startsWith("Filter")) {
284+
accumulator.filterNodeCount++;
285+
}
286+
if (normalizedType.startsWith("StatementPattern")) {
287+
accumulator.statementPatternCount++;
288+
}
289+
290+
accumulator.structureRawSignature.append('(').append(rawType);
291+
accumulator.structureNormalizedSignature.append('(').append(normalizedType);
292+
293+
String algorithm = readText(node, "algorithm");
294+
accumulator.joinSignature.append('(').append(normalizedType);
295+
if (normalizedType.contains("Join")) {
296+
accumulator.joinSignature.append("|algorithm=").append(algorithm);
297+
accumulator.joinAlgorithmCounts.merge(algorithm, 1, Integer::sum);
298+
}
299+
300+
String actual = readNumberToken(node, "resultSizeActual");
301+
accumulator.actualSignature.append('(')
302+
.append(normalizedType)
303+
.append("|resultSizeActual=")
304+
.append(actual);
305+
updateAggregate(actual, AggregateKind.ACTUAL_RESULT_SIZE, accumulator);
306+
307+
String cost = readNumberToken(node, "costEstimate");
308+
String estimate = readNumberToken(node, "resultSizeEstimate");
309+
accumulator.estimatesSignature.append('(')
310+
.append(normalizedType)
311+
.append("|costEstimate=")
312+
.append(cost)
313+
.append("|resultSizeEstimate=")
314+
.append(estimate);
315+
updateAggregate(cost, AggregateKind.COST_ESTIMATE, accumulator);
316+
updateAggregate(estimate, AggregateKind.RESULT_SIZE_ESTIMATE, accumulator);
317+
318+
JsonNode plans = node.get("plans");
319+
if (plans != null && plans.isArray()) {
320+
for (JsonNode child : plans) {
321+
appendDebugSignatures(child, depth + 1, accumulator);
322+
}
323+
}
324+
325+
accumulator.structureRawSignature.append(')');
326+
accumulator.structureNormalizedSignature.append(')');
327+
accumulator.joinSignature.append(')');
328+
accumulator.actualSignature.append(')');
329+
accumulator.estimatesSignature.append(')');
330+
}
331+
332+
private static void appendAllNullTokens(DebugMetricAccumulator accumulator) {
333+
accumulator.structureRawSignature.append("null");
334+
accumulator.structureNormalizedSignature.append("null");
335+
accumulator.joinSignature.append("null");
336+
accumulator.actualSignature.append("null");
337+
accumulator.estimatesSignature.append("null");
338+
}
339+
340+
private static void updateAggregate(String token, AggregateKind kind, DebugMetricAccumulator accumulator) {
341+
if (token == null || token.isBlank() || "<null>".equals(token)) {
342+
return;
343+
}
344+
BigDecimal value;
345+
try {
346+
value = new BigDecimal(token);
347+
} catch (NumberFormatException ignored) {
348+
return;
349+
}
350+
351+
switch (kind) {
352+
case COST_ESTIMATE:
353+
accumulator.costEstimateCount++;
354+
accumulator.costEstimateSum = accumulator.costEstimateSum.add(value);
355+
accumulator.costEstimateMax = accumulator.costEstimateMax == null
356+
? value
357+
: accumulator.costEstimateMax.max(value);
358+
break;
359+
case RESULT_SIZE_ESTIMATE:
360+
accumulator.resultSizeEstimateCount++;
361+
accumulator.resultSizeEstimateSum = accumulator.resultSizeEstimateSum.add(value);
362+
accumulator.resultSizeEstimateMax = accumulator.resultSizeEstimateMax == null
363+
? value
364+
: accumulator.resultSizeEstimateMax.max(value);
365+
break;
366+
case ACTUAL_RESULT_SIZE:
367+
accumulator.resultSizeActualCount++;
368+
accumulator.resultSizeActualSum = accumulator.resultSizeActualSum.add(value);
369+
accumulator.resultSizeActualMax = accumulator.resultSizeActualMax == null
370+
? value
371+
: accumulator.resultSizeActualMax.max(value);
372+
break;
373+
default:
374+
throw new IllegalStateException("Unhandled aggregate kind: " + kind);
375+
}
376+
}
377+
378+
private static String formatJoinAlgorithmCounts(Map<String, Integer> joinAlgorithmCounts) {
379+
if (joinAlgorithmCounts.isEmpty()) {
380+
return "<none>";
381+
}
382+
StringBuilder value = new StringBuilder();
383+
boolean first = true;
384+
for (Map.Entry<String, Integer> entry : joinAlgorithmCounts.entrySet()) {
385+
if (!first) {
386+
value.append(',');
387+
}
388+
value.append(entry.getKey()).append('=').append(entry.getValue());
389+
first = false;
390+
}
391+
return value.toString();
392+
}
393+
394+
private static int countAnonymousTokens(String value) {
395+
if (value == null || value.isBlank()) {
396+
return 0;
397+
}
398+
int count = 0;
399+
Matcher matcher = ANONYMOUS_VARIABLE_TOKEN_PATTERN.matcher(value);
400+
while (matcher.find()) {
401+
count++;
402+
}
403+
return count;
404+
}
405+
406+
private static String canonicalizeType(String type) {
407+
if (type == null || type.isBlank()) {
408+
return "<null>";
409+
}
410+
return ANONYMOUS_VARIABLE_NAME_PATTERN.matcher(type).replaceAll("$1<normalized>");
411+
}
412+
413+
private static String readText(JsonNode node, String field) {
414+
JsonNode value = node.get(field);
415+
if (value == null || value.isNull()) {
416+
return "<null>";
417+
}
418+
return value.asText();
419+
}
420+
421+
private static String readNumberToken(JsonNode node, String field) {
422+
JsonNode value = node.get(field);
423+
if (value == null || value.isNull()) {
424+
return "<null>";
425+
}
426+
String asText = value.asText();
427+
try {
428+
return new BigDecimal(asText).stripTrailingZeros().toPlainString();
429+
} catch (NumberFormatException ignored) {
430+
return asText;
431+
}
432+
}
433+
434+
private static String toPlainString(BigDecimal value) {
435+
return value.stripTrailingZeros().toPlainString();
436+
}
437+
438+
private static String sha256Hex(String input) {
439+
try {
440+
MessageDigest digest = MessageDigest.getInstance("SHA-256");
441+
byte[] bytes = digest.digest(input.getBytes(StandardCharsets.UTF_8));
442+
StringBuilder hex = new StringBuilder(bytes.length * 2);
443+
for (byte value : bytes) {
444+
hex.append(String.format("%02x", value));
445+
}
446+
return hex.toString();
447+
} catch (NoSuchAlgorithmException e) {
448+
throw new IllegalStateException("SHA-256 unavailable", e);
449+
}
450+
}
451+
208452
private void renderWithIr(TupleExpr tupleExpr, Function<TupleExpr, String> tupleExprRenderer,
209453
QueryPlanExplanation target) {
210454
if (tupleExprRenderer == null) {
@@ -293,4 +537,34 @@ private static String runGitCommand(String... args) {
293537
}
294538
}
295539
}
540+
541+
private enum AggregateKind {
542+
COST_ESTIMATE,
543+
RESULT_SIZE_ESTIMATE,
544+
ACTUAL_RESULT_SIZE
545+
}
546+
547+
private static final class DebugMetricAccumulator {
548+
private int planNodeCount;
549+
private int maxDepth;
550+
private int joinNodeCount;
551+
private int filterNodeCount;
552+
private int statementPatternCount;
553+
private int anonymousTypeTokenCount;
554+
private final StringBuilder structureRawSignature = new StringBuilder();
555+
private final StringBuilder structureNormalizedSignature = new StringBuilder();
556+
private final StringBuilder joinSignature = new StringBuilder();
557+
private final StringBuilder actualSignature = new StringBuilder();
558+
private final StringBuilder estimatesSignature = new StringBuilder();
559+
private final LinkedHashMap<String, Integer> joinAlgorithmCounts = new LinkedHashMap<>();
560+
private BigDecimal costEstimateSum = BigDecimal.ZERO;
561+
private BigDecimal costEstimateMax;
562+
private int costEstimateCount;
563+
private BigDecimal resultSizeEstimateSum = BigDecimal.ZERO;
564+
private BigDecimal resultSizeEstimateMax;
565+
private int resultSizeEstimateCount;
566+
private BigDecimal resultSizeActualSum = BigDecimal.ZERO;
567+
private BigDecimal resultSizeActualMax;
568+
private int resultSizeActualCount;
569+
}
296570
}

testsuites/benchmark-common/src/main/java/org/eclipse/rdf4j/benchmark/common/plan/QueryPlanExplanation.java

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@
1111
// Some portions generated by Codex
1212
package org.eclipse.rdf4j.benchmark.common.plan;
1313

14+
import java.util.LinkedHashMap;
15+
import java.util.Map;
16+
1417
import org.eclipse.rdf4j.common.annotation.Experimental;
1518

1619
/**
@@ -26,6 +29,7 @@ public class QueryPlanExplanation {
2629
private String tupleExprJson;
2730
private String irRenderedQuery;
2831
private String irRenderingError;
32+
private Map<String, String> debugMetrics = new LinkedHashMap<>();
2933

3034
public String getLevel() {
3135
return level;
@@ -82,4 +86,12 @@ public String getIrRenderingError() {
8286
public void setIrRenderingError(String irRenderingError) {
8387
this.irRenderingError = irRenderingError;
8488
}
89+
90+
public Map<String, String> getDebugMetrics() {
91+
return debugMetrics;
92+
}
93+
94+
public void setDebugMetrics(Map<String, String> debugMetrics) {
95+
this.debugMetrics = debugMetrics;
96+
}
8597
}

testsuites/benchmark-common/src/test/java/org/eclipse/rdf4j/benchmark/common/plan/QueryPlanCaptureTest.java

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,24 @@ void capturesAllExplanationLevelsAndIrRenderedQueries() throws IOException {
116116
assertEquals(outputFile.getFileName(), byFingerprint.get().getFileName());
117117
}
118118

119+
@Test
120+
void capturesPlanMetricsFieldsForPerformanceDebugging() throws IOException {
121+
QueryPlanCapture capture = new QueryPlanCapture();
122+
String query = "SELECT ?s WHERE { ?s ?p ?o . ?s ?p2 ?o2 . FILTER(?o != ?o2) }";
123+
QueryPlanCaptureContext context = QueryPlanCaptureContext.builder()
124+
.outputDirectory(tempDir)
125+
.queryId("metrics-select")
126+
.queryString(query)
127+
.benchmark("QueryPlanCaptureTest")
128+
.build();
129+
130+
Path outputFile = capture.captureAndWrite(context, () -> stubTupleQueryFor(query));
131+
String snapshotJson = Files.readString(outputFile);
132+
assertTrue(snapshotJson.contains("\"planNodeCount\""), snapshotJson);
133+
assertTrue(snapshotJson.contains("\"maxDepth\""), snapshotJson);
134+
assertTrue(snapshotJson.contains("\"joinAlgorithmCounts\""), snapshotJson);
135+
}
136+
119137
@Test
120138
void capturesGitBranchMetadataWhenConfigured() {
121139
String propertyKey = "rdf4j.query.plan.capture.gitBranch";

0 commit comments

Comments
 (0)