Skip to content

Commit 45b6e86

Browse files
committed
Introduce MetricsStore.
1 parent 1348c3c commit 45b6e86

20 files changed

Lines changed: 2102 additions & 100 deletions

deployment-examples/metrics/grafana/dashboards/nativelink-stores.json

Lines changed: 1522 additions & 0 deletions
Large diffs are not rendered by default.

deployment-examples/metrics/grafana/dashboards/nativelink-worker.json

Lines changed: 87 additions & 51 deletions
Large diffs are not rendered by default.

deployment-examples/metrics/grafana/provisioning/dashboards/dashboards.yaml

Lines changed: 0 additions & 17 deletions
This file was deleted.

deployment-examples/metrics/grafana/provisioning/datasources/datasources.yaml

Lines changed: 0 additions & 18 deletions
This file was deleted.

deployment-examples/metrics/prometheus-recording-rules.yml

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,3 +279,92 @@ groups:
279279
sum(rate(nativelink_execution_completed_count_total[30d]))
280280
))
281281
) / (1 - 0.99)
282+
283+
- name: nativelink_stores
284+
interval: 30s
285+
rules:
286+
# Store cache hit rate by store type and name
287+
- record: nativelink:store_cache_hit_rate
288+
expr: |
289+
sum by (store_type, store_name) (
290+
rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result="hit"}[5m])
291+
) /
292+
sum by (store_type, store_name) (
293+
rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result=~"hit|miss"}[5m])
294+
)
295+
296+
# Store read latency percentiles
297+
- record: nativelink:store_read_latency_p50
298+
expr: |
299+
histogram_quantile(0.5,
300+
sum by (le, store_type, store_name) (
301+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="read"}[5m])
302+
)
303+
)
304+
305+
- record: nativelink:store_read_latency_p90
306+
expr: |
307+
histogram_quantile(0.9,
308+
sum by (le, store_type, store_name) (
309+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="read"}[5m])
310+
)
311+
)
312+
313+
- record: nativelink:store_read_latency_p99
314+
expr: |
315+
histogram_quantile(0.99,
316+
sum by (le, store_type, store_name) (
317+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="read"}[5m])
318+
)
319+
)
320+
321+
# Store write latency percentiles
322+
- record: nativelink:store_write_latency_p50
323+
expr: |
324+
histogram_quantile(0.5,
325+
sum by (le, store_type, store_name) (
326+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="write"}[5m])
327+
)
328+
)
329+
330+
- record: nativelink:store_write_latency_p90
331+
expr: |
332+
histogram_quantile(0.9,
333+
sum by (le, store_type, store_name) (
334+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="write"}[5m])
335+
)
336+
)
337+
338+
- record: nativelink:store_write_latency_p99
339+
expr: |
340+
histogram_quantile(0.99,
341+
sum by (le, store_type, store_name) (
342+
rate(nativelink_store_operation_duration_bucket{cache_operation_name="write"}[5m])
343+
)
344+
)
345+
346+
# Store operation rates
347+
- record: nativelink:store_read_rate
348+
expr: |
349+
sum by (store_type, store_name) (
350+
rate(nativelink_store_operations{cache_operation_name="read"}[5m])
351+
)
352+
353+
- record: nativelink:store_write_rate
354+
expr: |
355+
sum by (store_type, store_name) (
356+
rate(nativelink_store_operations{cache_operation_name="write"}[5m])
357+
)
358+
359+
# Store error rate
360+
- record: nativelink:store_error_rate
361+
expr: |
362+
sum by (store_type, store_name, cache_operation_name) (
363+
rate(nativelink_store_operations{cache_operation_result="error"}[5m])
364+
)
365+
366+
# Overall store hit rate (aggregated across all stores)
367+
- record: nativelink:store_overall_hit_rate
368+
expr: |
369+
sum(rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result="hit"}[5m])) /
370+
sum(rate(nativelink_store_operations{cache_operation_name="read", cache_operation_result=~"hit|miss"}[5m]))

nativelink-service/tests/ac_server_test.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
5656
store_manager.add_store(
5757
"main_cas",
5858
store_factory(
59+
"main_cas",
5960
&StoreSpec::Memory(MemorySpec::default()),
6061
&store_manager,
6162
None,
@@ -65,6 +66,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
6566
store_manager.add_store(
6667
"main_ac",
6768
store_factory(
69+
"main_ac",
6870
&StoreSpec::Memory(MemorySpec::default()),
6971
&store_manager,
7072
None,

nativelink-service/tests/bep_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
5555
store_manager.add_store(
5656
BEP_STORE_NAME,
5757
store_factory(
58+
BEP_STORE_NAME,
5859
&StoreSpec::Memory(MemorySpec::default()),
5960
&store_manager,
6061
None,

nativelink-service/tests/bytestream_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
6060
store_manager.add_store(
6161
"main_cas",
6262
store_factory(
63+
"main_cas",
6364
&StoreSpec::Memory(MemorySpec::default()),
6465
&store_manager,
6566
None,

nativelink-service/tests/cas_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
5050
store_manager.add_store(
5151
"main_cas",
5252
store_factory(
53+
"main_cas",
5354
&StoreSpec::Memory(MemorySpec::default()),
5455
&store_manager,
5556
None,

nativelink-service/tests/execution_server_test.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ async fn make_store_manager() -> Result<Arc<StoreManager>, Error> {
3535
store_manager.add_store(
3636
"main_cas",
3737
store_factory(
38+
"main_cas",
3839
&StoreSpec::Memory(MemorySpec::default()),
3940
&store_manager,
4041
None,

0 commit comments

Comments
 (0)