Skip to content

Commit 36b25ca

Browse files
authored
[build] File-level test target indexing for precise affected test detection (#17033)
* [build] update target indexing to work by file not package * need to cache the dependency query for sources or else very slow * need to filter out aspect rules and node modules * filter before querying sources
1 parent 3334f0f commit 36b25ca

3 files changed

Lines changed: 100 additions & 57 deletions

File tree

.github/workflows/ci-build-index.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,9 @@ jobs:
1313
with:
1414
name: Build Test Index
1515
os: ubuntu
16-
run: ./go bazel:build_test_index bazel-test-target-index
17-
artifact-name: bazel-test-target-index
18-
artifact-path: bazel-test-target-index
16+
run: ./go bazel:build_test_index bazel-test-file-index
17+
artifact-name: bazel-test-file-index
18+
artifact-path: bazel-test-file-index
1919

2020
cache:
2121
name: Cache Index
@@ -26,9 +26,9 @@ jobs:
2626
- name: Download index
2727
uses: actions/download-artifact@v4
2828
with:
29-
name: bazel-test-target-index
29+
name: bazel-test-file-index
3030
- name: Cache index
3131
uses: actions/cache/save@v4
3232
with:
33-
path: bazel-test-target-index
34-
key: bazel-test-target-index-${{ github.run_id }}
33+
path: bazel-test-file-index
34+
key: bazel-test-file-index-${{ github.run_id }}

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ jobs:
3131
uses: ./.github/workflows/bazel.yml
3232
with:
3333
name: Check Targets
34-
cache-name: bazel-test-target-index
34+
cache-name: bazel-test-file-index
3535
run: |
3636
if [ "${{ github.event_name }}" == "schedule" ] || \
3737
[ "${{ github.event_name }}" == "workflow_call" ] || \
@@ -42,9 +42,9 @@ jobs:
4242
BASE_SHA="${{ github.event.pull_request.base.sha || github.event.before }}"
4343
HEAD_SHA="${{ github.event.pull_request.head.sha || 'HEAD' }}"
4444
if [ -n "$BASE_SHA" ]; then
45-
./go bazel:affected_targets "${BASE_SHA}..${HEAD_SHA}" bazel-test-target-index
45+
./go bazel:affected_targets "${BASE_SHA}..${HEAD_SHA}" bazel-test-file-index
4646
else
47-
./go bazel:affected_targets bazel-test-target-index
47+
./go bazel:affected_targets bazel-test-file-index
4848
fi
4949
fi
5050
artifact-name: check-targets

rake_tasks/bazel.rake

Lines changed: 91 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
require 'json'
44
require 'set'
55

6+
# Dirs that affect all bindings - changes here trigger "run all tests"
7+
HIGH_IMPACT_DIRS = %w[common rust/src javascript/atoms javascript/webdriver/atoms].freeze
8+
HIGH_IMPACT_PATTERN = %r{\A(?:#{HIGH_IMPACT_DIRS.map { |d| Regexp.escape(d) }.join('|')})(?:/|$)}
9+
610
# ./go bazel:affected_targets --> HEAD^..HEAD with default index
711
# ./go bazel:affected_targets abc123..def456 --> explicit range
812
# ./go bazel:affected_targets abc123..def456 my-index --> explicit range with custom index
@@ -12,7 +16,7 @@ task :affected_targets do |_task, args|
1216
values = args.to_a
1317
index_file = values.find { |value| File.exist?(value) }
1418
range = (values - [index_file]).first || 'HEAD'
15-
index_file ||= 'build/bazel-test-target-index'
19+
index_file ||= 'build/bazel-test-file-index'
1620

1721
base_rev, head_rev = if range.include?('..')
1822
range.split('..', 2)
@@ -25,11 +29,13 @@ task :affected_targets do |_task, args|
2529
changed_files = `git diff --name-only #{base_rev} #{head_rev}`.split("\n").map(&:strip).reject(&:empty?)
2630
puts "Changed files: #{changed_files.size}"
2731

28-
targets = if File.exist?(index_file)
32+
targets = if changed_files.any? { |f| f.match?(HIGH_IMPACT_PATTERN) }
33+
BINDING_TARGETS.values
34+
elsif File.exist?(index_file)
2935
affected_targets_with_index(changed_files, index_file)
3036
else
3137
puts 'No index found, using directory-based fallback'
32-
affected_targets_fallback(changed_files)
38+
affected_targets_by_directory(changed_files)
3339
end
3440

3541
if targets.empty?
@@ -42,13 +48,14 @@ task :affected_targets do |_task, args|
4248
end
4349
end
4450

45-
# ./go bazel:build_test_index --> 'build/bazel-test-target-index'
51+
# ./go bazel:build_test_index --> 'build/bazel-test-file-index'
4652
# ./go bazel:build_test_index my-index --> 'my-index'
4753
desc 'Build test target index for faster affected target lookup'
4854
task :build_test_index, [:index_file] do |_task, args|
49-
output = args[:index_file] || 'build/bazel-test-target-index'
55+
output = args[:index_file] || 'build/bazel-test-file-index'
5056

51-
index = {}
57+
# Flat index: file path → [test targets]
58+
index = Hash.new { |h, k| h[k] = [] }
5259
tests = []
5360

5461
exclude_tags = %w[manual spotbugs ie]
@@ -60,38 +67,63 @@ task :build_test_index, [:index_file] do |_task, args|
6067
Bazel.execute('query', ['--output=label'], "kind(#{kind}, #{all_bindings}) #{tag_exclusions}") do |out|
6168
tests = out.lines.map(&:strip).select { |l| l.start_with?('//') }
6269
end
63-
puts "Found #{tests.size} tests"
70+
puts "Found #{tests.size} test targets"
6471

72+
puts 'Building file → tests mapping...'
73+
srcs_cache = {}
6574
tests.each_with_index do |test, i|
6675
puts "Processing #{i + 1}/#{tests.size}: #{test}" if (i % 100).zero?
6776

68-
deps = []
69-
Bazel.execute('query', ['--output=label'], "deps(#{test})") do |out|
70-
deps = out.lines.map(&:strip).select { |l| l.start_with?('//', '@selenium//') }
77+
query_test_deps(test).each do |dep|
78+
srcs_cache[dep] ||= query_dep_srcs(dep)
79+
add_test_to_index(index, test, srcs_cache[dep])
7180
end
81+
end
82+
puts "Cached #{srcs_cache.size} dep → srcs lookups"
7283

73-
deps.each do |dep|
74-
pkg = bazel_label_to_package(dep)
75-
next if pkg.nil? || pkg.empty?
76-
77-
index[pkg] ||= []
78-
index[pkg] << test unless index[pkg].include?(test)
79-
end
84+
sorted_index = index.keys.sort.each_with_object({}) do |filepath, h|
85+
h[filepath] = index[filepath].uniq.sort
8086
end
8187

82-
sorted_index = index.keys.sort.each_with_object({}) { |k, h| h[k] = index[k].sort }
8388
FileUtils.mkdir_p(File.dirname(output))
8489
File.write(output, JSON.pretty_generate(sorted_index))
85-
puts "Wrote #{sorted_index.size} packages to #{output}"
90+
puts "Wrote index with #{sorted_index.size} files to #{output}"
8691
end
8792

88-
def bazel_label_to_package(label)
89-
# Skip external deps (but allow @selenium// which is internal)
90-
return nil if label.start_with?('@') && !label.start_with?('@selenium//')
93+
def query_test_deps(test)
94+
deps = []
95+
Bazel.execute('query', ['--output=label'], "deps(#{test}) intersect //... except attr(testonly, 1, //...)") do |out|
96+
deps = out.lines.map(&:strip).select { |l| l.start_with?('//') }
97+
end
98+
deps.reject do |d|
99+
# Skip high-impact dirs and root package targets (generated files, LICENSE, etc)
100+
HIGH_IMPACT_DIRS.any? { |dir| d.start_with?("//#{dir}") } || d.start_with?('//:')
101+
end
102+
rescue StandardError => e
103+
puts " Warning: Failed to query deps for #{test}: #{e.message}"
104+
[]
105+
end
106+
107+
def add_test_to_index(index, test, srcs)
108+
srcs.each do |src|
109+
# Convert //pkg:file to pkg/file
110+
filepath = src.sub(%r{^//}, '').tr(':', '/')
111+
# Skip dotnet tests for java sources (dotnet depends on java server but has no remote tests)
112+
next if filepath.start_with?('java/') && test.start_with?('//dotnet/')
113+
114+
index[filepath] << test
115+
end
116+
end
91117

92-
# Normalize @selenium//foo to foo, //foo to foo
93-
label = label.sub(%r{^@selenium//}, '').sub(%r{^//}, '')
94-
label.split(':').first
118+
def query_dep_srcs(dep)
119+
srcs = []
120+
Bazel.execute('query', ['--output=label'], "labels(srcs, #{dep})") do |out|
121+
srcs = out.lines.map(&:strip).select { |l| l.start_with?('//') && !l.start_with?('//:') }
122+
end
123+
srcs
124+
rescue StandardError => e
125+
puts " Warning: Failed to query srcs for #{dep}: #{e.message}"
126+
[]
95127
end
96128

97129
def find_bazel_package(filepath)
@@ -107,49 +139,60 @@ end
107139

108140
def affected_targets_with_index(changed_files, index_file)
109141
puts "Using index: #{index_file}"
142+
110143
begin
111144
index = JSON.parse(File.read(index_file))
112145
rescue JSON::ParserError => e
113146
puts "Invalid JSON in index file: #{e.message}"
114-
return affected_targets_fallback(changed_files)
147+
puts 'Using directory-based fallback'
148+
return affected_targets_by_directory(changed_files)
115149
end
116150

117-
test_files, lib_files = changed_files.partition { |f| f.match?(/[_-]test\.rb$|_test\.py$|Test\.java$|Tests?\.cs$|\.test\.[jt]s$|_spec\.rb$/) }
151+
test_files, lib_files = changed_files.partition { |f| f.match?(%r{[_-]test\.rb$|_tests?\.py$|Test\.java$|\.test\.[jt]s$|_spec\.rb$|^dotnet/test/}) }
118152

119153
affected = Set.new
154+
# Just test the tests
120155
affected.merge(targets_from_tests(test_files))
121156

122157
lib_files.each do |filepath|
123-
pkg = find_bazel_package(filepath)
124-
affected.merge(targets_from_lookup(pkg, index, filepath))
158+
tests = index[filepath]
159+
if tests
160+
puts " #{filepath}#{tests.size} tests"
161+
affected.merge(tests)
162+
else
163+
puts " #{filepath} not in index, querying for affected tests"
164+
affected.merge(query_unindexed_file(filepath))
165+
end
125166
end
126167

127168
affected.to_a
128169
end
129170

130-
def targets_from_lookup(pkg, index, filepath)
131-
# ignore files not associated with bazel package
132-
return [] if pkg.nil?
171+
def query_unindexed_file(filepath)
172+
pkg = find_bazel_package(filepath)
173+
return [] unless pkg
133174

134-
# Root package is empty string, not '.'
175+
rel = pkg == '.' ? filepath : filepath.sub(%r{^#{Regexp.escape(pkg)}/}, '')
135176
pkg = '' if pkg == '.'
136177

137-
# generate targets if package not in the index
138-
test_targets = index[pkg] || query_package_dep(pkg)
139-
140-
# dotnet tests depend on java server, but there are no remote tests, so safe to ignore
141-
filepath.start_with?('java/') ? test_targets.reject { |t| t.start_with?('//dotnet/') } : test_targets
142-
end
178+
# Find targets that contain this file in their srcs
179+
containing = []
180+
Bazel.execute('query', ['--output=label'], "attr(srcs, '#{rel}', //#{pkg}:*)") do |out|
181+
containing = out.lines.map(&:strip).select { |l| l.start_with?('//') }
182+
end
183+
return [] if containing.empty?
143184

144-
def query_package_dep(pkg)
145-
# Root package is empty string, not '.'
146-
pkg = '' if pkg == '.'
147-
puts "Package not in index, querying deps: //#{pkg}"
185+
# Find tests that depend on those targets
148186
targets = []
149-
Bazel.execute('query', ['--output=label'], "kind('.*_test', deps(//#{pkg}:all))") do |out|
187+
Bazel.execute('query', ['--output=label'], "kind(_test, rdeps(//..., #{containing.join(' + ')}))") do |out|
150188
targets = out.lines.map(&:strip).select { |l| l.start_with?('//') }
151189
end
152-
targets
190+
191+
# dotnet tests depend on java server, but there are no remote tests, so safe to ignore
192+
filepath.start_with?('java/') ? targets.reject { |t| t.start_with?('//dotnet/') } : targets
193+
rescue StandardError => e
194+
puts " Warning: Failed to query unindexed file #{filepath}: #{e.message}"
195+
[]
153196
end
154197

155198
def targets_from_tests(test_files)
@@ -158,7 +201,7 @@ def targets_from_tests(test_files)
158201

159202
query = test_files.filter_map { |f|
160203
pkg = find_bazel_package(f)
161-
next if pkg.nil?
204+
next unless pkg
162205

163206
# Bazel srcs often use paths relative to the package, not basenames.
164207
rel = f.sub(%r{^#{Regexp.escape(pkg)}/}, '')
@@ -168,13 +211,13 @@ def targets_from_tests(test_files)
168211
return [] if query.empty?
169212

170213
targets = []
171-
Bazel.execute('query', ['--output=label'], "kind('.*_test', #{query})") do |out|
214+
Bazel.execute('query', ['--output=label'], "kind(_test, #{query})") do |out|
172215
targets = out.lines.map(&:strip).select { |l| l.start_with?('//') }
173216
end
174217
targets
175218
end
176219

177-
def affected_targets_fallback(changed_files)
220+
def affected_targets_by_directory(changed_files)
178221
targets = Set.new
179222
top_level_dirs = changed_files.map { |f| f.split('/').first }.uniq
180223

0 commit comments

Comments
 (0)