Skip to content

Commit 3257958

Browse files
authored
Implement use-def maps (#1144)
Part of #1141. Provides a map of definitions for every uses (references to a variable) encountered in code. Unlocks accurate file-local features like rename, goto definition, find references. See module-level documentation in `use_def_maps.rs` to get oriented. In particular the discussions regarding complications created by loops and `<<-`.
2 parents 11c658f + e45e416 commit 3257958

11 files changed

Lines changed: 1661 additions & 67 deletions

File tree

Cargo.lock

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ serde_json = { version = "1.0.94", features = ["preserve_order"] }
9494
serde_repr = "0.1.17"
9595
serde_with = "3.0.0"
9696
sha2 = "0.10.6"
97+
smallvec = "1.13.2"
9798
stdext = { path = "crates/stdext" }
9899
streaming-iterator = "0.1.9"
99100
strum = "0.26.2"

crates/oak_index/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,11 @@ workspace = true
1616
[dependencies]
1717
aether_syntax.workspace = true
1818
biome_rowan.workspace = true
19+
itertools.workspace = true
20+
log.workspace = true
1921
rustc-hash.workspace = true
22+
smallvec.workspace = true
23+
stdext.workspace = true
2024

2125
[dev-dependencies]
2226
aether_parser.workspace = true

crates/oak_index/src/builder.rs

Lines changed: 176 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ use crate::semantic_index::SymbolFlags;
3030
use crate::semantic_index::SymbolTableBuilder;
3131
use crate::semantic_index::Use;
3232
use crate::semantic_index::UseId;
33+
use crate::use_def_map::UseDefMapBuilder;
3334

3435
/// Build a [`SemanticIndex`] from a parsed R file.
3536
pub fn build(root: &RRoot) -> SemanticIndex {
@@ -47,6 +48,7 @@ struct SemanticIndexBuilder {
4748
symbol_tables: IndexVec<ScopeId, SymbolTableBuilder>,
4849
definitions: IndexVec<ScopeId, IndexVec<DefinitionId, Definition>>,
4950
uses: IndexVec<ScopeId, IndexVec<UseId, Use>>,
51+
use_def_maps: IndexVec<ScopeId, UseDefMapBuilder>,
5052
current_scope: ScopeId,
5153
}
5254

@@ -56,6 +58,7 @@ impl SemanticIndexBuilder {
5658
let mut symbol_tables = IndexVec::new();
5759
let mut definitions = IndexVec::new();
5860
let mut uses = IndexVec::new();
61+
let mut use_def_maps = IndexVec::new();
5962

6063
// The descendants range starts empty (`n+1..n+1`). `pop_scope` later
6164
// fills in `descendants.end` with the current arena length. Everything
@@ -67,15 +70,20 @@ impl SemanticIndexBuilder {
6770
descendants: ScopeId::from(1)..ScopeId::from(1),
6871
});
6972

73+
// All `ScopeId`-indexed vecs must be pushed in lockstep so they stay
74+
// the same length. The `push_scope()` method is in charge of
75+
// guaranteeing that invariant after construction.
7076
symbol_tables.push(SymbolTableBuilder::new());
7177
definitions.push(IndexVec::new());
7278
uses.push(IndexVec::new());
79+
use_def_maps.push(UseDefMapBuilder::new());
7380

7481
Self {
7582
scopes,
7683
symbol_tables,
7784
definitions,
7885
uses,
86+
use_def_maps,
7987
current_scope: file,
8088
}
8189
}
@@ -99,6 +107,7 @@ impl SemanticIndexBuilder {
99107
self.symbol_tables.push(SymbolTableBuilder::new());
100108
self.definitions.push(IndexVec::new());
101109
self.uses.push(IndexVec::new());
110+
self.use_def_maps.push(UseDefMapBuilder::new());
102111

103112
id
104113
}
@@ -120,17 +129,79 @@ impl SemanticIndexBuilder {
120129
kind: DefinitionKind,
121130
range: TextRange,
122131
) {
123-
let symbol = self.symbol_tables[self.current_scope].intern(name, flags);
132+
let symbol_id = self.symbol_tables[self.current_scope].intern(name, flags);
133+
let def_id = self.definitions[self.current_scope].push(Definition {
134+
symbol: symbol_id,
135+
kind,
136+
range,
137+
});
138+
self.use_def_maps[self.current_scope].ensure_symbol(symbol_id);
139+
self.use_def_maps[self.current_scope].record_definition(symbol_id, def_id);
140+
}
141+
142+
// Super-assignment is lexically in the current scope but binds in an
143+
// ancestor. We record the definition in the current scope and append
144+
// it to the target scope's use-def map (without shadowing prior
145+
// definitions).
146+
//
147+
// R's `<<-` walks up the environment chain from the parent, targeting
148+
// the first scope where the symbol is already bound. If no binding is
149+
// found, it assigns in the global (file) scope.
150+
fn add_super_definition(&mut self, name: &str, kind: DefinitionKind, range: TextRange) {
151+
let symbol_id =
152+
self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_SUPER_BOUND);
124153
self.definitions[self.current_scope].push(Definition {
125-
symbol,
154+
symbol: symbol_id,
155+
kind: kind.clone(),
156+
range,
157+
});
158+
159+
let target_scope = self.resolve_super_target(name);
160+
161+
let target_symbol = self.symbol_tables[target_scope].intern(name, SymbolFlags::IS_BOUND);
162+
let target_def_id = self.definitions[target_scope].push(Definition {
163+
symbol: target_symbol,
126164
kind,
127165
range,
128166
});
167+
self.use_def_maps[target_scope].ensure_symbol(target_symbol);
168+
self.use_def_maps[target_scope].record_deferred_definition(target_symbol, target_def_id);
169+
}
170+
171+
// Walk up from the parent scope looking for a scope where `name` already
172+
// has `IS_BOUND`. Returns that scope, or the file scope if no binding is
173+
// found (mirroring R's assignment to the global environment).
174+
fn resolve_super_target(&self, name: &str) -> ScopeId {
175+
let file_scope = ScopeId::from(0);
176+
let Some(mut scope) = self.scopes[self.current_scope].parent else {
177+
return file_scope;
178+
};
179+
180+
loop {
181+
if let Some(id) = self.symbol_tables[scope].id(name) {
182+
if self.symbol_tables[scope]
183+
.symbol(id)
184+
.flags()
185+
.contains(SymbolFlags::IS_BOUND)
186+
{
187+
return scope;
188+
}
189+
}
190+
let Some(parent) = self.scopes[scope].parent else {
191+
return file_scope;
192+
};
193+
scope = parent;
194+
}
129195
}
130196

131197
fn add_use(&mut self, name: &str, range: TextRange) {
132-
let symbol = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED);
133-
self.uses[self.current_scope].push(Use { symbol, range });
198+
let symbol_id = self.symbol_tables[self.current_scope].intern(name, SymbolFlags::IS_USED);
199+
let use_id = self.uses[self.current_scope].push(Use {
200+
symbol: symbol_id,
201+
range,
202+
});
203+
self.use_def_maps[self.current_scope].ensure_symbol(symbol_id);
204+
self.use_def_maps[self.current_scope].record_use(symbol_id, use_id);
134205
}
135206

136207
// --- Recursive descent ---
@@ -224,6 +295,10 @@ impl SemanticIndexBuilder {
224295
},
225296

226297
AnyRExpression::RForStatement(stmt) => {
298+
// The for variable is always bound (R sets it to NULL for
299+
// empty sequences), so its binding is recorded before the
300+
// snapshot. Assignments inside the body are conditional
301+
// (body may not execute for empty sequences).
227302
if let Ok(variable) = stmt.variable() {
228303
self.add_definition(
229304
&identifier_text(&variable),
@@ -235,17 +310,78 @@ impl SemanticIndexBuilder {
235310
if let Ok(sequence) = stmt.sequence() {
236311
self.collect_expression(&sequence);
237312
}
313+
314+
let pre_loop = self.use_def_maps[self.current_scope].snapshot();
315+
316+
if let Ok(body) = stmt.body() {
317+
let first_use = self.uses[self.current_scope].next_id();
318+
self.collect_expression(&body);
319+
self.use_def_maps[self.current_scope].finish_loop_defs(&pre_loop, first_use);
320+
}
321+
322+
self.use_def_maps[self.current_scope].merge(pre_loop);
323+
},
324+
325+
AnyRExpression::RIfStatement(stmt) => {
326+
// Condition is always evaluated
327+
if let Ok(condition) = stmt.condition() {
328+
self.collect_expression(&condition);
329+
}
330+
331+
let pre_if = self.use_def_maps[self.current_scope].snapshot();
332+
333+
// If-body (consequence)
334+
if let Ok(consequence) = stmt.consequence() {
335+
self.collect_expression(&consequence);
336+
}
337+
338+
let post_if = self.use_def_maps[self.current_scope].snapshot();
339+
self.use_def_maps[self.current_scope].restore(pre_if);
340+
341+
// Else-body (alternative), if present. If absent, the
342+
// "else path" is just the pre-if state we restored to.
343+
if let Some(else_clause) = stmt.else_clause() {
344+
if let Ok(alternative) = else_clause.alternative() {
345+
self.collect_expression(&alternative);
346+
}
347+
}
348+
349+
// After: definitions from both branches are live
350+
self.use_def_maps[self.current_scope].merge(post_if);
351+
},
352+
353+
AnyRExpression::RWhileStatement(stmt) => {
354+
if let Ok(condition) = stmt.condition() {
355+
self.collect_expression(&condition);
356+
}
357+
358+
let pre_loop = self.use_def_maps[self.current_scope].snapshot();
359+
360+
if let Ok(body) = stmt.body() {
361+
let first_use = self.uses[self.current_scope].next_id();
362+
self.collect_expression(&body);
363+
self.use_def_maps[self.current_scope].finish_loop_defs(&pre_loop, first_use);
364+
}
365+
366+
// Body may not execute
367+
self.use_def_maps[self.current_scope].merge(pre_loop);
368+
},
369+
370+
AnyRExpression::RRepeatStatement(stmt) => {
371+
// Body always executes at least once, no snapshot needed
238372
if let Ok(body) = stmt.body() {
373+
let pre_loop = self.use_def_maps[self.current_scope].snapshot();
374+
let first_use = self.uses[self.current_scope].next_id();
239375
self.collect_expression(&body);
376+
self.use_def_maps[self.current_scope].finish_loop_defs(&pre_loop, first_use);
240377
}
241378
},
242379

243380
AnyRExpression::RBogusExpression(_) => {},
244381

245382
// Generic fallback: walk over descendant nodes and collect their
246383
// `AnyRExpression` children, letting `collect_expression`
247-
// handle their contents. This covers `RIfStatement`,
248-
// `RWhileStatement`, `RRepeatStatement`, `RUnaryExpression`,
384+
// handle their contents. This covers `RUnaryExpression`,
249385
// `RParenthesizedExpression`, `RReturnExpression`, literals, and
250386
// any future expression types without needing explicit arms.
251387
//
@@ -354,34 +490,16 @@ impl SemanticIndexBuilder {
354490
let target = if right { op.right() } else { op.left() };
355491
let Ok(target) = target else { return };
356492

357-
let (name, range) = match &target {
358-
AnyRExpression::RIdentifier(ident) => {
359-
let name = identifier_text(ident);
360-
let range = ident.syntax().text_trimmed_range();
361-
(name, range)
362-
},
363-
364-
// `"x" <- 1` is equivalent to `x <- 1` in R
365-
AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => {
366-
let Some(name) = string_value_text(s) else {
367-
return;
368-
};
369-
let range = s.syntax().text_trimmed_range();
370-
(name, range)
371-
},
372-
493+
let Some((name, range)) = assignment_target_name(&target) else {
373494
// Complex target (`x$foo <- rhs`, `x[1] <- rhs`, etc.) does
374495
// not represent a binding. We recurse for uses.
375-
other => {
376-
self.collect_expression(other);
377-
return;
378-
},
496+
self.collect_expression(&target);
497+
return;
379498
};
380499

381500
if super_assign {
382-
self.add_definition(
501+
self.add_super_definition(
383502
&name,
384-
SymbolFlags::IS_SUPER_BOUND,
385503
DefinitionKind::SuperAssignment(op.syntax().clone()),
386504
range,
387505
);
@@ -406,8 +524,17 @@ impl SemanticIndexBuilder {
406524

407525
fn finish(mut self) -> SemanticIndex {
408526
self.scopes[ScopeId::from(0)].descendants.end = self.scopes.next_id();
527+
409528
let symbol_tables = self.symbol_tables.into_iter().map(|b| b.build()).collect();
410-
SemanticIndex::new(self.scopes, symbol_tables, self.definitions, self.uses)
529+
let use_def_maps = self.use_def_maps.into_iter().map(|b| b.finish()).collect();
530+
531+
SemanticIndex::new(
532+
self.scopes,
533+
symbol_tables,
534+
self.definitions,
535+
self.uses,
536+
use_def_maps,
537+
)
411538
}
412539
}
413540

@@ -460,6 +587,26 @@ fn string_value_text(s: &aether_syntax::RStringValue) -> Option<String> {
460587
Some(text[1..text.len() - 1].to_string())
461588
}
462589

590+
/// Extract the binding name and range from an assignment target expression.
591+
/// Returns `None` for complex targets (`x$foo`, `x[1]`, etc.) that don't
592+
/// represent simple name bindings.
593+
fn assignment_target_name(target: &AnyRExpression) -> Option<(String, TextRange)> {
594+
match target {
595+
AnyRExpression::RIdentifier(ident) => {
596+
let name = identifier_text(ident);
597+
let range = ident.syntax().text_trimmed_range();
598+
Some((name, range))
599+
},
600+
// `"x" <- 1` is equivalent to `x <- 1` in R
601+
AnyRExpression::AnyRValue(AnyRValue::RStringValue(s)) => {
602+
let name = string_value_text(s)?;
603+
let range = s.syntax().text_trimmed_range();
604+
Some((name, range))
605+
},
606+
_ => None,
607+
}
608+
}
609+
463610
fn is_super_assignment(bin: &RBinaryExpression) -> bool {
464611
let Ok(op) = bin.operator() else {
465612
return false;

crates/oak_index/src/index_vec.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ pub trait Idx: Copy + fmt::Debug + Eq {
99

1010
/// A `Vec<V>` indexed by a strongly-typed newtype `I` instead of `usize`,
1111
/// so that indices from different vectors can't be mixed up.
12+
#[derive(Clone)]
1213
pub struct IndexVec<I: Idx, V> {
1314
raw: Vec<V>,
1415
_phantom: PhantomData<I>,

crates/oak_index/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
pub mod builder;
22
pub(crate) mod index_vec;
33
pub mod semantic_index;
4+
pub mod use_def_map;

0 commit comments

Comments
 (0)