Skip to content

Commit 699d185

Browse files
author
Junghee Lim
committed
Add initial_function_containing_return and refactor rules
1 parent b9b362d commit 699d185

7 files changed

Lines changed: 164 additions & 42 deletions

File tree

src/datalog/code_inference.dl

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,26 @@ likely_fallthrough(From,To):-
179179
conditional_jump(From)
180180
).
181181

182+
/**
183+
A block `Block` falls through a NOP/padding sled to the next real block `NextBlock`.
184+
`From` is the source instruction of the edge leading to `NextBlock`.
185+
*/
186+
.decl fallthrough_over_padding(Block:address, From:address, NextBlock:address)
187+
188+
fallthrough_over_padding(Block, From, NextBlock):-
189+
block_last_instruction(Block, BlockLast),
190+
may_fallthrough(BlockLast, NextBlock),
191+
!candidate_block_is_padding(NextBlock),
192+
From = BlockLast.
193+
194+
fallthrough_over_padding(Block, From, NextBlock):-
195+
block_last_instruction(Block, Inst),
196+
may_fallthrough(Inst, PadBlock),
197+
candidate_block_is_padding(PadBlock),
198+
block_last_instruction(PadBlock, PadLast),
199+
may_fallthrough(PadLast, NextBlock),
200+
From = PadLast.
201+
182202
//////////////////////////////////////////////////////////////
183203
// This is a small refinement for discarding immediates as targets
184204
// in some obvious cases. This is specially useful for PIE code where

src/datalog/code_inference_postprocess.dl

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -226,3 +226,22 @@ padding_prefix_end(EA,Block):-
226226
!arch.is_nop(EA),
227227
next(PrevEA,EA),
228228
padding_prefix(PrevEA,Block).
229+
230+
/**
231+
A refined block `Block` contains at least one `return` instruction.
232+
*/
233+
.decl refined_block_contains_return(Block:address)
234+
235+
refined_block_contains_return(Block):-
236+
refined_block(Block),
237+
code_in_refined_block(EA,Block),
238+
arch.return(EA).
239+
240+
/**
241+
A function `Func` contains at least one `return` instruction.
242+
*/
243+
.decl refined_block_contains_return_in_func(Func:address)
244+
245+
refined_block_contains_return_in_func(Func) :-
246+
function_inference.in_function(Block, Func),
247+
refined_block_contains_return(Block).

src/datalog/noreturn.dl

Lines changed: 71 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -128,32 +128,48 @@ no_return_block(Block,"no-fallthrough"):-
128128
no_return_call_propagated(BlockEnd,_)
129129
),
130130
block_boundaries(Block,Beg,EndBlock),
131-
EndBlock <= End,
132-
!plt_block(Block,_).
133-
131+
EndBlock <= End.
134132

135133
/**
136-
A function `Func` is called at `Call`, and the call falls through
137-
interprocedurally.
138-
There may be NOP paddings after the function call.
134+
A function `Func` is called at instruction `Call` and the call may fall through
135+
to the next instruction interprocedurally.
136+
NOP or padding instructions may follow the call.
139137
*/
140-
.decl call_may_fallthrough_inter(Call:address,Func:address,From:address)
141-
.output call_may_fallthrough_inter
138+
.decl call_may_fallthrough_inter(Call:address,Func:address)
142139

143-
call_may_fallthrough_inter(Call,Func,From):-
140+
call_may_fallthrough_inter(Call,Func):-
144141
direct_call(Call,Func),
145-
may_fallthrough(Call,Fallthrough),
146-
(
147-
!candidate_block_is_padding(Fallthrough),
148-
Next = Fallthrough,
149-
From = Call
150-
;
151-
candidate_block_is_padding(Fallthrough),
152-
block_last_instruction(Fallthrough,From),
153-
may_fallthrough(From,Next)
154-
),
142+
block_last_instruction(CallBlock,Call),
143+
fallthrough_over_padding(CallBlock,From,Next),
155144
inter_procedural_edge(From,Next).
156145

146+
/**
147+
Represents the next initial function following `Func`
148+
(from `function_entry_initial`, before use-def and value analysis).
149+
`NextFunc` is the immediate successor of `Func`.
150+
*/
151+
.decl next_function_entry_initial(Func:address,NextFunc:address)
152+
153+
next_function_entry_initial(Func, NextFunc) :-
154+
function_inference.function_entry_initial(Func),
155+
NextFunc = min F : {
156+
function_inference.function_entry_initial(F),
157+
F > Func
158+
}.
159+
160+
/**
161+
An initial function (`function_entry_initial`, prior to use-def
162+
and value analysis) contains one or more `return` instructions.
163+
This is a heuristic and may generate false positives.
164+
*/
165+
.decl initial_function_containing_return(Func:address,ReturnEA:address)
166+
167+
initial_function_containing_return(Func,EA):-
168+
next_function_entry_initial(Func, NextFunc),
169+
EA >= Func,
170+
EA < NextFunc,
171+
arch.return(EA).
172+
157173
/**
158174
Calls to known no return functions or their PLT blocks.
159175
*/
@@ -173,6 +189,17 @@ no_return_block(Block,"no_return_call_refined"):-
173189
no_return_call_refined(BlockEnd),
174190
block_last_instruction(Block,BlockEnd).
175191

192+
/**
193+
A call `Call` targets a function `CallTarget` that has another call-site
194+
which falls through interprocedurally.
195+
*/
196+
.decl call_target_has_other_fallthrough_inter(Call:address,CallTarget:address)
197+
198+
call_target_has_other_fallthrough_inter(Call,CallTarget):-
199+
call_may_fallthrough_inter(OtherCall,CallTarget),
200+
direct_call(Call,CallTarget),
201+
OtherCall != Call.
202+
176203
/**
177204
Calls to noreturn blocks.
178205
*/
@@ -192,7 +219,7 @@ no_return_call_propagated(EA,"direct_call to no_return_block"):-
192219
// it's interprocedural.
193220
no_return_call_propagated(Call,"interprocedural fallthrough"):-
194221
direct_call(Call,CallTarget),
195-
call_may_fallthrough_inter(Call,CallTarget,_),
222+
call_may_fallthrough_inter(Call,CallTarget),
196223
!no_return_call_refined(Call),
197224
!pc_load_call(Call,CallTarget).
198225

@@ -204,22 +231,31 @@ no_return_call_propagated(Call,"interprocedural fallthrough"):-
204231
// To avoid incorrectly removing legitimate fallthrough CFG edges, this rule
205232
// conservatively checks whether `Next` is a `possible_target`, and if not,
206233
// we do not classify the function call as noreturn function call.
207-
no_return_call_propagated(Call,"call possiblly_no_return_func and fallthrough-next is a possible target"):-
208-
direct_call(Call,CallTarget),
209-
!call_may_fallthrough_inter(Call,CallTarget,_),
234+
no_return_call_propagated(
235+
Call,
236+
"call possiblly_no_return_func and fallthrough-next is a possible target"
237+
):-
238+
call_target_has_other_fallthrough_inter(Call,CallTarget),
239+
!call_may_fallthrough_inter(Call,CallTarget),
210240
!pc_load_call(Call,CallTarget),
211241
!no_return_call_refined(Call),
212-
0 != count : {
213-
call_may_fallthrough_inter(OtherCall,CallTarget,_),
214-
OtherCall != Call
215-
},
216-
may_fallthrough(Call,Fallthrough),
242+
block_last_instruction(CallBlock,Call),
243+
fallthrough_over_padding(CallBlock,_,Next),
217244
(
218-
!candidate_block_is_padding(Fallthrough),
219-
Next = Fallthrough
245+
// No return instruction appears in the target function (internal).
246+
!initial_function_containing_return(CallTarget,_), UNUSED(Next),
247+
!plt_block(CallTarget,_)
220248
;
221-
candidate_block_is_padding(Fallthrough),
222-
block_last_instruction(Fallthrough,From),
223-
may_fallthrough(From,Next)
224-
),
225-
possible_target(Next).
249+
// If the target is external, there is not enough information to
250+
// determine whether it is conditionally non-returning.
251+
// As a heuristic, classify it as a no-return call only when
252+
// Next is a possible target.
253+
plt_block(CallTarget,_),
254+
possible_target(Next)
255+
;
256+
// The call target function may contain a return, which may
257+
// indicate that it is a conditionally non-returning function.
258+
// Further check whether Next is a possible target.
259+
initial_function_containing_return(CallTarget,_),
260+
possible_target(Next)
261+
).

src/datalog/self_diagnose.dl

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,3 +225,13 @@ no_return_func_with_return(Func):-
225225
function_inference.function_entry(Func),
226226
function_inference.in_function(Ret,Func),
227227
cfg_edge_to_top(Ret,_,"return").
228+
229+
/**
230+
Functions marked as initial_function_with_return, but without a return
231+
*/
232+
.decl false_positive_initial_function_with_return(Func:address,ReturnEA:address)
233+
.output false_positive_initial_function_with_return
234+
235+
false_positive_initial_function_with_return(Func,ReturnEA) :-
236+
initial_function_containing_return(Func,ReturnEA),
237+
!refined_block_contains_return_in_func(Func).

src/datalog/symbolization.dl

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -583,12 +583,13 @@ String at EA is followed by another string at Next.
583583

584584
subsequent_string_candidate(EA,Next):-
585585
string_candidate(EA,End,_),
586-
string_candidate(Next,_,_),
587-
End <= Next,
586+
Next = min Begin : {
587+
string_candidate(Begin,_,_),
588+
Begin >= End
589+
},
588590
(
589591
End = Next
590592
;
591-
0 = count : {string_candidate(Begin,_,_), Begin >= End, Begin < Next},
592593
Next - End < 8,
593594
padding_block_limit(Next)
594595
).

tests/cfg_test.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,11 +88,11 @@ def test_noreturn_conditional(self):
8888
ir_library = disassemble(binary).ir()
8989
m = ir_library.modules[0]
9090

91-
# check that the call blocks has fallthrough edges.
91+
# Check that the call blocks have outgoing fallthrough edges.
9292
for call_symbol_str in ["call_1", "call_2"]:
9393
call_symbol = next(m.symbols_named(call_symbol_str))
94-
assert isinstance(call_symbol.referent, gtirb.CodeBlock)
9594
call_block = call_symbol.referent
95+
assert isinstance(call_block, gtirb.CodeBlock)
9696

9797
outedges = [
9898
edge
@@ -101,6 +101,18 @@ def test_noreturn_conditional(self):
101101
]
102102
self.assertEqual(1, len(outedges))
103103

104+
# Check that the labeled fallthrough symbol right after the
105+
# no-return function call has no incoming fallthrough edge.
106+
fallthrough_sym = next(m.symbols_named("fall_through_func"))
107+
fallthrough_sym_block = fallthrough_sym.referent
108+
assert isinstance(fallthrough_sym_block, gtirb.CodeBlock)
109+
inedges = [
110+
edge
111+
for edge in fallthrough_sym_block.incoming_edges
112+
if edge.label.type == EdgeType.Fallthrough
113+
]
114+
self.assertEqual(0, len(inedges))
115+
104116
@unittest.skipUnless(
105117
platform.system() == "Linux", "This test is linux only."
106118
)

tests/misc_test.py

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -620,15 +620,22 @@ def test_generate_resources(self):
620620

621621
class SymbolSelectionTests(unittest.TestCase):
622622
def check_first_sym_expr(
623-
self, m: gtirb.Module, block_name: str, target_name: str
623+
self,
624+
m: gtirb.Module,
625+
block_name: str,
626+
target_name: str,
627+
code: bool = True,
624628
) -> None:
625629
"""
626630
Check that the first Symexpr in a block identified
627631
with symbol 'block_name' points to a symbol with
628632
name 'target_name'
629633
"""
630634
sym = next(m.symbols_named(block_name))
631-
self.assertIsInstance(sym.referent, gtirb.CodeBlock)
635+
if code:
636+
self.assertIsInstance(sym.referent, gtirb.CodeBlock)
637+
else:
638+
self.assertIsInstance(sym.referent, gtirb.DataBlock)
632639
block = sym.referent
633640
sexpr = sorted(
634641
[
@@ -724,6 +731,23 @@ def test_boundary_sym_expr(self):
724731
m = ir_library.modules[0]
725732
self.check_first_sym_expr(m, "load_end", "nums_end")
726733

734+
@unittest.skipUnless(
735+
platform.system() == "Linux", "This test is linux only."
736+
)
737+
def test_noreturn_use_def(self):
738+
"""
739+
Test that instruction referening in the middle of a symbol address
740+
is correctly recognized as dead code and the symbol is correctly
741+
symbolized.
742+
"""
743+
744+
binary = Path("ex")
745+
with cd(ex_asm_dir / "ex_noreturn_use_def"):
746+
self.assertTrue(compile("gcc", "g++", "-O0", []))
747+
ir_library = disassemble(binary).ir()
748+
m = ir_library.modules[0]
749+
self.check_first_sym_expr(m, ".s_hello_ptr", ".s_hello", False)
750+
727751

728752
class ElfSymbolAuxdataTests(unittest.TestCase):
729753
@unittest.skipUnless(

0 commit comments

Comments
 (0)