Skip to content

Commit 9760531

Browse files
committed
Merge branch 'false_positive_symexpr' into 'main'
Add more heuristics to avoid false-positive symbolic operands Closes #633 See merge request rewriting/ddisasm!1254
2 parents 99a152e + 6e1deca commit 9760531

7 files changed

Lines changed: 245 additions & 16 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
* Enhance MIPS32 support:
44
- Fix several issues that could result in missing symbolic expressions
55
- Improve resolution of TLS-related symbolic expression
6+
* Fix bug that could lead to functional errors due to false-positive symbolic operands or data.
67

78
# 1.9.2
89

examples/asm_examples/ex_symbolic_operand_heuristics/Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
all: ex_original.s
3-
gcc ex_original.s -no-pie -o ex
3+
gcc ex_original.s -no-pie -Wl,-T,link.ld -o ex
44
@./ex > out.txt
55
clean:
66
rm -f ex out.txt

examples/asm_examples/ex_symbolic_operand_heuristics/ex_original.s

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,17 @@ rip_lea_misleading_call_rdx:
2525
# These should NOT be symbolized
2626
# We use "message" in the original to ensure it looks like an address
2727

28+
mov_immediate:
29+
# 0x712300 + 0x10 + 974064 = 0x800000
30+
# NOTE: 0x10 accounts for two extra 8-byte variables inserted at the
31+
# beginning of the .data section by the linker.
32+
mov rdi, OFFSET message+974064
33+
call aux_fun_print_imm
34+
imul rdi, 3
35+
print_peer:
36+
lea rdi, qword ptr [rip+str_peer]
37+
call aux_fun
38+
imul rdi, 3
2839
lea_multiplied:
2940
lea rax, qword ptr [rax+message]
3041
imul rax, 3
@@ -47,6 +58,62 @@ aux_fun_rdx:
4758
mov rdx, 0
4859
ret
4960

61+
aux_fun_print_imm:
62+
mov rsi, rdi
63+
lea rdi, qword ptr [rip+fmt_str]
64+
xor eax, eax
65+
call printf@plt
66+
ret
67+
68+
# This section is pinned at 0x712300 (see link.dl).
5069
.data
5170
message:
5271
.asciz "Hello"
72+
73+
fmt_str:
74+
.asciz "Hello: 0x%x\n"
75+
76+
.align 8
77+
# 0x712328
78+
.quad .L_726560
79+
numeric_data:
80+
# This is numeric and should NOT be symbolized as .L_800000.
81+
.byte 0x00 # 0x800000
82+
.byte 0x00
83+
.byte 0x80
84+
.byte 0x00
85+
.byte 0x00
86+
.byte 0x00
87+
.byte 0x00
88+
.byte 0x00
89+
.byte 0x00
90+
.byte 0x00
91+
.byte 0x00
92+
.byte 0x00
93+
.byte 0x00
94+
.byte 0x00
95+
.byte 0x00
96+
str_peer:
97+
# This is a string and should NOT be symbolized as .L_726565 or .L_726560+5.
98+
.byte 0x70 # .string "peer"
99+
.quad .L_726560+5
100+
.byte 0x00
101+
.byte 0x00
102+
.byte 0x00
103+
.byte 0x00
104+
.byte 0x00
105+
.byte 0x00
106+
.byte 0x00
107+
.byte 0x00
108+
# 0x712350
109+
.zero 82448
110+
# 0x726560
111+
.L_726560:
112+
.byte 0x77
113+
.zero 0x100
114+
115+
.align 8
116+
.quad str_peer
117+
118+
dummy:
119+
.fill 0x100000, 1, 0
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
SECTIONS
2+
{
3+
/* place message at fixed address */
4+
. = 0x712300;
5+
.data : {
6+
*(.data*)
7+
}
8+
}
9+
INSERT AFTER .rodata;

src/datalog/pointer_reattribution.dl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ point to an 'at-end' symbol.
8989

9090
////////////////////////////////////////////////////////////////////////////////////
9191

92+
moved_label_class(EA,0,"overlapping label"),
9293
moved_data_label(EA,Size,Dest,NewDest):-
9394
symbolic_data(EA,Size,Dest),
9495
arch.pointer_size(Pt_size),
@@ -97,6 +98,7 @@ moved_data_label(EA,Size,Dest,NewDest):-
9798

9899
//if something points to the middle of a known symbol we express it as symbol+constant
99100
//as long as it is not code
101+
moved_label_class(EA,0,"middle of symbol"),
100102
moved_data_label(EA,SizePointer,Dest,Address):-
101103
symbolic_data(EA,SizePointer,Dest),
102104
!code(Dest),
@@ -106,6 +108,7 @@ moved_data_label(EA,SizePointer,Dest,Address):-
106108
Dest < Address+Size.
107109

108110
// create a symbol+constant for overlapping instructions
111+
moved_label_class(EA,0,"overlapping instruction"),
109112
moved_data_label(EA,SizePointer,Dest,Address):-
110113
symbolic_data(EA,SizePointer,Dest),
111114
overlapping_instruction(Dest,Address).

src/datalog/self_diagnose.dl

Lines changed: 87 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,30 @@
1-
.decl false_negative(EA:address)
2-
.decl false_positive(EA:address)
3-
.output false_negative
4-
.output false_positive
5-
.decl relocation_in_operand(EA:address,Index:operand_index,Rel:address,InsnOffset:unsigned)
1+
//===- self_diagnose.dl ------------------------------------*- datalog -*-===//
2+
//
3+
// Copyright (C) 2019-2026 GrammaTech, Inc.
4+
//
5+
// This code is licensed under the GNU Affero General Public License
6+
// as published by the Free Software Foundation, either version 3 of
7+
// the License, or (at your option) any later version. See the
8+
// LICENSE.txt file in the project root for license terms or visit
9+
// https://www.gnu.org/licenses/agpl.txt.
10+
//
11+
// This program is distributed in the hope that it will be useful,
12+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
// GNU Affero General Public License for more details.
15+
//
16+
// This project is sponsored by the Office of Naval Research, One Liberty
17+
// Center, 875 N. Randolph Street, Arlington, VA 22203 under contract #
18+
// N68335-17-C-0700. The content of the information does not necessarily
19+
// reflect the position or policy of the Government and no official
20+
// endorsement should be inferred.
21+
//
22+
//===---------------------------------------------------------------------===//
23+
24+
// Predicates for self diagnosis
25+
626

27+
.decl relocation_in_operand(EA:address,Index:operand_index,Rel:address,InsnOffset:unsigned)
728

829
relocation_in_operand(EA,Index,Location,Offset):-
930
code(EA),
@@ -84,7 +105,20 @@ bad_symbol_constant(EA,Cnt):-
84105
Orig_minus_cnt2 != End.
85106

86107

87-
false_negative(Location):-
108+
/**
109+
Potentially false-positive / false-negative symbolic operand or data
110+
111+
EA: The address of instruction or data object containing FP/FN symbolic expression
112+
Location: The exact address of FP/FN symbolic expression within instruction or data
113+
Type: code or data
114+
*/
115+
.decl false_negative_symexpr(EA:address, Location:address, Type:symbol)
116+
.output false_negative_symexpr
117+
118+
.decl false_positive_symexpr(EA:address, Location:address, Type:symbol)
119+
.output false_positive_symexpr
120+
121+
false_negative_symexpr(EA,Location,"code"):-
88122
relocation_in_operand(EA,Index,Rel,_),
89123
!trivial_relocation(Rel),
90124
!symbolic_operand(EA,Index,_,_),
@@ -95,7 +129,7 @@ false_negative(Location):-
95129
),
96130
Location = EA+Offset.
97131

98-
false_positive(Location):-
132+
false_positive_symexpr(EA,Location,"code"):-
99133
code(EA),
100134
(
101135
symbolic_operand(EA,Index,_,_);
@@ -111,13 +145,7 @@ false_positive(Location):-
111145
),
112146
Location=EA+Offset.
113147

114-
.decl zero_relocation(EA:address)
115-
116-
zero_relocation(EA):-
117-
relocation(EA,_,_,_,_,_,_),
118-
data_word(EA,8,0).
119-
120-
false_negative(EA):-
148+
false_negative_symexpr(EA,EA,"data"):-
121149
data_byte(EA,_),
122150
relocation(EA,_,_,_,_,_,_),
123151
!zero_relocation(EA),
@@ -128,7 +156,7 @@ false_negative(EA):-
128156
EA >= BegSect,
129157
EA < EndSect.
130158

131-
false_positive(EA):-
159+
false_positive_symexpr(EA,EA,"data"):-
132160
(
133161
symbolic_data(EA,_,_);
134162
// it looks like jump tables where each entry 4 or less bytes do not have
@@ -141,3 +169,47 @@ false_positive(EA):-
141169
loaded_section(BegSect,EndSect,Sect),
142170
EA >= BegSect,
143171
EA < EndSect.
172+
173+
174+
.decl zero_relocation(EA:address)
175+
176+
zero_relocation(EA):-
177+
relocation(EA,_,_,_,_,_,_),
178+
data_word(EA,8,0).
179+
180+
181+
/**
182+
Output is the absolute value of Input.
183+
*/
184+
.decl abs_val(Input:number,Output:number) inline
185+
186+
abs_val(X,X):- X >= 0.
187+
abs_val(X,Y):- X < 0, Y = 0 - X.
188+
189+
190+
/**
191+
Moved labels with large offset (> 64), which are likely false positives.
192+
193+
moved_label_with_large_offset: symbolic operand in instruction
194+
moved_data_label_with_large_offset: symbolic expression in data
195+
*/
196+
.decl moved_label_with_large_offset(EA:address,Offset:number,Reason:symbol)
197+
.output moved_label_with_large_offset
198+
199+
.decl moved_data_label_with_large_offset(EA:address,Offset:number,Reason:symbol)
200+
.output moved_data_label_with_large_offset
201+
202+
moved_label_with_large_offset(EA,Offset,Reason):-
203+
moved_label(EA,Index,Dest,NewDest),
204+
!pc_relative_operand(EA,Index,Dest),
205+
Offset = as(Dest - NewDest,number),
206+
abs_val(Offset,AbsOffset),
207+
AbsOffset > 64,
208+
moved_label_class(EA,Index,Reason).
209+
210+
moved_data_label_with_large_offset(EA,Offset,Reason):-
211+
moved_data_label(EA,_,Dest,NewDest),
212+
Offset = as(Dest - NewDest,number),
213+
abs_val(Offset,AbsOffset),
214+
AbsOffset > 64,
215+
moved_label_class(EA,_,Reason).

src/datalog/symbolization.dl

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -303,6 +303,23 @@ symbolic_operand_point(EA,Imm_index,-1,"immediate is bitmask"):-
303303
instruction_get_operation(EA,Operation),
304304
arch.logic_operation(Operation).
305305

306+
// Heuristic: treat values with >=20 trailing zero bits (0xFFFFF) as bitmasks.
307+
//
308+
// This threshold is empirical, not architectural. It was chosen to avoid
309+
// misclassifying large immediates observed in bgpd (e.g., 0x800000)
310+
// as addresses, while remaining more conservative than a 16-bit cutoff.
311+
// Using 24 trailing zeros would miss this case; using 16 would increase
312+
// false positives. If new binaries show misclassification in either direction,
313+
// this threshold should be revisited.
314+
//
315+
symbolic_operand_point(EA,Imm_index,-1,"immediate may be bitmask"):-
316+
symbolic_operand_candidate(EA,Imm_index,_,_),
317+
instruction_get_op(EA,Imm_index,Op),
318+
op_immediate(Op,Imm,_),
319+
!defined_symbol(as(Imm,address),_,_,_,_,_,_,_,_),
320+
!loaded_section(as(Imm,address),_,_),
321+
0 = (Imm band 0xFFFFF).
322+
306323
symbolic_operand_point(EA,Imm_index,-2,"point to exception section"):-
307324
symbolic_operand_candidate(EA,Imm_index,Dest,_),
308325
exception_section(Name),
@@ -451,6 +468,21 @@ address_in_data_is_printable(EA):-
451468
address_in_data(EA,_),
452469
EAString <= EA, EA <= End - Pt_size.
453470

471+
/**
472+
The address appearing at 'EA' overlaps with a potential
473+
`ascii_string` and therefore more likely to be spurious.
474+
*/
475+
.decl address_in_data_overlaps_string(EA:address, EAString:address)
476+
477+
address_in_data_overlaps_string(EA, EAString):-
478+
ascii_string(EAString,End),
479+
address_in_data(EA,_),
480+
(
481+
EAString <= EA, EA < End;
482+
arch.pointer_size(Pt_size),
483+
EAString < EA+Pt_size, EA+Pt_size <= End
484+
).
485+
454486
// address_in_data considers anything that points to the code region
455487
// this refinement restricts that to the beginning of the final blocks
456488
.decl address_in_data_refined(EA:address,Val:address)
@@ -544,6 +576,23 @@ string_candidate(EA,End,"ascii"):-
544576
EA >= DataBeg,
545577
End <= DataEnd.
546578

579+
/**
580+
String at EA is followed by another string at Next.
581+
*/
582+
.decl subsequent_string_candidate(EA:address,Next:address)
583+
584+
subsequent_string_candidate(EA,Next):-
585+
string_candidate(EA,End,_),
586+
string_candidate(Next,_,_),
587+
End <= Next,
588+
(
589+
End = Next
590+
;
591+
0 = count : {string_candidate(Begin,_,_), Begin >= End, Begin < Next},
592+
Next - End < 8,
593+
padding_block_limit(Next)
594+
).
595+
547596
/**
548597
String candidate refinement projects candidate string references onto a compound
549598
domain of "ascii" or "string" typed data objects, where "string" typed objects
@@ -798,6 +847,29 @@ data_object_point(EA,Pt_size,"symbol",2,"aligned"):-
798847
data_object_candidate(EA,Pt_size,"symbol"),
799848
EA % Pt_size = 0.
800849

850+
data_object_point(EA,Pt_size,"symbol",-2,"potentially bitmask"):-
851+
data_object_candidate(EA,Pt_size,"symbol"),
852+
EA % Pt_size = 0,
853+
address_in_data_refined(EA,Dest),
854+
!defined_symbol(Dest,_,_,_,_,_,_,_,_),
855+
0 = (Dest band 0xFFFFF).
856+
857+
data_object_point(EA,Pt_size,"symbol",-2,"printerable address: likely spurious"):-
858+
data_object_candidate(EA,Pt_size,"symbol"),
859+
EA % Pt_size = 0,
860+
address_in_data_is_printable(EA),
861+
address_in_data_refined(EA,Dest),
862+
!defined_symbol(Dest,_,_,_,_,_,_,_,_).
863+
864+
data_object_point(EA,Pt_size,"symbol",-1,"overlaps string: likely spurious"):-
865+
data_object_candidate(EA,Pt_size,"symbol"),
866+
EA % Pt_size = 0,
867+
address_in_data_overlaps_string(EA,EAString),
868+
ascii_string(EAString,End),
869+
End - EAString > 4,
870+
address_in_data_refined(EA,Dest),
871+
!defined_symbol(Dest,_,_,_,_,_,_,_,_).
872+
801873
data_object_point(EA,Pt_size,"symbol",4,"point-to-boundary-sym"):-
802874
data_object_candidate(EA,Pt_size,"symbol"),
803875
address_in_data_refined(EA,Dest),
@@ -852,6 +924,11 @@ data_object_point(EA,Size,"string",1,"string that has reference"):-
852924
code(Code),
853925
data_limit(EA).
854926

927+
data_object_point(EA,Size,"string",1,"string has string neighbors"):-
928+
data_object_candidate(EA,Size,"string"),
929+
subsequent_string_candidate(EA,_),
930+
subsequent_string_candidate(_,EA).
931+
855932
// data access negative heuristic
856933
data_object_point(EA,Size,"other",4,"data access"):-
857934
data_object_candidate(EA,Size,"other"),

0 commit comments

Comments
 (0)