11from binaryninja import BinaryView , log_info , log_error , Symbol , SymbolType
2- from reait .api import RE_authentication , RE_search , RE_nearest_symbols_batch , RE_analyze_functions , RE_name_score
2+ from reait .api import RE_authentication , RE_search , RE_nearest_symbols_batch , RE_analyze_functions , RE_name_score , RE_functions_data_types , RE_functions_data_types_poll
33from concurrent .futures import ThreadPoolExecutor , as_completed
44from typing import List , Dict , Tuple
55import math
6+ from revengai .utils .datatypes import apply_data_types as apply_data_types_util
7+ import time
68from revengai .utils import rename_function as rename_function_util
9+ from libbs .api import DecompilerInterface
10+ from libbs .decompilers .binja .interface import BinjaInterface
11+ from libbs .artifacts import _art_from_dict
12+ from libbs .artifacts import (
13+ Function ,
14+ FunctionArgument ,
15+ GlobalVariable ,
16+ Enum ,
17+ Struct ,
18+ Typedef ,
19+ )
720
821class AutoUnstrip :
922 def __init__ (self , config ):
@@ -13,12 +26,63 @@ def __init__(self, config):
1326 self .path = None
1427 self .max_workers = 4
1528
16- def _process_batch (self , function_ids : List [int ], id_to_addr : Dict [int , int ], bv : BinaryView ) -> Tuple [int , List [str ]]:
29+ def resolve_data_types (self , to_datatypes : List [Dict ], id_to_addr : Dict [int , int ], deci : DecompilerInterface , chunk_index : int ) -> None :
30+ try :
31+ function_ids = set ([result ['nearest_neighbor_id' ] for result in to_datatypes ])
32+ log_info (f"RevEng.AI | Resolving data types for { len (function_ids )} functions" )
33+ RE_functions_data_types (function_ids = list (function_ids ))
34+
35+ items = []
36+ while True :
37+ response = RE_functions_data_types_poll (
38+ function_ids = list (function_ids ),
39+ ).json ()
40+ data = response .get ("data" , {})
41+ items = data .get ("items" , [])
42+ pending_count = sum (1 for item in items if item .get ("status" ) == "pending" )
43+ log_info (f"RevEng.AI | [Chunk { chunk_index } ] { pending_count } items still pending..." )
44+ if not pending_count :
45+ break
46+ time .sleep (3 )
47+
48+ for item in items :
49+ log_info (f"RevEng.AI | Item: { item ['function_id' ]} " )
50+ if item ['status' ] != "completed" :
51+ continue
52+ for result in to_datatypes :
53+ if result ['nearest_neighbor_id' ] == item ['function_id' ]:
54+
55+
56+ signature = "N/A"
57+ item2 = item .get ("data_types" , {})
58+ func_types = item2 .get ("func_types" , None )
59+ func_deps = item2 .get ("func_deps" , [])
60+ log_info (f"RevEng.AI | Func types: { func_types } " )
61+ if func_types is not None :
62+ fnc : Function = _art_from_dict (func_types )
63+ if fnc .name is None :
64+ log_info (f"Function { item ['function_id' ]} has no name, skipping signature application." )
65+ continue
66+
67+ addr = id_to_addr .get (result ['origin_function_id' ])
68+ if not addr :
69+ continue
70+ log_info (f"RevEng.AI | Applying signature for { fnc .name } at 0x{ addr :x} " )
71+ signature_data = {"deps" : func_deps , "function" : fnc }
72+ apply_data_types_util (addr , signature_data , deci )
73+ log_info (f"RevEng.AI | Successfully applied signature for { fnc .name } at 0x{ addr :x} " )
74+ break
75+ break
76+
77+ except Exception as e :
78+ log_error (f"RevEng.AI | Error resolving data types: { str (e )} " )
79+
80+ def _process_batch (self , function_ids : List [int ], id_to_addr : Dict [int , int ], bv : BinaryView , debug_symbols : bool , data_types : bool , deci : DecompilerInterface = None , chunk_index : int = 0 ) -> Tuple [int , List [str ]]:
1781 try :
1882 functions_by_distance = RE_nearest_symbols_batch (
1983 function_ids = function_ids ,
2084 distance = self .auto_unstrip_distance ,
21- debug_enabled = True ,
85+ debug_enabled = debug_symbols ,
2286 nns = 1
2387 ).json ()["function_matches" ]
2488
@@ -30,6 +94,7 @@ def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv
3094 #log_info(f"RevEng.AI | Functions by score: {functions_by_score}")
3195 renamed_count = 0
3296 errors = []
97+ to_datatypes = []
3398 for result in functions_by_distance :
3499 try :
35100 func_id = result ['origin_function_id' ]
@@ -47,15 +112,18 @@ def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv
47112
48113 for function in functions_by_score :
49114 if function ['function_id' ] == func_id :
50- if function ['box_plot' ]["average" ] < 0.9 :
115+ if function ['box_plot' ]["average" ] < 90 :
51116 log_info (f"RevEng.AI | Function { function ['function_id' ]} has a score of { function ['box_plot' ]['average' ]:.2f} for name { new_name_mangled } , skipping" )
52117 break
53118 else :
54119 log_info (f"RevEng.AI | Function { function ['function_id' ]} has a score of { function ['box_plot' ]['average' ]:.2f} for name { new_name_mangled } , renaming" )
120+ to_datatypes .append (result )
55121 if rename_function_util (bv , func_addr , new_name_mangled ):
56122 renamed_count += 1
57123 break
58-
124+
125+ if to_datatypes :
126+ self .resolve_data_types (to_datatypes , id_to_addr , deci , chunk_index )
59127
60128 except Exception as e :
61129 log_error (f"RevEng.AI | Error processing function { result ['origin_function_id' ]} : { str (e )} " )
@@ -66,10 +134,13 @@ def _process_batch(self, function_ids: List[int], id_to_addr: Dict[int, int], bv
66134 except Exception as e :
67135 return 0 , [str (e )]
68136
69- def auto_unstrip (self , bv : BinaryView ):
137+ def auto_unstrip (self , bv : BinaryView , options : Dict ):
70138 try :
71139 log_info ("RevEng.AI | Auto Unstripping binary" )
72140
141+ debug_symbols = options .get ("debug_symbols" , True )
142+ data_types = options .get ("data_types" , False )
143+
73144 self .base_addr = bv .image_base
74145 self .path = bv .file .filename
75146 binary_id = self .config .get_binary_id (bv )
@@ -97,9 +168,13 @@ def auto_unstrip(self, bv: BinaryView):
97168
98169 total_renamed = 0
99170 all_errors = []
171+ deci = None
172+ if data_types :
173+ deci = BinjaInterface (bv )
174+
100175 with ThreadPoolExecutor (max_workers = self .max_workers ) as executor :
101176 future_to_chunk = {
102- executor .submit (self ._process_batch , chunk , id_to_addr , bv ): i
177+ executor .submit (self ._process_batch , chunk , id_to_addr , bv , debug_symbols , data_types , deci , i ): i
103178 for i , chunk in enumerate (chunks )
104179 }
105180
0 commit comments