1+ from binaryninja import BinaryView , log_info , log_error , Function
2+ from reait .api import RE_authentication , RE_search , RE_nearest_symbols_batch , RE_analyze_functions
3+ from typing import List , Dict , Tuple , Optional
4+ import json
5+
6+ class MatchFunctions :
7+ def __init__ (self , config ):
8+ self .config = config
9+ self .base_addr = None
10+ self .path = None
11+ self .binary_id = None
12+ self .analyzed_functions = []
13+
14+ def search_collections (self , search_term : str = "" ) -> List [Dict ]:
15+ """Search for collections in RevEng.AI database"""
16+ try :
17+ log_info (f"RevEng.AI | Searching collections with term: '{ search_term } '" )
18+
19+ # Since RE_collections might not be available, we'll use RE_search to find binaries
20+ # and simulate collections based on search results
21+ search_results = RE_search (fpath = search_term if search_term else "" ).json ()
22+
23+ if "query_results" not in search_results :
24+ log_error ("RevEng.AI | No search results found" )
25+ return []
26+
27+ query_results = search_results ["query_results" ]
28+
29+ # Convert search results to collection-like format
30+ collections = []
31+ for i , result in enumerate (query_results [:20 ]): # Limit to 20 results
32+ collection = {
33+ "id" : str (i ),
34+ "name" : result .get ("binary_name" , f"Binary_{ i } " ),
35+ "type" : "Binary" ,
36+ "date" : result .get ("upload_date" , "Unknown" ),
37+ "model_name" : result .get ("model" , "Unknown" ),
38+ "owner" : "RevEng.AI" ,
39+ "binary_data" : result # Store original data
40+ }
41+ collections .append (collection )
42+
43+ log_info (f"RevEng.AI | Found { len (collections )} collections" )
44+ return collections
45+
46+ except Exception as e :
47+ log_error (f"RevEng.AI | Error searching collections: { str (e )} " )
48+ return []
49+
50+ def get_collection_functions (self , collection_id : str ) -> List [Dict ]:
51+ """Get functions from a specific collection (simulated)"""
52+ try :
53+ log_info (f"RevEng.AI | Getting functions from collection { collection_id } " )
54+
55+ # For now, return a simulated list of functions
56+ # In a real implementation, this would query the API for functions in the collection
57+ functions = [
58+ {
59+ "function_id" : f"func_{ i } " ,
60+ "function_name" : f"function_{ i } " ,
61+ "signature" : f"void function_{ i } ()" ,
62+ "binary_name" : f"collection_{ collection_id } "
63+ }
64+ for i in range (10 ) # Simulate 10 functions
65+ ]
66+
67+ log_info (f"RevEng.AI | Found { len (functions )} functions in collection" )
68+ return functions
69+
70+ except Exception as e :
71+ log_error (f"RevEng.AI | Error getting collection functions: { str (e )} " )
72+ return []
73+
74+ def match_functions (self , bv : BinaryView , distance_threshold : float = 0.1 , max_matches : int = 10 ) -> List [Dict ]:
75+ """Match functions from the binary against RevEng.AI database"""
76+ try :
77+ log_info ("RevEng.AI | Starting function matching" )
78+
79+ self .base_addr = bv .image_base
80+ self .path = bv .file .filename
81+ self .binary_id = self .config .get_binary_id (bv )
82+
83+ log_info (f"RevEng.AI | Binary path: { self .path } " )
84+ log_info (f"RevEng.AI | Binary ID: { self .binary_id } " )
85+
86+ # Search for the binary
87+ search_results = RE_search (fpath = self .path ).json ()["query_results" ]
88+ log_info (f"RevEng.AI | Search results: { len (search_results )} found" )
89+
90+ if not search_results :
91+ raise Exception ("Binary not found in RevEng.AI database. Please upload the binary first." )
92+
93+ # Get analyzed functions
94+ self .analyzed_functions = RE_analyze_functions (self .path , self .binary_id ).json ()["functions" ]
95+ function_ids = [func ["function_id" ] for func in self .analyzed_functions ]
96+
97+ log_info (f"RevEng.AI | Found { len (function_ids )} functions to match" )
98+
99+ # Create address mapping
100+ id_to_addr = {
101+ func ["function_id" ]: func ["function_vaddr" ] + self .base_addr
102+ for func in self .analyzed_functions
103+ }
104+
105+ # Batch match functions
106+ matches_result = RE_nearest_symbols_batch (
107+ function_ids = function_ids ,
108+ distance = distance_threshold ,
109+ debug_enabled = True ,
110+ nns = max_matches
111+ ).json ()
112+
113+ if "function_matches" not in matches_result :
114+ log_error ("RevEng.AI | No function matches found in response" )
115+ return []
116+
117+ function_matches = matches_result ["function_matches" ]
118+
119+ # Enrich matches with additional information
120+ enriched_matches = []
121+ for match in function_matches :
122+ func_id = match .get ("origin_function_id" )
123+ func_addr = id_to_addr .get (func_id )
124+
125+ if func_addr :
126+ # Get the Binary Ninja function
127+ bn_function = bv .get_function_at (func_addr )
128+
129+ # Calculate similarity and confidence
130+ distance = match .get ("distance" , 1.0 )
131+ similarity_percentage = (1.0 - distance ) * 100
132+ confidence_percentage = similarity_percentage
133+
134+ # Determine if match is successful
135+ matched_name = match .get ("nearest_neighbor_function_name" , "N/A" )
136+ is_successful = (matched_name and
137+ matched_name != "N/A" and
138+ not matched_name .startswith (("sub_" , "FUN_" )) and
139+ similarity_percentage >= 90.0 ) # High confidence threshold
140+
141+ enriched_match = {
142+ "function_id" : func_id ,
143+ "function_address" : func_addr ,
144+ "original_name" : bn_function .name if bn_function else f"sub_{ func_addr :X} " ,
145+ "matched_name" : matched_name ,
146+ "matched_name_mangled" : match .get ("nearest_neighbor_function_name_mangled" , "N/A" ),
147+ "signature" : match .get ("signature" , "N/A" ),
148+ "matched_binary" : match .get ("nearest_neighbor_binary_name" , "N/A" ),
149+ "distance" : distance ,
150+ "similarity" : f"{ similarity_percentage :.2f} %" ,
151+ "confidence" : f"{ confidence_percentage :.2f} %" ,
152+ "successful" : "Yes" if is_successful else "No"
153+ }
154+ enriched_matches .append (enriched_match )
155+
156+ log_info (f"RevEng.AI | Successfully matched { len (enriched_matches )} functions" )
157+ return enriched_matches
158+
159+ except Exception as e :
160+ log_error (f"RevEng.AI | Error matching functions: { str (e )} " )
161+ raise e
162+
163+ def get_function_details (self , bv : BinaryView , function_address : int ) -> Optional [Dict ]:
164+ """Get detailed information about a function"""
165+ try :
166+ function = bv .get_function_at (function_address )
167+ if not function :
168+ return None
169+
170+ return {
171+ "name" : function .name ,
172+ "address" : hex (function_address ),
173+ "size" : len (function ),
174+ "basic_blocks" : len (function .basic_blocks ),
175+ "instructions" : sum (len (bb ) for bb in function .basic_blocks ),
176+ "call_sites" : len (function .call_sites ),
177+ "callers" : len (function .callers ),
178+ "callees" : len (function .callees )
179+ }
180+
181+ except Exception as e :
182+ log_error (f"RevEng.AI | Error getting function details: { str (e )} " )
183+ return None
0 commit comments