1313
1414# Define the input file
1515INPUT_FILE = "../Database-Files/Edit-Database/Compromised-Discord-Accounts.json"
16+ CACHE_FILE = "domain_cache.json" # File to store cached domain results
1617IPINFO_URL = "https://ipinfo.io/"
1718
1819# Domains that should be automatically set to US
2930}
3031
3132
33+ def load_cache ():
34+ try :
35+ with open (CACHE_FILE , "r" , encoding = "utf-8" ) as file :
36+ return json .load (file )
37+ except (FileNotFoundError , json .JSONDecodeError ):
38+ return {}
39+
40+
41+ def save_cache (cache ):
42+ with open (CACHE_FILE , "w" , encoding = "utf-8" ) as file :
43+ json .dump (cache , file , indent = 4 )
44+
45+
3246def log (message ):
3347 timestamp = datetime .now ().strftime ("[%Y-%m-%d %H:%M:%S]" )
3448 print (f"{ timestamp } { message } " )
@@ -45,11 +59,16 @@ def resolve_ip(domain):
4559 return None
4660
4761
48- def get_geolocation (domain ):
62+ def get_geolocation (domain , cache ):
4963 if domain in AUTO_US_DOMAINS :
5064 log (f"Skipping API call for { domain } , setting to US" )
5165 return "US"
5266
67+ # Check cache first
68+ if domain in cache :
69+ log (f"Using cached result for { domain } : { cache [domain ]} " )
70+ return cache [domain ]
71+
5372 # Resolve the domain to an IP address
5473 ip_address = resolve_ip (domain )
5574 if not ip_address :
@@ -63,6 +82,9 @@ def get_geolocation(domain):
6382 data = response .json ()
6483 country = data .get ("country" , "N/A" )
6584 log (f"Received response for { domain } (IP: { ip_address } ): { country } " )
85+
86+ # Update cache with new result
87+ cache [domain ] = country
6688 return country
6789 except requests .RequestException as e :
6890 log (f"Error querying { domain } (IP: { ip_address } ): { e } " )
@@ -96,7 +118,6 @@ def update_auto_us_domains(accounts):
96118 for account_id , details in accounts .items ():
97119 final_url_domain = details .get ("FINAL_URL_DOMAIN" , "" )
98120 if final_url_domain in AUTO_US_DOMAINS :
99- # Only update if we're actually processing this domain
100121 accounts [account_id ]["SUSPECTED_REGION_OF_ORIGIN" ] = "US"
101122 accounts [account_id ]["LAST_CHECK" ] = current_time
102123 updated_count += 1
@@ -111,6 +132,10 @@ def update_auto_us_domains(accounts):
111132
112133
113134def update_compromised_accounts (start_from = 0 ):
135+ # Load cache at start
136+ domain_cache = load_cache ()
137+ log (f"Loaded { len (domain_cache )} cached domain results" )
138+
114139 with open (INPUT_FILE , "r" , encoding = "utf-8" ) as file :
115140 accounts = json .load (file )
116141
@@ -119,6 +144,7 @@ def update_compromised_accounts(start_from=0):
119144 unknown_count = 0
120145 skipped_count = 0
121146 request_counter = 0
147+ cached_count = 0
122148
123149 log (f"Found { total_cases } cases in { INPUT_FILE } " )
124150
@@ -133,40 +159,56 @@ def update_compromised_accounts(start_from=0):
133159 final_url_domain = details .get ("FINAL_URL_DOMAIN" , "" )
134160
135161 if final_url_domain and final_url_domain not in AUTO_US_DOMAINS :
136- current_time = datetime .utcnow ().isoformat ()
137- country = get_geolocation (final_url_domain )
138- accounts [account_id ]["SUSPECTED_REGION_OF_ORIGIN" ] = country
139- accounts [account_id ]["LAST_CHECK" ] = current_time
140-
141- if country == "US" :
142- skipped_count += 1
143- elif country == "N/A" :
144- unknown_count += 1
162+ # Check if we have a cached result for this domain
163+ if final_url_domain in domain_cache :
164+ cached_result = domain_cache [final_url_domain ]
165+ current_time = datetime .utcnow ().isoformat ()
166+ accounts [account_id ]["SUSPECTED_REGION_OF_ORIGIN" ] = cached_result
167+ accounts [account_id ]["LAST_CHECK" ] = current_time
168+ cached_count += 1
169+ log (
170+ f"Used cached result for { account_id } : { final_url_domain } -> { cached_result } | Last Check: { current_time } "
171+ )
145172 else :
146- updated_count += 1
147-
148- log (
149- f"Updated { account_id } : { final_url_domain } -> { country } | Last Check: { current_time } "
150- )
173+ # No cached result, process normally
174+ current_time = datetime .utcnow ().isoformat ()
175+ country = get_geolocation (final_url_domain , domain_cache )
176+ accounts [account_id ]["SUSPECTED_REGION_OF_ORIGIN" ] = country
177+ accounts [account_id ]["LAST_CHECK" ] = current_time
178+
179+ if country == "US" :
180+ skipped_count += 1
181+ elif country == "N/A" :
182+ unknown_count += 1
183+ else :
184+ updated_count += 1
185+
186+ log (
187+ f"Updated { account_id } : { final_url_domain } -> { country } | Last Check: { current_time } "
188+ )
189+
190+ # Only increment the request counter and apply rate limiting if we actually queried the API
191+ if country != "N/A" and final_url_domain not in domain_cache :
192+ request_counter += 1
193+ if request_counter >= RATE_LIMIT :
194+ log (
195+ f"Reached API rate limit ({ RATE_LIMIT } per minute), sleeping for 60 seconds..."
196+ )
197+ time .sleep (60 )
198+ request_counter = 0
199+ else :
200+ time .sleep (60 / RATE_LIMIT ) # Distribute requests evenly
151201
152202 # Save progress after each update
153203 with open (INPUT_FILE , "w" , encoding = "utf-8" ) as file :
154204 json .dump (accounts , file , indent = 4 )
155205
156- # Only increment the request counter and apply rate limiting if we actually queried the API
157- if country != "N/A" :
158- request_counter += 1
159- if request_counter >= RATE_LIMIT :
160- log (
161- f"Reached API rate limit ({ RATE_LIMIT } per minute), sleeping for 60 seconds..."
162- )
163- time .sleep (60 )
164- request_counter = 0
165- else :
166- time .sleep (60 / RATE_LIMIT ) # Distribute requests evenly
206+ # Save cache at the end
207+ save_cache (domain_cache )
208+ log (f"Saved { len (domain_cache )} domain results to cache" )
167209
168210 log (
169- f"Update complete: { updated_count } updated, { unknown_count } set to N/A, { skipped_count } auto-set to US"
211+ f"Update complete: { updated_count } updated, { unknown_count } set to N/A, { skipped_count } auto-set to US, { cached_count } from cache "
170212 )
171213
172214
0 commit comments