Skip to content
This repository was archived by the owner on Apr 26, 2026. It is now read-only.

Commit f185871

Browse files
committed
Add domain geolocation caching to reduce API calls
Introduced a cache mechanism for domain geolocation results to minimize redundant API requests and improve performance. Added functions to load and save the cache, updated the main processing logic to use cached results when available, and ensured the cache is persisted after updates. Logging and counters were updated to reflect cache usage.
1 parent 5230deb commit f185871

1 file changed

Lines changed: 70 additions & 28 deletions

File tree

Tool-13-Geolocation-Tagger.py

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313

1414
# Define the input file
1515
INPUT_FILE = "../Database-Files/Edit-Database/Compromised-Discord-Accounts.json"
16+
CACHE_FILE = "domain_cache.json" # File to store cached domain results
1617
IPINFO_URL = "https://ipinfo.io/"
1718

1819
# Domains that should be automatically set to US
@@ -29,6 +30,19 @@
2930
}
3031

3132

33+
def load_cache():
34+
try:
35+
with open(CACHE_FILE, "r", encoding="utf-8") as file:
36+
return json.load(file)
37+
except (FileNotFoundError, json.JSONDecodeError):
38+
return {}
39+
40+
41+
def save_cache(cache):
42+
with open(CACHE_FILE, "w", encoding="utf-8") as file:
43+
json.dump(cache, file, indent=4)
44+
45+
3246
def log(message):
3347
timestamp = datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
3448
print(f"{timestamp} {message}")
@@ -45,11 +59,16 @@ def resolve_ip(domain):
4559
return None
4660

4761

48-
def get_geolocation(domain):
62+
def get_geolocation(domain, cache):
4963
if domain in AUTO_US_DOMAINS:
5064
log(f"Skipping API call for {domain}, setting to US")
5165
return "US"
5266

67+
# Check cache first
68+
if domain in cache:
69+
log(f"Using cached result for {domain}: {cache[domain]}")
70+
return cache[domain]
71+
5372
# Resolve the domain to an IP address
5473
ip_address = resolve_ip(domain)
5574
if not ip_address:
@@ -63,6 +82,9 @@ def get_geolocation(domain):
6382
data = response.json()
6483
country = data.get("country", "N/A")
6584
log(f"Received response for {domain} (IP: {ip_address}): {country}")
85+
86+
# Update cache with new result
87+
cache[domain] = country
6688
return country
6789
except requests.RequestException as e:
6890
log(f"Error querying {domain} (IP: {ip_address}): {e}")
@@ -96,7 +118,6 @@ def update_auto_us_domains(accounts):
96118
for account_id, details in accounts.items():
97119
final_url_domain = details.get("FINAL_URL_DOMAIN", "")
98120
if final_url_domain in AUTO_US_DOMAINS:
99-
# Only update if we're actually processing this domain
100121
accounts[account_id]["SUSPECTED_REGION_OF_ORIGIN"] = "US"
101122
accounts[account_id]["LAST_CHECK"] = current_time
102123
updated_count += 1
@@ -111,6 +132,10 @@ def update_auto_us_domains(accounts):
111132

112133

113134
def update_compromised_accounts(start_from=0):
135+
# Load cache at start
136+
domain_cache = load_cache()
137+
log(f"Loaded {len(domain_cache)} cached domain results")
138+
114139
with open(INPUT_FILE, "r", encoding="utf-8") as file:
115140
accounts = json.load(file)
116141

@@ -119,6 +144,7 @@ def update_compromised_accounts(start_from=0):
119144
unknown_count = 0
120145
skipped_count = 0
121146
request_counter = 0
147+
cached_count = 0
122148

123149
log(f"Found {total_cases} cases in {INPUT_FILE}")
124150

@@ -133,40 +159,56 @@ def update_compromised_accounts(start_from=0):
133159
final_url_domain = details.get("FINAL_URL_DOMAIN", "")
134160

135161
if final_url_domain and final_url_domain not in AUTO_US_DOMAINS:
136-
current_time = datetime.utcnow().isoformat()
137-
country = get_geolocation(final_url_domain)
138-
accounts[account_id]["SUSPECTED_REGION_OF_ORIGIN"] = country
139-
accounts[account_id]["LAST_CHECK"] = current_time
140-
141-
if country == "US":
142-
skipped_count += 1
143-
elif country == "N/A":
144-
unknown_count += 1
162+
# Check if we have a cached result for this domain
163+
if final_url_domain in domain_cache:
164+
cached_result = domain_cache[final_url_domain]
165+
current_time = datetime.utcnow().isoformat()
166+
accounts[account_id]["SUSPECTED_REGION_OF_ORIGIN"] = cached_result
167+
accounts[account_id]["LAST_CHECK"] = current_time
168+
cached_count += 1
169+
log(
170+
f"Used cached result for {account_id}: {final_url_domain} -> {cached_result} | Last Check: {current_time}"
171+
)
145172
else:
146-
updated_count += 1
147-
148-
log(
149-
f"Updated {account_id}: {final_url_domain} -> {country} | Last Check: {current_time}"
150-
)
173+
# No cached result, process normally
174+
current_time = datetime.utcnow().isoformat()
175+
country = get_geolocation(final_url_domain, domain_cache)
176+
accounts[account_id]["SUSPECTED_REGION_OF_ORIGIN"] = country
177+
accounts[account_id]["LAST_CHECK"] = current_time
178+
179+
if country == "US":
180+
skipped_count += 1
181+
elif country == "N/A":
182+
unknown_count += 1
183+
else:
184+
updated_count += 1
185+
186+
log(
187+
f"Updated {account_id}: {final_url_domain} -> {country} | Last Check: {current_time}"
188+
)
189+
190+
# Only increment the request counter and apply rate limiting if we actually queried the API
191+
if country != "N/A" and final_url_domain not in domain_cache:
192+
request_counter += 1
193+
if request_counter >= RATE_LIMIT:
194+
log(
195+
f"Reached API rate limit ({RATE_LIMIT} per minute), sleeping for 60 seconds..."
196+
)
197+
time.sleep(60)
198+
request_counter = 0
199+
else:
200+
time.sleep(60 / RATE_LIMIT) # Distribute requests evenly
151201

152202
# Save progress after each update
153203
with open(INPUT_FILE, "w", encoding="utf-8") as file:
154204
json.dump(accounts, file, indent=4)
155205

156-
# Only increment the request counter and apply rate limiting if we actually queried the API
157-
if country != "N/A":
158-
request_counter += 1
159-
if request_counter >= RATE_LIMIT:
160-
log(
161-
f"Reached API rate limit ({RATE_LIMIT} per minute), sleeping for 60 seconds..."
162-
)
163-
time.sleep(60)
164-
request_counter = 0
165-
else:
166-
time.sleep(60 / RATE_LIMIT) # Distribute requests evenly
206+
# Save cache at the end
207+
save_cache(domain_cache)
208+
log(f"Saved {len(domain_cache)} domain results to cache")
167209

168210
log(
169-
f"Update complete: {updated_count} updated, {unknown_count} set to N/A, {skipped_count} auto-set to US"
211+
f"Update complete: {updated_count} updated, {unknown_count} set to N/A, {skipped_count} auto-set to US, {cached_count} from cache"
170212
)
171213

172214

0 commit comments

Comments
 (0)