Skip to content

Commit 0586a29

Browse files
committed
Add logging for silent failures, narrow constraint rescues, and improve test coverage
- Add warn/verbose logging to eliminate silent data loss cascade: country_table, state_table, county_table, and zipcode_table now log when rows are skipped due to missing lookups - Add warn_once helper to DbTable for deduplicated warnings - Narrow rescue SQLite3::ConstraintException to only swallow UNIQUE violations; re-raise NOT NULL, FOREIGN KEY, and CHECK violations - Extract find_state_where helper to keep get_state_id under method length limit - Add early return for nil/empty country in get_state_id - Expand synthesize_state comment to document Kiba row-hash mutation - Add missing tests: zipcode state_id nil guard, state country_id nil guard, and cross-country integration tests through full Kiba pipeline - Silence logger and initialize Options in spec_helper for all tests
1 parent 08e89cc commit 0586a29

11 files changed

Lines changed: 148 additions & 45 deletions

File tree

lib/free_zipcode_data/country_table.rb

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,10 @@ def build
2525

2626
def write(row)
2727
country_hash = country_lookup_table[row[:country]]
28-
return update_progress unless country_hash
28+
unless country_hash
29+
warn_once("Skipping unknown country '#{row[:country]}': not in country_lookup_table")
30+
return update_progress
31+
end
2932

3033
sql = <<-SQL
3134
INSERT INTO countries (alpha2, alpha3, iso, name)
@@ -37,8 +40,10 @@ def write(row)
3740

3841
begin
3942
database.execute(sql)
40-
rescue SQLite3::ConstraintException
41-
# Swallow duplicates
43+
rescue SQLite3::ConstraintException => e
44+
unless e.message.include?('UNIQUE')
45+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
46+
end
4247
rescue StandardError => e
4348
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
4449
end

lib/free_zipcode_data/county_table.rb

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,13 @@ def write(row)
2727
return nil unless row[:county]
2828

2929
state_id = get_state_id(row[:country], row[:short_state], row[:state])
30-
return nil unless state_id
30+
unless state_id
31+
logger.verbose(
32+
"Skipping county '#{row[:county]}': no state found for " \
33+
"abbr='#{row[:short_state]}', country='#{row[:country]}'"
34+
)
35+
return nil
36+
end
3137

3238
sql = <<-SQL
3339
INSERT INTO counties (state_id, abbr, name)
@@ -39,8 +45,10 @@ def write(row)
3945

4046
begin
4147
database.execute(sql)
42-
rescue SQLite3::ConstraintException
43-
# swallow duplicates
48+
rescue SQLite3::ConstraintException => e
49+
unless e.message.include?('UNIQUE')
50+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
51+
end
4452
rescue StandardError => e
4553
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
4654
end

lib/free_zipcode_data/db_table.rb

Lines changed: 39 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,18 @@ def update_progress
2424

2525
private
2626

27+
def logger
28+
Logger.instance
29+
end
30+
31+
def warn_once(message)
32+
@warned_messages ||= {}
33+
return if @warned_messages[message]
34+
35+
logger.warn(message)
36+
@warned_messages[message] = true
37+
end
38+
2739
def country_lookup_table
2840
@country_lookup_table ||=
2941
begin
@@ -44,44 +56,43 @@ def get_country_id(country)
4456
select_first(sql)
4557
end
4658

59+
# Look up a state ID scoped to a country, trying progressively less specific
60+
# criteria: (1) abbr + name + country, (2) abbr + country, (3) name + country.
61+
# Returns nil if no match is found.
4762
def get_state_id(country, state_abbr, state_name)
4863
escaped_country = escape_single_quotes(country)
64+
return nil if escaped_country.empty?
65+
4966
escaped_abbr = escape_single_quotes(state_abbr)
5067
escaped_name = escape_single_quotes(state_name)
51-
52-
# Try exact match: abbr + name + country
53-
sql = <<-SQL
54-
SELECT s.id FROM states s
55-
INNER JOIN countries c ON s.country_id = c.id
56-
WHERE s.abbr = '#{escaped_abbr}'
57-
AND s.name = '#{escaped_name}'
58-
AND c.alpha2 = '#{escaped_country}'
59-
SQL
60-
res = select_first(sql)
68+
country_cond = "c.alpha2 = '#{escaped_country}'"
69+
# Most specific lookup: abbr + name + country
70+
res = find_state_where("s.abbr = '#{escaped_abbr}'", "s.name = '#{escaped_name}'", country_cond)
71+
return res if res
6172

6273
# Fallback: abbr + country only
63-
if res.nil?
64-
sql = <<-SQL
65-
SELECT s.id FROM states s
66-
INNER JOIN countries c ON s.country_id = c.id
67-
WHERE s.abbr = '#{escaped_abbr}'
68-
AND c.alpha2 = '#{escaped_country}'
69-
SQL
70-
res = select_first(sql)
74+
res = find_state_where("s.abbr = '#{escaped_abbr}'", country_cond)
75+
if res
76+
logger.verbose("State fallback: abbr '#{state_abbr}' + country '#{country}' (name mismatch)")
77+
return res
7178
end
72-
7379
# Fallback: name + country only
74-
if res.nil?
75-
sql = <<-SQL
76-
SELECT s.id FROM states s
77-
INNER JOIN countries c ON s.country_id = c.id
78-
WHERE s.name = '#{escaped_name}'
79-
AND c.alpha2 = '#{escaped_country}'
80-
SQL
81-
res = select_first(sql)
80+
res = find_state_where("s.name = '#{escaped_name}'", country_cond)
81+
if res
82+
logger.verbose("State fallback: name '#{state_name}' + country '#{country}' (abbr mismatch)")
83+
return res
8284
end
85+
logger.warn("State lookup failed: abbr='#{state_abbr}', name='#{state_name}', country='#{country}'")
86+
nil
87+
end
8388

84-
res
89+
def find_state_where(*conditions)
90+
sql = <<-SQL
91+
SELECT s.id FROM states s
92+
INNER JOIN countries c ON s.country_id = c.id
93+
WHERE #{conditions.join(' AND ')}
94+
SQL
95+
select_first(sql)
8596
end
8697

8798
def get_county_id(county)

lib/free_zipcode_data/state_table.rb

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,10 @@ def write(row)
3333

3434
row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
3535
country_id = get_country_id(row[:country])
36-
return nil unless country_id
36+
unless country_id
37+
warn_once("Country '#{row[:country]}' not found in countries table, skipping state")
38+
return nil
39+
end
3740

3841
sql = <<-SQL
3942
INSERT INTO states (abbr, name, country_id)
@@ -44,8 +47,10 @@ def write(row)
4447
SQL
4548
begin
4649
database.execute(sql)
47-
rescue SQLite3::ConstraintException
48-
# Swallow duplicates
50+
rescue SQLite3::ConstraintException => e
51+
unless e.message.include?('UNIQUE')
52+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
53+
end
4954
rescue StandardError => e
5055
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
5156
end
@@ -55,11 +60,19 @@ def write(row)
5560

5661
private
5762

58-
# Synthesize state from country for stateless countries (downstream tables need this)
63+
# Synthesize state from country for stateless countries.
64+
# Mutates the row hash so downstream Kiba destinations (CountyTable, ZipcodeTable)
65+
# see the synthesized short_state and state values.
5966
def synthesize_state(row)
6067
if row[:short_state].nil? || row[:short_state] == ''
6168
country_entry = country_lookup_table[row[:country]]
62-
return false unless country_entry
69+
unless country_entry
70+
warn_once(
71+
"Cannot synthesize state for country '#{row[:country]}': " \
72+
'not in country_lookup_table'
73+
)
74+
return false
75+
end
6376

6477
row[:short_state] = row[:country]
6578
row[:state] = country_entry[:name]

lib/free_zipcode_data/zipcode_table.rb

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,13 @@ def write(row)
3030
return nil unless row[:postal_code]
3131

3232
state_id = get_state_id(row[:country], row[:short_state], row[:state])
33-
return nil unless state_id
33+
unless state_id
34+
logger.verbose(
35+
"Skipping zipcode '#{row[:postal_code]}': no state found for " \
36+
"abbr='#{row[:short_state]}', country='#{row[:country]}'"
37+
)
38+
return nil
39+
end
3440

3541
city_name = escape_single_quotes(row[:city])
3642

@@ -47,8 +53,10 @@ def write(row)
4753

4854
begin
4955
database.execute(sql)
50-
rescue SQLite3::ConstraintException => _e
51-
# there are some duplicates - swallow them
56+
rescue SQLite3::ConstraintException => e
57+
unless e.message.include?('UNIQUE')
58+
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
59+
end
5260
rescue StandardError => e
5361
raise "Please file an issue at #{ISSUE_URL}: [#{e}] -> SQL: [#{sql}]"
5462
end

spec/etl/csv_source_spec.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@
2828
rows = []
2929
source.each { |row| rows << row }
3030

31-
expect(rows.length).to eq(5)
31+
expect(rows.length).to eq(6)
3232
expect(rows.first).to be_a(Hash)
3333
expect(rows.first.keys).to include(:country, :postal_code, :city)
3434
end

spec/etl/free_zipcode_data_job_spec.rb

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
require 'etl/free_zipcode_data_job'
55

66
RSpec.describe ETL::FreeZipcodeDataJob do
7-
let(:db) { create_test_database(line_count: 5) }
7+
let(:db) { create_test_database(line_count: 6) }
88
let(:fixture_csv) { File.join(FreeZipcodeData.root, 'spec', 'fixtures', 'test_data.csv') }
99
let(:logger) { FreeZipcodeData::Logger.instance }
1010
let(:string_io) { StringIO.new }
@@ -92,5 +92,44 @@
9292
expect(lat).to be_within(0.01).of(40.7484)
9393
expect(lon).to be_within(0.01).of(-73.9967)
9494
end
95+
96+
it 'scopes duplicate state abbreviations by country' do
97+
us_ny = db.execute(<<-SQL)
98+
SELECT s.id, s.name, c.alpha2
99+
FROM states s
100+
JOIN countries c ON s.country_id = c.id
101+
WHERE s.abbr = 'NY' AND c.alpha2 = 'US'
102+
SQL
103+
ca_ny = db.execute(<<-SQL)
104+
SELECT s.id, s.name, c.alpha2
105+
FROM states s
106+
JOIN countries c ON s.country_id = c.id
107+
WHERE s.abbr = 'NY' AND c.alpha2 = 'CA'
108+
SQL
109+
expect(us_ny.length).to eq(1)
110+
expect(ca_ny.length).to eq(1)
111+
expect(us_ny[0][0]).not_to eq(ca_ny[0][0])
112+
expect(us_ny[0][1]).to eq('New York')
113+
expect(ca_ny[0][1]).to eq('Northern York')
114+
end
115+
116+
it 'links cross-country zipcodes to the correct state' do
117+
us_zip = db.execute(<<-SQL)
118+
SELECT z.code, s.name, c.alpha2
119+
FROM zipcodes z
120+
JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
121+
JOIN countries c ON s.country_id = c.id
122+
WHERE z.code = '10001'
123+
SQL
124+
ca_zip = db.execute(<<-SQL)
125+
SELECT z.code, s.name, c.alpha2
126+
FROM zipcodes z
127+
JOIN states s ON CAST(z.state_id AS INTEGER) = s.id
128+
JOIN countries c ON s.country_id = c.id
129+
WHERE z.code = 'K0A'
130+
SQL
131+
expect(us_zip[0]).to eq(['10001', 'New York', 'US'])
132+
expect(ca_zip[0]).to eq(['K0A', 'Northern York', 'CA'])
133+
end
95134
end
96135
end

spec/fixtures/test_data.csv

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ US,10001,New York,New York,NY,New York,061,Manhattan,MN,40.7484,-73.9967,4
33
US,90210,Beverly Hills,California,CA,Los Angeles,037,,LA,34.0901,-118.4065,4
44
US,60601,Chicago,Illinois,IL,Cook,031,,CK,41.8819,-87.6278,4
55
CA,H2X,Montreal,Quebec,QC,,,Montreal,,45.5088,-73.5878,4
6+
CA,K0A,Almonte,Northern York,NY,Lanark,LNK,,,45.2260,-76.1840,4
67
GB,SW1A,London,England,ENG,Westminster,,City of Westminster,,51.5014,-0.1419,1

spec/free_zipcode_data/state_table_spec.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,13 @@
7575
expect(rows[0][0]).to eq(0)
7676
end
7777

78+
it 'returns nil when country is not in the countries table' do
79+
result = table.write({ country: 'DE', short_state: 'BY', state: 'Bavaria' })
80+
expect(result).to be_nil
81+
rows = db.execute('SELECT COUNT(*) FROM states')
82+
expect(rows[0][0]).to eq(0)
83+
end
84+
7885
it 'silently ignores duplicate state entries' do
7986
table.write({ country: 'US', short_state: 'NY', state: 'New York' })
8087
expect { table.write({ country: 'US', short_state: 'NY', state: 'New York' }) }.not_to raise_error

spec/free_zipcode_data/zipcode_table_spec.rb

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,13 @@
7272
expect(rows[0][0]).to eq(0)
7373
end
7474

75+
it 'returns nil and skips when state cannot be found' do
76+
result = table.write(row.merge(short_state: 'ZZ', state: 'Nonexistent'))
77+
expect(result).to be_nil
78+
rows = db.execute('SELECT COUNT(*) FROM zipcodes')
79+
expect(rows[0][0]).to eq(0)
80+
end
81+
7582
it 'silently ignores duplicate zipcode entries' do
7683
table.write(row)
7784
expect { table.write(row) }.not_to raise_error

0 commit comments

Comments
 (0)