Skip to content

Commit 880b051

Browse files
committed
Fix state_abbr not globally unique across countries
Scope state lookups by country in get_state_id using a 3-step fallback (exact match, abbr+country, name+country). Add stateless country fallback in StateTable for countries without state subdivisions. Update state_name index to include country_id for multi-country uniqueness. Fixes #18
1 parent 438a867 commit 880b051

9 files changed

Lines changed: 124 additions & 25 deletions

File tree

lib/free_zipcode_data/county_table.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def build
2626
def write(row)
2727
return nil unless row[:county]
2828

29-
state_id = get_state_id(row[:short_state], row[:state])
29+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
3030
return nil unless state_id
3131

3232
sql = <<-SQL

lib/free_zipcode_data/db_table.rb

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,44 @@ def get_country_id(country)
4444
select_first(sql)
4545
end
4646

47-
def get_state_id(state_abbr, state_name)
48-
sql = "SELECT id FROM states
49-
WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
50-
select_first(sql)
47+
def get_state_id(country, state_abbr, state_name)
48+
escaped_country = escape_single_quotes(country)
49+
escaped_abbr = escape_single_quotes(state_abbr)
50+
escaped_name = escape_single_quotes(state_name)
51+
52+
# Try exact match: abbr + name + country
53+
sql = <<-SQL
54+
SELECT s.id FROM states s
55+
INNER JOIN countries c ON s.country_id = c.id
56+
WHERE s.abbr = '#{escaped_abbr}'
57+
AND s.name = '#{escaped_name}'
58+
AND c.alpha2 = '#{escaped_country}'
59+
SQL
60+
res = select_first(sql)
61+
62+
# Fallback: abbr + country only
63+
if res.nil?
64+
sql = <<-SQL
65+
SELECT s.id FROM states s
66+
INNER JOIN countries c ON s.country_id = c.id
67+
WHERE s.abbr = '#{escaped_abbr}'
68+
AND c.alpha2 = '#{escaped_country}'
69+
SQL
70+
res = select_first(sql)
71+
end
72+
73+
# Fallback: name + country only
74+
if res.nil?
75+
sql = <<-SQL
76+
SELECT s.id FROM states s
77+
INNER JOIN countries c ON s.country_id = c.id
78+
WHERE s.name = '#{escaped_name}'
79+
AND c.alpha2 = '#{escaped_country}'
80+
SQL
81+
res = select_first(sql)
82+
end
83+
84+
res
5185
end
5286

5387
def get_county_id(county)

lib/free_zipcode_data/state_table.rb

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,13 +23,13 @@ def build
2323

2424
ndx = <<-SQL
2525
CREATE UNIQUE INDEX "main"."state_name"
26-
ON #{tablename} (name COLLATE NOCASE ASC);
26+
ON #{tablename} (name, country_id COLLATE NOCASE ASC);
2727
SQL
2828
database.execute_batch(ndx)
2929
end
3030

3131
def write(row)
32-
return nil unless row[:short_state]
32+
return nil unless synthesize_state(row)
3333

3434
row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
3535
country_id = get_country_id(row[:country])
@@ -50,5 +50,19 @@ def write(row)
5050

5151
update_progress
5252
end
53+
54+
private
55+
56+
# Synthesize state from country for stateless countries (downstream tables need this)
57+
def synthesize_state(row)
58+
if row[:short_state].nil? || row[:short_state] == ''
59+
country_entry = country_lookup_table[row[:country]]
60+
return false unless country_entry
61+
62+
row[:short_state] = row[:country]
63+
row[:state] = country_entry[:name]
64+
end
65+
row[:short_state]
66+
end
5367
end
5468
end

lib/free_zipcode_data/zipcode_table.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def build
2929
def write(row)
3030
return nil unless row[:postal_code]
3131

32-
state_id = get_state_id(row[:short_state], row[:state])
32+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
3333
city_name = escape_single_quotes(row[:city])
3434

3535
sql = <<-SQL

spec/free_zipcode_data/county_table_spec.rb

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,45 +32,51 @@
3232

3333
describe '#write' do
3434
it 'inserts a county row' do
35-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
35+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
36+
state: 'Illinois' })
3637
rows = db.execute('SELECT name, abbr FROM counties')
3738
expect(rows.length).to eq(1)
3839
expect(rows[0]).to eq(%w[Cook 031])
3940
end
4041

4142
it 'links the county to its state' do
42-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
43+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
44+
state: 'Illinois' })
4345
state_id = db.execute("SELECT id FROM states WHERE abbr = 'IL'")[0][0]
4446
county_state_id = db.execute('SELECT state_id FROM counties')[0][0]
4547
expect(county_state_id).to eq(state_id)
4648
end
4749

4850
it 'returns nil and skips when county is nil' do
49-
result = table.write({ county: nil, short_county: nil, short_state: 'IL', state: 'Illinois' })
51+
result = table.write({ country: 'US', county: nil, short_county: nil, short_state: 'IL',
52+
state: 'Illinois' })
5053
expect(result).to be_nil
5154
rows = db.execute('SELECT COUNT(*) FROM counties')
5255
expect(rows[0][0]).to eq(0)
5356
end
5457

5558
it 'returns nil when state cannot be found' do
56-
result = table.write({ county: 'Unknown', short_county: '999', short_state: 'ZZ',
59+
result = table.write({ country: 'US', county: 'Unknown', short_county: '999', short_state: 'ZZ',
5760
state: 'Nonexistent' })
5861
expect(result).to be_nil
5962
rows = db.execute('SELECT COUNT(*) FROM counties')
6063
expect(rows[0][0]).to eq(0)
6164
end
6265

6366
it 'silently ignores duplicate county entries' do
64-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
67+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
68+
state: 'Illinois' })
6569
expect do
66-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
70+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
71+
state: 'Illinois' })
6772
end.not_to raise_error
6873
rows = db.execute('SELECT COUNT(*) FROM counties')
6974
expect(rows[0][0]).to eq(1)
7075
end
7176

7277
it 'handles county names with single quotes' do
73-
table.write({ county: "Prince George's", short_county: '033', short_state: 'NY', state: 'New York' })
78+
table.write({ country: 'US', county: "Prince George's", short_county: '033', short_state: 'NY',
79+
state: 'New York' })
7480
rows = db.execute('SELECT name FROM counties')
7581
expect(rows[0][0]).to eq("Prince George's")
7682
end

spec/free_zipcode_data/db_table_spec.rb

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,41 @@ def build; end
8989
end
9090

9191
describe 'private #get_state_id' do
92-
it 'finds a state by abbreviation' do
93-
id = table.send(:get_state_id, 'NY', 'New York')
92+
it 'finds a state by exact match (abbr + name + country)' do
93+
id = table.send(:get_state_id, 'US', 'NY', 'New York')
9494
expect(id).to be_a(Integer)
9595
end
9696

97-
it 'finds a state by name' do
98-
id = table.send(:get_state_id, 'XX', 'New York')
97+
it 'falls back to abbr + country when name does not match' do
98+
id = table.send(:get_state_id, 'US', 'NY', 'Wrong Name')
99+
expect(id).to be_a(Integer)
100+
end
101+
102+
it 'falls back to name + country when abbr does not match' do
103+
id = table.send(:get_state_id, 'US', 'XX', 'New York')
99104
expect(id).to be_a(Integer)
100105
end
101106

102107
it 'returns nil for an unknown state' do
103-
id = table.send(:get_state_id, 'ZZ', 'Nonexistent')
108+
id = table.send(:get_state_id, 'US', 'ZZ', 'Nonexistent')
109+
expect(id).to be_nil
110+
end
111+
112+
it 'scopes lookup by country' do
113+
# Seed a Canadian state with the same abbr as a US state
114+
ca_state_table = FreeZipcodeData::StateTable.new(database: db, tablename: 'states')
115+
ca_state_table.write({ country: 'CA', short_state: 'NY', state: 'Northern Yukon' })
116+
117+
us_id = table.send(:get_state_id, 'US', 'NY', 'New York')
118+
ca_id = table.send(:get_state_id, 'CA', 'NY', 'Northern Yukon')
119+
120+
expect(us_id).to be_a(Integer)
121+
expect(ca_id).to be_a(Integer)
122+
expect(us_id).not_to eq(ca_id)
123+
end
124+
125+
it 'returns nil when all three fallbacks fail' do
126+
id = table.send(:get_state_id, 'GB', 'ZZ', 'Nonexistent')
104127
expect(id).to be_nil
105128
end
106129
end

spec/free_zipcode_data/state_table_spec.rb

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,22 @@
5050
expect(state_country_id).to eq(country_id)
5151
end
5252

53-
it 'returns nil and skips when short_state is nil' do
54-
result = table.write({ country: 'US', short_state: nil, state: 'Unknown' })
53+
it 'creates a state from country lookup when short_state is nil' do
54+
table.write({ country: 'US', short_state: nil, state: 'Unknown' })
55+
rows = db.execute("SELECT abbr, name FROM states WHERE abbr = 'US'")
56+
expect(rows.length).to eq(1)
57+
expect(rows[0]).to eq(['US', 'United States of America'])
58+
end
59+
60+
it 'creates a state from country lookup when short_state is empty' do
61+
table.write({ country: 'US', short_state: '', state: 'Unknown' })
62+
rows = db.execute("SELECT abbr, name FROM states WHERE abbr = 'US'")
63+
expect(rows.length).to eq(1)
64+
expect(rows[0]).to eq(['US', 'United States of America'])
65+
end
66+
67+
it 'returns nil when short_state is nil and country is unknown' do
68+
result = table.write({ country: 'ZZ', short_state: nil, state: 'Unknown' })
5569
expect(result).to be_nil
5670
rows = db.execute('SELECT COUNT(*) FROM states')
5771
expect(rows[0][0]).to eq(0)
@@ -76,5 +90,12 @@
7690
rows = db.execute("SELECT name FROM states WHERE abbr = 'TX'")
7791
expect(rows[0][0]).to eq("Cote d'Ivoire")
7892
end
93+
94+
it 'allows states with the same name in different countries' do
95+
table.write({ country: 'US', short_state: 'BC', state: 'British Columbia' })
96+
table.write({ country: 'CA', short_state: 'BC', state: 'British Columbia' })
97+
rows = db.execute("SELECT COUNT(*) FROM states WHERE name = 'British Columbia'")
98+
expect(rows[0][0]).to eq(2)
99+
end
79100
end
80101
end

spec/free_zipcode_data/zipcode_table_spec.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
describe '#write' do
3434
let(:row) do
3535
{
36+
country: 'US',
3637
postal_code: '60601',
3738
short_state: 'IL',
3839
state: 'Illinois',

spec/support/database_helpers.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ def seed_counties(db, tablename: 'counties')
4040
table = FreeZipcodeData::CountyTable.new(database: db, tablename: tablename)
4141
table.build
4242
[
43-
{ county: 'New York', short_county: '061', short_state: 'NY', state: 'New York' },
44-
{ county: 'Los Angeles', short_county: '037', short_state: 'CA', state: 'California' },
45-
{ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }
43+
{ country: 'US', county: 'New York', short_county: '061', short_state: 'NY', state: 'New York' },
44+
{ country: 'US', county: 'Los Angeles', short_county: '037', short_state: 'CA', state: 'California' },
45+
{ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }
4646
].each { |row| table.write(row) }
4747
end
4848
end

0 commit comments

Comments
 (0)