Skip to content

Commit 08e89cc

Browse files
authored
Merge pull request #29 from midwire/fix/state-abbr-uniqueness
Fix state_abbr not globally unique across countries
2 parents 438a867 + ad286fd commit 08e89cc

9 files changed

Lines changed: 143 additions & 28 deletions

File tree

lib/free_zipcode_data/county_table.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def build
2626
def write(row)
2727
return nil unless row[:county]
2828

29-
state_id = get_state_id(row[:short_state], row[:state])
29+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
3030
return nil unless state_id
3131

3232
sql = <<-SQL

lib/free_zipcode_data/db_table.rb

Lines changed: 38 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,44 @@ def get_country_id(country)
4444
select_first(sql)
4545
end
4646

47-
def get_state_id(state_abbr, state_name)
48-
sql = "SELECT id FROM states
49-
WHERE abbr = '#{state_abbr}' OR name = '#{escape_single_quotes(state_name)}'"
50-
select_first(sql)
47+
def get_state_id(country, state_abbr, state_name)
48+
escaped_country = escape_single_quotes(country)
49+
escaped_abbr = escape_single_quotes(state_abbr)
50+
escaped_name = escape_single_quotes(state_name)
51+
52+
# Try exact match: abbr + name + country
53+
sql = <<-SQL
54+
SELECT s.id FROM states s
55+
INNER JOIN countries c ON s.country_id = c.id
56+
WHERE s.abbr = '#{escaped_abbr}'
57+
AND s.name = '#{escaped_name}'
58+
AND c.alpha2 = '#{escaped_country}'
59+
SQL
60+
res = select_first(sql)
61+
62+
# Fallback: abbr + country only
63+
if res.nil?
64+
sql = <<-SQL
65+
SELECT s.id FROM states s
66+
INNER JOIN countries c ON s.country_id = c.id
67+
WHERE s.abbr = '#{escaped_abbr}'
68+
AND c.alpha2 = '#{escaped_country}'
69+
SQL
70+
res = select_first(sql)
71+
end
72+
73+
# Fallback: name + country only
74+
if res.nil?
75+
sql = <<-SQL
76+
SELECT s.id FROM states s
77+
INNER JOIN countries c ON s.country_id = c.id
78+
WHERE s.name = '#{escaped_name}'
79+
AND c.alpha2 = '#{escaped_country}'
80+
SQL
81+
res = select_first(sql)
82+
end
83+
84+
res
5185
end
5286

5387
def get_county_id(county)

lib/free_zipcode_data/state_table.rb

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,22 +17,24 @@ def build
1717

1818
ndx = <<-SQL
1919
CREATE UNIQUE INDEX "main"."unique_state"
20-
ON #{tablename} (abbr, country_id COLLATE NOCASE ASC);
20+
ON #{tablename} (abbr COLLATE NOCASE ASC, country_id);
2121
SQL
2222
database.execute_batch(ndx)
2323

2424
ndx = <<-SQL
2525
CREATE UNIQUE INDEX "main"."state_name"
26-
ON #{tablename} (name COLLATE NOCASE ASC);
26+
ON #{tablename} (name COLLATE NOCASE ASC, country_id);
2727
SQL
2828
database.execute_batch(ndx)
2929
end
3030

3131
def write(row)
32-
return nil unless row[:short_state]
32+
return nil unless synthesize_state(row)
3333

3434
row[:state] = 'Marshall Islands' if row[:short_state] == 'MH' && row[:state].nil?
3535
country_id = get_country_id(row[:country])
36+
return nil unless country_id
37+
3638
sql = <<-SQL
3739
INSERT INTO states (abbr, name, country_id)
3840
VALUES ('#{row[:short_state]}',
@@ -50,5 +52,19 @@ def write(row)
5052

5153
update_progress
5254
end
55+
56+
private
57+
58+
# Synthesize state from country for stateless countries (downstream tables need this)
59+
def synthesize_state(row)
60+
if row[:short_state].nil? || row[:short_state] == ''
61+
country_entry = country_lookup_table[row[:country]]
62+
return false unless country_entry
63+
64+
row[:short_state] = row[:country]
65+
row[:state] = country_entry[:name]
66+
end
67+
row[:short_state]
68+
end
5369
end
5470
end

lib/free_zipcode_data/zipcode_table.rb

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,9 @@ def build
2929
def write(row)
3030
return nil unless row[:postal_code]
3131

32-
state_id = get_state_id(row[:short_state], row[:state])
32+
state_id = get_state_id(row[:country], row[:short_state], row[:state])
33+
return nil unless state_id
34+
3335
city_name = escape_single_quotes(row[:city])
3436

3537
sql = <<-SQL

spec/free_zipcode_data/county_table_spec.rb

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -32,45 +32,51 @@
3232

3333
describe '#write' do
3434
it 'inserts a county row' do
35-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
35+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
36+
state: 'Illinois' })
3637
rows = db.execute('SELECT name, abbr FROM counties')
3738
expect(rows.length).to eq(1)
3839
expect(rows[0]).to eq(%w[Cook 031])
3940
end
4041

4142
it 'links the county to its state' do
42-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
43+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
44+
state: 'Illinois' })
4345
state_id = db.execute("SELECT id FROM states WHERE abbr = 'IL'")[0][0]
4446
county_state_id = db.execute('SELECT state_id FROM counties')[0][0]
4547
expect(county_state_id).to eq(state_id)
4648
end
4749

4850
it 'returns nil and skips when county is nil' do
49-
result = table.write({ county: nil, short_county: nil, short_state: 'IL', state: 'Illinois' })
51+
result = table.write({ country: 'US', county: nil, short_county: nil, short_state: 'IL',
52+
state: 'Illinois' })
5053
expect(result).to be_nil
5154
rows = db.execute('SELECT COUNT(*) FROM counties')
5255
expect(rows[0][0]).to eq(0)
5356
end
5457

5558
it 'returns nil when state cannot be found' do
56-
result = table.write({ county: 'Unknown', short_county: '999', short_state: 'ZZ',
59+
result = table.write({ country: 'US', county: 'Unknown', short_county: '999', short_state: 'ZZ',
5760
state: 'Nonexistent' })
5861
expect(result).to be_nil
5962
rows = db.execute('SELECT COUNT(*) FROM counties')
6063
expect(rows[0][0]).to eq(0)
6164
end
6265

6366
it 'silently ignores duplicate county entries' do
64-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
67+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
68+
state: 'Illinois' })
6569
expect do
66-
table.write({ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' })
70+
table.write({ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL',
71+
state: 'Illinois' })
6772
end.not_to raise_error
6873
rows = db.execute('SELECT COUNT(*) FROM counties')
6974
expect(rows[0][0]).to eq(1)
7075
end
7176

7277
it 'handles county names with single quotes' do
73-
table.write({ county: "Prince George's", short_county: '033', short_state: 'NY', state: 'New York' })
78+
table.write({ country: 'US', county: "Prince George's", short_county: '033', short_state: 'NY',
79+
state: 'New York' })
7480
rows = db.execute('SELECT name FROM counties')
7581
expect(rows[0][0]).to eq("Prince George's")
7682
end

spec/free_zipcode_data/db_table_spec.rb

Lines changed: 38 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,18 +89,49 @@ def build; end
8989
end
9090

9191
describe 'private #get_state_id' do
92-
it 'finds a state by abbreviation' do
93-
id = table.send(:get_state_id, 'NY', 'New York')
94-
expect(id).to be_a(Integer)
92+
it 'finds a state by exact match (abbr + name + country)' do
93+
expected_id = db.execute("SELECT id FROM states WHERE abbr = 'NY'")[0][0]
94+
id = table.send(:get_state_id, 'US', 'NY', 'New York')
95+
expect(id).to eq(expected_id)
9596
end
9697

97-
it 'finds a state by name' do
98-
id = table.send(:get_state_id, 'XX', 'New York')
99-
expect(id).to be_a(Integer)
98+
it 'falls back to abbr + country when name does not match' do
99+
expected_id = db.execute("SELECT id FROM states WHERE abbr = 'NY'")[0][0]
100+
id = table.send(:get_state_id, 'US', 'NY', 'Wrong Name')
101+
expect(id).to eq(expected_id)
102+
end
103+
104+
it 'falls back to name + country when abbr does not match' do
105+
expected_id = db.execute("SELECT id FROM states WHERE name = 'New York'")[0][0]
106+
id = table.send(:get_state_id, 'US', 'XX', 'New York')
107+
expect(id).to eq(expected_id)
100108
end
101109

102110
it 'returns nil for an unknown state' do
103-
id = table.send(:get_state_id, 'ZZ', 'Nonexistent')
111+
id = table.send(:get_state_id, 'US', 'ZZ', 'Nonexistent')
112+
expect(id).to be_nil
113+
end
114+
115+
it 'returns nil when country is nil' do
116+
id = table.send(:get_state_id, nil, 'NY', 'New York')
117+
expect(id).to be_nil
118+
end
119+
120+
it 'scopes lookup by country' do
121+
# Seed a Canadian state with the same abbr as a US state
122+
ca_state_table = FreeZipcodeData::StateTable.new(database: db, tablename: 'states')
123+
ca_state_table.write({ country: 'CA', short_state: 'NY', state: 'Northern Yukon' })
124+
125+
us_id = table.send(:get_state_id, 'US', 'NY', 'New York')
126+
ca_id = table.send(:get_state_id, 'CA', 'NY', 'Northern Yukon')
127+
128+
expect(us_id).to be_a(Integer)
129+
expect(ca_id).to be_a(Integer)
130+
expect(us_id).not_to eq(ca_id)
131+
end
132+
133+
it 'returns nil when all three fallbacks fail' do
134+
id = table.send(:get_state_id, 'GB', 'ZZ', 'Nonexistent')
104135
expect(id).to be_nil
105136
end
106137
end

spec/free_zipcode_data/state_table_spec.rb

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,26 @@
5050
expect(state_country_id).to eq(country_id)
5151
end
5252

53-
it 'returns nil and skips when short_state is nil' do
54-
result = table.write({ country: 'US', short_state: nil, state: 'Unknown' })
53+
it 'creates a state from country lookup when short_state is nil' do
54+
row = { country: 'US', short_state: nil, state: 'Unknown' }
55+
table.write(row)
56+
rows = db.execute("SELECT abbr, name FROM states WHERE abbr = 'US'")
57+
expect(rows.length).to eq(1)
58+
expect(rows[0]).to eq(['US', 'United States of America'])
59+
# Verify row mutation for downstream Kiba destinations
60+
expect(row[:short_state]).to eq('US')
61+
expect(row[:state]).to eq('United States of America')
62+
end
63+
64+
it 'creates a state from country lookup when short_state is empty' do
65+
table.write({ country: 'US', short_state: '', state: 'Unknown' })
66+
rows = db.execute("SELECT abbr, name FROM states WHERE abbr = 'US'")
67+
expect(rows.length).to eq(1)
68+
expect(rows[0]).to eq(['US', 'United States of America'])
69+
end
70+
71+
it 'returns nil when short_state is nil and country is unknown' do
72+
result = table.write({ country: 'ZZ', short_state: nil, state: 'Unknown' })
5573
expect(result).to be_nil
5674
rows = db.execute('SELECT COUNT(*) FROM states')
5775
expect(rows[0][0]).to eq(0)
@@ -76,5 +94,12 @@
7694
rows = db.execute("SELECT name FROM states WHERE abbr = 'TX'")
7795
expect(rows[0][0]).to eq("Cote d'Ivoire")
7896
end
97+
98+
it 'allows states with the same name in different countries' do
99+
table.write({ country: 'US', short_state: 'BC', state: 'British Columbia' })
100+
table.write({ country: 'CA', short_state: 'BC', state: 'British Columbia' })
101+
rows = db.execute("SELECT COUNT(*) FROM states WHERE name = 'British Columbia'")
102+
expect(rows[0][0]).to eq(2)
103+
end
79104
end
80105
end

spec/free_zipcode_data/zipcode_table_spec.rb

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
describe '#write' do
3434
let(:row) do
3535
{
36+
country: 'US',
3637
postal_code: '60601',
3738
short_state: 'IL',
3839
state: 'Illinois',

spec/support/database_helpers.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ def seed_counties(db, tablename: 'counties')
4040
table = FreeZipcodeData::CountyTable.new(database: db, tablename: tablename)
4141
table.build
4242
[
43-
{ county: 'New York', short_county: '061', short_state: 'NY', state: 'New York' },
44-
{ county: 'Los Angeles', short_county: '037', short_state: 'CA', state: 'California' },
45-
{ county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }
43+
{ country: 'US', county: 'New York', short_county: '061', short_state: 'NY', state: 'New York' },
44+
{ country: 'US', county: 'Los Angeles', short_county: '037', short_state: 'CA', state: 'California' },
45+
{ country: 'US', county: 'Cook', short_county: '031', short_state: 'IL', state: 'Illinois' }
4646
].each { |row| table.write(row) }
4747
end
4848
end

0 commit comments

Comments
 (0)