added better regions database; added appropriate indexing for regions table

This commit is contained in:
Scott Comer 2014-05-22 15:17:00 -05:00
parent 5527a6032c
commit bf06df36a3
7 changed files with 39 additions and 94 deletions

View File

@ -1 +1,10 @@
this is just for getting this maxmind data over there so i can use it.
source for iso3166-1 data:
http://dev.maxmind.com/static/csv/codes/iso3166.csv
source for iso3166-2 data (compatible):
http://geolite.maxmind.com/download/geoip/misc/region_codes.csv

View File

@ -1,13 +0,0 @@
AB,Alberta
BC,British Columbia
MB,Manitoba
NB,New Brunswick
NL,Newfoundland and Labrador
NS,Nova Scotia
NT,Northwest Territories
NU,Nunavut
ON,Ontario
PE,Prince Edward Island
QC,Quebec
SK,Saskatchewan
YT,Yukon
1 AB Alberta
2 BC British Columbia
3 MB Manitoba
4 NB New Brunswick
5 NL Newfoundland and Labrador
6 NS Nova Scotia
7 NT Northwest Territories
8 NU Nunavut
9 ON Ontario
10 PE Prince Edward Island
11 QC Quebec
12 SK Saskatchewan
13 YT Yukon

View File

@ -1,57 +0,0 @@
AA,Armed Forces America
AE,Armed Forces
AP,Armed Forces Pacific
AK,Alaska
AL,Alabama
AR,Arkansas
AZ,Arizona
CA,California
CO,Colorado
CT,Connecticut
DC,District of Columbia
DE,Delaware
FL,Florida
GA,Georgia
GU,Guam
HI,Hawaii
IA,Iowa
ID,Idaho
IL,Illinois
IN,Indiana
KS,Kansas
KY,Kentucky
LA,Louisiana
MA,Massachusetts
MD,Maryland
ME,Maine
MI,Michigan
MN,Minnesota
MO,Missouri
MS,Mississippi
MT,Montana
NC,North Carolina
ND,North Dakota
NE,Nebraska
NH,New Hampshire
NJ,New Jersey
NM,New Mexico
NV,Nevada
NY,New York
OH,Ohio
OK,Oklahoma
OR,Oregon
PA,Pennsylvania
PR,Puerto Rico
RI,Rhode Island
SC,South Carolina
SD,South Dakota
TN,Tennessee
TX,Texas
UT,Utah
VA,Virginia
VI,Virgin Islands
VT,Vermont
WA,Washington
WI,Wisconsin
WV,West Virginia
WY,Wyoming
1 AA Armed Forces America
2 AE Armed Forces
3 AP Armed Forces Pacific
4 AK Alaska
5 AL Alabama
6 AR Arkansas
7 AZ Arizona
8 CA California
9 CO Colorado
10 CT Connecticut
11 DC District of Columbia
12 DE Delaware
13 FL Florida
14 GA Georgia
15 GU Guam
16 HI Hawaii
17 IA Iowa
18 ID Idaho
19 IL Illinois
20 IN Indiana
21 KS Kansas
22 KY Kentucky
23 LA Louisiana
24 MA Massachusetts
25 MD Maryland
26 ME Maine
27 MI Michigan
28 MN Minnesota
29 MO Missouri
30 MS Mississippi
31 MT Montana
32 NC North Carolina
33 ND North Dakota
34 NE Nebraska
35 NH New Hampshire
36 NJ New Jersey
37 NM New Mexico
38 NV Nevada
39 NY New York
40 OH Ohio
41 OK Oklahoma
42 OR Oregon
43 PA Pennsylvania
44 PR Puerto Rico
45 RI Rhode Island
46 SC South Carolina
47 SD South Dakota
48 TN Tennessee
49 TX Texas
50 UT Utah
51 VA Virginia
52 VI Virgin Islands
53 VT Vermont
54 WA Washington
55 WI Wisconsin
56 WV West Virginia
57 WY Wyoming

View File

@ -158,3 +158,4 @@ remove_lat_lng_user_fields.sql
update_get_work_for_larger_radius.sql
periodic_emails.sql
remember_extra_scoring_data.sql
indexing_for_regions.sql

View File

@ -0,0 +1,2 @@
create index regions_countrycode_ndx on regions (countrycode);
create unique index regions_countrycode_region_ndx on regions (countrycode, region);

View File

@ -7,47 +7,50 @@ module JamRuby
self.where(countrycode: country).order('regionname asc').all
end
def self.import_from_xx_region(countrycode, file)
def self.import_from_region_codes(file)
# File xx_region.csv
# File region_codes.csv
# Format:
# region,regionname
# countrycode,region,regionname
# what this does is not replace the contents of the table, but rather update the specifies rows with the names.
# any rows not specified are left alone. the parameter countrycode denote the country of the region (when uppercased)
raise "countrycode (#{MaxMindIsp.quote_value(countrycode)}) is missing or invalid (it must be two characters)" unless countrycode and countrycode.length == 2
countrycode = countrycode.upcase
# what this does is replace the contents of the table with the new data.
self.transaction do
self.connection.execute "update #{self.table_name} set regionname = region where countrycode = #{MaxMindIsp.quote_value(countrycode)}"
self.connection.execute "delete from #{self.table_name}"
File.open(file, 'r:ISO-8859-1') do |io|
saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : 0
saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : -1
count = 0
ncols = 2
errors = 0
ncols = 3
csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false})
csv.each do |row|
raise "file does not have expected number of columns (#{ncols}): #{row.length}" unless row.length == ncols
region = row[0]
regionname = row[1]
countrycode = row[0]
region = row[1]
regionname = row[2]
stmt = "UPDATE #{self.table_name} SET regionname = #{MaxMindIsp.quote_value(regionname)} WHERE countrycode = #{MaxMindIsp.quote_value(countrycode)} AND region = #{MaxMindIsp.quote_value(region)}"
self.connection.execute stmt
count += 1
if countrycode.length == 2 and region.length == 2 and regionname.length >= 2 and regionname.length <= 64
if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level < Logger::INFO
ActiveRecord::Base.logger.debug "... logging updates to #{self.table_name} suspended ..."
ActiveRecord::Base.logger.level = Logger::INFO
stmt = "INSERT INTO #{self.table_name} (countrycode, region, regionname) VALUES (#{self.connection.quote(countrycode)}, #{self.connection.quote(region)}, #{self.connection.quote(regionname)})"
self.connection.execute stmt
count += 1
if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level < Logger::INFO
ActiveRecord::Base.logger.debug "... logging updates to #{self.table_name} suspended ..."
ActiveRecord::Base.logger.level = Logger::INFO
end
else
ActiveRecord::Base.logger.warn("bogus region_codes record '#{countrycode}', '#{region}', '#{regionname}'") if ActiveRecord::Base.logger
errors += 1
end
end
if ActiveRecord::Base.logger
ActiveRecord::Base.logger.level = saved_level
ActiveRecord::Base.logger.debug "updated #{count} records in #{self.table_name}"
ActiveRecord::Base.logger.debug "inserted #{count} records into #{self.table_name}, #{errors} errors"
end
end # file
end # transaction

View File

@ -29,9 +29,9 @@ namespace :db do
Country.import_from_iso3166 ENV['file']
end
desc "Import a region database (regioncode, regionname); run like this: rake db:import_regions countrycode=XX file=/path/to/xx_region.csv"
desc "Import a region database (countrycode, regioncode, regionname); run like this: rake db:import_regions file=/path/to/region_codes.csv"
task import_regions: :environment do
Region.import_from_xx_region(ENV['countrycode'], ENV['file'])
Region.import_from_region_codes(ENV['file'])
end
desc "Help"
@ -42,7 +42,7 @@ namespace :db do
puts "bundle exec rake db:import_geoip_locations file=/path/to/GeoIPCity-134-Location.csv # geo-134"
puts "bundle exec rake db:import_jam_isp file=/path/to/GeoIPISP.csv # geo-124"
puts "bundle exec rake db:import_countries file=/path/to/iso3166.csv # db/geodata"
puts "bundle exec rake db:import_regions countrycode=XX file=/path/to/xx_region.csv # db/geodata, both of them"
puts "bundle exec rake db:import_regions file=/path/to/region_codes.csv # db/geodata"
end
desc "Create a fake set of maxmind data"