diff --git a/db/geodata/README.txt b/db/geodata/README.txt index ff42edb3b..4988404c0 100644 --- a/db/geodata/README.txt +++ b/db/geodata/README.txt @@ -1 +1,10 @@ this is just for getting this maxmind data over there so i can use it. + +source for iso3166-1 data: + +http://dev.maxmind.com/static/csv/codes/iso3166.csv + +source for iso3166-2 data (compatible): + +http://geolite.maxmind.com/download/geoip/misc/region_codes.csv + diff --git a/db/geodata/ca_region.csv b/db/geodata/ca_region.csv deleted file mode 100644 index f24bd3902..000000000 --- a/db/geodata/ca_region.csv +++ /dev/null @@ -1,13 +0,0 @@ -AB,Alberta -BC,British Columbia -MB,Manitoba -NB,New Brunswick -NL,Newfoundland and Labrador -NS,Nova Scotia -NT,Northwest Territories -NU,Nunavut -ON,Ontario -PE,Prince Edward Island -QC,Quebec -SK,Saskatchewan -YT,Yukon diff --git a/db/geodata/us_region.csv b/db/geodata/us_region.csv deleted file mode 100644 index b8a27ee2e..000000000 --- a/db/geodata/us_region.csv +++ /dev/null @@ -1,57 +0,0 @@ -AA,Armed Forces America -AE,Armed Forces -AP,Armed Forces Pacific -AK,Alaska -AL,Alabama -AR,Arkansas -AZ,Arizona -CA,California -CO,Colorado -CT,Connecticut -DC,District of Columbia -DE,Delaware -FL,Florida -GA,Georgia -GU,Guam -HI,Hawaii -IA,Iowa -ID,Idaho -IL,Illinois -IN,Indiana -KS,Kansas -KY,Kentucky -LA,Louisiana -MA,Massachusetts -MD,Maryland -ME,Maine -MI,Michigan -MN,Minnesota -MO,Missouri -MS,Mississippi -MT,Montana -NC,North Carolina -ND,North Dakota -NE,Nebraska -NH,New Hampshire -NJ,New Jersey -NM,New Mexico -NV,Nevada -NY,New York -OH,Ohio -OK,Oklahoma -OR,Oregon -PA,Pennsylvania -PR,Puerto Rico -RI,Rhode Island -SC,South Carolina -SD,South Dakota -TN,Tennessee -TX,Texas -UT,Utah -VA,Virginia -VI,Virgin Islands -VT,Vermont -WA,Washington -WI,Wisconsin -WV,West Virginia -WY,Wyoming diff --git a/db/manifest b/db/manifest index b1162bd2c..6bc1571b3 100755 --- a/db/manifest +++ b/db/manifest @@ -158,3 +158,4 @@ remove_lat_lng_user_fields.sql update_get_work_for_larger_radius.sql periodic_emails.sql remember_extra_scoring_data.sql +indexing_for_regions.sql diff --git a/db/up/indexing_for_regions.sql b/db/up/indexing_for_regions.sql new file mode 100644 index 000000000..e59ebaca2 --- /dev/null +++ b/db/up/indexing_for_regions.sql @@ -0,0 +1,2 @@ +create index regions_countrycode_ndx on regions (countrycode); +create unique index regions_countrycode_region_ndx on regions (countrycode, region); diff --git a/ruby/lib/jam_ruby/models/region.rb b/ruby/lib/jam_ruby/models/region.rb index decc05b9a..8a44a4f6d 100644 --- a/ruby/lib/jam_ruby/models/region.rb +++ b/ruby/lib/jam_ruby/models/region.rb @@ -7,47 +7,50 @@ module JamRuby self.where(countrycode: country).order('regionname asc').all end - def self.import_from_xx_region(countrycode, file) + def self.import_from_region_codes(file) - # File xx_region.csv + # File region_codes.csv # Format: - # region,regionname + # countrycode,region,regionname - # what this does is not replace the contents of the table, but rather update the specifies rows with the names. - # any rows not specified are left alone. the parameter countrycode denote the country of the region (when uppercased) - - raise "countrycode (#{MaxMindIsp.quote_value(countrycode)}) is missing or invalid (it must be two characters)" unless countrycode and countrycode.length == 2 - countrycode = countrycode.upcase + # what this does is replace the contents of the table with the new data. self.transaction do - self.connection.execute "update #{self.table_name} set regionname = region where countrycode = #{MaxMindIsp.quote_value(countrycode)}" + self.connection.execute "delete from #{self.table_name}" File.open(file, 'r:ISO-8859-1') do |io| - saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : 0 + saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : -1 count = 0 - - ncols = 2 + errors = 0 + ncols = 3 csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false}) csv.each do |row| raise "file does not have expected number of columns (#{ncols}): #{row.length}" unless row.length == ncols - region = row[0] - regionname = row[1] + countrycode = row[0] + region = row[1] + regionname = row[2] - stmt = "UPDATE #{self.table_name} SET regionname = #{MaxMindIsp.quote_value(regionname)} WHERE countrycode = #{MaxMindIsp.quote_value(countrycode)} AND region = #{MaxMindIsp.quote_value(region)}" - self.connection.execute stmt - count += 1 + if countrycode.length == 2 and region.length == 2 and regionname.length >= 2 and regionname.length <= 64 - if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level < Logger::INFO - ActiveRecord::Base.logger.debug "... logging updates to #{self.table_name} suspended ..." - ActiveRecord::Base.logger.level = Logger::INFO + stmt = "INSERT INTO #{self.table_name} (countrycode, region, regionname) VALUES (#{self.connection.quote(countrycode)}, #{self.connection.quote(region)}, #{self.connection.quote(regionname)})" + self.connection.execute stmt + count += 1 + + if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level < Logger::INFO + ActiveRecord::Base.logger.debug "... logging updates to #{self.table_name} suspended ..." + ActiveRecord::Base.logger.level = Logger::INFO + end + else + ActiveRecord::Base.logger.warn("bogus region_codes record '#{countrycode}', '#{region}', '#{regionname}'") if ActiveRecord::Base.logger + errors += 1 end end if ActiveRecord::Base.logger ActiveRecord::Base.logger.level = saved_level - ActiveRecord::Base.logger.debug "updated #{count} records in #{self.table_name}" + ActiveRecord::Base.logger.debug "inserted #{count} records into #{self.table_name}, #{errors} errors" end end # file end # transaction diff --git a/web/lib/tasks/import_max_mind.rake b/web/lib/tasks/import_max_mind.rake index 526c73514..c23e11b41 100644 --- a/web/lib/tasks/import_max_mind.rake +++ b/web/lib/tasks/import_max_mind.rake @@ -29,9 +29,9 @@ namespace :db do Country.import_from_iso3166 ENV['file'] end - desc "Import a region database (regioncode, regionname); run like this: rake db:import_regions countrycode=XX file=/path/to/xx_region.csv" + desc "Import a region database (countrycode, regioncode, regionname); run like this: rake db:import_regions file=/path/to/region_codes.csv" task import_regions: :environment do - Region.import_from_xx_region(ENV['countrycode'], ENV['file']) + Region.import_from_region_codes(ENV['file']) end desc "Help" @@ -42,7 +42,7 @@ namespace :db do puts "bundle exec rake db:import_geoip_locations file=/path/to/GeoIPCity-134-Location.csv # geo-134" puts "bundle exec rake db:import_jam_isp file=/path/to/GeoIPISP.csv # geo-124" puts "bundle exec rake db:import_countries file=/path/to/iso3166.csv # db/geodata" - puts "bundle exec rake db:import_regions countrycode=XX file=/path/to/xx_region.csv # db/geodata, both of them" + puts "bundle exec rake db:import_regions file=/path/to/region_codes.csv # db/geodata" end desc "Create a fake set of maxmind data"