require 'csv' module JamRuby class MaxMindIsp < ActiveRecord::Base self.table_name = 'max_mind_isp' @@log = Logging.logger[MaxMindIsp] def self.import_from_max_mind(options) file = options[:file] use_copy = options[:use_copy] # File Geo-142 # Format: # "beginIp","endIp","countryCode","ISP" # drop indexes on start, then add them back when done start = Time.now MaxMindIsp.delete_all if use_copy Database.copy(MaxMind.table_name, file) else File.open(file, 'r:ISO-8859-1') do |io| s = io.gets.strip # eat the copyright line. gah, why do they have that in their file?? unless s.eql? 'Copyright (c) 2011 MaxMind Inc. All Rights Reserved.' puts s puts 'Copyright (c) 2011 MaxMind Inc. All Rights Reserved.' raise 'file does not start with expected copyright (line 1): Copyright (c) 2011 MaxMind Inc. All Rights Reserved.' end s = io.gets.strip # eat the headers line unless s.eql? '"beginIp","endIp","countryCode","ISP"' puts s puts '"beginIp","endIp","countryCode","ISP"' raise 'file does not start with expected header (line 2): "beginIp","endIp","countryCode","ISP"' end saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : 0 count = 0 stmt = "insert into #{MaxMindIsp.table_name} (ip_bottom, ip_top, country, isp) values" vals = '' sep = '' i = 0 n = 20 # going from 20 to 40 only changed things a little bit, and 512 was slower... and 1024 was even slower (weird) csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false}) csv.each do |row| raise "file does not have expected number of columns (4): #{row.length}" unless row.length == 4 ip_bottom = ip_address_to_int(strip_quotes(row[0])) ip_top = ip_address_to_int(strip_quotes(row[1])) country = row[2] isp = row[3] vals = vals+sep+"(#{ip_bottom}, #{ip_top}, '#{country}', #{quote_value(isp)})" sep = ',' i += 1 if count == 0 or i >= n MaxMindIsp.connection.execute stmt+vals count += i vals = '' sep = '' i = 0 if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level > 1 ActiveRecord::Base.logger.debug "... logging inserts into #{MaxMindIsp.table_name} suspended ..." ActiveRecord::Base.logger.level = 1 end if ActiveRecord::Base.logger and count%10000 < n ActiveRecord::Base.logger.level = saved_level ActiveRecord::Base.logger.debug "... inserted #{count} into #{MaxMindIsp.table_name} ..." ActiveRecord::Base.logger.level = 1 end end end if i > 0 MaxMindIsp.connection.execute stmt+vals count += i end if ActiveRecord::Base.logger ActiveRecord::Base.logger.level = saved_level ActiveRecord::Base.logger.debug "loaded #{count} records into #{MaxMindIsp.table_name}" end end end elapsed = Time.now - start @@log.debug("#{MaxMindIsp.table_name} import took #{elapsed} seconds") end # Make an IP address fit in a signed int. Just divide it by 2, as the least significant part # just can't possibly matter. We can verify this if needed. My guess is the entire bottom octet is # actually irrelevant def self.ip_address_to_int(ip) ip.split('.').inject(0) { |total, value| (total << 8) + value.to_i } end private def self.strip_quotes str return nil if str.nil? if str.start_with? '"' str = str[1..-1] end if str.end_with? '"' str = str.chop end return str end end end