110 lines
3.6 KiB
Ruby
110 lines
3.6 KiB
Ruby
require 'csv'
|
|
|
|
module JamRuby
|
|
class MaxMindIsp < ActiveRecord::Base
|
|
|
|
self.table_name = 'max_mind_isp'
|
|
|
|
def self.import_from_max_mind(file)
|
|
|
|
# File Geo-142
|
|
# Format:
|
|
# "beginIp","endIp","countryCode","ISP"
|
|
|
|
MaxMindIsp.transaction do
|
|
MaxMindIsp.delete_all
|
|
File.open(file, 'r:ISO-8859-1') do |io|
|
|
s = io.gets.strip # eat the copyright line. gah, why do they have that in their file??
|
|
unless s.eql? 'Copyright (c) 2011 MaxMind Inc. All Rights Reserved.'
|
|
puts s
|
|
puts 'Copyright (c) 2011 MaxMind Inc. All Rights Reserved.'
|
|
raise 'file does not start with expected copyright (line 1): Copyright (c) 2011 MaxMind Inc. All Rights Reserved.'
|
|
end
|
|
|
|
s = io.gets.strip # eat the headers line
|
|
unless s.eql? '"beginIp","endIp","countryCode","ISP"'
|
|
puts s
|
|
puts '"beginIp","endIp","countryCode","ISP"'
|
|
raise 'file does not start with expected header (line 2): "beginIp","endIp","countryCode","ISP"'
|
|
end
|
|
|
|
saved_level = ActiveRecord::Base.logger ? ActiveRecord::Base.logger.level : 0
|
|
count = 0
|
|
|
|
stmt = "insert into #{MaxMindIsp.table_name} (ip_bottom, ip_top, country, isp) values"
|
|
|
|
vals = ''
|
|
sep = ''
|
|
i = 0
|
|
n = 20 # going from 20 to 40 only changed things a little bit
|
|
|
|
csv = ::CSV.new(io, {encoding: 'ISO-8859-1', headers: false})
|
|
csv.each do |row|
|
|
raise "file does not have expected number of columns (4): #{row.length}" unless row.length == 4
|
|
|
|
ip_bottom = ip_address_to_int(strip_quotes(row[0]))
|
|
ip_top = ip_address_to_int(strip_quotes(row[1]))
|
|
country = row[2]
|
|
isp = row[3]
|
|
|
|
vals = vals+sep+"(#{ip_bottom}, #{ip_top}, '#{country}', #{quote_value(isp)})"
|
|
sep = ','
|
|
i += 1
|
|
|
|
if count == 0 or i >= n then
|
|
MaxMindIsp.connection.execute stmt+vals
|
|
count += i
|
|
vals = ''
|
|
sep = ''
|
|
i = 0
|
|
|
|
if ActiveRecord::Base.logger and ActiveRecord::Base.logger.level > 1 then
|
|
ActiveRecord::Base.logger.debug "... logging inserts into #{MaxMindIsp.table_name} suspended ..."
|
|
ActiveRecord::Base.logger.level = 1
|
|
end
|
|
|
|
if ActiveRecord::Base.logger and count%10000 < n then
|
|
ActiveRecord::Base.logger.level = saved_level
|
|
ActiveRecord::Base.logger.debug "... inserted #{count} into #{MaxMindIsp.table_name} ..."
|
|
ActiveRecord::Base.logger.level = 1
|
|
end
|
|
end
|
|
end
|
|
|
|
if i > 0 then
|
|
MaxMindIsp.connection.execute stmt+vals
|
|
count += i
|
|
end
|
|
|
|
if ActiveRecord::Base.logger then
|
|
ActiveRecord::Base.logger.level = saved_level
|
|
ActiveRecord::Base.logger.debug "loaded #{count} records into #{MaxMindIsp.table_name}"
|
|
end
|
|
end
|
|
end
|
|
end
|
|
|
|
# Make an IP address fit in a signed int. Just divide it by 2, as the least significant part
|
|
# just can't possibly matter. We can verify this if needed. My guess is the entire bottom octet is
|
|
# actually irrelevant
|
|
def self.ip_address_to_int(ip)
|
|
ip.split('.').inject(0) {|total,value| (total << 8 ) + value.to_i}
|
|
end
|
|
|
|
private
|
|
|
|
def self.strip_quotes str
|
|
return nil if str.nil?
|
|
|
|
if str.start_with? '"'
|
|
str = str[1..-1]
|
|
end
|
|
|
|
if str.end_with? '"'
|
|
str = str.chop
|
|
end
|
|
|
|
return str
|
|
end
|
|
end
|
|
end |