jam-cloud/ruby/lib/jam_ruby/models/max_mind_release.rb

218 lines
8.2 KiB
Ruby

module JamRuby
class MaxMindRelease < ActiveRecord::Base
include S3ManagerMixin
@@log = Logging.logger[MaxMindRelease]
mount_uploader :geo_ip_124_url, MaxMindReleaseUploader
mount_uploader :geo_ip_134_url, MaxMindReleaseUploader
#mount_uploader :geo_ip_139_url, MaxMindReleaseUploader
#mount_uploader :geo_ip_142_url, MaxMindReleaseUploader
mount_uploader :iso3166_url, MaxMindReleaseUploader
mount_uploader :region_codes_url, MaxMindReleaseUploader
mount_uploader :table_dumps_url, MaxMindReleaseUploader
def store_dir
"maxmind/#{released_at}"
end
# if a dump file is found, use it and specify that COPY should be used
def file_or_dump(file, dump)
if dump
{file: dump, use_copy:true}
else
{file: file, use_copy:false}
end
end
def import(force_from_source=false)
@@log.debug("-----------------------------------")
@@log.debug("--------- STARTING IMPORT ---------")
@@log.debug("-----------------------------------")
start = Time.now
geo_ip_124_files, geo_ip_134_files, iso3166, region_codes, table_dump_files = download_assets(force_from_source)
import_to_database(geo_ip_124_files, geo_ip_134_files, iso3166, region_codes, table_dump_files)
@@log.debug("IMPORT TOOK: #{Time.now - start} SECONDS")
@@log.debug("-----------------------------------")
@@log.debug("--------- FINISHED IMPORT ---------")
@@log.debug("-----------------------------------")
end
def import_to_database(geo_ip_124_files, geo_ip_134_files, iso3166, region_codes, table_dump_files = {})
MaxMindRelease.transaction do
#MaxMindIsp.import_from_max_mind(file_or_dump(geo_ip_142_files['GeoIPISP-142.csv'], table_dump_files['max_mind_isp.txt']))
#MaxMindGeo.import_from_max_mind(file_or_dump(geo_ip_139_files['GeoIPCity.csv'], table_dump_files['max_mind_geo.txt']))
GeoIpBlocks.import_from_max_mind(file_or_dump(geo_ip_134_files['GeoIPCity-134-Blocks.csv'], table_dump_files['geoipblocks.txt']))
GeoIpLocations.import_from_max_mind(file_or_dump(geo_ip_134_files['GeoIPCity-134-Location.csv'], table_dump_files['geoiplocations.txt']))
JamIsp.import_from_max_mind(file_or_dump(geo_ip_124_files['GeoIPISP.csv'], table_dump_files['geoipisp.txt']))
Country.import_from_iso3166(file_or_dump(iso3166, table_dump_files['countries.txt']))
Region.import_from_region_codes(file_or_dump(region_codes, table_dump_files['regions.txt']))
# updating all scores to an old data to jump-start scoring
@@log.debug("setting all scores 'score_dt' to one day older than initial time")
Score.connection.execute("UPDATE scores SET score_dt = score_dt - interval '1 day'")
# update all user, band, and connection info that is dependent on maxmind
User.after_maxmind_import
Connection.after_maxmind_import
Band.after_maxmind_import
# migrate any scores that need migrating, before we move all the new maxmind content over the old content
ScoreHistory.migrate_scores
@@log.debug("rename temporary tables over existing tables")
# replace existing tables with new tables
GeoIpBlocks.after_maxmind_import
GeoIpLocations.after_maxmind_import
JamIsp.after_maxmind_import
Country.after_maxmind_import
Region.after_maxmind_import
self.imported = true
self.imported_at = Time.now
self.save!
end
end
def download_assets(force_from_source)
working_dir = dated_working_dir
#@@log.debug("downloading and unzipping geoip-142")
#geo_ip_142_files = download_and_unzip(working_dir, :geo_ip_142_url, self[:geo_ip_142_md5])
#@@log.debug("downloading and unzipping geoip-139")
#geo_ip_139_files = download_and_unzip(working_dir, :geo_ip_139_url, self[:geo_ip_139_md5])
@@log.debug("downloading and unzipping geoip-134")
geo_ip_134_files = download_and_unzip(working_dir, :geo_ip_134_url, self[:geo_ip_134_md5])
@@log.debug("downloading and unzipping geoip-124")
geo_ip_124_files = download_and_unzip(working_dir, :geo_ip_124_url, self[:geo_ip_124_md5])
@@log.debug("downloading region_codes")
region_codes = download(working_dir, :region_codes_url, self[:region_codes_md5])
@@log.debug("downloading iso3166")
iso3166 = download(working_dir, :iso3166_url, self[:iso3166_md5])
table_dump_files = {}
if self[:table_dumps_url] && !force_from_source
@@log.debug("downloading table dumps")
table_dump_files = download_and_unzip(working_dir, :table_dumps_url, self[:table_dumps_md5])
end
return geo_ip_124_files, geo_ip_134_files, iso3166, region_codes, table_dump_files
end
def download_and_unzip(working_dir, field, md5)
downloaded_filename = download(working_dir, field, md5)
unzip(working_dir, downloaded_filename)
end
def download(working_dir, field, md5)
filename = File.basename(self[field])
downloaded_filename = File.join(working_dir, filename)
@@log.debug("working on field=#{field}, filename #{downloaded_filename}")
if File.exists?(downloaded_filename)
if matching_md5(downloaded_filename, md5)
@@log.debug("#{downloaded_filename} file has matching md5")
return downloaded_filename
else
@@log.debug("#{downloaded_filename} exists but has wrong md5. deleting.")
File.delete(downloaded_filename)
end
end
url = sign_url(field)
uri = URI(url)
open downloaded_filename, 'wb' do |io|
Net::HTTP.start(uri.host, uri.port, use_ssl: url.start_with?('https') ? true : false) do |http|
request = Net::HTTP::Get.new uri
http.request request do |response|
response_code = response.code.to_i
unless response_code >= 200 && response_code <= 299
raise "bad status code: #{response_code}. body: #{response.body}"
end
response.read_body do |chunk|
io.write chunk
end
end
end
end
@@log.debug("downloaded #{downloaded_filename}")
downloaded_filename
end
def matching_md5(downloaded_filename, md5)
computed_md5 = Digest::MD5.new
File.open(downloaded_filename, 'rb').each {|line| computed_md5.update(line)}
computed_md5.to_s == md5
end
def unzip(working_dir, downloaded_filename)
result = {}
# overwrites existing files
Zip.on_exists_proc = true
# get the file without extension, to make the output folder name
extension = File.extname(downloaded_filename)
name = File.basename(downloaded_filename, extension)
output_dir = File.join(working_dir, name)
Dir.mkdir(output_dir) unless Dir.exists?(output_dir)
Zip::File.open(downloaded_filename) do |zip_file|
# Handle entries one by one
zip_file.each do |entry|
# Extract to file/directory/symlink
entry_output_dir = File.join(Dir.pwd, output_dir, File.dirname(entry.name))
FileUtils.mkdir_p(entry_output_dir)
output_filename = File.join(output_dir, entry.name)
File.delete(output_filename) if File.exists?(output_filename)
entry.extract(output_filename)
result[File.basename(entry.name)] = output_filename
end
end
result
end
def dated_working_dir
# you need a valid working directory from config
working_dir = APP_CONFIG.max_mind_working_dir
unless Dir.exists?(working_dir)
raise "maxmind release working_dir does not exist=#{working_dir}"
end
# append date, antd download everything to there
working_dir = File.join(working_dir, released_at.to_s)
unless Dir.exists?(working_dir)
Dir.mkdir(working_dir)
end
working_dir
end
def sign_url(expiration_time = 120, field)
resolve_url(field, 'application/zip', expiration_time)
end
def resolve_url(url_field, mime_type, expiration_time)
self[url_field].start_with?('http') ? self[url_field] : s3_manager.sign_url(self[url_field], {:expires => expiration_time, :response_content_type => mime_type, :secure => false})
end
end
end