jam-cloud/ruby/lib/jam_ruby/import/tency_stem_mapping.rb

360 lines
11 KiB
Ruby

module JamRuby
# this is probably a one-off class used to map Tency-named stems into JamKazam-named stems
class TencyStemMapping
@@log = Logging.logger[TencyStemMapping]
def s3_manager
@s3_manager ||= S3Manager.new('jamkazam-tency', APP_CONFIG.aws_access_key_id, APP_CONFIG.aws_secret_access_key)
end
def initialize
@originals_folder = "/Volumes/sethcall/Dropbox/seth@jamkazam.com/JamTracks - Tency Music - Original Folder for Normalization Map"
@mapping_folder = "/Volumes/sethcall/Dropbox/seth@jamkazam.com/JamTracks - Tency Music"
@original_songs = {}
@mapping_songs = {}
@mappings = {}
end
def create_map
tency_originals
tency_maps
dump
end
def create_mapping_map
tency_maps
dump_map
end
def hydrate
@original_songs = YAML.load_file('original_songs.yml')
@mapping_songs = YAML.load_file('mapping_songs.yml')
end
def parse_sanitized_filename(filename)
instrument = nil
part = nil
basename = File.basename(filename)
stem = basename.index('Stem')
if stem
stripped = basename[(stem + 'Stem'.length)..-5] # takes of 'stem' and '.wav'
stripped.strip!
dash = stripped.index('-')
if dash == 0
stripped = stripped[1..-1].strip!
# now we should have something like "Vocal - Lead" (instrument - part)
instrument, part = stripped.split('-')
instrument.strip! if instrument
part.strip! if part
else
"no or misplaced dash for #{filename}"
end
else
raise "no stem for #{filename}"
end
[instrument, part]
end
# For all the tracks that I have labeled manually as
# Instrument = Upright Bass and Part = Upright Bass,
# can you please change both the Instrument and Part to Double Bass instead?
#
def check_mappings
missing_instrument = 0
missing_part = 0
part_names = []
hydrate
@mapping_songs.each do |cache_id, data|
mapped_filename = data[:filename]
@@log.debug("parsing #{mapped_filename}")
instrument, part = parse_sanitized_filename(mapped_filename)
@@log.debug("parsed #{instrument} (#{part})")
missing_instrument = missing_instrument + 1 unless instrument
missing_part = missing_part + 1 unless part
part_names << mapped_filename unless part
end
@@log.info("SUMMARY")
@@log.info("-------")
@@log.info("missing instruments:#{missing_instrument} missing parts: #{missing_part}")
@@log.info("files with no parts: #{part_names}")
# files with no parts:
# ["Huey Lewis And The News - Heart And Soul - 31957/Heart And Soul Stem - Synth 2.wav",
# "ZZ Top - Tush - 20852/Tush Stem - Clicktrack.wav",
# "Crosby Stills And Nash - Teach Your Children - 15440/Teach Your Children Stem - Bass Guitar.wav",
# /Brad Paisley - She's Everything - 19886/She's Everything Stem - Clicktrack.wav",
# "Toby Keith - Beer For My Horses - 7221/Beer For My Horses Stem - Lap Steel.wav",
# Toby Keith - Beer For My Horses - 7221/Beer For My Horses Stem - Acoustic Guitar.wav"
end
def track_mapping(basename, instr_part)
instrument = instr_part[:instrument]
part = instr_part[:part]
basename.downcase!
info = @mappings[basename]
unless info
info = {matches:[]}
@mappings[basename] = info
end
info[:matches] << instr_part
end
def correlate
mapped = 0
unmapped = 0
unmapped_details = []
no_instrument = []
common_unknown_instruments = {}
hydrate
@mapping_songs.each do |cache_id, data|
# go through each track hand-mapped, and find it's matching song if any.
mapped_filename = data[:filename]
found_original = @original_songs[cache_id]
if found_original
# mapping made
original_filename = found_original[:filename]
original_basename = File.basename(original_filename).downcase
mapped = mapped + 1
instrument, part = parse_sanitized_filename(mapped_filename)
instr_part = JamTrackImporter.determine_instrument(instrument, part)
instr_part[:instrument]
if instr_part[:instrument]
# track the mapping of this one
track_mapping(original_basename, instr_part)
else
@@log.error("unable to determine instrument for #{File.basename(mapped_filename)}")
no_instrument << ({filename: File.basename(mapped_filename), instrument: instrument, part: part})
common_unknown_instruments["#{instrument}-(#{part})"] = 1
end
else
unmapped = unmapped + 1
unmapped_details << {filename: mapped_filename}
end
end
puts("SUMMARY")
puts("-------")
puts("MAPPED:#{mapped} UNMAPPED:#{unmapped}")
unmapped_details.each do |unmapped_detail|
puts "UNMAPPED FILE: #{File.basename(unmapped_detail[:filename])}"
end
puts("UNKNOWN INSTRUMENT: #{no_instrument.length}")
no_instrument.each do |item|
puts("UNKNOWN INSTRUMENT: #{item[:filename]}")
end
common_unknown_instruments.each do |key, value|
puts("#{key}")
end
@mappings.each do |basename, mapping|
matches = mapping[:matches]
counts = matches.each_with_object(Hash.new(0)) { |word,counts| counts[word] += 1 }
ordered_matches = counts.sort_by {|k, v| -v}
output = ""
ordered_matches.each do |match|
detail = match[0]
count = match[1]
output << "#{detail[:instrument]}(#{detail[:part]})/#{count}, "
end
puts "map detail: #{basename}: #{output}"
mapping[:ordered] = ordered_matches
mapping[:detail] = output
end
CSV.open("mapping.csv", "wb") do |csv|
@mappings.each do |basename, mapping|
item = mapping[:ordered]
trust_worthy = item.length == 1
unless trust_worthy
# if the 1st item is at least 4 'counts' more than the next item, we can consider it trust_worthy
if item[0][1] - 4 > item[1][1]
trust_worthy = true
end
end
csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1], trust_worthy ]
end
end
CSV.open("determinate-single-matches.csv", "wb") do |csv|
@mappings.each do |basename, mapping|
if mapping[:ordered].length == 1 && mapping[:ordered][0][1] == 1
item = mapping[:ordered]
csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1] ]
end
end
end
CSV.open("determinate-multi-matches.csv", "wb") do |csv|
@mappings.each do |basename, mapping|
if mapping[:ordered].length == 1 && mapping[:ordered][0][1] > 1
item = mapping[:ordered]
csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1] ]
end
end
end
CSV.open("ambiguous-matches.csv", "wb") do |csv|
@mappings.each do |basename, mapping|
if mapping[:ordered].length > 1
csv << [ basename, mapping[:detail] ]
end
end
end
end
def dump
File.open('original_songs.yml', 'w') {|f| f.write(YAML.dump(@original_songs)) }
File.open('mapping_songs.yml', 'w') {|f| f.write(YAML.dump(@mapping_songs)) }
end
def dump_map
File.open('mapping_songs.yml', 'w') {|f| f.write(YAML.dump(@mapping_songs)) }
end
def md5(filepath)
Digest::MD5.file(filepath).hexdigest
end
def tency_original_check
songs = Pathname.new(@originals_folder).children.select { |c| c.directory? }
songs.each do |song|
dirs = Pathname.new(song).children.select {|c| c.directory? }
@@log.debug "SONG #{song}"
dirs.each do |dir|
@@log.debug "#{dir.basename.to_s}"
end
@@log.debug ""
end
end
def tency_originals
songs = Pathname.new(@originals_folder).children.select { |c| c.directory? }
songs.each do |filename|
id = parse_id(filename.basename.to_s )
files = Pathname.new(filename).children.select {|c| c.file? }
# also look into any 1st level folders we might find
dirs = Pathname.new(filename).children.select {|c| c.directory? }
dirs.each do |dir|
more_tracks = Pathname.new(dir).children.select {|c| c.file? }
files = files + more_tracks
end
files.each do |file|
@@log.debug("processing original track #{file.to_s}")
md5 = md5(file.to_s)
song = {md5:md5, filename:file.to_s, id:id}
@original_songs[cache_id(id, md5)] = song
end
end
end
def tency_maps
songs = Pathname.new(@mapping_folder).children.select { |c| c.directory? }
songs.each do |song_filename|
id = parse_id_mapped(song_filename.basename.to_s )
@@log.debug "processing song #{song_filename.to_s}"
tracks = Pathname.new(song_filename).children.select {|c| c.file? }
tracks.each do |track|
if track.to_s.include? "Stem"
@@log.debug("processing mapped track #{track.to_s}")
md5 = md5(track.to_s)
song = {md5:md5, filename:track.to_s}
@mapping_songs[cache_id(id, md5)] = song
end
end
end
end
def cache_id(id, md5)
"#{id}-#{md5}"
end
def parse_id(filename)
#amy-winehouse_you-know-i-m-no-good-feat-ghostface-killah_11767
index = filename.rindex('_')
if index
id = filename[(index + 1)..-1]
if id.end_with?('/')
id = id[0...-1]
end
id = id.to_i
if id == 0
raise "no valid ID in filename: #{filename}"
end
else
raise "no _ in filename: #{filename}"
end
id
end
def parse_id_mapped(filename)
#Flyleaf - I'm So Sick - 15771
index = filename.rindex('-')
if index
id = filename[(index + 1)..-1]
if id.end_with?('/')
id = id[0...-1]
end
id.strip!
id = id.to_i
if id == 0
raise "no valid ID in filename: #{filename}"
end
else
raise "no - in filename: #{filename}"
end
id
end
def tency_originals2
s3_manager.list_directories('mapper').each do |song_folder|
@@log.debug("searching through tency directory. song folder:'#{song_folder}'")
id = parse_id(song_folder)
@@log.debug("ID #{id}")
top_folder = s3_manager.list_directories(song_folder)
end
end
end
end