module JamRuby # this is probably a one-off class used to map Tency-named stems into JamKazam-named stems class TencyStemMapping @@log = Logging.logger[TencyStemMapping] def s3_manager @s3_manager ||= S3Manager.new('jamkazam-tency', APP_CONFIG.aws_access_key_id, APP_CONFIG.aws_secret_access_key) end def initialize @originals_folder = "/Volumes/sethcall/Dropbox/seth@jamkazam.com/JamTracks - Tency Music - Original Folder for Normalization Map" @mapping_folder = "/Volumes/sethcall/Dropbox/seth@jamkazam.com/JamTracks - Tency Music" @original_songs = {} @mapping_songs = {} @mappings = {} end def create_map tency_originals tency_maps dump end def create_mapping_map tency_maps dump_map end def hydrate @original_songs = YAML.load_file('original_songs.yml') @mapping_songs = YAML.load_file('mapping_songs.yml') end def parse_sanitized_filename(filename) instrument = nil part = nil basename = File.basename(filename) stem = basename.index('Stem') if stem stripped = basename[(stem + 'Stem'.length)..-5] # takes of 'stem' and '.wav' stripped.strip! dash = stripped.index('-') if dash == 0 stripped = stripped[1..-1].strip! # now we should have something like "Vocal - Lead" (instrument - part) instrument, part = stripped.split('-') instrument.strip! if instrument part.strip! if part else "no or misplaced dash for #{filename}" end else raise "no stem for #{filename}" end [instrument, part] end # For all the tracks that I have labeled manually as # Instrument = Upright Bass and Part = Upright Bass, # can you please change both the Instrument and Part to Double Bass instead? # def check_mappings missing_instrument = 0 missing_part = 0 part_names = [] hydrate @mapping_songs.each do |cache_id, data| mapped_filename = data[:filename] @@log.debug("parsing #{mapped_filename}") instrument, part = parse_sanitized_filename(mapped_filename) @@log.debug("parsed #{instrument} (#{part})") missing_instrument = missing_instrument + 1 unless instrument missing_part = missing_part + 1 unless part part_names << mapped_filename unless part end @@log.info("SUMMARY") @@log.info("-------") @@log.info("missing instruments:#{missing_instrument} missing parts: #{missing_part}") @@log.info("files with no parts: #{part_names}") # files with no parts: # ["Huey Lewis And The News - Heart And Soul - 31957/Heart And Soul Stem - Synth 2.wav", # "ZZ Top - Tush - 20852/Tush Stem - Clicktrack.wav", # "Crosby Stills And Nash - Teach Your Children - 15440/Teach Your Children Stem - Bass Guitar.wav", # /Brad Paisley - She's Everything - 19886/She's Everything Stem - Clicktrack.wav", # "Toby Keith - Beer For My Horses - 7221/Beer For My Horses Stem - Lap Steel.wav", # Toby Keith - Beer For My Horses - 7221/Beer For My Horses Stem - Acoustic Guitar.wav" end def track_mapping(basename, instr_part) instrument = instr_part[:instrument] part = instr_part[:part] basename.downcase! info = @mappings[basename] unless info info = {matches:[]} @mappings[basename] = info end info[:matches] << instr_part end def correlate mapped = 0 unmapped = 0 unmapped_details = [] no_instrument = [] common_unknown_instruments = {} hydrate @mapping_songs.each do |cache_id, data| # go through each track hand-mapped, and find it's matching song if any. mapped_filename = data[:filename] found_original = @original_songs[cache_id] if found_original # mapping made original_filename = found_original[:filename] original_basename = File.basename(original_filename).downcase mapped = mapped + 1 instrument, part = parse_sanitized_filename(mapped_filename) instr_part = JamTrackImporter.determine_instrument(instrument, part) instr_part[:instrument] if instr_part[:instrument] # track the mapping of this one track_mapping(original_basename, instr_part) else @@log.error("unable to determine instrument for #{File.basename(mapped_filename)}") no_instrument << ({filename: File.basename(mapped_filename), instrument: instrument, part: part}) common_unknown_instruments["#{instrument}-(#{part})"] = 1 end else unmapped = unmapped + 1 unmapped_details << {filename: mapped_filename} end end puts("SUMMARY") puts("-------") puts("MAPPED:#{mapped} UNMAPPED:#{unmapped}") unmapped_details.each do |unmapped_detail| puts "UNMAPPED FILE: #{File.basename(unmapped_detail[:filename])}" end puts("UNKNOWN INSTRUMENT: #{no_instrument.length}") no_instrument.each do |item| puts("UNKNOWN INSTRUMENT: #{item[:filename]}") end common_unknown_instruments.each do |key, value| puts("#{key}") end @mappings.each do |basename, mapping| matches = mapping[:matches] counts = matches.each_with_object(Hash.new(0)) { |word,counts| counts[word] += 1 } ordered_matches = counts.sort_by {|k, v| -v} output = "" ordered_matches.each do |match| detail = match[0] count = match[1] output << "#{detail[:instrument]}(#{detail[:part]})/#{count}, " end puts "map detail: #{basename}: #{output}" mapping[:ordered] = ordered_matches mapping[:detail] = output end CSV.open("mapping.csv", "wb") do |csv| @mappings.each do |basename, mapping| item = mapping[:ordered] trust_worthy = item.length == 1 unless trust_worthy # if the 1st item is at least 4 'counts' more than the next item, we can consider it trust_worthy if item[0][1] - 4 > item[1][1] trust_worthy = true end end csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1], trust_worthy ] end end CSV.open("determinate-single-matches.csv", "wb") do |csv| @mappings.each do |basename, mapping| if mapping[:ordered].length == 1 && mapping[:ordered][0][1] == 1 item = mapping[:ordered] csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1] ] end end end CSV.open("determinate-multi-matches.csv", "wb") do |csv| @mappings.each do |basename, mapping| if mapping[:ordered].length == 1 && mapping[:ordered][0][1] > 1 item = mapping[:ordered] csv << [ basename, item[0][0][:instrument], item[0][0][:part], item[0][1] ] end end end CSV.open("ambiguous-matches.csv", "wb") do |csv| @mappings.each do |basename, mapping| if mapping[:ordered].length > 1 csv << [ basename, mapping[:detail] ] end end end end def dump File.open('original_songs.yml', 'w') {|f| f.write(YAML.dump(@original_songs)) } File.open('mapping_songs.yml', 'w') {|f| f.write(YAML.dump(@mapping_songs)) } end def dump_map File.open('mapping_songs.yml', 'w') {|f| f.write(YAML.dump(@mapping_songs)) } end def md5(filepath) Digest::MD5.file(filepath).hexdigest end def tency_original_check songs = Pathname.new(@originals_folder).children.select { |c| c.directory? } songs.each do |song| dirs = Pathname.new(song).children.select {|c| c.directory? } @@log.debug "SONG #{song}" dirs.each do |dir| @@log.debug "#{dir.basename.to_s}" end @@log.debug "" end end def tency_originals songs = Pathname.new(@originals_folder).children.select { |c| c.directory? } songs.each do |filename| id = parse_id(filename.basename.to_s ) files = Pathname.new(filename).children.select {|c| c.file? } # also look into any 1st level folders we might find dirs = Pathname.new(filename).children.select {|c| c.directory? } dirs.each do |dir| more_tracks = Pathname.new(dir).children.select {|c| c.file? } files = files + more_tracks end files.each do |file| @@log.debug("processing original track #{file.to_s}") md5 = md5(file.to_s) song = {md5:md5, filename:file.to_s, id:id} @original_songs[cache_id(id, md5)] = song end end end def tency_maps songs = Pathname.new(@mapping_folder).children.select { |c| c.directory? } songs.each do |song_filename| id = parse_id_mapped(song_filename.basename.to_s ) @@log.debug "processing song #{song_filename.to_s}" tracks = Pathname.new(song_filename).children.select {|c| c.file? } tracks.each do |track| if track.to_s.include? "Stem" @@log.debug("processing mapped track #{track.to_s}") md5 = md5(track.to_s) song = {md5:md5, filename:track.to_s} @mapping_songs[cache_id(id, md5)] = song end end end end def cache_id(id, md5) "#{id}-#{md5}" end def parse_id(filename) #amy-winehouse_you-know-i-m-no-good-feat-ghostface-killah_11767 index = filename.rindex('_') if index id = filename[(index + 1)..-1] if id.end_with?('/') id = id[0...-1] end id = id.to_i if id == 0 raise "no valid ID in filename: #{filename}" end else raise "no _ in filename: #{filename}" end id end def parse_id_mapped(filename) #Flyleaf - I'm So Sick - 15771 index = filename.rindex('-') if index id = filename[(index + 1)..-1] if id.end_with?('/') id = id[0...-1] end id.strip! id = id.to_i if id == 0 raise "no valid ID in filename: #{filename}" end else raise "no - in filename: #{filename}" end id end def tency_originals2 s3_manager.list_directories('mapper').each do |song_folder| @@log.debug("searching through tency directory. song folder:'#{song_folder}'") id = parse_id(song_folder) @@log.debug("ID #{id}") top_folder = s3_manager.list_directories(song_folder) end end end end