Update manifest maker
This commit is contained in:
parent
e3cff0a825
commit
1279b16ec0
|
|
@ -466,5 +466,158 @@ namespace :jam_tracks do
|
|||
end
|
||||
end
|
||||
|
||||
task gen_jamtrack_manifest: :environment do |task, arg|
|
||||
|
||||
|
||||
# DOWNLOAD_AUDIO =
|
||||
max = ENV['MAX_JAMTRACKS'].to_i
|
||||
if max == 0
|
||||
max = nil
|
||||
end
|
||||
puts "MAX JAMTRACKS #{max}"
|
||||
|
||||
jam_tracks = JamTrack.includes([:jam_track_tracks,
|
||||
{genres_jam_tracks: :genres},
|
||||
{jam_track_tracks: :instrument},
|
||||
:genres]).where(status: 'Production').order('original_artist, name')
|
||||
private_bucket = Rails.application.config.aws_bucket
|
||||
s3_manager = S3Manager.new(private_bucket, Rails.application.config.aws_access_key_id, Rails.application.config.aws_secret_access_key)
|
||||
|
||||
tmp_dir = Dir.mktmpdir
|
||||
FileUtils.mkdir_p tmp_dir
|
||||
|
||||
puts "tmp_dir=#{tmp_dir}"
|
||||
|
||||
csv_file = File.join(tmp_dir, "manifest.csv")
|
||||
top_folder = File.join(tmp_dir, "audio")
|
||||
FileUtils.mkdir_p(top_folder)
|
||||
|
||||
CSV.open(csv_file, "wb") do |csv|
|
||||
header = ['JamTrackId', 'TrackId', 'Artist', 'Song', 'Instrument', 'Part', 'Type', 'Genre', 'LocalOgg', 'LocalMeta', 's3_path_url_44', 's3_path_url_48', 'AudioExists']
|
||||
csv << header
|
||||
|
||||
jam_tracks.each do |jam_track|
|
||||
song = jam_track.name
|
||||
jam_track.jam_track_tracks.each do |jam_track_track|
|
||||
instrument = jam_track_track.instrument_id
|
||||
part = jam_track_track.part ? jam_track_track.part : ''
|
||||
|
||||
|
||||
# construct the meta file for this track:
|
||||
meta = {}
|
||||
meta[:jam_track_id] = jam_track.id
|
||||
meta[:track_id] = jam_track_track.id
|
||||
meta[:artist] = jam_track.original_artist
|
||||
meta[:song] = jam_track.name
|
||||
meta[:instrument] = instrument
|
||||
meta[:part] = part
|
||||
meta[:type] = jam_track_track.track_type
|
||||
genre = jam_track.genres.first
|
||||
meta[:genre] = genre ? genre.description : ''
|
||||
meta[:s3_path_url_44] = "s3://#{private_bucket}/" + jam_track_track.url_44
|
||||
meta[:s3_path_url_48] = "s3://#{private_bucket}/" + jam_track_track.url_48
|
||||
|
||||
meta_json = File.join(tmp_dir, "meta_#{jam_track.id}_#{jam_track_track.id}.json")
|
||||
File.open(meta_json, "w") do |f|
|
||||
f.write(JSON.pretty_generate(meta))
|
||||
end
|
||||
|
||||
# find the first a-z, 0-9 character and use that
|
||||
first_character = song[0].downcase
|
||||
folder = File.join(top_folder, first_character)
|
||||
if File.exist?(folder) == false
|
||||
FileUtils.mkdir_p(folder)
|
||||
end
|
||||
# folder structure is:
|
||||
# audio/a/song-instrument-part-type.ogg
|
||||
# audio/a/song-instrument-part-type.ogg.meta
|
||||
# run 'part' through a sanitizer to make it filesystem safe
|
||||
part = part.gsub(/[^0-9A-Za-z]/, '_')
|
||||
base_name = File.join(folder, "#{jam_track.id}_#{jam_track_track.id}_#{instrument}_#{part}_#{jam_track_track.track_type}".downcase)
|
||||
ogg_file = "#{base_name}.ogg"
|
||||
|
||||
exists = s3_manager.exists?(jam_track_track.url_48)
|
||||
|
||||
|
||||
row = []
|
||||
row << jam_track.id
|
||||
row << jam_track_track.id
|
||||
row << jam_track.original_artist
|
||||
row << jam_track.name
|
||||
row << instrument
|
||||
row << part
|
||||
row << jam_track_track.track_type
|
||||
genre = jam_track.genres.first
|
||||
row << (genre ? genre.description : '')
|
||||
row << ogg_file.gsub(tmp_dir, '')
|
||||
row << ogg_file.gsub(tmp_dir, '') + ".meta"
|
||||
row << "s3://#{private_bucket}/" + jam_track_track.url_44
|
||||
row << "s3://#{private_bucket}/" + jam_track_track.url_48
|
||||
row << exists
|
||||
|
||||
csv << row
|
||||
|
||||
|
||||
FileUtils.cp(meta_json, "#{ogg_file}.meta")
|
||||
# cleanup meta
|
||||
FileUtils.rm(meta_json)
|
||||
end
|
||||
|
||||
if max > 0
|
||||
max -= 1
|
||||
if max == 0
|
||||
puts "Max of jamtracks reached"
|
||||
break
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
# dump the 1st 10 lines to stdout for quick verification
|
||||
File.open(csv_file, "r") do|f|
|
||||
10.times do |i|
|
||||
puts f.readline
|
||||
end
|
||||
end
|
||||
|
||||
s3_manager.upload('jam_track_manifests/manifest.csv', csv_file, content_type: 'text/csv')
|
||||
|
||||
folder_to_zip = top_folder
|
||||
output_tar_path = Dir.mktmpdir
|
||||
|
||||
output_tar_file = "#{output_tar_path}/archive.tar.gz"
|
||||
|
||||
safe_output = Shellwords.escape(output_tar_file)
|
||||
|
||||
# -c = create
|
||||
# -z = compress with gzip
|
||||
# -f = to a file
|
||||
# -P = (Optional, but useful) Preserve absolute paths.
|
||||
# By default, 'tar' strips the leading '/' for security.
|
||||
# A common pattern is to cd into the parent dir first.
|
||||
|
||||
# Safer way: cd to the directory to get relative paths
|
||||
parent_dir = File.dirname(folder_to_zip)
|
||||
folder_name = File.basename(folder_to_zip)
|
||||
safe_parent = Shellwords.escape(parent_dir)
|
||||
safe_folder_name = Shellwords.escape(folder_name)
|
||||
|
||||
# This command is safer as it creates the archive with relative paths
|
||||
command = "tar -czf #{safe_output} -C #{safe_parent} #{safe_folder_name}"
|
||||
success = system(command)
|
||||
|
||||
if success
|
||||
puts "Successfully created tar.gz file."
|
||||
else
|
||||
puts "Failed to create tar.gz file."
|
||||
end
|
||||
|
||||
|
||||
s3_manager.upload('jam_track_manifests/all-tracks.tar.gz', safe_output, content_type: 'application/gzip')
|
||||
|
||||
puts "tar.gz output=#{safe_output}"
|
||||
|
||||
puts "tmp_dir=#{tmp_dir}"
|
||||
end
|
||||
end
|
||||
|
|
|
|||
Loading…
Reference in New Issue