Update manifest maker

This commit is contained in:
Seth Call 2025-10-26 14:33:00 -05:00
parent e3cff0a825
commit 1279b16ec0
2 changed files with 153 additions and 0 deletions

View File

View File

@ -466,5 +466,158 @@ namespace :jam_tracks do
end end
end end
task gen_jamtrack_manifest: :environment do |task, arg|
# DOWNLOAD_AUDIO =
max = ENV['MAX_JAMTRACKS'].to_i
if max == 0
max = nil
end
puts "MAX JAMTRACKS #{max}"
jam_tracks = JamTrack.includes([:jam_track_tracks,
{genres_jam_tracks: :genres},
{jam_track_tracks: :instrument},
:genres]).where(status: 'Production').order('original_artist, name')
private_bucket = Rails.application.config.aws_bucket
s3_manager = S3Manager.new(private_bucket, Rails.application.config.aws_access_key_id, Rails.application.config.aws_secret_access_key)
tmp_dir = Dir.mktmpdir
FileUtils.mkdir_p tmp_dir
puts "tmp_dir=#{tmp_dir}"
csv_file = File.join(tmp_dir, "manifest.csv")
top_folder = File.join(tmp_dir, "audio")
FileUtils.mkdir_p(top_folder)
CSV.open(csv_file, "wb") do |csv|
header = ['JamTrackId', 'TrackId', 'Artist', 'Song', 'Instrument', 'Part', 'Type', 'Genre', 'LocalOgg', 'LocalMeta', 's3_path_url_44', 's3_path_url_48', 'AudioExists']
csv << header
jam_tracks.each do |jam_track|
song = jam_track.name
jam_track.jam_track_tracks.each do |jam_track_track|
instrument = jam_track_track.instrument_id
part = jam_track_track.part ? jam_track_track.part : ''
# construct the meta file for this track:
meta = {}
meta[:jam_track_id] = jam_track.id
meta[:track_id] = jam_track_track.id
meta[:artist] = jam_track.original_artist
meta[:song] = jam_track.name
meta[:instrument] = instrument
meta[:part] = part
meta[:type] = jam_track_track.track_type
genre = jam_track.genres.first
meta[:genre] = genre ? genre.description : ''
meta[:s3_path_url_44] = "s3://#{private_bucket}/" + jam_track_track.url_44
meta[:s3_path_url_48] = "s3://#{private_bucket}/" + jam_track_track.url_48
meta_json = File.join(tmp_dir, "meta_#{jam_track.id}_#{jam_track_track.id}.json")
File.open(meta_json, "w") do |f|
f.write(JSON.pretty_generate(meta))
end
# find the first a-z, 0-9 character and use that
first_character = song[0].downcase
folder = File.join(top_folder, first_character)
if File.exist?(folder) == false
FileUtils.mkdir_p(folder)
end
# folder structure is:
# audio/a/song-instrument-part-type.ogg
# audio/a/song-instrument-part-type.ogg.meta
# run 'part' through a sanitizer to make it filesystem safe
part = part.gsub(/[^0-9A-Za-z]/, '_')
base_name = File.join(folder, "#{jam_track.id}_#{jam_track_track.id}_#{instrument}_#{part}_#{jam_track_track.track_type}".downcase)
ogg_file = "#{base_name}.ogg"
exists = s3_manager.exists?(jam_track_track.url_48)
row = []
row << jam_track.id
row << jam_track_track.id
row << jam_track.original_artist
row << jam_track.name
row << instrument
row << part
row << jam_track_track.track_type
genre = jam_track.genres.first
row << (genre ? genre.description : '')
row << ogg_file.gsub(tmp_dir, '')
row << ogg_file.gsub(tmp_dir, '') + ".meta"
row << "s3://#{private_bucket}/" + jam_track_track.url_44
row << "s3://#{private_bucket}/" + jam_track_track.url_48
row << exists
csv << row
FileUtils.cp(meta_json, "#{ogg_file}.meta")
# cleanup meta
FileUtils.rm(meta_json)
end
if max > 0
max -= 1
if max == 0
puts "Max of jamtracks reached"
break
end
end
end
end
# dump the 1st 10 lines to stdout for quick verification
File.open(csv_file, "r") do|f|
10.times do |i|
puts f.readline
end
end
s3_manager.upload('jam_track_manifests/manifest.csv', csv_file, content_type: 'text/csv')
folder_to_zip = top_folder
output_tar_path = Dir.mktmpdir
output_tar_file = "#{output_tar_path}/archive.tar.gz"
safe_output = Shellwords.escape(output_tar_file)
# -c = create
# -z = compress with gzip
# -f = to a file
# -P = (Optional, but useful) Preserve absolute paths.
# By default, 'tar' strips the leading '/' for security.
# A common pattern is to cd into the parent dir first.
# Safer way: cd to the directory to get relative paths
parent_dir = File.dirname(folder_to_zip)
folder_name = File.basename(folder_to_zip)
safe_parent = Shellwords.escape(parent_dir)
safe_folder_name = Shellwords.escape(folder_name)
# This command is safer as it creates the archive with relative paths
command = "tar -czf #{safe_output} -C #{safe_parent} #{safe_folder_name}"
success = system(command)
if success
puts "Successfully created tar.gz file."
else
puts "Failed to create tar.gz file."
end
s3_manager.upload('jam_track_manifests/all-tracks.tar.gz', safe_output, content_type: 'application/gzip')
puts "tar.gz output=#{safe_output}"
puts "tmp_dir=#{tmp_dir}"
end
end end