diff --git a/lambda/jamtrack-importer/shared/lib/jam_ruby/models/jam_track.rb b/lambda/jamtrack-importer/shared/lib/jam_ruby/models/jam_track.rb index 2ab576c6b..4478890a0 100644 --- a/lambda/jamtrack-importer/shared/lib/jam_ruby/models/jam_track.rb +++ b/lambda/jamtrack-importer/shared/lib/jam_ruby/models/jam_track.rb @@ -587,12 +587,39 @@ module JamRuby end # http://stackoverflow.com/questions/4308377/ruby-post-title-to-slug - def sluggarize(field) - field.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') + #def sluggarize(field) + # field.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '') + #end + + + def sluggarize(text) + # Convert to ASCII-friendly format + text = Unicode.normalize_KD(text).encode("ASCII", replace: "").downcase + + text.gsub!(/\s+/, "-") # Replace whitespace with '-' + text.gsub!(/[:]+/, "-") # Replace : with '-' + text.gsub!(/[&\+]+/, "and") # Replace & and + with 'and' + text.gsub!(/['\",!?\(\)\=\#\+\*\.]/, "") # Remove quotes, commas, ?, and ! + text.gsub!(/[\-]+/, "-") # Replace consecutive dashes with a single '-' + + # Ensure the slug doesn't start or end with a '-' + if text.start_with?("-") || text.end_with?("-") + text = text.gsub(/^-+|-+$/, "") + end + + # Warn if double dashes exist + if text.include?("--") + puts "Warning: Consecutive dashes found in slug '#{text}'" + end + + text end def generate_slug self.slug = sluggarize(original_artist) + '-' + sluggarize(name) + self.original_artist_slug = sluggarize(original_artist); + self.name_slug = sluggarize(name) + if licensor && licensor.slug.present? #raise "no slug on licensor #{licensor.id}" if licensor.slug.nil? diff --git a/ruby/db/migrate/20250120000000_jam_track_import_tency.rb b/ruby/db/migrate/20250120000000_jam_track_import_tency.rb index df7755bb5..296d54ec9 100644 --- a/ruby/db/migrate/20250120000000_jam_track_import_tency.rb +++ b/ruby/db/migrate/20250120000000_jam_track_import_tency.rb @@ -1,32 +1,38 @@ class AddNumResultsSeen < ActiveRecord::Migration def self.up - execute("ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR UNIQUE;") + execute("ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR;") + execute("CREATE INDEX jam_tracks_original_artist_slug_index ON public.jam_tracks USING btree (original_artist_slug);"); execute("ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR;") + execute("CREATE INDEX jam_tracks_name_slug_index ON public.jam_tracks USING btree (name_slug);"); execute("ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE;") execute("ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR;") execute("ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR;") - ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR UNIQUE; - ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR; - ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE; - ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR; - ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR; + #ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR; + #CREATE INDEX jam_tracks_original_artist_slug_index ON public.jam_tracks USING btree (original_artist_slug); + #ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR; + #CREATE INDEX jam_tracks_name_slug_index ON public.jam_tracks USING btree (name_slug); + #ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE; + #ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR; + #ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR; # just for prod/staging only - GRANT UPDATE on jam_tracks to lambda; - GRANT INSERT on jam_tracks to lambda; - GRANT UPDATE on jam_track_tracks to lambda; - GRANT INSERT on jam_track_tracks to lambda; - GRANT UPDATE on jam_track_files to lambda; - GRANT INSERT on jam_track_files to lambda; - GRANT UPDATE on genres_jam_tracks to lambda; - GRANT INSERT on genres_jam_tracks to lambda; + #GRANT UPDATE on jam_tracks to lambda; + #GRANT INSERT on jam_tracks to lambda; + #GRANT UPDATE on jam_track_tracks to lambda; + #GRANT INSERT on jam_track_tracks to lambda; + #GRANT UPDATE on jam_track_files to lambda; + #GRANT INSERT on jam_track_files to lambda; + #GRANT UPDATE on genres_jam_tracks to lambda; + #GRANT INSERT on genres_jam_tracks to lambda; end def self.down execute("ALTER TABLE public.jam_tracks DROP COLUMN original_artist_slug;") + execute("DROP INDEX jam_tracks_original_artist_slug_index;") execute("ALTER TABLE public.jam_tracks DROP COLUMN name_slug;") + execute("DROP INDEX jam_tracks_name_slug_index;") execute("ALTER TABLE public.jam_tracks DROP COLUMN origin_s3_path;") execute("ALTER TABLE public.jam_tracks DROP COLUMN origin_s3_bucket;") execute("ALTER TABLE public.jam_tracks DROP COLUMN s3_audio_dir;") diff --git a/web/script/py/readme.md b/web/script/py/readme.md new file mode 100644 index 000000000..e5ddc0fec --- /dev/null +++ b/web/script/py/readme.md @@ -0,0 +1,13 @@ +Run: + +# create a virtual env +python3 -m venv venv + +# activate the virtual env +source venv/bin/activate + +# install dependencies into it +pip install -r requirements.txt + +# sluggarize your local DB (idempotent; safe to run repeatedly) +python3 sluggarize_jamtracks.py diff --git a/web/script/py/requirements.txt b/web/script/py/requirements.txt new file mode 100644 index 000000000..658130bb2 --- /dev/null +++ b/web/script/py/requirements.txt @@ -0,0 +1 @@ +psycopg2 diff --git a/web/script/py/sluggarize_jamtracks.py b/web/script/py/sluggarize_jamtracks.py new file mode 100644 index 000000000..3210df803 --- /dev/null +++ b/web/script/py/sluggarize_jamtracks.py @@ -0,0 +1,94 @@ +import psycopg2 +import re +import argparse +import os +from urllib.parse import quote + +# Database connection settings from environment variables +# Function to construct DB_CONFIG dynamically based on the environment +def get_db_config(env): + env = env.upper() + if env not in ["DEV", "STAGING", "PROD"]: + raise ValueError("Invalid environment. Choose from dev, staging, or prod.") + + return { + "dbname": os.getenv(f"JAM_DB_{env}_DBNAME", "jam"), + "user": os.getenv(f"JAM_DB_{env}_USER", "jam"), + "password": os.getenv(f"JAM_DB_{env}_PASSWORD", ""), + "host": os.getenv(f"JAM_DB_{env}_HOST", "127.0.0.1"), + "port": os.getenv(f"JAM_DB_{env}_PORT", "5432") + } + + +import unicodedata +def slugify(text): + """Convert a string into a URL-friendly slug.""" + text = text.lower() + text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8') + text = re.sub(r"[\s]+", "-", text) # Replace whitespace with '-' + text = re.sub(r"[\:]+", "-", text) # Replace : with '-' + text = re.sub(r"[&\+]+", "and", text) # Replace & with and + text = re.sub(r"['\",!?\(\)\=\#\+\*\.]", "", text) # Remove quotes, commas, ?, and ! + text = re.sub(r"[\-]+", "-", text) # Replace consecutive whitespace with '-' + + #text = re.sub(r"[^a-z0-9-]", "", text) # Remove all non-alphanumeric except '-' + if text.startswith("-") or text.endswith("-"): + text = text.strip("-") + return text + +def update_slugs(env, dry_run=False): + """Fetch records, generate slugs, and update the table.""" + try: + db_config_for_env = get_db_config(env) + conn = psycopg2.connect(**db_config_for_env) + cursor = conn.cursor() + + # Fetch all records + cursor.execute("SELECT id, original_artist, name FROM jam_tracks;") + rows = cursor.fetchall() + + for row in rows: + track_id, original_artist, name = row + #print(f"{track_id} : {original_artist} : {name}") + artist_slug = slugify(original_artist) if original_artist else "" + name_slug = slugify(name) if name else "" + + # Validate percent encoding + encoded_artist_slug = quote(artist_slug) + encoded_name_slug = quote(name_slug) + + if encoded_artist_slug != artist_slug: + print(f"Warning: Slug mismatch for original_artist '{original_artist}' -> '{artist_slug}'") + if encoded_name_slug != name_slug: + print(f"Warning: Slug mismatch for name '{name}' -> '{name_slug}'") + + #print(f"{artist_slug}\n{name_slug}") + if not dry_run: + # Update database with new slugs + cursor.execute( + "UPDATE jam_tracks SET original_artist_slug = %s, name_slug = %s WHERE id = %s;", + (artist_slug, name_slug, track_id) + ) + + if not dry_run: + conn.commit() + print(f"Updated {cursor.rowcount} rows.") + + except Exception as e: + print("Error:", e) + + finally: + if cursor: + cursor.close() + if conn: + conn.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--dry-run", action="store_true", help="Run in dry-run mode without updating the database") + parser.add_argument("--env", choices=["dev", "staging", "prod"], default="dev", help="Specify the environment (dev, staging, prod)") + + args = parser.parse_args() + + update_slugs(args.env, dry_run=args.dry_run) +