Merged in mc/sluggarize-fixes (pull request #52)
Update sluggarization and sluggarize existing jamtracks * wip * sluggarize script * comment out copy/pastable sql * add readme
This commit is contained in:
parent
f26733fa46
commit
810ae15769
|
|
@ -587,12 +587,39 @@ module JamRuby
|
||||||
end
|
end
|
||||||
|
|
||||||
# http://stackoverflow.com/questions/4308377/ruby-post-title-to-slug
|
# http://stackoverflow.com/questions/4308377/ruby-post-title-to-slug
|
||||||
def sluggarize(field)
|
#def sluggarize(field)
|
||||||
field.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
|
# field.downcase.strip.gsub(' ', '-').gsub(/[^\w-]/, '')
|
||||||
|
#end
|
||||||
|
|
||||||
|
|
||||||
|
def sluggarize(text)
|
||||||
|
# Convert to ASCII-friendly format
|
||||||
|
text = Unicode.normalize_KD(text).encode("ASCII", replace: "").downcase
|
||||||
|
|
||||||
|
text.gsub!(/\s+/, "-") # Replace whitespace with '-'
|
||||||
|
text.gsub!(/[:]+/, "-") # Replace : with '-'
|
||||||
|
text.gsub!(/[&\+]+/, "and") # Replace & and + with 'and'
|
||||||
|
text.gsub!(/['\",!?\(\)\=\#\+\*\.]/, "") # Remove quotes, commas, ?, and !
|
||||||
|
text.gsub!(/[\-]+/, "-") # Replace consecutive dashes with a single '-'
|
||||||
|
|
||||||
|
# Ensure the slug doesn't start or end with a '-'
|
||||||
|
if text.start_with?("-") || text.end_with?("-")
|
||||||
|
text = text.gsub(/^-+|-+$/, "")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Warn if double dashes exist
|
||||||
|
if text.include?("--")
|
||||||
|
puts "Warning: Consecutive dashes found in slug '#{text}'"
|
||||||
|
end
|
||||||
|
|
||||||
|
text
|
||||||
end
|
end
|
||||||
|
|
||||||
def generate_slug
|
def generate_slug
|
||||||
self.slug = sluggarize(original_artist) + '-' + sluggarize(name)
|
self.slug = sluggarize(original_artist) + '-' + sluggarize(name)
|
||||||
|
self.original_artist_slug = sluggarize(original_artist);
|
||||||
|
self.name_slug = sluggarize(name)
|
||||||
|
|
||||||
|
|
||||||
if licensor && licensor.slug.present?
|
if licensor && licensor.slug.present?
|
||||||
#raise "no slug on licensor #{licensor.id}" if licensor.slug.nil?
|
#raise "no slug on licensor #{licensor.id}" if licensor.slug.nil?
|
||||||
|
|
|
||||||
|
|
@ -1,32 +1,38 @@
|
||||||
class AddNumResultsSeen < ActiveRecord::Migration
|
class AddNumResultsSeen < ActiveRecord::Migration
|
||||||
def self.up
|
def self.up
|
||||||
execute("ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR UNIQUE;")
|
execute("ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR;")
|
||||||
|
execute("CREATE INDEX jam_tracks_original_artist_slug_index ON public.jam_tracks USING btree (original_artist_slug);");
|
||||||
execute("ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR;")
|
execute("ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR;")
|
||||||
|
execute("CREATE INDEX jam_tracks_name_slug_index ON public.jam_tracks USING btree (name_slug);");
|
||||||
execute("ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE;")
|
execute("ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE;")
|
||||||
execute("ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR;")
|
execute("ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR;")
|
||||||
execute("ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR;")
|
execute("ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR;")
|
||||||
|
|
||||||
ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR UNIQUE;
|
#ALTER TABLE public.jam_tracks ADD COLUMN original_artist_slug VARCHAR;
|
||||||
ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR;
|
#CREATE INDEX jam_tracks_original_artist_slug_index ON public.jam_tracks USING btree (original_artist_slug);
|
||||||
ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE;
|
#ALTER TABLE public.jam_tracks ADD COLUMN name_slug VARCHAR;
|
||||||
ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR;
|
#CREATE INDEX jam_tracks_name_slug_index ON public.jam_tracks USING btree (name_slug);
|
||||||
ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR;
|
#ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_path VARCHAR UNIQUE;
|
||||||
|
#ALTER TABLE public.jam_tracks ADD COLUMN origin_s3_bucket VARCHAR;
|
||||||
|
#ALTER TABLE public.jam_tracks ADD COLUMN s3_audio_dir VARCHAR;
|
||||||
|
|
||||||
# just for prod/staging only
|
# just for prod/staging only
|
||||||
GRANT UPDATE on jam_tracks to lambda;
|
#GRANT UPDATE on jam_tracks to lambda;
|
||||||
GRANT INSERT on jam_tracks to lambda;
|
#GRANT INSERT on jam_tracks to lambda;
|
||||||
GRANT UPDATE on jam_track_tracks to lambda;
|
#GRANT UPDATE on jam_track_tracks to lambda;
|
||||||
GRANT INSERT on jam_track_tracks to lambda;
|
#GRANT INSERT on jam_track_tracks to lambda;
|
||||||
GRANT UPDATE on jam_track_files to lambda;
|
#GRANT UPDATE on jam_track_files to lambda;
|
||||||
GRANT INSERT on jam_track_files to lambda;
|
#GRANT INSERT on jam_track_files to lambda;
|
||||||
GRANT UPDATE on genres_jam_tracks to lambda;
|
#GRANT UPDATE on genres_jam_tracks to lambda;
|
||||||
GRANT INSERT on genres_jam_tracks to lambda;
|
#GRANT INSERT on genres_jam_tracks to lambda;
|
||||||
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.down
|
def self.down
|
||||||
execute("ALTER TABLE public.jam_tracks DROP COLUMN original_artist_slug;")
|
execute("ALTER TABLE public.jam_tracks DROP COLUMN original_artist_slug;")
|
||||||
|
execute("DROP INDEX jam_tracks_original_artist_slug_index;")
|
||||||
execute("ALTER TABLE public.jam_tracks DROP COLUMN name_slug;")
|
execute("ALTER TABLE public.jam_tracks DROP COLUMN name_slug;")
|
||||||
|
execute("DROP INDEX jam_tracks_name_slug_index;")
|
||||||
execute("ALTER TABLE public.jam_tracks DROP COLUMN origin_s3_path;")
|
execute("ALTER TABLE public.jam_tracks DROP COLUMN origin_s3_path;")
|
||||||
execute("ALTER TABLE public.jam_tracks DROP COLUMN origin_s3_bucket;")
|
execute("ALTER TABLE public.jam_tracks DROP COLUMN origin_s3_bucket;")
|
||||||
execute("ALTER TABLE public.jam_tracks DROP COLUMN s3_audio_dir;")
|
execute("ALTER TABLE public.jam_tracks DROP COLUMN s3_audio_dir;")
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,13 @@
|
||||||
|
Run:
|
||||||
|
|
||||||
|
# create a virtual env
|
||||||
|
python3 -m venv venv
|
||||||
|
|
||||||
|
# activate the virtual env
|
||||||
|
source venv/bin/activate
|
||||||
|
|
||||||
|
# install dependencies into it
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
# sluggarize your local DB (idempotent; safe to run repeatedly)
|
||||||
|
python3 sluggarize_jamtracks.py
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
psycopg2
|
||||||
|
|
@ -0,0 +1,94 @@
|
||||||
|
import psycopg2
|
||||||
|
import re
|
||||||
|
import argparse
|
||||||
|
import os
|
||||||
|
from urllib.parse import quote
|
||||||
|
|
||||||
|
# Database connection settings from environment variables
|
||||||
|
# Function to construct DB_CONFIG dynamically based on the environment
|
||||||
|
def get_db_config(env):
|
||||||
|
env = env.upper()
|
||||||
|
if env not in ["DEV", "STAGING", "PROD"]:
|
||||||
|
raise ValueError("Invalid environment. Choose from dev, staging, or prod.")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"dbname": os.getenv(f"JAM_DB_{env}_DBNAME", "jam"),
|
||||||
|
"user": os.getenv(f"JAM_DB_{env}_USER", "jam"),
|
||||||
|
"password": os.getenv(f"JAM_DB_{env}_PASSWORD", ""),
|
||||||
|
"host": os.getenv(f"JAM_DB_{env}_HOST", "127.0.0.1"),
|
||||||
|
"port": os.getenv(f"JAM_DB_{env}_PORT", "5432")
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
def slugify(text):
|
||||||
|
"""Convert a string into a URL-friendly slug."""
|
||||||
|
text = text.lower()
|
||||||
|
text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8')
|
||||||
|
text = re.sub(r"[\s]+", "-", text) # Replace whitespace with '-'
|
||||||
|
text = re.sub(r"[\:]+", "-", text) # Replace : with '-'
|
||||||
|
text = re.sub(r"[&\+]+", "and", text) # Replace & with and
|
||||||
|
text = re.sub(r"['\",!?\(\)\=\#\+\*\.]", "", text) # Remove quotes, commas, ?, and !
|
||||||
|
text = re.sub(r"[\-]+", "-", text) # Replace consecutive whitespace with '-'
|
||||||
|
|
||||||
|
#text = re.sub(r"[^a-z0-9-]", "", text) # Remove all non-alphanumeric except '-'
|
||||||
|
if text.startswith("-") or text.endswith("-"):
|
||||||
|
text = text.strip("-")
|
||||||
|
return text
|
||||||
|
|
||||||
|
def update_slugs(env, dry_run=False):
|
||||||
|
"""Fetch records, generate slugs, and update the table."""
|
||||||
|
try:
|
||||||
|
db_config_for_env = get_db_config(env)
|
||||||
|
conn = psycopg2.connect(**db_config_for_env)
|
||||||
|
cursor = conn.cursor()
|
||||||
|
|
||||||
|
# Fetch all records
|
||||||
|
cursor.execute("SELECT id, original_artist, name FROM jam_tracks;")
|
||||||
|
rows = cursor.fetchall()
|
||||||
|
|
||||||
|
for row in rows:
|
||||||
|
track_id, original_artist, name = row
|
||||||
|
#print(f"{track_id} : {original_artist} : {name}")
|
||||||
|
artist_slug = slugify(original_artist) if original_artist else ""
|
||||||
|
name_slug = slugify(name) if name else ""
|
||||||
|
|
||||||
|
# Validate percent encoding
|
||||||
|
encoded_artist_slug = quote(artist_slug)
|
||||||
|
encoded_name_slug = quote(name_slug)
|
||||||
|
|
||||||
|
if encoded_artist_slug != artist_slug:
|
||||||
|
print(f"Warning: Slug mismatch for original_artist '{original_artist}' -> '{artist_slug}'")
|
||||||
|
if encoded_name_slug != name_slug:
|
||||||
|
print(f"Warning: Slug mismatch for name '{name}' -> '{name_slug}'")
|
||||||
|
|
||||||
|
#print(f"{artist_slug}\n{name_slug}")
|
||||||
|
if not dry_run:
|
||||||
|
# Update database with new slugs
|
||||||
|
cursor.execute(
|
||||||
|
"UPDATE jam_tracks SET original_artist_slug = %s, name_slug = %s WHERE id = %s;",
|
||||||
|
(artist_slug, name_slug, track_id)
|
||||||
|
)
|
||||||
|
|
||||||
|
if not dry_run:
|
||||||
|
conn.commit()
|
||||||
|
print(f"Updated {cursor.rowcount} rows.")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print("Error:", e)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
if cursor:
|
||||||
|
cursor.close()
|
||||||
|
if conn:
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
parser = argparse.ArgumentParser()
|
||||||
|
parser.add_argument("--dry-run", action="store_true", help="Run in dry-run mode without updating the database")
|
||||||
|
parser.add_argument("--env", choices=["dev", "staging", "prod"], default="dev", help="Specify the environment (dev, staging, prod)")
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
update_slugs(args.env, dry_run=args.dry_run)
|
||||||
|
|
||||||
Loading…
Reference in New Issue