From 7191358b8d058ec549aed4e1aec8b29696dccad9 Mon Sep 17 00:00:00 2001 From: Seth Call Date: Sat, 8 Feb 2025 23:02:32 -0600 Subject: [PATCH] sluggarize script --- web/script/py/requirements.txt | 1 + web/script/py/sluggarize_jamtracks.py | 94 +++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) create mode 100644 web/script/py/requirements.txt create mode 100644 web/script/py/sluggarize_jamtracks.py diff --git a/web/script/py/requirements.txt b/web/script/py/requirements.txt new file mode 100644 index 000000000..658130bb2 --- /dev/null +++ b/web/script/py/requirements.txt @@ -0,0 +1 @@ +psycopg2 diff --git a/web/script/py/sluggarize_jamtracks.py b/web/script/py/sluggarize_jamtracks.py new file mode 100644 index 000000000..3210df803 --- /dev/null +++ b/web/script/py/sluggarize_jamtracks.py @@ -0,0 +1,94 @@ +import psycopg2 +import re +import argparse +import os +from urllib.parse import quote + +# Database connection settings from environment variables +# Function to construct DB_CONFIG dynamically based on the environment +def get_db_config(env): + env = env.upper() + if env not in ["DEV", "STAGING", "PROD"]: + raise ValueError("Invalid environment. Choose from dev, staging, or prod.") + + return { + "dbname": os.getenv(f"JAM_DB_{env}_DBNAME", "jam"), + "user": os.getenv(f"JAM_DB_{env}_USER", "jam"), + "password": os.getenv(f"JAM_DB_{env}_PASSWORD", ""), + "host": os.getenv(f"JAM_DB_{env}_HOST", "127.0.0.1"), + "port": os.getenv(f"JAM_DB_{env}_PORT", "5432") + } + + +import unicodedata +def slugify(text): + """Convert a string into a URL-friendly slug.""" + text = text.lower() + text = unicodedata.normalize('NFKD', text).encode('ascii', 'ignore').decode('utf-8') + text = re.sub(r"[\s]+", "-", text) # Replace whitespace with '-' + text = re.sub(r"[\:]+", "-", text) # Replace : with '-' + text = re.sub(r"[&\+]+", "and", text) # Replace & with and + text = re.sub(r"['\",!?\(\)\=\#\+\*\.]", "", text) # Remove quotes, commas, ?, and ! + text = re.sub(r"[\-]+", "-", text) # Replace consecutive whitespace with '-' + + #text = re.sub(r"[^a-z0-9-]", "", text) # Remove all non-alphanumeric except '-' + if text.startswith("-") or text.endswith("-"): + text = text.strip("-") + return text + +def update_slugs(env, dry_run=False): + """Fetch records, generate slugs, and update the table.""" + try: + db_config_for_env = get_db_config(env) + conn = psycopg2.connect(**db_config_for_env) + cursor = conn.cursor() + + # Fetch all records + cursor.execute("SELECT id, original_artist, name FROM jam_tracks;") + rows = cursor.fetchall() + + for row in rows: + track_id, original_artist, name = row + #print(f"{track_id} : {original_artist} : {name}") + artist_slug = slugify(original_artist) if original_artist else "" + name_slug = slugify(name) if name else "" + + # Validate percent encoding + encoded_artist_slug = quote(artist_slug) + encoded_name_slug = quote(name_slug) + + if encoded_artist_slug != artist_slug: + print(f"Warning: Slug mismatch for original_artist '{original_artist}' -> '{artist_slug}'") + if encoded_name_slug != name_slug: + print(f"Warning: Slug mismatch for name '{name}' -> '{name_slug}'") + + #print(f"{artist_slug}\n{name_slug}") + if not dry_run: + # Update database with new slugs + cursor.execute( + "UPDATE jam_tracks SET original_artist_slug = %s, name_slug = %s WHERE id = %s;", + (artist_slug, name_slug, track_id) + ) + + if not dry_run: + conn.commit() + print(f"Updated {cursor.rowcount} rows.") + + except Exception as e: + print("Error:", e) + + finally: + if cursor: + cursor.close() + if conn: + conn.close() + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument("--dry-run", action="store_true", help="Run in dry-run mode without updating the database") + parser.add_argument("--env", choices=["dev", "staging", "prod"], default="dev", help="Specify the environment (dev, staging, prod)") + + args = parser.parse_args() + + update_slugs(args.env, dry_run=args.dry_run) +