#!/bin/sh set -euf # TODO: maybe make it so we're in an "updating" or "not" state? # would have to keep tabs with an existing description file and go from there # for now we don't gotta worry about it too much BN="${0##*/}" NL=' ' OSTYPE=linux-gnu . "$HOME/.local/python-venv/bin/activate" # HELPER FUNCTIONS # errecho() { >&2 echo "$*" } fail() { errecho "error: $BN: $*" exit 1 } # transform the escapes html entities that bandcamp has in that one thing # into unescaped versions, so we can put it through jq unescapehtml() { sed 's/"/"/g ; s/'/'\''/g ; s/<//g ; s/&/\&/g' } # for release escaping escapehtml() { sed 's/&/\&/g ; s//\>/g ; s/"/\"/g ; s/'\''/\'/g' } # inserts into a section at index after a url # $1: section # $2: index (0-indexed) # $3: string insert_into_section() { index="$2" case "$index" in end) index_str='/^$' ;; *) index_str="+$index" ;; esac >/dev/null ed DESCRIPTION.html <<-EOF /
$section $index_str i $3 . wq EOF } # gets all from a section get_section() { awk -v h="$1" -v RS="" -v FS='\n' '$1 ~ h' DESCRIPTION.html } # checks if release link is contained or not # returns num:line if it is is_contained() { get_section "Contained Releases" | grep -nF "href=\"$1\"" } is_uncontained() { get_section "Uncontained Releases" | grep -nF "href=\"$1\"" } # move a link from uncontained to contained if it exists in uncontained # uses $last_contained # $1 = url move_from_uncontained() { if conline=$(is_contained "$last_contained"); then conline="$(grep -nF "${conline#*:}" DESCRIPTION.html)" else conline="/
Contained Releases" fi >/dev/null ed DESCRIPTION.html <<-EOF ${unline%%:*}m${conline%%:*} wq EOF } clean() { trap 'exit' INT HUP QUIT TERM EXIT [ -f "${DISCOG_PAGE:-}" ] && rm -f "$DISCOG_PAGE" [ -f "${RELEASE_PAGE:-}" ] && rm -f "$RELEASE_PAGE" [ -f "${JSON_HEAD:-}" ] && rm -f "$JSON_HEAD" [ -f "${JSON_TRALBUM:-}" ] && rm -f "$JSON_TRALBUM" exit } trap 'clean' INT HUP QUIT TERM EXIT # OPTIONS # no_download="" only_download="" skip_contained="" force_desc="" while getopts :nds OPT; do case $OPT in n) no_download=1 ;; # don't download files, just make the description.html d) only_download=1 ;; # only download files, don't make description.html s) skip_contained=1 ;; # skip known albums, will insert uncontained -> contained ones after "last contained album" *) fail "unknown option: -$OPTARG" ;; esac done shift "$((OPTIND - 1))" # END OPTIONS # # SETUP # # creates a NEWLINE-deliminated list of all bandcamp domains in question BANDCAMP_DOMAINS=$(awk -v RS='/| ' '$1 ~ "bandcamp.com"' <<-EOF $* EOF ) DISCOG_PAGE=$(mktemp) RELEASE_PAGE=$(mktemp) JSON_HEAD=$(mktemp) JSON_TRALBUM=$(mktemp) ## END SETUP # MAIN LOOP # for BANDCAMP_DOMAIN in $BANDCAMP_DOMAINS; do cd ~/data/discographies # normalized path ig is_existing_discog="" if [ -d "$BANDCAMP_DOMAIN" ]; then is_existing_discog=1 fi mkdir -p "$BANDCAMP_DOMAIN"; cd "$BANDCAMP_DOMAIN" BASEURL="https://$BANDCAMP_DOMAIN" # SOCIALS # if [ -z "$only_download" ] && [ -z "$is_existing_discog" ] || ! [ -f DESCRIPTION.html ]; then >&2 cat <<-EOF input artist socials in KEY=VALUE format. blank line to continue. bandcamp is already filled common shorthands: web=website, sc=soundcloud, yt=youtube, tw=twitch, tr=twitter, mx=mixcloud, ig=instagram, sp=spotify, lt=Linktree, ch=Cohost EOF # creates the DESCRIPTION thingy printf 'Bandcamp
\n' "$BASEURL" > DESCRIPTION.html while IFS="=" read -r key val; do case "$key" in web) key=Website ;; sc) key=SoundCloud ;; yt) key=YouTube ;; tw) key=Twitch ;; tr) key=Twitter ;; mx) key=Mixcloud ;; ig) key=Instagram ;; sp) key=Spotify ;; lt) key=Linktree ;; ch) key=Cohost ;; "") break ;; *) : ;; esac printf '%s
\n' "$val" "$key" >> DESCRIPTION.html done errecho 'Moving on!' printf '\n
%s
\n\n
%s
' \ "Uncontained Releases" "Contained Releases" >> DESCRIPTION.html fi # END SOCIALS # # HTML/JSON PARSING # curl -L -s -o "$DISCOG_PAGE" "$BASEURL/music" if [ -z "$only_download" ]; then look_closer="" uncontained_releases="" contained_releases="" contained_releases_raw="" last_contained="" last_uncontained="" # albums and tracks while read -r url; do case "$url" in /*) url="$BASEURL$url" ;; *) url=${url%%\?*} ;; esac # if is contained, skip it but also add its thingy to the contained releases var if [ -n "$skip_contained" ] && is_contained "$url" >/dev/null; then last_contained="$url" errecho "ALREADY CONTAINED: $url" continue fi # get album data curl -L -s -o "$RELEASE_PAGE" "$url" pup 'head > script[type="application/ld+json"]' 'text{}' < "$RELEASE_PAGE" > "$JSON_HEAD" || continue pup -p '[data-tralbum] attr{data-tralbum}' < "$RELEASE_PAGE" > "$JSON_TRALBUM" || continue artist="$(jq -r '.byArtist.name' < "$JSON_HEAD")" name="$(jq -r '.name' < "$JSON_HEAD")" numtracks="$(jq -r '.inAlbum.numTracks' < "$JSON_HEAD")" [ "$numtracks" = "null" ] && numtracks="$(jq -r .numTracks < "$JSON_HEAD")" price="$(jq ' if (.inAlbum) then .inAlbum.albumRelease[0].offers.price else .albumRelease[0].offers.price end' < "$JSON_HEAD" )" # check if it even has audio first if [ "$(jq .hasAudio < "$JSON_TRALBUM")" != "true" ]; then errecho "NO AUDIO, LOOK CLOSER: $url" continue fi # now check price :D if [ "$price" = "0.0" ]; then if unline=$(is_uncontained "$url"); then unline="$(grep -nF "${unline#*:}" DESCRIPTION.html)" move_from_uncontained "$url" else if indexline=$(is_contained "$last_contained"); then indexline="$(grep -nF "${indexline#*:}" DESCRIPTION.html)" else indexline="/
Contained Releases" fi >/dev/null ed DESCRIPTION.html <<-EOF ${indexline%%:*} a $artist - $name
. wq EOF fi errecho "CONTAINED RELEASE: $artist - $name" last_contained="$url" else # just to catch if we are rebuilding something and don't skip is_contained "$url" >/dev/null && continue errecho "UNCONTAINED RELEASE: $artist - $name" is_uncontained "$url" >/dev/null && continue if indexline=$(is_uncontained "$last_uncontained"); then indexline="$(grep -nF "${indexline#*:}" DESCRIPTION.html)" else indexline="/
Uncontained Releases" fi >/dev/null ed DESCRIPTION.html <<-EOF ${indexline%%:*} a $artist - $name
. wq EOF last_uncontained="$url" fi done <<-EOF $( pup '#music-grid > li > a attr{href}' < "$DISCOG_PAGE" pup '#music-grid attr{data-client-items}' < "$DISCOG_PAGE" | unescapehtml | jq -r '.[].page_url' ) EOF fi # download shit if [ -z "$no_download" ]; then outdir="./files" if [ -n "$is_existing_discog" ]; then outdir="./files/new_stuff" fi mkdir -p "$outdir" printf '%s' 'CAT.ALL' > ./files/_rules.conf # artist img img="$(pup 'img.band-photo' 'attr{src}' < "$DISCOG_PAGE")" if [ -n "$img" ]; then ext="${img##*.}" curl -s -o ./files/artist."$ext" "${img%_*}_0.$ext" fi errecho "Done with retrieving metadata! Now to download..." bcdl-free --no-unzip -z 12345 -f FLAC -d "$outdir" -e auto -l "$BASEURL/music" set +f if [ "$(printf '%s' "$outdir"/*.flac)" != "$outdir/*.flac" ]; then mkdir "$outdir"/TRACKS mv "$outdir"/*.flac "$outdir"/TRACKS fi set -f fi done # END MAIN LOOP #