diff --git a/discogarchive b/discogarchive
index 1da6c58..d49e34a 100755
--- a/discogarchive
+++ b/discogarchive
@@ -20,136 +20,255 @@ fail() {
exit 1
}
+# transform the escapes html entities that bandcamp has in that one thing
+# into unescaped versions, so we can put it through jq
unescapehtml() {
- sed '
- s/"/"/g
- s/'/'\''/g
- s/<//g
- s/&/\&/g
- '
+ sed 's/"/"/g ; s/'/'\''/g ; s/<//g ; s/&/\&/g'
+}
+# for release escaping
+escapehtml() {
+ sed 's/&/\&/g ; s/\</g ; s/>/\>/g ; s/"/\"/g ; s/'\''/\'/g'
+}
+
+# inserts into a section at index after a url
+# $1: section
+# $2: index (0-indexed)
+# $3: string
+insert_into_section() {
+ index="$2"
+ case "$index" in
+ end) index_str='/^$' ;;
+ *) index_str="+$index" ;;
+ esac
+ >/dev/null ed DESCRIPTION.html <<-EOF
+ /
$section
+ $index_str
+ i
+ $3
+ .
+ wq
+ EOF
+}
+
+# gets all from a section
+get_section() {
+ awk -v h="$1" -v RS="" -v FS='\n' '$1 ~ h' DESCRIPTION.html
+}
+
+# checks if release link is contained or not
+# returns num:line if it is
+is_contained() {
+ get_section "Contained Releases" | grep -nF "href=\"$1\""
+}
+
+is_uncontained() {
+ get_section "Uncontained Releases" | grep -nF "href=\"$1\""
+}
+
+# move a link from uncontained to contained if it exists in uncontained
+# uses $last_contained
+# $1 = url
+move_from_uncontained() {
+ if conline=$(is_contained "$last_contained"); then
+ conline="$(grep -nF "${conline#*:}" DESCRIPTION.html)"
+ else
+ conline="/
Contained Releases"
+ fi
+ >/dev/null ed DESCRIPTION.html <<-EOF
+ ${unline%%:*}m${conline%%:*}
+ wq
+ EOF
}
clean() {
trap 'exit' INT HUP QUIT TERM EXIT
- [ -f "$HTML_FILE" ] && rm -f "$HTML_FILE"
- [ -f "$json" ] && rm -f "$json"
+ [ -f "${DISCOG_PAGE:-}" ] && rm -f "$DISCOG_PAGE"
+ [ -f "${RELEASE_PAGE:-}" ] && rm -f "$RELEASE_PAGE"
+ [ -f "${JSON_HEAD:-}" ] && rm -f "$JSON_HEAD"
+ [ -f "${JSON_TRALBUM:-}" ] && rm -f "$JSON_TRALBUM"
exit
}
trap 'clean' INT HUP QUIT TERM EXIT
# OPTIONS #
-no_download=""
-while getopts :n OPT; do
+no_download="" only_download="" skip_contained="" force_desc=""
+while getopts :nds OPT; do
case $OPT in
n) no_download=1 ;; # don't download files, just make the description.html
+ d) only_download=1 ;; # only download files, don't make description.html
+ s) skip_contained=1 ;; # skip known albums, will insert uncontained -> contained ones after "last contained album"
*) fail "unknown option: -$OPTARG" ;;
esac
done
shift "$((OPTIND - 1))"
+# END OPTIONS #
-case $1 in
- *.bandcamp.com*) : ;;
- *) fail 'Please use the *.bandcamp.com link instead of any custom domains!' ;;
-esac
-IFS="/" read -r _ _ BANDCAMP_DOMAIN _ <<-EOF
- $1
+# SETUP #
+# creates a NEWLINE-deliminated list of all bandcamp domains in question
+BANDCAMP_DOMAINS=$(awk -v RS='/| ' '$1 ~ "bandcamp.com"' <<-EOF
+ $*
EOF
-mkdir -p "$BANDCAMP_DOMAIN"; cd "$BANDCAMP_DOMAIN"
-logdir="${XDG_DATA_HOME:-$HOME/.local/share}/discogarchive"
-mkdir -p "$logdir"
+)
+DISCOG_PAGE=$(mktemp)
+RELEASE_PAGE=$(mktemp)
+JSON_HEAD=$(mktemp)
+JSON_TRALBUM=$(mktemp)
+## END SETUP
-HTML_FILE="/tmp/$BANDCAMP_DOMAIN.html"
-BASEURL="https://$BANDCAMP_DOMAIN"
+# MAIN LOOP #
+for BANDCAMP_DOMAIN in $BANDCAMP_DOMAINS; do
+ cd ~/data/discographies # normalized path ig
+ is_existing_discog=""
+ if [ -d "$BANDCAMP_DOMAIN" ]; then
+ is_existing_discog=1
+ fi
+ mkdir -p "$BANDCAMP_DOMAIN"; cd "$BANDCAMP_DOMAIN"
-# SOCIALS #
-errecho 'input artist socials in KEY=VALUE format. blank line to continue. bandcamp is already filled
-common shorthands:
-web=website, sc=soundcloud, yt=youtube, tw=twitch, tr=twitter, mx=mixcloud,
-ig=instagram, sp=spotify, lt=Linktree, ch=Cohost'
-socials="Bandcamp
$NL"
-while IFS="=" read -r key val; do
- case "$key" in
- web) key=Website ;;
- sc) key=SoundCloud ;;
- yt) key=YouTube ;;
- tw) key=Twitch ;;
- tr) key=Twitter ;;
- mx) key=Mixcloud ;;
- ig) key=Instagram ;;
- sp) key=Spotify ;;
- lt) key=Linktree ;;
- ch) key=Cohost ;;
- "") break ;;
- *) : ;;
- esac
- socials="$socials$key
$NL"
-done
-socials="$(printf '%s' "$socials" | sort)$NL"
-errecho 'Moving on!'
+ BASEURL="https://$BANDCAMP_DOMAIN"
-# HTML/JSON PARSING #
-curl -L -s -o "$HTML_FILE" "$1"
+ # SOCIALS #
+ if [ -z "$only_download" ] && [ -z "$is_existing_discog" ] || ! [ -f DESCRIPTION.html ]; then
+ >&2 cat <<-EOF
+ input artist socials in KEY=VALUE format. blank line to continue. bandcamp is already filled
+ common shorthands:
+ web=website, sc=soundcloud, yt=youtube, tw=twitch, tr=twitter, mx=mixcloud,
+ ig=instagram, sp=spotify, lt=Linktree, ch=Cohost
+ EOF
+ # creates the DESCRIPTION thingy
+ printf 'Bandcamp
\n' "$BASEURL" > DESCRIPTION.html
+ while IFS="=" read -r key val; do
+ case "$key" in
+ web) key=Website ;;
+ sc) key=SoundCloud ;;
+ yt) key=YouTube ;;
+ tw) key=Twitch ;;
+ tr) key=Twitter ;;
+ mx) key=Mixcloud ;;
+ ig) key=Instagram ;;
+ sp) key=Spotify ;;
+ lt) key=Linktree ;;
+ ch) key=Cohost ;;
+ "") break ;;
+ *) : ;;
+ esac
+ printf '%s
\n' "$val" "$key" >> DESCRIPTION.html
+ done
+ errecho 'Moving on!'
+ printf '\n
%s
\n\n
%s
' \
+ "Uncontained Releases" "Contained Releases" >> DESCRIPTION.html
+ fi
+ # END SOCIALS #
-tmplog="" look_closer="" uncontained_releases="" contained_releases="" contained_releases_raw=""
-json="$(mktemp -u)"
-# albums and tracks
-while read -r url; do
- url="$BASEURL$url"
-# if rg -q -F "$url" "$logdir/log" 2>/dev/null; then
-# errecho "ALREADY CONTAINED $type: $url"
-# tmplog="$tmplog$url CONTAINED$NL"
-# continue
-# fi
- curl -L -s -o - "$url" | pup 'script[type="application/ld+json"]' 'text{}' > "$json" || continue
- artist="$(jq -r '.byArtist.name' < "$json")"
- name="$(jq -r '.name' < "$json")"
- if [ "$(jq '.numTracks' < "$json")" = "0.0" ]; then
- look_closer="$artist - $name
$NL$look_closer"
- errecho "LOOK CLOSER: $url"
- elif [ "$(jq 'if (.inAlbum) then .inAlbum.albumRelease[0].offers.price else .albumRelease[0].offers.price end' < "$json")" = "0.0" ]; then
- contained_releases="$artist - $name
${NL}${contained_releases}"
- contained_releases_raw="$url$NL$contained_releases_raw"
- errecho "CONTAINED RELEASE: $artist - $name"
- if ! rg -q -F "$url" "$logdir/log" 2>/dev/null; then
- echo "$url" >> "$logdir/log"
+ # HTML/JSON PARSING #
+ curl -L -s -o "$DISCOG_PAGE" "$BASEURL/music"
+
+ if [ -z "$only_download" ]; then
+ look_closer="" uncontained_releases="" contained_releases="" contained_releases_raw=""
+ last_contained="" last_uncontained=""
+ # albums and tracks
+ while read -r url; do
+ case "$url" in
+ /*) url="$BASEURL$url" ;;
+ *) url=${url%%\?*} ;;
+ esac
+
+ # if is contained, skip it but also add its thingy to the contained releases var
+ if [ -n "$skip_contained" ] && is_contained "$url" >/dev/null; then
+ last_contained="$url"
+ errecho "ALREADY CONTAINED: $url"
+ continue
+ fi
+
+ # get album data
+ curl -L -s -o "$RELEASE_PAGE" "$url"
+ pup 'head > script[type="application/ld+json"]' 'text{}' < "$RELEASE_PAGE" > "$JSON_HEAD" || continue
+ pup -p '[data-tralbum] attr{data-tralbum}' 'text{}' < "$RELEASE_PAGE" > "$JSON_TRALBUM" || continue
+
+ artist="$(jq -r '.byArtist.name' < "$JSON_HEAD")"
+ name="$(jq -r '.name' < "$JSON_HEAD")"
+ numtracks="$(jq -r '.inAlbum.numTracks' < "$JSON_HEAD")"
+ [ "$numtracks" = "null" ] && numtracks="$(jq -r .numTracks < "$JSON_HEAD")"
+ price="$(jq '
+ if (.inAlbum) then .inAlbum.albumRelease[0].offers.price
+ else .albumRelease[0].offers.price end' < "$JSON_HEAD"
+ )"
+
+ # check if it even has audio first
+ if [ "$(jq .hasAudio < "$JSON_TRALBUM")" != "true" ]; then
+ errecho "NO AUDIO, LOOK CLOSER: $url"
+ fi
+
+ # now check price :D
+ if [ "$price" = "0.0" ]; then
+ if unline=$(is_uncontained "$url"); then
+ unline="$(grep -nF "${unline#*:}" DESCRIPTION.html)"
+ move_from_uncontained "$url"
+ else
+ if indexline=$(is_contained "$last_contained"); then
+ indexline="$(grep -nF "${indexline#*:}" DESCRIPTION.html)"
+ else
+ indexline="/
Contained Releases"
+ fi
+ >/dev/null ed DESCRIPTION.html <<-EOF
+ ${indexline%%:*}
+ a
+ $artist - $name
+ .
+ wq
+ EOF
+ fi
+ errecho "CONTAINED RELEASE: $artist - $name"
+ last_contained="$url"
+ else
+ # just to catch if we are rebuilding something and don't skip
+ is_contained "$url" >/dev/null && continue
+ errecho "UNCONTAINED RELEASE: $artist - $name"
+ is_uncontained "$url" >/dev/null && continue
+ if indexline=$(is_contained "$last_uncontained"); then
+ indexline="$(grep -nF "${indexline#*:}" DESCRIPTION.html)"
+ else
+ indexline="/
Uncontained Releases"
+ fi
+ >/dev/null ed DESCRIPTION.html <<-EOF
+ ${indexline%%:*}
+ a
+ $artist - $name
+ .
+ wq
+ EOF
+ last_uncontained="$url"
+ fi
+ done <<-EOF
+ $(
+ pup '#music-grid > li > a attr{href}' < "$DISCOG_PAGE"
+ pup '#music-grid attr{data-client-items}' < "$DISCOG_PAGE" | unescapehtml | jq -r '.[].page_url'
+ )
+ EOF
+ fi
+
+ # download shit
+ if [ -z "$no_download" ]; then
+ outdir="./files"
+ if [ -n "$is_existing_discog" ]; then
+ outdir="./files/new_stuff"
fi
- else
- uncontained_releases="$artist - $name
${NL}${uncontained_releases}"
- errecho "UNCONTAINED RELEASE: $artist - $name"
+ mkdir -p "$outdir"
+ printf '%s' 'CAT.ALL' > ./files/_rules.conf
+ # artist img
+ img="$(pup 'img.band-photo' 'attr{src}' < "$DISCOG_PAGE")"
+ if [ -n "$img" ]; then
+ ext="${img##*.}"
+ curl -s -o ./files/artist."$ext" "${img%_*}_0.$ext"
+ fi
+ errecho "Done with retrieving metadata! Now to download..."
+ bcdl-free --no-unzip -z 12345 -f FLAC -d "$outdir" -e auto -l "$BASEURL/music"
+
+ set +f
+ if [ "$(printf '%s' "$outdir"/*.flac)" != "$outdir/*.flac" ]; then
+ mkdir "$outdir"/TRACKS
+ mv "$outdir"/*.flac "$outdir"/TRACKS
+ fi
+ set -f
fi
-done <<-EOF
- $(
- pup '#music-grid > li > a attr{href}' < "$HTML_FILE"
- pup '#music-grid attr{data-client-items}' < "$HTML_FILE" | unescapehtml | jq -r '.[].page_url'
- )
-EOF
-
-# formatting the description
-if [ -n "${uncontained_releases}" ]; then
- uncontained_releases="$NL
Uncontained Releases
$NL$uncontained_releases"
-fi
-contained_releases="$NL
Contained Releases
$NL$contained_releases"
-
-printf '%s%s%s%s' \
- "${look_closer:+$look_closer$NL}" "$socials" "$uncontained_releases" "$contained_releases" > DESCRIPTION.html
-
-# download shit
-if [ ! "$no_download" ]; then
- mkdir -p "files"
- printf '%s' 'CAT.ALL' > ./files/_rules.conf
- # artist img
- img="$(pup 'img.band-photo' 'attr{src}' < "$HTML_FILE")"
- if [ -n "$img" ]; then
- ext="${img##*.}"
- curl -s -o ./files/artist."$ext" "${img%_*}_0.$ext"
- fi
- errecho "Done with retrieving metadata! Now to download..."
- bcdl-free --no-unzip -z 12345 -f FLAC -d "./files" -e auto -l "$BASEURL"
-
- set +f
- if [ ./files/*.flac != "./files/*.flac" ]; then
- mkdir ./files/TRACKS
- mv ./files/*.flac ./files/TRACKS
- fi
-fi
+done
+# END MAIN LOOP #