discogarchive: yeah I redid the whole thing basically
This commit is contained in:
parent
a71970f1c6
commit
9180519e45
337
discogarchive
337
discogarchive
|
@ -20,136 +20,255 @@ fail() {
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# transform the escapes html entities that bandcamp has in that one thing
|
||||||
|
# into unescaped versions, so we can put it through jq
|
||||||
unescapehtml() {
|
unescapehtml() {
|
||||||
sed '
|
sed 's/"/"/g ; s/'/'\''/g ; s/</</g ; s/>/>/g ; s/&/\&/g'
|
||||||
s/"/"/g
|
}
|
||||||
s/'/'\''/g
|
# for release escaping
|
||||||
s/</</g
|
escapehtml() {
|
||||||
s/>/>/g
|
sed 's/&/\&/g ; s/</\</g ; s/>/\>/g ; s/"/\"/g ; s/'\''/\'/g'
|
||||||
s/&/\&/g
|
}
|
||||||
'
|
|
||||||
|
# inserts into a section at index after a url
|
||||||
|
# $1: section
|
||||||
|
# $2: index (0-indexed)
|
||||||
|
# $3: string
|
||||||
|
insert_into_section() {
|
||||||
|
index="$2"
|
||||||
|
case "$index" in
|
||||||
|
end) index_str='/^$' ;;
|
||||||
|
*) index_str="+$index" ;;
|
||||||
|
esac
|
||||||
|
>/dev/null ed DESCRIPTION.html <<-EOF
|
||||||
|
/<br><div><b>$section
|
||||||
|
$index_str
|
||||||
|
i
|
||||||
|
$3
|
||||||
|
.
|
||||||
|
wq
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# gets all from a section
|
||||||
|
get_section() {
|
||||||
|
awk -v h="$1" -v RS="" -v FS='\n' '$1 ~ h' DESCRIPTION.html
|
||||||
|
}
|
||||||
|
|
||||||
|
# checks if release link is contained or not
|
||||||
|
# returns num:line if it is
|
||||||
|
is_contained() {
|
||||||
|
get_section "Contained Releases" | grep -nF "href=\"$1\""
|
||||||
|
}
|
||||||
|
|
||||||
|
is_uncontained() {
|
||||||
|
get_section "Uncontained Releases" | grep -nF "href=\"$1\""
|
||||||
|
}
|
||||||
|
|
||||||
|
# move a link from uncontained to contained if it exists in uncontained
|
||||||
|
# uses $last_contained
|
||||||
|
# $1 = url
|
||||||
|
move_from_uncontained() {
|
||||||
|
if conline=$(is_contained "$last_contained"); then
|
||||||
|
conline="$(grep -nF "${conline#*:}" DESCRIPTION.html)"
|
||||||
|
else
|
||||||
|
conline="/<br><div><b>Contained Releases"
|
||||||
|
fi
|
||||||
|
>/dev/null ed DESCRIPTION.html <<-EOF
|
||||||
|
${unline%%:*}m${conline%%:*}
|
||||||
|
wq
|
||||||
|
EOF
|
||||||
}
|
}
|
||||||
|
|
||||||
clean() {
|
clean() {
|
||||||
trap 'exit' INT HUP QUIT TERM EXIT
|
trap 'exit' INT HUP QUIT TERM EXIT
|
||||||
[ -f "$HTML_FILE" ] && rm -f "$HTML_FILE"
|
[ -f "${DISCOG_PAGE:-}" ] && rm -f "$DISCOG_PAGE"
|
||||||
[ -f "$json" ] && rm -f "$json"
|
[ -f "${RELEASE_PAGE:-}" ] && rm -f "$RELEASE_PAGE"
|
||||||
|
[ -f "${JSON_HEAD:-}" ] && rm -f "$JSON_HEAD"
|
||||||
|
[ -f "${JSON_TRALBUM:-}" ] && rm -f "$JSON_TRALBUM"
|
||||||
exit
|
exit
|
||||||
}
|
}
|
||||||
trap 'clean' INT HUP QUIT TERM EXIT
|
trap 'clean' INT HUP QUIT TERM EXIT
|
||||||
|
|
||||||
# OPTIONS #
|
# OPTIONS #
|
||||||
no_download=""
|
no_download="" only_download="" skip_contained="" force_desc=""
|
||||||
while getopts :n OPT; do
|
while getopts :nds OPT; do
|
||||||
case $OPT in
|
case $OPT in
|
||||||
n) no_download=1 ;; # don't download files, just make the description.html
|
n) no_download=1 ;; # don't download files, just make the description.html
|
||||||
|
d) only_download=1 ;; # only download files, don't make description.html
|
||||||
|
s) skip_contained=1 ;; # skip known albums, will insert uncontained -> contained ones after "last contained album"
|
||||||
*) fail "unknown option: -$OPTARG" ;;
|
*) fail "unknown option: -$OPTARG" ;;
|
||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
shift "$((OPTIND - 1))"
|
shift "$((OPTIND - 1))"
|
||||||
|
# END OPTIONS #
|
||||||
|
|
||||||
case $1 in
|
# SETUP #
|
||||||
*.bandcamp.com*) : ;;
|
# creates a NEWLINE-deliminated list of all bandcamp domains in question
|
||||||
*) fail 'Please use the *.bandcamp.com link instead of any custom domains!' ;;
|
BANDCAMP_DOMAINS=$(awk -v RS='/| ' '$1 ~ "bandcamp.com"' <<-EOF
|
||||||
esac
|
$*
|
||||||
IFS="/" read -r _ _ BANDCAMP_DOMAIN _ <<-EOF
|
|
||||||
$1
|
|
||||||
EOF
|
EOF
|
||||||
mkdir -p "$BANDCAMP_DOMAIN"; cd "$BANDCAMP_DOMAIN"
|
)
|
||||||
logdir="${XDG_DATA_HOME:-$HOME/.local/share}/discogarchive"
|
DISCOG_PAGE=$(mktemp)
|
||||||
mkdir -p "$logdir"
|
RELEASE_PAGE=$(mktemp)
|
||||||
|
JSON_HEAD=$(mktemp)
|
||||||
|
JSON_TRALBUM=$(mktemp)
|
||||||
|
## END SETUP
|
||||||
|
|
||||||
HTML_FILE="/tmp/$BANDCAMP_DOMAIN.html"
|
# MAIN LOOP #
|
||||||
BASEURL="https://$BANDCAMP_DOMAIN"
|
for BANDCAMP_DOMAIN in $BANDCAMP_DOMAINS; do
|
||||||
|
cd ~/data/discographies # normalized path ig
|
||||||
|
is_existing_discog=""
|
||||||
|
if [ -d "$BANDCAMP_DOMAIN" ]; then
|
||||||
|
is_existing_discog=1
|
||||||
|
fi
|
||||||
|
mkdir -p "$BANDCAMP_DOMAIN"; cd "$BANDCAMP_DOMAIN"
|
||||||
|
|
||||||
# SOCIALS #
|
BASEURL="https://$BANDCAMP_DOMAIN"
|
||||||
errecho 'input artist socials in KEY=VALUE format. blank line to continue. bandcamp is already filled
|
|
||||||
common shorthands:
|
|
||||||
web=website, sc=soundcloud, yt=youtube, tw=twitch, tr=twitter, mx=mixcloud,
|
|
||||||
ig=instagram, sp=spotify, lt=Linktree, ch=Cohost'
|
|
||||||
socials="<a href=\"$BASEURL\" rel=\"nofollow\">Bandcamp</a><br>$NL"
|
|
||||||
while IFS="=" read -r key val; do
|
|
||||||
case "$key" in
|
|
||||||
web) key=Website ;;
|
|
||||||
sc) key=SoundCloud ;;
|
|
||||||
yt) key=YouTube ;;
|
|
||||||
tw) key=Twitch ;;
|
|
||||||
tr) key=Twitter ;;
|
|
||||||
mx) key=Mixcloud ;;
|
|
||||||
ig) key=Instagram ;;
|
|
||||||
sp) key=Spotify ;;
|
|
||||||
lt) key=Linktree ;;
|
|
||||||
ch) key=Cohost ;;
|
|
||||||
"") break ;;
|
|
||||||
*) : ;;
|
|
||||||
esac
|
|
||||||
socials="$socials<a href=\"$val\" rel=\"nofollow\">$key</a><br>$NL"
|
|
||||||
done
|
|
||||||
socials="$(printf '%s' "$socials" | sort)$NL"
|
|
||||||
errecho 'Moving on!'
|
|
||||||
|
|
||||||
# HTML/JSON PARSING #
|
# SOCIALS #
|
||||||
curl -L -s -o "$HTML_FILE" "$1"
|
if [ -z "$only_download" ] && [ -z "$is_existing_discog" ] || ! [ -f DESCRIPTION.html ]; then
|
||||||
|
>&2 cat <<-EOF
|
||||||
|
input artist socials in KEY=VALUE format. blank line to continue. bandcamp is already filled
|
||||||
|
common shorthands:
|
||||||
|
web=website, sc=soundcloud, yt=youtube, tw=twitch, tr=twitter, mx=mixcloud,
|
||||||
|
ig=instagram, sp=spotify, lt=Linktree, ch=Cohost
|
||||||
|
EOF
|
||||||
|
# creates the DESCRIPTION thingy
|
||||||
|
printf '<a href="%s" rel="nofollow">Bandcamp</a><br>\n' "$BASEURL" > DESCRIPTION.html
|
||||||
|
while IFS="=" read -r key val; do
|
||||||
|
case "$key" in
|
||||||
|
web) key=Website ;;
|
||||||
|
sc) key=SoundCloud ;;
|
||||||
|
yt) key=YouTube ;;
|
||||||
|
tw) key=Twitch ;;
|
||||||
|
tr) key=Twitter ;;
|
||||||
|
mx) key=Mixcloud ;;
|
||||||
|
ig) key=Instagram ;;
|
||||||
|
sp) key=Spotify ;;
|
||||||
|
lt) key=Linktree ;;
|
||||||
|
ch) key=Cohost ;;
|
||||||
|
"") break ;;
|
||||||
|
*) : ;;
|
||||||
|
esac
|
||||||
|
printf '<a href="%s" rel="nofollow">%s</a><br>\n' "$val" "$key" >> DESCRIPTION.html
|
||||||
|
done
|
||||||
|
errecho 'Moving on!'
|
||||||
|
printf '\n<br><div><b>%s</b></div>\n\n<br><div><b>%s</b></div>' \
|
||||||
|
"Uncontained Releases" "Contained Releases" >> DESCRIPTION.html
|
||||||
|
fi
|
||||||
|
# END SOCIALS #
|
||||||
|
|
||||||
tmplog="" look_closer="" uncontained_releases="" contained_releases="" contained_releases_raw=""
|
# HTML/JSON PARSING #
|
||||||
json="$(mktemp -u)"
|
curl -L -s -o "$DISCOG_PAGE" "$BASEURL/music"
|
||||||
# albums and tracks
|
|
||||||
while read -r url; do
|
if [ -z "$only_download" ]; then
|
||||||
url="$BASEURL$url"
|
look_closer="" uncontained_releases="" contained_releases="" contained_releases_raw=""
|
||||||
# if rg -q -F "$url" "$logdir/log" 2>/dev/null; then
|
last_contained="" last_uncontained=""
|
||||||
# errecho "ALREADY CONTAINED $type: $url"
|
# albums and tracks
|
||||||
# tmplog="$tmplog$url CONTAINED$NL"
|
while read -r url; do
|
||||||
# continue
|
case "$url" in
|
||||||
# fi
|
/*) url="$BASEURL$url" ;;
|
||||||
curl -L -s -o - "$url" | pup 'script[type="application/ld+json"]' 'text{}' > "$json" || continue
|
*) url=${url%%\?*} ;;
|
||||||
artist="$(jq -r '.byArtist.name' < "$json")"
|
esac
|
||||||
name="$(jq -r '.name' < "$json")"
|
|
||||||
if [ "$(jq '.numTracks' < "$json")" = "0.0" ]; then
|
# if is contained, skip it but also add its thingy to the contained releases var
|
||||||
look_closer="<a href=\"$url\" rel=\"nofollow\">$artist - $name</a><br>$NL$look_closer"
|
if [ -n "$skip_contained" ] && is_contained "$url" >/dev/null; then
|
||||||
errecho "LOOK CLOSER: $url"
|
last_contained="$url"
|
||||||
elif [ "$(jq 'if (.inAlbum) then .inAlbum.albumRelease[0].offers.price else .albumRelease[0].offers.price end' < "$json")" = "0.0" ]; then
|
errecho "ALREADY CONTAINED: $url"
|
||||||
contained_releases="<a href=\"$url\" rel=\"nofollow\">$artist - $name</a><br>${NL}${contained_releases}"
|
continue
|
||||||
contained_releases_raw="$url$NL$contained_releases_raw"
|
fi
|
||||||
errecho "CONTAINED RELEASE: $artist - $name"
|
|
||||||
if ! rg -q -F "$url" "$logdir/log" 2>/dev/null; then
|
# get album data
|
||||||
echo "$url" >> "$logdir/log"
|
curl -L -s -o "$RELEASE_PAGE" "$url"
|
||||||
|
pup 'head > script[type="application/ld+json"]' 'text{}' < "$RELEASE_PAGE" > "$JSON_HEAD" || continue
|
||||||
|
pup -p '[data-tralbum] attr{data-tralbum}' 'text{}' < "$RELEASE_PAGE" > "$JSON_TRALBUM" || continue
|
||||||
|
|
||||||
|
artist="$(jq -r '.byArtist.name' < "$JSON_HEAD")"
|
||||||
|
name="$(jq -r '.name' < "$JSON_HEAD")"
|
||||||
|
numtracks="$(jq -r '.inAlbum.numTracks' < "$JSON_HEAD")"
|
||||||
|
[ "$numtracks" = "null" ] && numtracks="$(jq -r .numTracks < "$JSON_HEAD")"
|
||||||
|
price="$(jq '
|
||||||
|
if (.inAlbum) then .inAlbum.albumRelease[0].offers.price
|
||||||
|
else .albumRelease[0].offers.price end' < "$JSON_HEAD"
|
||||||
|
)"
|
||||||
|
|
||||||
|
# check if it even has audio first
|
||||||
|
if [ "$(jq .hasAudio < "$JSON_TRALBUM")" != "true" ]; then
|
||||||
|
errecho "NO AUDIO, LOOK CLOSER: $url"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# now check price :D
|
||||||
|
if [ "$price" = "0.0" ]; then
|
||||||
|
if unline=$(is_uncontained "$url"); then
|
||||||
|
unline="$(grep -nF "${unline#*:}" DESCRIPTION.html)"
|
||||||
|
move_from_uncontained "$url"
|
||||||
|
else
|
||||||
|
if indexline=$(is_contained "$last_contained"); then
|
||||||
|
indexline="$(grep -nF "${indexline#*:}" DESCRIPTION.html)"
|
||||||
|
else
|
||||||
|
indexline="/<br><div><b>Contained Releases"
|
||||||
|
fi
|
||||||
|
>/dev/null ed DESCRIPTION.html <<-EOF
|
||||||
|
${indexline%%:*}
|
||||||
|
a
|
||||||
|
<a href="$url" rel="nofollow">$artist - $name</a><br>
|
||||||
|
.
|
||||||
|
wq
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
errecho "CONTAINED RELEASE: $artist - $name"
|
||||||
|
last_contained="$url"
|
||||||
|
else
|
||||||
|
# just to catch if we are rebuilding something and don't skip
|
||||||
|
is_contained "$url" >/dev/null && continue
|
||||||
|
errecho "UNCONTAINED RELEASE: $artist - $name"
|
||||||
|
is_uncontained "$url" >/dev/null && continue
|
||||||
|
if indexline=$(is_contained "$last_uncontained"); then
|
||||||
|
indexline="$(grep -nF "${indexline#*:}" DESCRIPTION.html)"
|
||||||
|
else
|
||||||
|
indexline="/<br><div><b>Uncontained Releases"
|
||||||
|
fi
|
||||||
|
>/dev/null ed DESCRIPTION.html <<-EOF
|
||||||
|
${indexline%%:*}
|
||||||
|
a
|
||||||
|
<a href="$url" rel="nofollow">$artist - $name</a><br>
|
||||||
|
.
|
||||||
|
wq
|
||||||
|
EOF
|
||||||
|
last_uncontained="$url"
|
||||||
|
fi
|
||||||
|
done <<-EOF
|
||||||
|
$(
|
||||||
|
pup '#music-grid > li > a attr{href}' < "$DISCOG_PAGE"
|
||||||
|
pup '#music-grid attr{data-client-items}' < "$DISCOG_PAGE" | unescapehtml | jq -r '.[].page_url'
|
||||||
|
)
|
||||||
|
EOF
|
||||||
|
fi
|
||||||
|
|
||||||
|
# download shit
|
||||||
|
if [ -z "$no_download" ]; then
|
||||||
|
outdir="./files"
|
||||||
|
if [ -n "$is_existing_discog" ]; then
|
||||||
|
outdir="./files/new_stuff"
|
||||||
fi
|
fi
|
||||||
else
|
mkdir -p "$outdir"
|
||||||
uncontained_releases="<a href=\"$url\" rel=\"nofollow\">$artist - $name</a><br>${NL}${uncontained_releases}"
|
printf '%s' 'CAT.ALL' > ./files/_rules.conf
|
||||||
errecho "UNCONTAINED RELEASE: $artist - $name"
|
# artist img
|
||||||
|
img="$(pup 'img.band-photo' 'attr{src}' < "$DISCOG_PAGE")"
|
||||||
|
if [ -n "$img" ]; then
|
||||||
|
ext="${img##*.}"
|
||||||
|
curl -s -o ./files/artist."$ext" "${img%_*}_0.$ext"
|
||||||
|
fi
|
||||||
|
errecho "Done with retrieving metadata! Now to download..."
|
||||||
|
bcdl-free --no-unzip -z 12345 -f FLAC -d "$outdir" -e auto -l "$BASEURL/music"
|
||||||
|
|
||||||
|
set +f
|
||||||
|
if [ "$(printf '%s' "$outdir"/*.flac)" != "$outdir/*.flac" ]; then
|
||||||
|
mkdir "$outdir"/TRACKS
|
||||||
|
mv "$outdir"/*.flac "$outdir"/TRACKS
|
||||||
|
fi
|
||||||
|
set -f
|
||||||
fi
|
fi
|
||||||
done <<-EOF
|
done
|
||||||
$(
|
# END MAIN LOOP #
|
||||||
pup '#music-grid > li > a attr{href}' < "$HTML_FILE"
|
|
||||||
pup '#music-grid attr{data-client-items}' < "$HTML_FILE" | unescapehtml | jq -r '.[].page_url'
|
|
||||||
)
|
|
||||||
EOF
|
|
||||||
|
|
||||||
# formatting the description
|
|
||||||
if [ -n "${uncontained_releases}" ]; then
|
|
||||||
uncontained_releases="$NL<br><div><b>Uncontained Releases</b></div>$NL$uncontained_releases"
|
|
||||||
fi
|
|
||||||
contained_releases="$NL<br><div><b>Contained Releases</b></div>$NL$contained_releases"
|
|
||||||
|
|
||||||
printf '%s%s%s%s' \
|
|
||||||
"${look_closer:+$look_closer$NL}" "$socials" "$uncontained_releases" "$contained_releases" > DESCRIPTION.html
|
|
||||||
|
|
||||||
# download shit
|
|
||||||
if [ ! "$no_download" ]; then
|
|
||||||
mkdir -p "files"
|
|
||||||
printf '%s' 'CAT.ALL' > ./files/_rules.conf
|
|
||||||
# artist img
|
|
||||||
img="$(pup 'img.band-photo' 'attr{src}' < "$HTML_FILE")"
|
|
||||||
if [ -n "$img" ]; then
|
|
||||||
ext="${img##*.}"
|
|
||||||
curl -s -o ./files/artist."$ext" "${img%_*}_0.$ext"
|
|
||||||
fi
|
|
||||||
errecho "Done with retrieving metadata! Now to download..."
|
|
||||||
bcdl-free --no-unzip -z 12345 -f FLAC -d "./files" -e auto -l "$BASEURL"
|
|
||||||
|
|
||||||
set +f
|
|
||||||
if [ ./files/*.flac != "./files/*.flac" ]; then
|
|
||||||
mkdir ./files/TRACKS
|
|
||||||
mv ./files/*.flac ./files/TRACKS
|
|
||||||
fi
|
|
||||||
fi
|
|
||||||
|
|
Loading…
Reference in New Issue