discogarchive: fix for bandcamp stinky
This commit is contained in:
parent
e91ff9788c
commit
c0b92df7bb
|
@ -20,6 +20,16 @@ fail() {
|
|||
exit 1
|
||||
}
|
||||
|
||||
unescapehtml() {
|
||||
sed '
|
||||
s/"/"/g
|
||||
s/'/'\''/g
|
||||
s/</</g
|
||||
s/>/>/g
|
||||
s/&/\&/g
|
||||
'
|
||||
}
|
||||
|
||||
clean() {
|
||||
trap 'exit' INT HUP QUIT TERM EXIT
|
||||
[ -f "$HTML_FILE" ] && rm -f "$HTML_FILE"
|
||||
|
@ -81,10 +91,11 @@ errecho 'Moving on!'
|
|||
# HTML/JSON PARSING #
|
||||
curl -L -s -o "$HTML_FILE" "$1"
|
||||
|
||||
tmplog="" look_closer="" uncontained_releases="" contained_releases=""
|
||||
tmplog="" look_closer="" uncontained_releases="" contained_releases="" contained_releases_raw=""
|
||||
json="$(mktemp -u)"
|
||||
# albums and tracks
|
||||
while read -r url; do
|
||||
url="$BASEURL$url"
|
||||
# if rg -q -F "$url" "$logdir/log" 2>/dev/null; then
|
||||
# errecho "ALREADY CONTAINED $type: $url"
|
||||
# tmplog="$tmplog$url CONTAINED$NL"
|
||||
|
@ -98,6 +109,7 @@ while read -r url; do
|
|||
errecho "LOOK CLOSER: $url"
|
||||
elif [ "$(jq 'if (.inAlbum) then .inAlbum.albumRelease[0].offers.price else .albumRelease[0].offers.price end' < "$json")" = "0.0" ]; then
|
||||
contained_releases="<a href=\"$url\" rel=\"nofollow\">$artist - $name</a><br>${NL}${contained_releases}"
|
||||
contained_releases_raw="$url$NL$contained_releases_raw"
|
||||
errecho "CONTAINED RELEASE: $artist - $name"
|
||||
if ! rg -q -F "$url" "$logdir/log" 2>/dev/null; then
|
||||
echo "$url" >> "$logdir/log"
|
||||
|
@ -107,7 +119,10 @@ while read -r url; do
|
|||
errecho "UNCONTAINED RELEASE: $artist - $name"
|
||||
fi
|
||||
done <<-EOF
|
||||
$(rg -e '"(/(album|track)/.+)"' -or "$BASEURL"'$1' "$HTML_FILE")
|
||||
$(
|
||||
pup '#music-grid > li > a attr{href}' < "$HTML_FILE"
|
||||
pup '#music-grid attr{data-client-items}' < "$HTML_FILE" | unescapehtml | jq -r '.[].page_url'
|
||||
)
|
||||
EOF
|
||||
|
||||
# formatting the description
|
||||
|
|
Loading…
Reference in New Issue