More fixes.

This commit is contained in:
Fake-Name 2017-11-23 20:58:24 -08:00
parent f9fad86d54
commit bd2abec2fc
3 changed files with 46 additions and 15 deletions

View File

@ -127,10 +127,17 @@ class KonaChanFetcher(scraper.fetchBase.AbstractFetcher):
def extractImageUrl(self, job, sidebar):
imgurl_a = sidebar.find("a", class_='original-file-unchanged')
imgurl_s = sidebar.find("a", class_='original-file-changed')
if imgurl_a:
imgurl = imgurl_a['href']
print("ImageURL:", (imgurl_a['href'], imgurl_a))
self.log.info("ImageURL: (%s -> %s)", imgurl_a['href'], imgurl_a)
elif imgurl_s:
# Apparently some times they don't have an unchanged variant at all
imgurl = imgurl_s['href']
self.log.info("ImageURL: (%s -> %s)", imgurl_s['href'], imgurl_s)
else:
self.log.error("Missing image url!")
self.log.error("Source url id = %s", 'https://konachan.com/post/show/{}'.format(job.postid))
return imgurl
def extractMeta(self, job, soup):
@ -178,7 +185,7 @@ class KonaChanFetcher(scraper.fetchBase.AbstractFetcher):
self.log.warning("Marking %s as %s (%s)", job.id, job.state, job.err_str)
db.session.commit()
return
if 'This post was deleted for the following reasons' in text:
if 'This post was deleted.' in text:
job.state = 'removed'
job.err_str = 'post deleted'
self.log.warning("Marking %s as %s (%s)", job.id, job.state, job.err_str)
@ -229,6 +236,20 @@ class KonaChanFetcher(scraper.fetchBase.AbstractFetcher):
def test():
fetcher = KonaChanFetcher()
soup = fetcher.wg.getSoup("https://konachan.com/post/show/5103")
tmp = lambda: None
tmp.postid = 5103
tmp.tags = []
tmp.character = []
tmp.artist = []
tmp.file = []
fetcher.extractMeta(tmp, soup)
def run(indice):
print("Runner {}!".format(indice))
fetcher = KonaChanFetcher()
@ -246,8 +267,9 @@ def run(indice):
if __name__ == '__main__':
import logSetup
logSetup.initLogging()
import util.logSetup
util.logSetup.initLogging()
run(1)
test()
# run(1)

View File

@ -48,8 +48,9 @@ class E621Fetcher(scraper.fetchBase.AbstractFetcher):
artists = []
for artistli in artistlis:
artist = artistli.find_all('a')[-1].get_text()
artists.append(artist)
if artistli.find_all('a'):
artist = artistli.find_all('a')[-1].get_text()
artists.append(artist)
characters = []
for characterli in characterlis:
@ -170,7 +171,7 @@ class E621Fetcher(scraper.fetchBase.AbstractFetcher):
self.log.warning("Marking %s as %s (%s)", job.id, job.state, job.err_str)
db.session.commit()
return
if 'This post was deleted for the following reasons' in text:
if 'This post was deleted by' in text:
job.state = 'removed'
job.err_str = 'post deleted'
self.log.warning("Marking %s as %s (%s)", job.id, job.state, job.err_str)
@ -242,17 +243,24 @@ def run(indice):
def test():
fetcher = E621Fetcher()
soup = fetcher.wg.getSoup("https://e621.net/post/show/1190129")
fetcher.extractMeta(None, soup)
soup = fetcher.wg.getSoup("https://e621.net/post/show/28024")
tmp = lambda: None
tmp.tags = []
tmp.character = []
tmp.artist = []
tmp.file = []
fetcher.extractMeta(tmp, soup)
if __name__ == '__main__':
import logSetup
logSetup.initLogging()
import util.logSetup
util.logSetup.initLogging()
# test()
run(1)
test()
# run(1)

View File

@ -2,6 +2,7 @@ import logging
import threading
import multiprocessing
import scraper.database as db
import scraper.runstate