Move things about, update webrequest lib.

This commit is contained in:
Fake-Name 2017-11-20 21:02:57 -08:00
parent dfeaf81a50
commit b36af77670
26 changed files with 2886 additions and 1132 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@
/*.lwp
/logs
/*.pyc
*.pyc

View File

@ -16,7 +16,7 @@ import runstate
import concurrent.futures
# THREADS = 6
THREADS = 30
THREADS = 15
UPSERT_STEP = 10000
@ -57,7 +57,7 @@ def go():
do_upsert('KonaChan', 245000)
print("Resetting DL states.")
resetDlstate()
# resetDlstate()
print("Creating run contexts")
executor = concurrent.futures.ThreadPoolExecutor(max_workers=THREADS)
@ -66,7 +66,7 @@ def go():
try:
for plugin in plugins:
for x in range(THREADS // len(plugins)):
for x in range(50):
executor.submit(plugin.run, x)

View File

@ -0,0 +1,252 @@
#!/usr/bin/python3
import time
import logging
import random
import traceback
import urllib.parse
import threading
import multiprocessing
import gc
import bs4
import ChromeController
# from cachetools import LRUCache
# class ChromeLRUCache(LRUCache):
# def __init__(self, *args, **kwargs):
# super().__init__(*args, **kwargs)
# self.log = logging.getLogger("Main.ChromeInterfaceCache")
# def close_chrome(self, pop_key, to_del):
# try:
# self.log.info("LRU Cache is closing chromium interface for %s", pop_key)
# to_del.close()
# except Exception:
# self.log.error("Exception in chromium teardown!")
# for line in traceback.format_exc().split("\n"):
# self.log.error(" %s", line)
# def popitem(self):
# pop_key, to_del = super().popitem()
# self.close_chrome(pop_key, to_del)
# def close_by_key(self, key):
# pop_key, to_del = self.pop(key)
# self.close_chrome(pop_key, to_del)
# def get_chromium_instance(self, cr_binary, cr_port):
# cpid = multiprocessing.current_process().name
# ctid = threading.current_thread().name
# csid = "{}-{}".format(cpid, ctid)
# if csid in self:
# self.log.info("Using existing chromium process.")
# # We probe the remote chrome to make sure it's not defunct
# try:
# self[csid].get_current_url()
# return self[csid]
# except ChromeController.ChromeControllerException:
# self.log.error("Chromium appears to be defunct. Creating new")
# self.close_by_key(csid)
# self.log.info("Creating Chromium process.")
# try:
# instance = ChromeController.ChromeRemoteDebugInterface(cr_binary, dbg_port = cr_port)
# except Exception as e:
# self.log.error("Failure creating chromium process!")
# for line in traceback.format_exc().split("\n"):
# self.log.error(" %s", line)
# # Sometimes the old process is around because
# # the GC hasn't seen it, and forcing a collection can fix that.
# # Yes, this is HORRIBLE.
# gc.collect()
# raise e
# self[csid] = instance
# return instance
# CHROME_CACHE = ChromeLRUCache(maxsize=2)
class WebGetCrMixin(object):
# creds is a list of 3-tuples that gets inserted into the password manager.
# it is structured [(top_level_url1, username1, password1), (top_level_url2, username2, password2)]
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self._cr_binary = "google-chrome"
def _syncIntoChromium(self, cr):
# Headers are a list of 2-tuples. We need a dict
hdict = dict(self.browserHeaders)
cr.update_headers(hdict)
for cookie in self.cj:
cr.set_cookie(cookie)
def _syncOutOfChromium(self, cr):
for cookie in cr.get_cookies():
self.cj.set_cookie(cookie)
def getItemChromium(self, itemUrl):
self.log.info("Fetching page for URL: '%s' with Chromium" % itemUrl)
with ChromeController.ChromeContext(self._cr_binary) as cr:
self._syncIntoChromium(cr)
response = cr.blocking_navigate_and_get_source(itemUrl, timeout=10)
raw_url = cr.get_current_url()
fileN = urllib.parse.unquote(urllib.parse.urlparse(raw_url)[2].split("/")[-1])
fileN = bs4.UnicodeDammit(fileN).unicode_markup
self._syncOutOfChromium(cr)
# Probably a bad assumption
if response['binary']:
mType = "application/x-binary"
else:
mType = "text/html"
# So, self._cr.page_source appears to be the *compressed* page source as-rendered. Because reasons.
content = response['content']
return content, fileN, mType
def getHeadTitleChromium(self, url, referrer=None):
self.log.info("Getting HEAD with Chromium")
if not referrer:
referrer = url
with ChromeController.ChromeContext(self._cr_binary) as cr:
self._syncIntoChromium(cr)
cr.blocking_navigate(referrer)
time.sleep(random.uniform(2, 6))
cr.blocking_navigate(url)
title, cur_url = cr.get_page_url_title()
self._syncOutOfChromium(cr)
self.log.info("Resolved URL for %s -> %s", url, cur_url)
ret = {
'url': cur_url,
'title': title,
}
return ret
def getHeadChromium(self, url, referrer=None):
self.log.info("Getting HEAD with Chromium")
if not referrer:
referrer = url
with ChromeController.ChromeContext(self._cr_binary) as cr:
self._syncIntoChromium(cr)
cr.blocking_navigate(referrer)
time.sleep(random.uniform(2, 6))
cr.blocking_navigate(url)
dummy_title, cur_url = cr.get_page_url_title()
self._syncOutOfChromium(cr)
return cur_url
def chromiumGetRenderedItem(self, url):
with ChromeController.ChromeContext(self._cr_binary) as cr:
self._syncIntoChromium(cr)
# get_rendered_page_source
cr.blocking_navigate(url)
content = cr.get_rendered_page_source()
mType = 'text/html'
fileN = ''
self._syncOutOfChromium(cr)
return content, fileN, mType
def __del__(self):
# print("ChromiumMixin destructor")
sup = super()
if hasattr(sup, '__del__'):
sup.__del__()
# def stepThroughCloudFlare_cr(self, url, titleContains='', titleNotContains=''):
# '''
# Use Selenium+Chromium to access a resource behind cloudflare protection.
# Params:
# ``url`` - The URL to access that is protected by cloudflare
# ``titleContains`` - A string that is in the title of the protected page, and NOT the
# cloudflare intermediate page. The presence of this string in the page title
# is used to determine whether the cloudflare protection has been successfully
# penetrated.
# The current WebGetRobust headers are installed into the selenium browser, which
# is then used to access the protected resource.
# Once the protected page has properly loaded, the cloudflare access cookie is
# then extracted from the selenium browser, and installed back into the WebGetRobust
# instance, so it can continue to use the cloudflare auth in normal requests.
# '''
# if (not titleContains) and (not titleNotContains):
# raise ValueError("You must pass either a string the title should contain, or a string the title shouldn't contain!")
# if titleContains and titleNotContains:
# raise ValueError("You can only pass a single conditional statement!")
# self.log.info("Attempting to access page through cloudflare browser verification.")
# dcap = dict(DesiredCapabilities.Chromium)
# wgSettings = dict(self.browserHeaders)
# # Install the headers from the WebGet class into Chromium
# dcap["Chromium.page.settings.userAgent"] = wgSettings.pop('User-Agent')
# for headerName in wgSettings:
# dcap['Chromium.page.customHeaders.{header}'.format(header=headerName)] = wgSettings[headerName]
# driver = selenium.webdriver.Chromium(desired_capabilities=dcap)
# driver.set_window_size(1024, 768)
# driver.get(url)
# if titleContains:
# condition = EC.title_contains(titleContains)
# elif titleNotContains:
# condition = title_not_contains(titleNotContains)
# else:
# raise ValueError("Wat?")
# try:
# WebDriverWait(driver, 20).until(condition)
# success = True
# self.log.info("Successfully accessed main page!")
# except TimeoutException:
# self.log.error("Could not pass through cloudflare blocking!")
# success = False
# # Add cookies to cookiejar
# for cookie in driver.get_cookies():
# self.addSeleniumCookie(cookie)
# #print cookie[u"value"]
# self.__syncCookiesFromFile()
# return success

View File

@ -0,0 +1,518 @@
import random
random.seed()
# Due to general internet people douchebaggyness, I've basically said to hell with it and decided to spoof a whole assortment of browsers
# It should keep people from blocking this scraper *too* easily
# This file generates a random browser user-agent, It should have an extremely large set of possible UA structures.
USER_AGENTS = [
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 10.0; WOW64; Trident/8.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; BIDUBrowser 2.x)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 1.0.3705; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)',
'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.2; WOW64; Trident/7.0; .NET4.0E; .NET4.0C; Media Center PC 6.0; .NET CLR 3.5.30729; .NET CLR 2.0.50727; .NET CLR 3.0.30729; InfoPath.3; ms-office; MSOffice 15)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1)',
'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0)',
'Mozilla/4.0 (compatible; MSIE 9.0; Windows NT 6.1; 125LA; .NET CLR 2.0.50727; .NET CLR 3.0.04506.648; .NET CLR 3.5.21022)',
'Mozilla/4.0 (compatible;)',
'Mozilla/5.0',
'Mozilla/5.0 (BB10; Kbd) AppleWebKit/537.35+ (KHTML, like Gecko) Version/10.3.2.2876 Mobile Safari/537.35+',
'Mozilla/5.0 (compatible) Feedfetcher-Google; (+http://www.google.com/feedfetcher.html)',
'Mozilla/5.0 (compatible; AhrefsBot/5.2; +http://ahrefs.com/robot/)',
'Mozilla/5.0 (compatible; archive.org_bot +http://www.archive.org/details/archive.org_bot)',
'Mozilla/5.0 (compatible; archive.org_bot; Wayback Machine Live Record; +http://archive.org/details/archive.org_bot)',
'Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)',
'Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)',
'Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)',
'Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)',
'Mozilla/5.0 (compatible; DotBot/1.1; http://www.opensiteexplorer.org/dotbot, help@moz.com)',
'Mozilla/5.0 (compatible; DuckDuckGo-Favicons-Bot/1.0; +http://duckduckgo.com)',
'Mozilla/5.0 (compatible; evc-batch/2.0.20170913102128)',
'Mozilla/5.0 (compatible; Exabot/3.0; +http://www.exabot.com/go/robot)',
'Mozilla/5.0 (compatible; FLinkhubbot/1.1; +hello@flinkhub.com )',
'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Mozilla/5.0 (compatible; linkdexbot/2.2; +http://www.linkdex.com/bots/)',
'Mozilla/5.0 (compatible; Linux x86_64; Mail.RU_Bot/2.0; +http://go.mail.ru/help/robots)',
'Mozilla/5.0 (compatible; MJ12bot/v1.4.7; http://mj12bot.com/)',
'Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; Trident/6.0)',
'Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.2; WOW64; Trident/6.0; MASBJS)',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0; Trident/5.0; Trident/5.0)',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; Trident/5.0)',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)',
'Mozilla/5.0 (compatible; MSIE 9.0; Windows Phone OS 7.5; Trident/5.0; IEMobile/9.0; NOKIA; Lumia 900)',
'Mozilla/5.0 (compatible; Nmap Scripting Engine; https://nmap.org/book/nse.html)',
'Mozilla/5.0 (compatible; Pinterestbot/1.0; +http://www.pinterest.com/bot.html)',
'Mozilla/5.0 (compatible; SemrushBot-BA; +http://www.semrush.com/bot.html)',
'Mozilla/5.0 (compatible; SemrushBot/1.2~bl; +http://www.semrush.com/bot.html)',
'Mozilla/5.0 (compatible; SeznamBot/3.2; +http://napoveda.seznam.cz/en/seznambot-intro/)',
'Mozilla/5.0 (compatible; Yahoo! Slurp; http://help.yahoo.com/help/us/ysearch/slurp)',
'Mozilla/5.0 (compatible; YandexAccessibilityBot/3.0; +http://yandex.com/bots)',
'Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)',
'Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)',
'Mozilla/5.0 (iPad; CPU OS 10_0_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Mobile/14A403 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_1_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/61.0.3163.73 Mobile/14B100 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_1_1 like Mac OS X) AppleWebKit/602.2.14 (KHTML, like Gecko) Version/10.0 Mobile/14B100 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_2 like Mac OS X) AppleWebKit/602.3.12 (KHTML, like Gecko) Version/10.0 Mobile/14C92 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_2_1 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) CriOS/60.0.3112.89 Mobile/14D27 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_2_1 like Mac OS X) AppleWebKit/602.4.6 (KHTML, like Gecko) Version/10.0 Mobile/14D27 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E277 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_1 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) Version/10.0 Mobile/14E304 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_2 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) CriOS/60.0.3112.89 Mobile/14F89 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_2 like Mac OS X) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.0 Mobile/14F89 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/602.1.50 (KHTML, like Gecko) GSA/34.1.167176684 Mobile/14G60 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) CriOS/60.0.3112.89 Mobile/14G60 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.1.30 (KHTML, like Gecko) CriOS/61.0.3163.73 Mobile/14G60 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 10_3_3 like Mac OS X) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.0 Mobile/14G60 Safari/602.1',
'Mozilla/5.0 (iPad; CPU OS 5_1_1 like Mac OS X) AppleWebKit/534.46 (KHTML, like Gecko) Version/5.1 Mobile/9B206 Safari/7534.48.3',
'Mozilla/5.0 (iPad; CPU OS 7_1_1 like Mac OS X) AppleWebKit/537.51.2 (KHTML, like Gecko) Version/7.0 Mobile/11D201 Safari/9537.53',
'Mozilla/5.0 (iPad; CPU OS 8_1_2 like Mac OS X) AppleWebKit/600.1.4 (KHTML, like Gecko) Version/8.0 Mobile/12B440 Safari/600.1.4',
'Mozilla/5.0 (iPad; CPU OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1',
'Mozilla/5.0 (iPad; CPU OS 9_3_3 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G34 Safari/601.1',
'Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/60.0.3112.89 Mobile/13G36 Safari/601.1.46',
'Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X) AppleWebKit/601.1 (KHTML, like Gecko) CriOS/61.0.3163.73 Mobile/13G36 Safari/601.1.46',
'Mozilla/5.0 (iPad; CPU OS 9_3_5 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13G36 Safari/601.1',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:44.0) Gecko/20100101 Firefox/44.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.11; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:53.0) Gecko/20100101 Firefox/53.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:48.0) Gecko/20100101 Firefox/48.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:48.0) Gecko/20100101 Firefox/48.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/601.6.17 (KHTML, like Gecko) Version/9.1.1 Safari/601.6.17',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2752.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/601.7.8 (KHTML, like Gecko) Version/9.1.3 Safari/601.7.8',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/603.2.5 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.5',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12) AppleWebKit/602.1.50 (KHTML, like Gecko) Version/10.0 Safari/602.1.50',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.95 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/603.1.30 (KHTML, like Gecko)',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko)',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/603.3.8 (KHTML, like Gecko) Version/10.1.2 Safari/603.3.8',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_5_8) AppleWebKit/534.50.2 (KHTML, like Gecko) Version/5.0.6 Safari/533.22.3',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/6.1.3 Safari/537.75.14',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/601.7.8 (KHTML, like Gecko) Version/9.1.3 Safari/537.86.7',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2486.0 Safari/537.36 Edge/13.10586',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.105 Safari/537.36 Vivaldi/1.92.917.43',
'Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063,gzip(gfe)',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.71',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.80',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.18 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.9 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3214.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:50.0) Gecko/20100101 Firefox/50.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.93 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/7.0.6.1042 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2700.0 Iron Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.0.12195 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.0.12195 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.11.2987.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.13.2987.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.2988.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.1.3029.81 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36 Vivaldi/1.91.867.42',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 YaBrowser/17.7.1.791 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.105 Safari/537.36 Vivaldi/1.92.917.43',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 YaBrowser/17.9.1.449 (beta) Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36 OPR/47.0.2631.55',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.71',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.80',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.80 (Edition Campaign 34)',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.71 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/61.4.120 Chrome/55.4.2883.120 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/66.4.102 Chrome/60.4.3112.102 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/66.4.104 Chrome/60.4.3112.104 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/5.0.4.3000 Chrome/47.0.2526.73 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:41.0) Gecko/20100101 Firefox/41.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0,gzip(gfe)',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; MATBJS; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.12 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.80 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 YaBrowser/17.3.0.1785 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/50.2.163 Chrome/44.2.2403.163 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/56.3.154 Chrome/50.3.2661.154 Safari/537.36',
'Mozilla/5.0 (Windows NT 5.1; rv:11.0) Gecko Firefox/11.0 (via ggpht.com GoogleImageProxy)',
'Mozilla/5.0 (Windows NT 5.1; rv:25.0) Gecko/20100101 Firefox/25.0',
'Mozilla/5.0 (Windows NT 5.1; rv:30.0) Gecko/20100101 Firefox/30.0',
'Mozilla/5.0 (Windows NT 5.1; rv:43.0) Gecko/20100101 Firefox/43.0',
'Mozilla/5.0 (Windows NT 5.1; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 5.1; rv:52.0) Gecko/20100101 Firefox/52.0,gzip(gfe)',
'Mozilla/5.0 (Windows NT 5.1; rv:6.0.2) Gecko/20100101 Firefox/6.0.2',
'Mozilla/5.0 (Windows NT 5.2; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 6.0; rv:22.0) Gecko/20130405 Firefox/22.0',
'Mozilla/5.0 (Windows NT 6.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2228.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.124 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/7.0.6.1042 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.273 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.84 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.76 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.1.3029.81 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 YaBrowser/17.7.1.791 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.71',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/66.4.104 Chrome/60.4.3112.104 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Maxthon/4.9.5.1000 Chrome/39.0.2146.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0',
'Mozilla/5.0 (Windows NT 6.1; rv:38.9) Gecko/20100101 Goanna/2.2 Firefox/38.9 PaleMoon/26.5.0',
'Mozilla/5.0 (Windows NT 6.1; rv:42.0) Gecko/20100101 Firefox/42.0',
'Mozilla/5.0 (Windows NT 6.1; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Windows NT 6.1; rv:53.0) Gecko/20100101 Firefox/53.0',
'Mozilla/5.0 (Windows NT 6.1; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.1; rv:55.0) Gecko/20100101 Firefox/55.0,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1; rv:56.0) Gecko/20100101 Firefox/56.0',
'Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2214.85 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2540.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/53.0.2785.143 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 OPR/44.0.2510.1449',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36 OPR/47.0.2631.55',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.71',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.80',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.18 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:3.2) Goanna/20170821 PaleMoon/27.4.2',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:42.0) Gecko/20100101 Firefox/42.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:45.9) Gecko/20100101 Goanna/3.2 Firefox/45.9 PaleMoon/27.4.2',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.1; Win64; x64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534+ (KHTML, like Gecko) BingPreview/1.0b',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.63 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.102 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.149 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.154 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.153 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36; 360Spider',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/7.0.6.1042 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.94 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.63 Safari/537.36 OPR/38.0.2220.29',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.75 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 UBrowser/6.1.2909.1213 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.0.12335 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.2991.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.81 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.109 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 YaBrowser/17.7.0.1683 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 YaBrowser/17.7.1.791 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 YaBrowser/17.7.1.804 Yowser/2.5 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36 OPR/46.0.2597.32',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.86 Safari/537.36 Sleipnir/4.5.8',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.90 Safari/537.36 Vivaldi/1.91.867.38',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.71',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.71 (Edition 360-1)',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.4.3112.104 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/66.4.102 Chrome/60.4.3112.102 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/66.4.104 Chrome/60.4.3112.104 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:25.0) Gecko/20100101 Firefox/25.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:29.0) Gecko/20120101 Firefox/29.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 Firefox/40.1',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:40.0) Gecko/20100101 IceDragon/40.1.1.18 Firefox/40.0.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:45.9) Gecko/20100101 Goanna/3.2 Firefox/45.9 PaleMoon/27.4.2',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:48.0) Gecko/20100101 Firefox/48.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:50.0) Gecko/20100101 Firefox/50.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:52.2.0) Gecko/52.2.0 Firefox/52.2.0; ADSSO',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:53.0) Gecko/20100101 Firefox/53.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.12.2987.98 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:39.0) Gecko/20100101 Firefox/39.0',
'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.2; WOW64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Mozilla/5.0 (Windows NT 6.2; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.5.3029.81 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36 OPR/44.0.2510.1449',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 OPR/45.0.0.255225845',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36,gzip(gfe)',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.3; Win64; x64; Trident/7.0; Touch; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.0.10802 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 UBrowser/7.0.6.1042 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.0.12137 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36 OPR/43.0.2442.1144',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.2988.0 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.90 Safari/537.36 Vivaldi/1.91.867.38',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 OPR/47.0.2631.80',
'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/66.4.104 Chrome/60.4.3112.104 Safari/537.36',
'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:47.0) Gecko/20100101 Firefox/47.0',
'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (Windows NT 6.3; WOW64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; MAFSJS; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko/20100101 Firefox/12.0',
'Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; Touch; LCJB; rv:11.0) like Gecko',
'Mozilla/5.0 (Windows Phone 8.1; ARM; Trident/7.0; Touch; rv:11.0; IEMobile/11.0; NOKIA; Lumia 635) like Gecko',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; de; rv:1.9.0.7; Google-SearchByImage) Gecko/2009021910 Firefox/3.0.7',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/532.0 (KHTML, like Gecko) Chrome/3.0.195.27 Safari/532.0',
'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.3) Gecko/20070309 Firefox/2.0.0.3',
'Mozilla/5.0 (X11; CrOS x86_64 9460.73.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.134 Safari/537.36',
'Mozilla/5.0 (X11; CrOS x86_64 9592.85.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.112 Safari/537.36',
'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.101 Safari/537.36',
'Mozilla/5.0 (X11; Fedora; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (X11; Fedora; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.132 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.75 Safari/537.36 Google Favicon',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.104 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.78 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.79 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/48.0.2564.82 Chrome/48.0.2564.82 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/60.0.3112.113 Chrome/60.0.3112.113 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/60.0.3112.78 Chrome/60.0.3112.78 Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) WordPress.com mShots Safari/537.36',
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/44.0 (Chrome)',
'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) Gecko/20150101 Firefox/47.0 (Chrome)',
'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
'Mozilla/5.0 (X11; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Mozilla/5.0 (X11; U; Linux i686; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Chrome/6.0.460.0 Safari/534.3',
'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.114 Safari/537.36 Puffin/5.2.0IT',
'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.114 Safari/537.36 Puffin/5.2.2IT',
'Mozilla/5.0 (X11; Ubuntu; Linux i686; rv:41.0) Gecko/20100101 Firefox/41.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:51.0) Gecko/20100101 Firefox/51.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0',
'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:55.0) Gecko/20100101 Firefox/55.0',
'Opera/9.80 (BlackBerry; Opera Mini/8.0.35667/67.445; U; en) Presto/2.12.423 Version/12.16',
'UCWEB/2.0 (Java; U; MIDP-2.0; Nokia203/20.37) U2/1.0.0 UCBrowser/8.7.0.218 U2/1.0.0 Mobile',
'UCWEB/2.0 (MIDP-2.0; U; Adr 4.4.2; id; S35G) U2/1.0.0 UCBrowser/10.7.8.806 U2/1.0.0 Mobile',
'UCWEB/2.0 (MIDP-2.0; U; Adr 5.0.1; en-US; GT-I9500) U2/1.0.0 UCBrowser/10.9.5.983 U2/1.0.0 Mobile',
'UCWEB/2.0 (MIDP-2.0; U; Adr 5.1.1; en-US; A37f) U2/1.0.0 UCBrowser/10.9.8.1006 U2/1.0.0 Mobile',
'UCWEB/2.0 (MIDP-2.0; U; Adr 5.1.1; en-US; SM-J200G) U2/1.0.0 UCBrowser/10.6.0.706 U2/1.0.0 Mobile',
'UCWEB/2.0 (MIDP-2.0; U; Adr 6.0.1; id; ASUS_Z00LDD) U2/1.0.0 UCBrowser/10.9.5.983 U2/1.0.0 Mobile',
'UCWEB/2.0 (MIDP-2.0; U; Adr 6.0; airg.com; S8_mini) U2/1.0.0 UCBrowser/9.6.0.514 U2/1.0.0 Mobile',
'UCWEB/2.0 (Windows; U; wds 8.10; en-IN; NOKIA; RM-978_1046) U2/1.0.0 UCBrowser/4.2.1.541 U2/1.0.0 Mobile',
]
ACCEPT_LANGUAGE =[
"en-gb,en-us;q=0.7,de-ch;q=0.3",
"en-GB,en-US;q=0.8,en;q=0.6",
"en-GB,en-US;q=0.8,en;q=0.6",
"en-US",
"en-us, en;q=1.0,fr-ca, fr;q=0.5,pt-br, pt;q=0.5,es;q=0.5",
"en-US,de-DE;q=0.5",
"en-us,en;q=0.5",
"en-US,en;q=0.8",
"en-US,en;q=0.8,en-GB;q=0.6,fr-CA;q=0.4,fr;q=0.2",
"en-US,en;q=0.8,es-419;q=0.6",
"en-us,en;q=0.8,es;q=0.5,es-mx;q=0.3",
"en-US,en;q=0.8,es;q=0.6",
"en-US,en;q=0.8,pl;q=0.6",
"en-US,en;q=0.8,pl;q=0.6",
"en-US,en;q=0.9",
"en-US,en;q=0.9,fr;q=0.8,de;q=0.7,id;q=0.6",
"en-US,en;q=0.9,ja;q=0.8,fr;q=0.7,de;q=0.6,es;q=0.5,it;q=0.4,nl;q=0.3,sv;q=0.2,nb;q=0.1",
]
ACCEPT = [
["text/html","application/xhtml+xml","application/xml;q=0.9"],
["application/xml","application/xhtml+xml","text/html;q=0.9"," text/plain;q=0.8","image/png"],
["text/html","application/xhtml+xml","application/xml;q=0.9"],
["image/jpeg","application/x-ms-application","image/gif","application/xaml+xml","image/pjpeg","application/x-ms-xbap","application/x-shockwave-flash","application/msword"],
["text/html","application/xml;q=0.9","application/xhtml+xml","image/png","image/webp","image/jpeg","image/gif","image/x-xbitmap"]
]
ACCEPT_POSTFIX = ["*/*;q=0.8", "*/*;q=0.5", "*/*;q=0.8", "*/*", "*/*;q=0.1"]
ENCODINGS = [['gzip'], ['gzip', 'deflate'], ['gzip', 'deflate', 'sdch']]
def getUserAgent():
'''
Generate a randomized user agent by permuting a large set of possible values.
The returned user agent should look like a valid, in-use brower, with a specified preferred language of english.
Return value is a list of tuples, where each tuple is one of the user-agent headers.
Currently can provide approximately 147 * 17 * 5 * 5 * 2 * 3 * 2 values, or ~749K possible
unique user-agents.
'''
coding = random.choice(ENCODINGS)
random.shuffle(coding)
coding = ",".join(coding)
accept = random.choice(ACCEPT)
random.shuffle(accept)
accept.append(random.choice(ACCEPT_POSTFIX))
accept = random.choice((", ", ",")).join(accept)
user_agent = [
('User-Agent' , random.choice(USER_AGENTS)),
('Accept-Language' , random.choice(ACCEPT_LANGUAGE)),
('Accept' , accept),
('Accept-Encoding' , coding)
]
return user_agent
# This file based heavily on the UA List, Copyright (c) 2014, Harald Hope
# This list was released under the BSD 2 clause.
# Home page: techpatterns.com/forums/about304.html
# Special thanks to the following:
# User-Agent Switcher: www.chrispederick.com/work/user-agent-switcher
# Firefox history: www.zytrax.com/tech/web/firefox-history.html
# Mobile data: wikipedia.org/wiki/List_of_user_agents_for_mobile_phones
# Mobile data: www.zytrax.com/tech/web/mobile_ids.html
# Current User-Agents: http://myip.ms/browse/comp_browsers
# User-agent data: www.zytrax.com/tech/web/browser_ids.htm
# User-agent strings: www.useragentstring.com
# User-agent strings: www.webapps-online.com/online-tools/user-agent-strings/dv/
# License: BSD 2 Clause
# All rights reserved. Redistribution and use in source and binary forms,
# with or without modification, are permitted provided that the following
# conditions are met:
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice, this
# list of conditions and the following disclaimer in the documentation and/or other
# materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS'
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
# IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
# POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,15 @@
class WebGetException(Exception):
pass
class ContentTypeError(WebGetException):
pass
class ArgumentError(WebGetException):
pass
class FetchFailureError(WebGetException):
pass

147
util/WebRequest/Handlers.py Normal file
View File

@ -0,0 +1,147 @@
#!/usr/bin/python3
import sys
import codecs
import http.client
import email.parser
import urllib.request
import urllib.parse
import urllib.error
import os.path
import time
import http.cookiejar
import traceback
import logging
import zlib
import bs4
import re
import string
import gzip
import io
import socket
import json
import base64
import random
class HeadRequest(urllib.request.Request):
def get_method(self):
# Apparently HEAD is now being blocked. Because douche.
return "GET"
# return "HEAD"
class HTTPRedirectBlockerErrorHandler(urllib.request.HTTPErrorProcessor): # pragma: no cover
def http_response(self, request, response):
code, msg, hdrs = response.code, response.msg, response.info()
# only add this line to stop 302 redirection.
if code == 302:
print("Code!", 302)
return response
if code == 301:
print("Code!", 301)
return response
print("[HTTPRedirectBlockerErrorHandler] http_response! code:", code)
print(hdrs)
print(msg)
if not (200 <= code < 300):
response = self.parent.error('http', request, response, code, msg, hdrs)
return response
https_response = http_response
# Custom redirect handler to work around
# issue https://bugs.python.org/issue17214
class HTTPRedirectHandler(urllib.request.HTTPRedirectHandler):
# Implementation note: To avoid the server sending us into an
# infinite loop, the request object needs to track what URLs we
# have already seen. Do this by adding a handler-specific
# attribute to the Request object.
def http_error_302(self, req, fp, code, msg, headers):
# Some servers (incorrectly) return multiple Location headers
# (so probably same goes for URI). Use first header.
if "location" in headers:
newurl = headers["location"]
elif "uri" in headers:
newurl = headers["uri"]
else:
return
# fix a possible malformed URL
urlparts = urllib.parse.urlparse(newurl)
# For security reasons we don't allow redirection to anything other
# than http, https or ftp.
if urlparts.scheme not in ('http', 'https', 'ftp', ''):
raise urllib.error.HTTPError(
newurl, code,
"%s - Redirection to url '%s' is not allowed" % (msg, newurl),
headers, fp)
if not urlparts.path:
urlparts = list(urlparts)
urlparts[2] = "/"
newurl = urllib.parse.urlunparse(urlparts)
# http.client.parse_headers() decodes as ISO-8859-1. Recover the
# original bytes and percent-encode non-ASCII bytes, and any special
# characters such as the space.
newurl = urllib.parse.quote(
newurl, encoding="iso-8859-1", safe=string.punctuation)
newurl = urllib.parse.urljoin(req.full_url, newurl)
# XXX Probably want to forget about the state of the current
# request, although that might interact poorly with other
# handlers that also use handler-specific request attributes
new = self.redirect_request(req, fp, code, msg, headers, newurl)
if new is None: # pragma: no cover
return
# loop detection
# .redirect_dict has a key url if url was previously visited.
if hasattr(req, 'redirect_dict'):
visited = new.redirect_dict = req.redirect_dict
if (visited.get(newurl, 0) >= self.max_repeats or
len(visited) >= self.max_redirections):
raise urllib.error.HTTPError(req.full_url, code,
self.inf_msg + msg, headers, fp)
else:
visited = new.redirect_dict = req.redirect_dict = {}
visited[newurl] = visited.get(newurl, 0) + 1
# Don't close the fp until we are sure that we won't use it
# with HTTPError.
fp.read()
fp.close()
return self.parent.open(new, timeout=req.timeout)
class PreemptiveBasicAuthHandler(urllib.request.HTTPBasicAuthHandler):
'''Preemptive basic auth.
Instead of waiting for a 403 to then retry with the credentials,
send the credentials if the url is handled by the password manager.
Note: please use realm=None when calling add_password.'''
def http_request(self, req):
url = req.get_full_url()
realm = None
# this is very similar to the code from retry_http_basic_auth()
# but returns a request object.
user, pw = self.passwd.find_user_password(realm, url)
if pw:
raw = "%s:%s" % (user, pw)
raw = raw.encode("ascii")
auth = b'Basic ' + base64.standard_b64encode(raw).strip()
req.add_unredirected_header(self.auth_header, auth)
return req
https_request = http_request

View File

@ -0,0 +1,89 @@
#!/usr/bin/python3
import sys
import codecs
import http.client
import email.parser
cchardet = False
try:
import cchardet
except ImportError: # pragma: no cover
pass
def isUTF8Strict(data): # pragma: no cover - Only used when cchardet is missing.
'''
Check if all characters in a bytearray are decodable
using UTF-8.
'''
try:
decoded = data.decode('UTF-8')
except UnicodeDecodeError:
return False
else:
for ch in decoded:
if 0xD800 <= ord(ch) <= 0xDFFF:
return False
return True
def decode_headers(header_list):
'''
Decode a list of headers.
Takes a list of bytestrings, returns a list of unicode strings.
The character set for each bytestring is individually decoded.
'''
decoded_headers = []
for header in header_list:
if cchardet:
inferred = cchardet.detect(header)
if inferred and inferred['confidence'] > 0.8:
# print("Parsing headers!", header)
decoded_headers.append(header.decode(inferred['encoding']))
else:
decoded_headers.append(header.decode('iso-8859-1'))
else: # pragma: no cover
# All bytes are < 127 (e.g. ASCII)
if all([char & 0x80 == 0 for char in header]):
decoded_headers.append(header.decode("us-ascii"))
elif isUTF8Strict(header):
decoded_headers.append(header.decode("utf-8"))
else:
decoded_headers.append(header.decode('iso-8859-1'))
return decoded_headers
def parse_headers(fp, _class=http.client.HTTPMessage):
"""Parses only RFC2822 headers from a file pointer.
email Parser wants to see strings rather than bytes.
But a TextIOWrapper around self.rfile would buffer too many bytes
from the stream, bytes which we later need to read as bytes.
So we read the correct bytes here, as bytes, for email Parser
to parse.
Note: Monkey-patched version to try to more intelligently determine
header encoding
"""
headers = []
while True:
line = fp.readline(http.client._MAXLINE + 1)
if len(line) > http.client._MAXLINE:
raise http.client.LineTooLong("header line")
headers.append(line)
if len(headers) > http.client._MAXHEADERS:
raise HTTPException("got more than %d headers" % http.client._MAXHEADERS)
if line in (b'\r\n', b'\n', b''):
break
decoded_headers = decode_headers(headers)
hstring = ''.join(decoded_headers)
return email.parser.Parser(_class=_class).parsestr(hstring)
http.client.parse_headers = parse_headers

View File

@ -0,0 +1,277 @@
#!/usr/bin/python3
import time
import random
import socket
import urllib.parse
import http.cookiejar
import bs4
import selenium.webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
class title_not_contains(object):
""" An expectation for checking that the title *does not* contain a case-sensitive
substring. title is the fragment of title expected
returns True when the title matches, False otherwise
"""
def __init__(self, title):
self.title = title
def __call__(self, driver):
return self.title not in driver.title
#pylint: disable-msg=E1101, C0325, R0201, W0702, W0703
def wait_for(condition_function):
start_time = time.time()
while time.time() < start_time + 3:
if condition_function():
return True
else:
time.sleep(0.1)
raise Exception(
'Timeout waiting for {}'.format(condition_function.__name__)
)
class load_delay_context_manager(object):
def __init__(self, browser):
self.browser = browser
def __enter__(self):
self.old_page = self.browser.find_element_by_tag_name('html')
def page_has_loaded(self):
new_page = self.browser.find_element_by_tag_name('html')
return new_page.id != self.old_page.id
def __exit__(self, *_):
wait_for(self.page_has_loaded)
class WebGetPjsMixin(object):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
self.pjs_driver = None
def _initPjsWebDriver(self):
if self.pjs_driver:
self.pjs_driver.quit()
dcap = dict(DesiredCapabilities.PHANTOMJS)
wgSettings = dict(self.browserHeaders)
# Install the headers from the WebGet class into phantomjs
dcap["phantomjs.page.settings.userAgent"] = wgSettings.pop('User-Agent')
for headerName in wgSettings:
if headerName != 'Accept-Encoding':
dcap['phantomjs.page.customHeaders.{header}'.format(header=headerName)] = wgSettings[headerName]
self.pjs_driver = selenium.webdriver.PhantomJS(desired_capabilities=dcap)
self.pjs_driver.set_window_size(1280, 1024)
def _syncIntoPjsWebDriver(self):
'''
So selenium is completely retarded, and you can't just set cookes, you have to
be navigated to the domain for which you want to set cookies.
This is extra double-plus idiotic, as it means you can't set cookies up
before navigating.
Sigh.
'''
pass
# for cookie in self.getCookies():
# print("Cookie: ", cookie)
# cookurl = [
# "http" if cookieDict['httponly'] else "https", # scheme 0 URL scheme specifier
# cookie.domain, # netloc 1 Network location part
# "/", # path 2 Hierarchical path
# "", # params 3 Parameters for last path element
# "", # query 4 Query component
# "", # fragment 5 Fragment identifier
# ]
# cdat = {
# 'name' : cookie.name,
# 'value' : cookie.value,
# 'domain' : cookie.domain,
# 'path' :
# 'expiry' :
# }
# print("CDat: ", cdat)
# self.pjs_driver.add_cookie(cdat)
def _syncOutOfPjsWebDriver(self):
for cookie in self.pjs_driver.get_cookies():
self.addSeleniumCookie(cookie)
def getItemPhantomJS(self, itemUrl):
self.log.info("Fetching page for URL: '%s' with PhantomJS" % itemUrl)
if not self.pjs_driver:
self._initPjsWebDriver()
self._syncIntoPjsWebDriver()
with load_delay_context_manager(self.pjs_driver):
self.pjs_driver.get(itemUrl)
time.sleep(3)
fileN = urllib.parse.unquote(urllib.parse.urlparse(self.pjs_driver.current_url)[2].split("/")[-1])
fileN = bs4.UnicodeDammit(fileN).unicode_markup
self._syncOutOfPjsWebDriver()
# Probably a bad assumption
mType = "text/html"
# So, self.pjs_driver.page_source appears to be the *compressed* page source as-rendered. Because reasons.
source = self.pjs_driver.execute_script("return document.getElementsByTagName('html')[0].innerHTML")
assert source != '<head></head><body></body>'
source = "<html>"+source+"</html>"
return source, fileN, mType
def getHeadTitlePhantomJS(self, url, referrer=None):
self.getHeadPhantomJS(url, referrer)
ret = {
'url' : self.pjs_driver.current_url,
'title' : self.pjs_driver.title,
}
return ret
def getHeadPhantomJS(self, url, referrer=None):
self.log.info("Getting HEAD with PhantomJS")
if not self.pjs_driver:
self._initPjsWebDriver()
self._syncIntoPjsWebDriver()
def try_get(loc_url):
tries = 3
for x in range(9999):
try:
self.pjs_driver.get(loc_url)
time.sleep(random.uniform(2, 6))
return
except socket.timeout as e:
if x > tries:
raise e
if referrer:
try_get(referrer)
try_get(url)
self._syncOutOfPjsWebDriver()
return self.pjs_driver.current_url
def addSeleniumCookie(self, cookieDict):
'''
Install a cookie exported from a selenium webdriver into
the active opener
'''
# print cookieDict
cookie = http.cookiejar.Cookie(
version = 0,
name = cookieDict['name'],
value = cookieDict['value'],
port = None,
port_specified = False,
domain = cookieDict['domain'],
domain_specified = True,
domain_initial_dot = False,
path = cookieDict['path'],
path_specified = False,
secure = cookieDict['secure'],
expires = cookieDict['expiry'] if 'expiry' in cookieDict else None,
discard = False,
comment = None,
comment_url = None,
rest = {"httponly":"%s" % cookieDict['httponly']},
rfc2109 = False
)
self.addCookie(cookie)
def __del__(self):
# print("PhantomJS __del__")
if self.pjs_driver != None:
self.pjs_driver.quit()
sup = super()
if hasattr(sup, '__del__'):
sup.__del__()
def stepThroughCloudFlare_pjs(self, url, titleContains='', titleNotContains=''):
'''
Use Selenium+PhantomJS to access a resource behind cloudflare protection.
Params:
``url`` - The URL to access that is protected by cloudflare
``titleContains`` - A string that is in the title of the protected page, and NOT the
cloudflare intermediate page. The presence of this string in the page title
is used to determine whether the cloudflare protection has been successfully
penetrated.
The current WebGetRobust headers are installed into the selenium browser, which
is then used to access the protected resource.
Once the protected page has properly loaded, the cloudflare access cookie is
then extracted from the selenium browser, and installed back into the WebGetRobust
instance, so it can continue to use the cloudflare auth in normal requests.
'''
if (not titleContains) and (not titleNotContains):
raise ValueError("You must pass either a string the title should contain, or a string the title shouldn't contain!")
if titleContains and titleNotContains:
raise ValueError("You can only pass a single conditional statement!")
self.log.info("Attempting to access page through cloudflare browser verification.")
if not self.pjs_driver:
self._initPjsWebDriver()
self._syncIntoPjsWebDriver()
self.pjs_driver.get(url)
if titleContains:
condition = EC.title_contains(titleContains)
elif titleNotContains:
condition = title_not_contains(titleNotContains)
else:
raise ValueError("Wat?")
try:
WebDriverWait(self.pjs_driver, 20).until(condition)
success = True
self.log.info("Successfully accessed main page!")
except TimeoutException:
self.log.error("Could not pass through cloudflare blocking!")
success = False
# Add cookies to cookiejar
self._syncOutOfPjsWebDriver()
self.__syncCookiesFromFile()
return success

View File

@ -0,0 +1,833 @@
#!/usr/bin/python3
import urllib.request
import urllib.parse
import urllib.error
import os.path
import time
import http.cookiejar
import traceback
import logging
import zlib
import codecs
import re
import sys
import gzip
import io
import socket
import json
from threading import Lock
import bs4
try:
import socks
from sockshandler import SocksiPyHandler
HAVE_SOCKS = True
except ImportError:
HAVE_SOCKS = False
from . import HeaderParseMonkeyPatch
from . import ChromiumMixin
from . import PhantomJSMixin
from . import Handlers
from . import iri2uri
from . import Constants
from . import Exceptions
#pylint: disable-msg=E1101, C0325, R0201, W0702, W0703
COOKIEWRITELOCK = Lock()
GLOBAL_COOKIE_FILE = None
def as_soup(str):
return bs4.BeautifulSoup(str, "lxml")
def determine_json_encoding(json_bytes):
'''
Given the fact that the first 2 characters in json are guaranteed to be ASCII, we can use
these to determine the encoding.
See: http://tools.ietf.org/html/rfc4627#section-3
Copied here:
Since the first two characters of a JSON text will always be ASCII
characters [RFC0020], it is possible to determine whether an octet
stream is UTF-8, UTF-16 (BE or LE), or UTF-32 (BE or LE) by looking
at the pattern of nulls in the first four octets.
00 00 00 xx UTF-32BE
00 xx 00 xx UTF-16BE
xx 00 00 00 UTF-32LE
xx 00 xx 00 UTF-16LE
xx xx xx xx UTF-8
'''
assert(isinstance(json_bytes, bytes))
if len(json_bytes) > 4:
b1, b2, b3, b4 = json_bytes[0], json_bytes[1], json_bytes[2], json_bytes[3]
if b1 == 0 and b2 == 0 and b3 == 0 and b4 != 0:
return "UTF-32BE"
elif b1 == 0 and b2 != 0 and b3 == 0 and b4 != 0:
return "UTF-16BE"
elif b1 != 0 and b2 == 0 and b3 == 0 and b4 == 0:
return "UTF-32LE"
elif b1 != 0 and b2 == 0 and b3 != 0 and b4 == 0:
return "UTF-16LE"
elif b1 != 0 and b2 != 0 and b3 != 0 and b4 != 0:
return "UTF-8"
else:
raise Exceptions.ContentTypeError("Unknown encoding!")
elif len(json_bytes) > 2:
b1, b2 = json_bytes[0], json_bytes[1]
if b1 == 0 and b2 == 0:
return "UTF-32BE"
elif b1 == 0 and b2 != 0:
return "UTF-16BE"
elif b1 != 0 and b2 == 0:
raise Exceptions.ContentTypeError("Json string too short to definitively infer encoding.")
elif b1 != 0 and b2 != 0:
return "UTF-8"
else:
raise Exceptions.ContentTypeError("Unknown encoding!")
raise Exceptions.ContentTypeError("Input string too short to guess encoding!")
# A urllib2 wrapper that provides error handling and logging, as well as cookie management. It's a bit crude, but it works.
# Also supports transport compresion.
# OOOOLLLLLLDDDDD, has lots of creaky internals. Needs some cleanup desperately, but lots of crap depends on almost everything.
# Arrrgh.
class WebGetRobust(PhantomJSMixin.WebGetPjsMixin, ChromiumMixin.WebGetCrMixin):
COOKIEFILE = 'cookies.lwp' # the path and filename to save your cookies in
cj = None
cookielib = None
opener = None
errorOutCount = 2
# retryDelay = 0.1
retryDelay = 0.01
data = None
# creds is a list of 3-tuples that gets inserted into the password manager.
# it is structured [(top_level_url1, username1, password1), (top_level_url2, username2, password2)]
def __init__(self, creds=None, logPath="Main.WebRequest", cookie_lock=None, cloudflare=False, use_socks=False, alt_cookiejar=None):
super().__init__()
self.rules = {}
self.rules['cloudflare'] = cloudflare
if cookie_lock:
self.cookie_lock = cookie_lock
else:
self.cookie_lock = COOKIEWRITELOCK
self.use_socks = use_socks
# Override the global default socket timeout, so hung connections will actually time out properly.
socket.setdefaulttimeout(5)
self.log = logging.getLogger(logPath)
# print("Webget init! Logpath = ", logPath)
if creds:
print("Have creds for a domain")
# Due to general internet people douchebaggyness, I've basically said to hell with it and decided to spoof a whole assortment of browsers
# It should keep people from blocking this scraper *too* easily
self.browserHeaders = Constants.getUserAgent()
self.data = urllib.parse.urlencode(self.browserHeaders)
if creds:
print("Have credentials, installing password manager into urllib handler.")
passManager = urllib.request.HTTPPasswordMgrWithDefaultRealm()
for url, username, password in creds:
passManager.add_password(None, url, username, password)
self.credHandler = Handlers.PreemptiveBasicAuthHandler(passManager)
else:
self.credHandler = None
self.alt_cookiejar = alt_cookiejar
self.__loadCookies()
def chunkReport(self, bytesSoFar, totalSize):
if totalSize:
percent = float(bytesSoFar) / totalSize
percent = round(percent * 100, 2)
self.log.info("Downloaded %d of %d bytes (%0.2f%%)" % (bytesSoFar, totalSize, percent))
else:
self.log.info("Downloaded %d bytes" % (bytesSoFar))
def __chunkRead(self, response, chunkSize=2 ** 18, reportHook=None):
contentLengthHeader = response.info().getheader('Content-Length')
if contentLengthHeader:
totalSize = contentLengthHeader.strip()
totalSize = int(totalSize)
else:
totalSize = None
bytesSoFar = 0
pgContent = ""
while 1:
chunk = response.read(chunkSize)
pgContent += chunk
bytesSoFar += len(chunk)
if not chunk:
break
if reportHook:
reportHook(bytesSoFar, chunkSize, totalSize)
return pgContent
def getSoup(self, *args, **kwargs):
if 'returnMultiple' in kwargs and kwargs['returnMultiple']:
raise Exceptions.ArgumentError("getSoup cannot be called with 'returnMultiple' being true")
if 'soup' in kwargs and kwargs['soup']:
raise Exceptions.ArgumentError("getSoup contradicts the 'soup' directive!")
page = self.getpage(*args, **kwargs)
if isinstance(page, bytes):
raise Exceptions.ContentTypeError("Received content not decoded! Cannot parse!")
soup = as_soup(page)
return soup
def getJson(self, *args, **kwargs):
if 'returnMultiple' in kwargs and kwargs['returnMultiple']:
raise Exceptions.ArgumentError("getSoup cannot be called with 'returnMultiple' being true")
attempts = 0
while 1:
try:
page = self.getpage(*args, **kwargs)
if isinstance(page, bytes):
page = page.decode(determine_json_encoding(page))
# raise ValueError("Received content not decoded! Cannot parse!")
page = page.strip()
ret = json.loads(page)
return ret
except ValueError:
if attempts < 1:
attempts += 1
self.log.error("JSON Parsing issue retreiving content from page!")
for line in traceback.format_exc().split("\n"):
self.log.error("%s", line.rstrip())
self.log.error("Retrying!")
# Scramble our current UA
self.browserHeaders = Constants.getUserAgent()
if self.alt_cookiejar:
self.cj.init_agent(new_headers=self.browserHeaders)
time.sleep(self.retryDelay)
else:
self.log.error("JSON Parsing issue, and retries exhausted!")
# self.log.error("Page content:")
# self.log.error(page)
# with open("Error-ctnt-{}.json".format(time.time()), "w") as tmp_err_fp:
# tmp_err_fp.write(page)
raise
def getFileAndName(self, *args, **kwargs):
if 'returnMultiple' in kwargs:
raise Exceptions.ArgumentError("getFileAndName cannot be called with 'returnMultiple'")
if 'soup' in kwargs and kwargs['soup']:
raise Exceptions.ArgumentError("getFileAndName contradicts the 'soup' directive!")
kwargs["returnMultiple"] = True
pgctnt, pghandle = self.getpage(*args, **kwargs)
info = pghandle.info()
if not 'Content-Disposition' in info:
hName = ''
elif not 'filename=' in info['Content-Disposition']:
hName = ''
else:
hName = info['Content-Disposition'].split('filename=')[1]
return pgctnt, hName
def getFileNameMime(self, *args, **kwargs):
if 'returnMultiple' in kwargs:
raise Exceptions.ArgumentError("getFileAndName cannot be called with 'returnMultiple'")
if 'soup' in kwargs and kwargs['soup']:
raise Exceptions.ArgumentError("getFileAndName contradicts the 'soup' directive!")
kwargs["returnMultiple"] = True
pgctnt, pghandle = self.getpage(*args, **kwargs)
info = pghandle.info()
if not 'Content-Disposition' in info:
hName = ''
elif not 'filename=' in info['Content-Disposition']:
hName = ''
else:
hName = info['Content-Disposition'].split('filename=')[1]
mime = info.get_content_type()
return pgctnt, hName, mime
def getpage(self, requestedUrl, **kwargs):
self.log.info("Fetching content at URL: %s", requestedUrl)
# strip trailing and leading spaces.
requestedUrl = requestedUrl.strip()
# If we have 'soup' as a param, just pop it, and call `getSoup()`.
if 'soup' in kwargs and kwargs['soup']:
self.log.warning("'soup' kwarg is depreciated. Please use the `getSoup()` call instead.")
kwargs.pop('soup')
return self.getSoup(requestedUrl, **kwargs)
# Decode the kwargs values
addlHeaders = kwargs.setdefault("addlHeaders", None)
returnMultiple = kwargs.setdefault("returnMultiple", False)
callBack = kwargs.setdefault("callBack", None)
postData = kwargs.setdefault("postData", None)
retryQuantity = kwargs.setdefault("retryQuantity", None)
nativeError = kwargs.setdefault("nativeError", False)
binaryForm = kwargs.setdefault("binaryForm", False)
# Conditionally encode the referrer if needed, because otherwise
# urllib will barf on unicode referrer values.
if addlHeaders and 'Referer' in addlHeaders:
addlHeaders['Referer'] = iri2uri.iri2uri(addlHeaders['Referer'])
retryCount = 0
while 1:
pgctnt = None
pghandle = None
pgreq = self.__buildRequest(requestedUrl, postData, addlHeaders, binaryForm)
errored = False
lastErr = ""
retryCount = retryCount + 1
if (retryQuantity and retryCount > retryQuantity) or (not retryQuantity and retryCount > self.errorOutCount):
self.log.error("Failed to retrieve Website : %s at %s All Attempts Exhausted", pgreq.get_full_url(), time.ctime(time.time()))
pgctnt = None
try:
self.log.critical("Critical Failure to retrieve page! %s at %s, attempt %s", pgreq.get_full_url(), time.ctime(time.time()), retryCount)
self.log.critical("Error: %s", lastErr)
self.log.critical("Exiting")
except:
self.log.critical("And the URL could not be printed due to an encoding error")
break
#print "execution", retryCount
try:
# print("Getpage!", requestedUrl, kwargs)
pghandle = self.opener.open(pgreq, timeout=30) # Get Webpage
# print("Gotpage")
except urllib.error.HTTPError as e: # Lotta logging
self.log.warning("Error opening page: %s at %s On Attempt %s.", pgreq.get_full_url(), time.ctime(time.time()), retryCount)
self.log.warning("Error Code: %s", e)
#traceback.print_exc()
lastErr = e
try:
self.log.warning("Original URL: %s", requestedUrl)
errored = True
except:
self.log.warning("And the URL could not be printed due to an encoding error")
if e.code == 404:
#print "Unrecoverable - Page not found. Breaking"
self.log.critical("Unrecoverable - Page not found. Breaking")
break
time.sleep(self.retryDelay)
if e.code == 503:
errcontent = e.read()
if b'This process is automatic. Your browser will redirect to your requested content shortly.' in errcontent:
self.log.warning("Cloudflare failure! Doing automatic step-through.")
self.stepThroughCloudFlare(requestedUrl, titleNotContains="Just a moment...")
except UnicodeEncodeError:
self.log.critical("Unrecoverable Unicode issue retreiving page - %s", requestedUrl)
for line in traceback.format_exc().split("\n"):
self.log.critical("%s", line.rstrip())
self.log.critical("Parameters:")
self.log.critical(" requestedUrl: '%s'", requestedUrl)
self.log.critical(" postData: '%s'", postData)
self.log.critical(" addlHeaders: '%s'", addlHeaders)
self.log.critical(" binaryForm: '%s'", binaryForm)
break
except Exception:
errored = True
#traceback.print_exc()
lastErr = sys.exc_info()
self.log.warning("Retreival failed. Traceback:")
self.log.warning(str(lastErr))
self.log.warning(traceback.format_exc())
self.log.warning("Error Retrieving Page! - Trying again - Waiting %s seconds", self.retryDelay)
try:
self.log.critical("Error on page - %s", requestedUrl)
except:
self.log.critical("And the URL could not be printed due to an encoding error")
time.sleep(self.retryDelay)
continue
if pghandle != None:
self.log.info("Request for URL: %s succeeded at %s On Attempt %s. Recieving...", pgreq.get_full_url(), time.ctime(time.time()), retryCount)
pgctnt = self.__retreiveContent(pgreq, pghandle, callBack)
# if __retreiveContent did not return false, it managed to fetch valid results, so break
if pgctnt != False:
break
if errored and pghandle != None:
print(("Later attempt succeeded %s" % pgreq.get_full_url()))
elif (errored or not pgctnt) and pghandle is None:
if lastErr and nativeError:
raise lastErr
raise Exceptions.FetchFailureError("Failed to retreive page '%s'!" % (requestedUrl, ))
if returnMultiple:
return pgctnt, pghandle
else:
return pgctnt
def getItem(self, itemUrl):
try:
content, handle = self.getpage(itemUrl, returnMultiple=True)
except:
print("Failure?")
if self.rules['cloudflare']:
if not self.stepThroughCloudFlare(itemUrl, titleNotContains='Just a moment...'):
raise Exceptions.FetchFailureError("Could not step through cloudflare!")
# Cloudflare cookie set, retrieve again
content, handle = self.getpage(itemUrl, returnMultiple=True)
else:
raise
if not content or not handle:
raise urllib.error.URLError("Failed to retreive file from page '%s'!" % itemUrl)
fileN = urllib.parse.unquote(urllib.parse.urlparse(handle.geturl())[2].split("/")[-1])
fileN = bs4.UnicodeDammit(fileN).unicode_markup
mType = handle.info()['Content-Type']
# If there is an encoding in the content-type (or any other info), strip it out.
# We don't care about the encoding, since WebFunctions will already have handled that,
# and returned a decoded unicode object.
if mType and ";" in mType:
mType = mType.split(";")[0].strip()
# *sigh*. So minus.com is fucking up their http headers, and apparently urlencoding the
# mime type, because apparently they're shit at things.
# Anyways, fix that.
if '%2F' in mType:
mType = mType.replace('%2F', '/')
self.log.info("Retreived file of type '%s', name of '%s' with a size of %0.3f K", mType, fileN, len(content)/1000.0)
return content, fileN, mType
def getHead(self, url, addlHeaders=None):
for x in range(9999):
try:
self.log.info("Doing HTTP HEAD request for '%s'", url)
pgreq = self.__buildRequest(url, None, addlHeaders, None, req_class=Handlers.HeadRequest)
pghandle = self.opener.open(pgreq, timeout=30)
returl = pghandle.geturl()
if returl != url:
self.log.info("HEAD request returned a different URL '%s'", returl)
return returl
except socket.timeout as e:
self.log.info("Timeout, retrying....")
if x >= 3:
self.log.error("Failure fetching: %s", url)
raise Exceptions.FetchFailureError("Timout when fetching %s. Error: %s" % (url, e))
except urllib.error.URLError as e:
# Continue even in the face of cloudflare crapping it's pants
if e.code == 500 and e.geturl():
return e.geturl()
self.log.info("URLError, retrying....")
if x >= 3:
self.log.error("Failure fetching: %s", url)
raise Exceptions.FetchFailureError("URLError when fetching %s. Error: %s" % (url, e))
######################################################################################################################################################
######################################################################################################################################################
def __decodeHtml(self, pageContent, cType):
# this *should* probably be done using a parser.
# However, it seems to be grossly overkill to shove the whole page (which can be quite large) through a parser just to pull out a tag that
# should be right near the page beginning anyways.
# As such, it's a regular expression for the moment
# Regex is of bytes type, since we can't convert a string to unicode until we know the encoding the
# bytes string is using, and we need the regex to get that encoding
coding = re.search(rb"charset=[\'\"]?([a-zA-Z0-9\-]*)[\'\"]?", pageContent, flags=re.IGNORECASE)
cType = b""
charset = None
try:
if coding:
cType = coding.group(1)
codecs.lookup(cType.decode("ascii"))
charset = cType.decode("ascii")
except LookupError:
# I'm actually not sure what I was thinking when I wrote this if statement. I don't think it'll ever trigger.
if (b";" in cType) and (b"=" in cType): # the server is reporting an encoding. Now we use it to decode the
dummy_docType, charset = cType.split(b";")
charset = charset.split(b"=")[-1]
if not charset:
self.log.warning("Could not find encoding information on page - Using default charset. Shit may break!")
charset = "iso-8859-1"
try:
pageContent = str(pageContent, charset)
except UnicodeDecodeError:
self.log.error("Encoding Error! Stripping invalid chars.")
pageContent = pageContent.decode('utf-8', errors='ignore')
return pageContent
def __buildRequest(self, pgreq, postData, addlHeaders, binaryForm, req_class = None):
if req_class is None:
req_class = urllib.request.Request
pgreq = iri2uri.iri2uri(pgreq)
try:
params = {}
headers = {}
if postData != None:
self.log.info("Making a post-request! Params: '%s'", postData)
params['data'] = urllib.parse.urlencode(postData).encode("utf-8")
if addlHeaders != None:
self.log.info("Have additional GET parameters!")
for key, parameter in addlHeaders.items():
self.log.info(" Item: '%s' -> '%s'", key, parameter)
headers = addlHeaders
if binaryForm:
self.log.info("Binary form submission!")
if 'data' in params:
raise Exceptions.ArgumentError("You cannot make a binary form post and a plain post request at the same time!")
params['data'] = binaryForm.make_result()
headers['Content-type'] = binaryForm.get_content_type()
headers['Content-length'] = len(params['data'])
return req_class(pgreq, headers=headers, **params)
except:
self.log.critical("Invalid header or url")
raise
def __decompressContent(self, coding, pgctnt):
#preLen = len(pgctnt)
if coding == 'deflate':
compType = "deflate"
pgctnt = zlib.decompress(pgctnt, -zlib.MAX_WBITS)
elif coding == 'gzip':
compType = "gzip"
buf = io.BytesIO(pgctnt)
f = gzip.GzipFile(fileobj=buf)
pgctnt = f.read()
elif coding == "sdch":
raise Exceptions.ContentTypeError("Wait, someone other then google actually supports SDCH compression?")
else:
compType = "none"
return compType, pgctnt
def __decodeTextContent(self, pgctnt, cType):
if cType:
if (";" in cType) and ("=" in cType):
# the server is reporting an encoding. Now we use it to decode the content
# Some wierdos put two charsets in their headers:
# `text/html;Charset=UTF-8;charset=UTF-8`
# Split, and take the first two entries.
docType, charset = cType.split(";")[:2]
charset = charset.split("=")[-1]
# Only decode content marked as text (yeah, google is serving zip files
# with the content-disposition charset header specifying "UTF-8") or
# specifically allowed other content types I know are really text.
decode = ['application/atom+xml', 'application/xml', "application/json", 'text']
if any([item in docType for item in decode]):
try:
pgctnt = str(pgctnt, charset)
except UnicodeDecodeError:
self.log.error("Encoding Error! Stripping invalid chars.")
pgctnt = pgctnt.decode('utf-8', errors='ignore')
else:
# The server is not reporting an encoding in the headers.
# Use content-aware mechanisms for determing the content encoding.
if "text/html" in cType or \
'text/javascript' in cType or \
'text/css' in cType or \
'application/xml' in cType or \
'application/atom+xml' in cType: # If this is a html/text page, we want to decode it using the local encoding
pgctnt = self.__decodeHtml(pgctnt, cType)
elif "text/plain" in cType or "text/xml" in cType:
pgctnt = bs4.UnicodeDammit(pgctnt).unicode_markup
# Assume JSON is utf-8. Probably a bad idea?
elif "application/json" in cType:
pgctnt = pgctnt.decode('utf-8')
elif "text" in cType:
self.log.critical("Unknown content type!")
self.log.critical(cType)
else:
self.log.critical("No content disposition header!")
self.log.critical("Cannot guess content type!")
return pgctnt
def __retreiveContent(self, pgreq, pghandle, callBack):
try:
# If we have a progress callback, call it for chunked read.
# Otherwise, just read in the entire content.
if callBack:
pgctnt = self.__chunkRead(pghandle, 2 ** 17, reportHook=callBack)
else:
pgctnt = pghandle.read()
if pgctnt is None:
return False
self.log.info("URL fully retrieved.")
preDecompSize = len(pgctnt)/1000.0
encoded = pghandle.headers.get('Content-Encoding')
compType, pgctnt = self.__decompressContent(encoded, pgctnt)
decompSize = len(pgctnt)/1000.0
# self.log.info("Page content type = %s", type(pgctnt))
cType = pghandle.headers.get("Content-Type")
if compType == 'none':
self.log.info("Compression type = %s. Content Size = %0.3fK. File type: %s.", compType, decompSize, cType)
else:
self.log.info("Compression type = %s. Content Size compressed = %0.3fK. Decompressed = %0.3fK. File type: %s.", compType, preDecompSize, decompSize, cType)
pgctnt = self.__decodeTextContent(pgctnt, cType)
return pgctnt
except:
print("pghandle = ", pghandle)
self.log.error(sys.exc_info())
traceback.print_exc()
self.log.error("Error Retrieving Page! - Transfer failed. Waiting %s seconds before retrying", self.retryDelay)
try:
self.log.critical("Critical Failure to retrieve page! %s at %s", pgreq.get_full_url(), time.ctime(time.time()))
self.log.critical("Exiting")
except:
self.log.critical("And the URL could not be printed due to an encoding error")
print()
self.log.error(pghandle)
time.sleep(self.retryDelay)
return False
# HUGE GOD-FUNCTION.
# OH GOD FIXME.
# postData expects a dict
# addlHeaders also expects a dict
######################################################################################################################################################
######################################################################################################################################################
def __loadCookies(self):
if self.alt_cookiejar is not None:
self.alt_cookiejar.init_agent(new_headers=self.browserHeaders)
self.cj = self.alt_cookiejar
else:
self.cj = http.cookiejar.LWPCookieJar() # This is a subclass of FileCookieJar
# that has useful load and save methods
if self.cj is not None:
if os.path.isfile(self.COOKIEFILE):
try:
self.__updateCookiesFromFile()
# self.log.info("Loading CookieJar")
except:
self.log.critical("Cookie file is corrupt/damaged?")
try:
os.remove(self.COOKIEFILE)
except FileNotFoundError:
pass
if http.cookiejar is not None:
# self.log.info("Installing CookieJar")
self.log.debug(self.cj)
cookieHandler = urllib.request.HTTPCookieProcessor(self.cj)
args = (cookieHandler, Handlers.HTTPRedirectHandler)
if self.credHandler:
print("Have cred handler. Building opener using it")
args += (self.credHandler, )
if self.use_socks:
print("Using Socks handler")
if not HAVE_SOCKS:
raise RuntimeError("SOCKS Use specified, and no socks installed!")
args = (SocksiPyHandler(socks.SOCKS5, "127.0.0.1", 9050), ) + args
self.opener = urllib.request.build_opener(*args)
#self.opener.addheaders = [('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')]
self.opener.addheaders = self.browserHeaders
#urllib2.install_opener(self.opener)
for cookie in self.cj:
self.log.debug(cookie)
#print cookie
def __syncCookiesFromFile(self):
# self.log.info("Synchronizing cookies with cookieFile.")
if os.path.isfile(self.COOKIEFILE):
self.cj.save("cookietemp.lwp")
self.cj.load(self.COOKIEFILE)
self.cj.load("cookietemp.lwp")
# First, load any changed cookies so we don't overwrite them
# However, we want to persist any cookies that we have that are more recent then the saved cookies, so we temporarily save
# the cookies in memory to a temp-file, then load the cookiefile, and finally overwrite the loaded cookies with the ones from the
# temp file
def __updateCookiesFromFile(self):
if os.path.exists(self.COOKIEFILE):
# self.log.info("Synchronizing cookies with cookieFile.")
self.cj.load(self.COOKIEFILE)
# Update cookies from cookiefile
def addCookie(self, inCookie):
self.log.info("Updating cookie!")
self.cj.set_cookie(inCookie)
def saveCookies(self, halting=False):
locked = self.cookie_lock.acquire(timeout=5)
if not locked:
self.log.error("Failed to acquire cookie-lock!")
return
# print("Have %d cookies before saving cookiejar" % len(self.cj))
try:
# self.log.info("Trying to save cookies!")
if self.cj is not None: # If cookies were used
self.__syncCookiesFromFile()
# self.log.info("Have cookies to save")
for cookie in self.cj:
# print(cookie)
# print(cookie.expires)
if isinstance(cookie.expires, int) and cookie.expires > 30000000000: # Clamp cookies that expire stupidly far in the future because people are assholes
cookie.expires = 30000000000
# self.log.info("Calling save function")
self.cj.save(self.COOKIEFILE) # save the cookies again
# self.log.info("Cookies Saved")
else:
self.log.info("No cookies to save?")
except Exception as e:
pass
# The destructor call order is too incoherent, and shit fails
# during the teardown with null-references. The error printout is
# not informative, so just silence it.
# print("Possible error on exit (or just the destructor): '%s'." % e)
finally:
self.cookie_lock.release()
# print("Have %d cookies after saving cookiejar" % len(self.cj))
if not halting:
self.__syncCookiesFromFile()
# print "Have %d cookies after reloading cookiejar" % len(self.cj)
def getCookies(self):
locked = self.cookie_lock.acquire(timeout=5)
if not locked:
raise RuntimeError("Could not acquire lock on cookiejar")
try:
# self.log.info("Trying to save cookies!")
if self.cj is not None: # If cookies were used
self.__syncCookiesFromFile()
finally:
self.cookie_lock.release()
return self.cj
######################################################################################################################################################
######################################################################################################################################################
def __del__(self):
# print "WGH Destructor called!"
# print("WebRequest __del__")
self.saveCookies(halting=True)
sup = super()
if hasattr(sup, '__del__'):
sup.__del__()
def stepThroughCloudFlare(self, *args, **kwargs):
# Shim to the underlying web browser of choice
self.stepThroughCloudFlare_pjs(*args, **kwargs)

View File

@ -0,0 +1,9 @@
from .WebRequestClass import as_soup
from .WebRequestClass import WebGetRobust
from .Exceptions import WebGetException
from .Exceptions import ContentTypeError
from .Exceptions import ArgumentError
from .Exceptions import FetchFailureError

View File

@ -0,0 +1,75 @@
import urllib.parse
# Convert an IRI to a URI following the rules in RFC 3987
#
# The characters we need to enocde and escape are defined in the spec:
#
# iprivate = %xE000-F8FF / %xF0000-FFFFD / %x100000-10FFFD
# ucschar = %xA0-D7FF / %xF900-FDCF / %xFDF0-FFEF
# / %x10000-1FFFD / %x20000-2FFFD / %x30000-3FFFD
# / %x40000-4FFFD / %x50000-5FFFD / %x60000-6FFFD
# / %x70000-7FFFD / %x80000-8FFFD / %x90000-9FFFD
# / %xA0000-AFFFD / %xB0000-BFFFD / %xC0000-CFFFD
# / %xD0000-DFFFD / %xE1000-EFFFD
escape_range = [
(0xA0, 0xD7FF),
(0xE000, 0xF8FF),
(0xF900, 0xFDCF),
(0xFDF0, 0xFFEF),
(0x10000, 0x1FFFD),
(0x20000, 0x2FFFD),
(0x30000, 0x3FFFD),
(0x40000, 0x4FFFD),
(0x50000, 0x5FFFD),
(0x60000, 0x6FFFD),
(0x70000, 0x7FFFD),
(0x80000, 0x8FFFD),
(0x90000, 0x9FFFD),
(0xA0000, 0xAFFFD),
(0xB0000, 0xBFFFD),
(0xC0000, 0xCFFFD),
(0xD0000, 0xDFFFD),
(0xE1000, 0xEFFFD),
(0xF0000, 0xFFFFD),
(0x100000, 0x10FFFD),
]
def encode(c):
retval = c
i = ord(c)
for low, high in escape_range:
if i < low:
break
if i >= low and i <= high:
retval = "".join(["%%%2X" % o for o in c.encode('utf-8')])
break
return retval
def iri2uri(uri):
"""Convert an IRI to a URI. Note that IRIs must be
passed in a unicode strings. That is, do not utf-8 encode
the IRI before passing it into the function."""
assert uri != None, 'iri2uri must be passed a non-none string!'
original = uri
if isinstance(uri ,str):
(scheme, authority, path, query, fragment) = urllib.parse.urlsplit(uri)
authority = authority.encode('idna').decode('utf-8')
# For each character in 'ucschar' or 'iprivate'
# 1. encode as utf-8
# 2. then %-encode each octet of that utf-8
path = urllib.parse.quote(path)
uri = urllib.parse.urlunsplit((scheme, authority, path, query, fragment))
uri = "".join([encode(c) for c in uri])
# urllib.parse.urlunsplit(urllib.parse.urlsplit({something})
# strips any trailing "?" chars. While this may be legal according to the
# spec, it breaks some services. Therefore, we patch
# the "?" back in if it has been removed.
if original.endswith("?") and not uri.endswith("?"):
uri = uri+"?"
return uri

View File

@ -0,0 +1 @@

View File

@ -0,0 +1,192 @@
import unittest
import socket
import json
import base64
import zlib
import gzip
import bs4
import ChromeController
from http.server import BaseHTTPRequestHandler, HTTPServer
from threading import Thread
import util.WebRequest as WebRequest
class MockServerRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
# Process an HTTP GET request and return a response with an HTTP 200 status.
print("Path: ", self.path)
if self.path == "/":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"<html><body>Root OK?</body></html>")
if self.path == "/with_title_1":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"<html><html><title>Page Title 1</title></html><body>Root OK?</body></html>")
elif self.path == "/raw-txt":
self.send_response(200)
self.send_header('Content-type', "text/plain")
self.end_headers()
self.wfile.write(b"Root OK?")
elif self.path == "/binary_ctnt":
self.send_response(200)
self.send_header('Content-type', "image/jpeg")
self.end_headers()
self.wfile.write(b"Binary!\x00\x01\x02\x03")
elif self.path == "/redirect/bad-1":
self.send_response(302)
self.end_headers()
elif self.path == "/redirect/bad-2":
self.send_response(302)
self.send_header('location', "bad-2")
self.end_headers()
elif self.path == "/redirect/bad-3":
self.send_response(302)
self.send_header('location', "gopher://www.google.com")
self.end_headers()
elif self.path == "/redirect/from-1":
self.send_response(302)
self.send_header('location', "to-1")
self.end_headers()
if self.path == "/redirect/to-1":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Redirect-To-1")
elif self.path == "/redirect/from-2":
self.send_response(302)
self.send_header('uri', "to-2")
self.end_headers()
if self.path == "/redirect/to-2":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Redirect-To-2")
elif self.path == "/redirect/from-3":
self.send_response(302)
newurl = "http://{}:{}".format(self.server.server_address[0], self.server.server_address[1])
self.send_header('uri', newurl)
self.end_headers()
def get_free_port():
s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM)
s.bind(('localhost', 0))
address, port = s.getsockname()
s.close()
return port
class TestChromium(unittest.TestCase):
def setUp(self):
# Configure mock server.
self.mock_server_port = get_free_port()
self.mock_server = HTTPServer(('localhost', self.mock_server_port), MockServerRequestHandler)
# Start running mock server in a separate thread.
# Daemon threads automatically shut down when the main process exits.
self.mock_server_thread = Thread(target=self.mock_server.serve_forever)
self.mock_server_thread.setDaemon(True)
self.mock_server_thread.start()
self.wg = WebRequest.WebGetRobust()
def tearDown(self):
self.mock_server.shutdown()
# Hacky force-close of the chromium interface
self.wg.close_chromium()
del self.wg
def test_fetch_1(self):
page = self.wg.getpage("http://localhost:{}".format(self.mock_server_port))
self.assertEqual(page, '<html><body>Root OK?</body></html>')
def test_fetch_chromium_1(self):
page, fname, mtype = self.wg.getItemChromium("http://localhost:{}".format(self.mock_server_port))
self.assertEqual(fname, '')
self.assertEqual(mtype, 'text/html')
self.assertEqual(page, '<html><body>Root OK?</body></html>')
def test_fetch_chromium_2(self):
page, fname, mtype = self.wg.getItemChromium("http://localhost:{}/raw-txt".format(self.mock_server_port))
self.assertEqual(fname, 'raw-txt')
self.assertEqual(mtype, 'text/html') # I'm not properly retreiving the mimetype from chromium
self.assertEqual(page, 'Root OK?')
def test_fetch_chromium_3(self):
page, fname, mtype = self.wg.getItemChromium("http://localhost:{}/binary_ctnt".format(self.mock_server_port))
self.assertEqual(fname, 'binary_ctnt')
self.assertEqual(mtype, 'application/x-binary')
self.assertEqual(page, b"Binary!\x00\x01\x02\x03")
def test_head_chromium_1(self):
url_1 = "http://localhost:{}/raw-txt".format(self.mock_server_port)
purl_1 = self.wg.getHeadChromium(url_1)
self.assertEqual(purl_1, url_1)
def test_head_chromium_2(self):
url_2 = "http://localhost:{}/redirect/to-1".format(self.mock_server_port)
purl_2 = self.wg.getHeadChromium("http://localhost:{}/redirect/from-1".format(self.mock_server_port))
self.assertEqual(purl_2, url_2)
def test_head_chromium_3(self):
url_3 = "http://localhost:{}/redirect/bad-1".format(self.mock_server_port)
purl_3 = self.wg.getHeadChromium("http://localhost:{}/redirect/bad-1".format(self.mock_server_port))
self.assertEqual(purl_3, url_3)
def test_head_chromium_4(self):
# Chromium changes infinite redirects into timeouts.
with self.assertRaises(ChromeController.ChromeNavigateTimedOut):
self.wg.getHeadChromium("http://localhost:{}/redirect/bad-2".format(self.mock_server_port))
def test_head_chromium_5(self):
# Chromium changes infinite redirects into timeouts.
with self.assertRaises(ChromeController.ChromeNavigateTimedOut):
self.wg.getHeadChromium("http://localhost:{}/redirect/bad-3".format(self.mock_server_port))
def test_head_title_chromium_1(self):
pg_url = "http://localhost:{}/with_title_1".format(self.mock_server_port)
retreived = self.wg.getHeadTitleChromium(pg_url)
expect = {
'url': pg_url,
'title': 'Page Title 1',
}
self.assertEqual(retreived, expect)
def test_head_title_chromium_2(self):
pg_url = "http://localhost:{}/".format(self.mock_server_port)
retreived = self.wg.getHeadTitleChromium(pg_url)
expect = {
# If no title is specified, chromium returns the server URL
'url': pg_url,
'title': 'localhost:{}'.format(self.mock_server_port),
}
self.assertEqual(retreived, expect)
def test_head_title_chromium_3(self):
pg_url = "http://localhost:{}/binary_ctnt".format(self.mock_server_port)
retreived = self.wg.getHeadTitleChromium(pg_url)
expect = {
# If no title is specified, chromium returns the server URL
'url': pg_url,
'title': 'localhost:{}/binary_ctnt'.format(self.mock_server_port),
}
self.assertEqual(retreived, expect)

View File

@ -0,0 +1,144 @@
import unittest
import socket
import json
import base64
import zlib
import gzip
import bs4
from http.server import BaseHTTPRequestHandler, HTTPServer
from threading import Thread
import util.WebRequest as WebRequest
class MockServerRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
# Process an HTTP GET request and return a response with an HTTP 200 status.
print("Path: ", self.path)
if self.path == "/":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"Root OK?")
elif self.path == "/raw-txt":
self.send_response(200)
self.send_header('Content-type', "text/plain")
self.end_headers()
self.wfile.write(b"Root OK?")
elif self.path == "/redirect/bad-1":
self.send_response(302)
self.end_headers()
elif self.path == "/redirect/bad-2":
self.send_response(302)
self.send_header('location', "bad-2")
self.end_headers()
elif self.path == "/redirect/bad-3":
self.send_response(302)
self.send_header('location', "gopher://www.google.com")
self.end_headers()
elif self.path == "/redirect/from-1":
self.send_response(302)
self.send_header('location', "to-1")
self.end_headers()
if self.path == "/redirect/to-1":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Redirect-To-1")
elif self.path == "/redirect/from-2":
self.send_response(302)
self.send_header('uri', "to-2")
self.end_headers()
if self.path == "/redirect/to-2":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Redirect-To-2")
elif self.path == "/redirect/from-3":
self.send_response(302)
newurl = "http://{}:{}".format(self.server.server_address[0], self.server.server_address[1])
self.send_header('uri', newurl)
self.end_headers()
def get_free_port():
s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM)
s.bind(('localhost', 0))
address, port = s.getsockname()
s.close()
return port
class TestPhantomJS(unittest.TestCase):
def setUp(self):
# Configure mock server.
self.mock_server_port = get_free_port()
self.mock_server = HTTPServer(('localhost', self.mock_server_port), MockServerRequestHandler)
# Start running mock server in a separate thread.
# Daemon threads automatically shut down when the main process exits.
self.mock_server_thread = Thread(target=self.mock_server.serve_forever)
self.mock_server_thread.setDaemon(True)
self.mock_server_thread.start()
self.wg = WebRequest.WebGetRobust()
def tearDown(self):
self.mock_server.shutdown()
def test_fetch_1(self):
page = self.wg.getpage("http://localhost:{}".format(self.mock_server_port))
self.assertEqual(page, 'Root OK?')
def test_fetch_pjs(self):
page_1, fname_1, mtype_1 = self.wg.getItemPhantomJS("http://localhost:{}".format(self.mock_server_port))
# I think all this garbage is phantomjs/selenium deciding they know what I want the content to look like for me.
# Note that the content isn't specified to be HTML ANYWHERE.
self.assertEqual(page_1, '<html><head></head><body>Root OK?</body></html>')
# Because PJS is retarded, it ALWAYS wraps content in html shit unless you specify the content is "text/html". If you do that, it then proceds to only
# add /some/ of the html tag garbage
page_2, fname_2, mtype_2 = self.wg.getItemPhantomJS("http://localhost:{}/raw-txt".format(self.mock_server_port))
# I think all this garbage is phantomjs/selenium deciding they know what I want the content to look like for me.
# Note that the content isn't specified to be HTML ANYWHERE.
self.assertEqual(
page_2,
'<html><head></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">Root OK?</pre></body></html>'
)
def test_head_pjs_1(self):
url_1 = "http://localhost:{}/raw-txt".format(self.mock_server_port)
purl_1 = self.wg.getHeadPhantomJS(url_1)
self.assertEqual(purl_1, url_1)
url_2 = "http://localhost:{}/redirect/to-1".format(self.mock_server_port)
purl_2 = self.wg.getHeadPhantomJS("http://localhost:{}/redirect/from-1".format(self.mock_server_port))
self.assertEqual(purl_2, url_2)
# We expect to get the same value as passed, since pjs will not resolve out
# the bad redirects.
# Note we have to restart phantomjs for these tests, because otherwise it remembers state (this is why they're separate tests).
def test_head_pjs_2(self):
url_3 = "http://localhost:{}/redirect/bad-1".format(self.mock_server_port)
purl_3 = self.wg.getHeadPhantomJS("http://localhost:{}/redirect/bad-1".format(self.mock_server_port))
self.assertEqual(purl_3, url_3)
def test_head_pjs_3(self):
# Somehow, this turns into 'about:blank'. NFI how
url_4 = "about:blank"
purl_4 = self.wg.getHeadPhantomJS("http://localhost:{}/redirect/bad-2".format(self.mock_server_port))
self.assertEqual(purl_4, url_4)
def test_head_pjs_4(self):
# Somehow, this turns into 'about:blank'. NFI how
url_5 = "about:blank"
purl_5 = self.wg.getHeadPhantomJS("http://localhost:{}/redirect/bad-3".format(self.mock_server_port))
self.assertEqual(purl_5, url_5)

View File

@ -0,0 +1,330 @@
import unittest
import socket
import json
import base64
import zlib
import gzip
import bs4
from http.server import BaseHTTPRequestHandler, HTTPServer
from threading import Thread
import util.WebRequest as WebRequest
class TestPlainCreation(unittest.TestCase):
def test_plain_instantiation_1(self):
wg = WebRequest.WebGetRobust()
self.assertTrue(wg is not None)
def test_plain_instantiation_2(self):
wg = WebRequest.WebGetRobust(cloudflare=True)
self.assertTrue(wg is not None)
def test_plain_instantiation_3(self):
wg = WebRequest.WebGetRobust(use_socks=True)
self.assertTrue(wg is not None)
class MockServerRequestHandler(BaseHTTPRequestHandler):
def do_GET(self):
# Process an HTTP GET request and return a response with an HTTP 200 status.
print("Path: ", self.path)
if self.path == "/":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Root OK?")
elif self.path == "/html-decode":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"Root OK?")
elif self.path == "/html/real":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"<html><body>Root OK?</body></html>")
elif self.path == "/compressed/deflate":
self.send_response(200)
self.send_header('Content-Encoding', 'deflate')
self.send_header('Content-type', "text/html")
self.end_headers()
inb = b"Root OK?"
cobj = zlib.compressobj(wbits=-zlib.MAX_WBITS)
t1 = cobj.compress(inb) + cobj.flush()
self.wfile.write(t1)
elif self.path == "/compressed/gzip":
self.send_response(200)
self.send_header('Content-Encoding', 'gzip')
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(gzip.compress(b"Root OK?"))
elif self.path == "/json/invalid":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"LOLWAT")
elif self.path == "/json/valid":
self.send_response(200)
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b'{"oh" : "hai"}')
elif self.path == "/json/no-coding":
self.send_response(200)
self.end_headers()
self.wfile.write(b'{"oh" : "hai"}')
elif self.path == "/filename/path-only.txt":
self.send_response(200)
self.end_headers()
self.wfile.write(b"LOLWAT?")
elif self.path == "/filename/content-disposition":
self.send_response(200)
self.send_header('Content-Disposition', "filename=lolercoaster.txt")
self.end_headers()
self.wfile.write(b"LOLWAT?")
elif self.path == "/filename_mime/path-only.txt":
self.send_response(200)
self.end_headers()
self.wfile.write(b"LOLWAT?")
elif self.path == "/filename_mime/content-disposition":
self.send_response(200)
self.send_header('Content-Disposition', "filename=lolercoaster.txt")
self.end_headers()
self.wfile.write(b"LOLWAT?")
elif self.path == "/filename_mime/content-disposition-html-suffix":
self.send_response(200)
self.send_header('Content-Disposition', "filename=lolercoaster.html")
self.end_headers()
self.wfile.write(b"LOLWAT?")
elif self.path == "/filename_mime/explicit-html-mime":
self.send_response(200)
self.send_header('Content-Disposition', "filename=lolercoaster.html")
self.send_header('Content-type', "text/html")
self.end_headers()
self.wfile.write(b"LOLWAT?")
elif self.path == "/redirect/bad-1":
self.send_response(302)
self.end_headers()
elif self.path == "/redirect/bad-2":
self.send_response(302)
self.send_header('location', "bad-2")
self.end_headers()
elif self.path == "/redirect/bad-3":
self.send_response(302)
self.send_header('location', "gopher://www.google.com")
self.end_headers()
elif self.path == "/redirect/from-1":
self.send_response(302)
self.send_header('location', "to-1")
self.end_headers()
if self.path == "/redirect/to-1":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Redirect-To-1")
elif self.path == "/redirect/from-2":
self.send_response(302)
self.send_header('uri', "to-2")
self.end_headers()
if self.path == "/redirect/to-2":
self.send_response(200)
self.end_headers()
self.wfile.write(b"Redirect-To-2")
elif self.path == "/redirect/from-3":
self.send_response(302)
newurl = "http://{}:{}".format(self.server.server_address[0], self.server.server_address[1])
self.send_header('uri', newurl)
self.end_headers()
elif self.path == "/password/expect":
self.send_response(200)
self.end_headers()
val = self.headers['Authorization']
passval = val.split(" ")[-1]
passstr = base64.b64decode(passval)
if passstr == b'lol:wat':
self.wfile.write(b"Password Ok?")
else:
self.wfile.write(b"Password Bad!")
def get_free_port():
s = socket.socket(socket.AF_INET, type=socket.SOCK_STREAM)
s.bind(('localhost', 0))
address, port = s.getsockname()
s.close()
return port
class TestSimpleFetch(unittest.TestCase):
def setUp(self):
# Configure mock server.
self.mock_server_port = get_free_port()
self.mock_server = HTTPServer(('localhost', self.mock_server_port), MockServerRequestHandler)
# Start running mock server in a separate thread.
# Daemon threads automatically shut down when the main process exits.
self.mock_server_thread = Thread(target=self.mock_server.serve_forever)
self.mock_server_thread.setDaemon(True)
self.mock_server_thread.start()
self.wg = WebRequest.WebGetRobust()
def tearDown(self):
self.mock_server.shutdown()
def test_fetch_1(self):
page = self.wg.getpage("http://localhost:{}".format(self.mock_server_port))
self.assertEqual(page, b'Root OK?')
def test_fetch_decode_1(self):
# text/html content should be decoded automatically.
page = self.wg.getpage("http://localhost:{}/html-decode".format(self.mock_server_port))
self.assertEqual(page, 'Root OK?')
def test_fetch_soup(self):
# text/html content should be decoded automatically.
page = self.wg.getSoup("http://localhost:{}/html/real".format(self.mock_server_port))
self.assertEqual(page, bs4.BeautifulSoup('<html><body>Root OK?</body></html>', 'lxml'))
page = self.wg.getSoup("http://localhost:{}/html-decode".format(self.mock_server_port))
self.assertEqual(page, bs4.BeautifulSoup('<html><body><p>Root OK?</p></body></html>', 'lxml'))
# getSoup fails to fetch content that's not of content-type text/html
with self.assertRaises(WebRequest.ContentTypeError):
page = self.wg.getSoup("http://localhost:{}/".format(self.mock_server_port))
def test_fetch_decode_json(self):
# text/html content should be decoded automatically.
page = self.wg.getJson("http://localhost:{}/json/valid".format(self.mock_server_port))
self.assertEqual(page, {'oh': 'hai'})
page = self.wg.getJson("http://localhost:{}/json/no-coding".format(self.mock_server_port))
self.assertEqual(page, {'oh': 'hai'})
with self.assertRaises(json.decoder.JSONDecodeError):
page = self.wg.getJson("http://localhost:{}/json/invalid".format(self.mock_server_port))
def test_fetch_compressed(self):
page = self.wg.getpage("http://localhost:{}/compressed/gzip".format(self.mock_server_port))
self.assertEqual(page, 'Root OK?')
page = self.wg.getpage("http://localhost:{}/compressed/deflate".format(self.mock_server_port))
self.assertEqual(page, 'Root OK?')
def test_file_and_name(self):
page, fn = self.wg.getFileAndName("http://localhost:{}/filename/path-only.txt".format(self.mock_server_port))
self.assertEqual(page, b'LOLWAT?')
self.assertEqual(fn, '')
page, fn = self.wg.getFileAndName("http://localhost:{}/filename/content-disposition".format(self.mock_server_port))
self.assertEqual(page, b'LOLWAT?')
self.assertEqual(fn, 'lolercoaster.txt')
def test_file_name_mime(self):
page, fn, mimet = self.wg.getFileNameMime(
"http://localhost:{}/filename_mime/path-only.txt".format(self.mock_server_port))
self.assertEqual(page, b'LOLWAT?')
self.assertEqual(fn, '')
self.assertEqual(mimet, 'text/plain')
page, fn, mimet = self.wg.getFileNameMime(
"http://localhost:{}/filename_mime/content-disposition".format(self.mock_server_port))
self.assertEqual(page, b'LOLWAT?')
self.assertEqual(fn, 'lolercoaster.txt')
self.assertEqual(mimet, 'text/plain')
page, fn, mimet = self.wg.getFileNameMime(
"http://localhost:{}/filename_mime/content-disposition-html-suffix".format(self.mock_server_port))
self.assertEqual(page, b'LOLWAT?')
self.assertEqual(fn, 'lolercoaster.html')
self.assertEqual(mimet, 'text/plain')
page, fn, mimet = self.wg.getFileNameMime(
"http://localhost:{}/filename_mime/explicit-html-mime".format(self.mock_server_port))
self.assertEqual(page, 'LOLWAT?')
self.assertEqual(fn, 'lolercoaster.html')
self.assertEqual(mimet, 'text/html')
def test_get_head(self):
inurl_1 = "http://localhost:{}".format(self.mock_server_port)
nurl_1 = self.wg.getHead(inurl_1)
self.assertEqual(inurl_1, nurl_1)
inurl_2 = "http://localhost:{}/filename_mime/content-disposition".format(self.mock_server_port)
nurl_2 = self.wg.getHead(inurl_2)
self.assertEqual(inurl_2, nurl_2)
def test_redirect_handling(self):
inurl_1 = "http://localhost:{}/redirect/from-1".format(self.mock_server_port)
ctnt_1 = self.wg.getpage(inurl_1)
self.assertEqual(ctnt_1, b"Redirect-To-1")
inurl_2 = "http://localhost:{}/redirect/from-2".format(self.mock_server_port)
ctnt_2 = self.wg.getpage(inurl_2)
self.assertEqual(ctnt_2, b"Redirect-To-2")
inurl_3 = "http://localhost:{}/redirect/from-1".format(self.mock_server_port)
outurl_3 = "http://localhost:{}/redirect/to-1".format(self.mock_server_port)
nurl_3 = self.wg.getHead(inurl_3)
self.assertEqual(outurl_3, nurl_3)
inurl_4 = "http://localhost:{}/redirect/from-2".format(self.mock_server_port)
outurl_4 = "http://localhost:{}/redirect/to-2".format(self.mock_server_port)
nurl_4 = self.wg.getHead(inurl_4)
self.assertEqual(outurl_4, nurl_4)
# This is a redirect without the actual redirect
with self.assertRaises(WebRequest.FetchFailureError):
inurl_5 = "http://localhost:{}/redirect/bad-1".format(self.mock_server_port)
nurl_5 = self.wg.getHead(inurl_5)
# This is a infinitely recursive redirect.
with self.assertRaises(WebRequest.FetchFailureError):
inurl_6 = "http://localhost:{}/redirect/bad-2".format(self.mock_server_port)
nurl_6 = self.wg.getHead(inurl_6)
# This is a infinitely recursive redirect.
with self.assertRaises(WebRequest.FetchFailureError):
inurl_6 = "http://localhost:{}/redirect/bad-3".format(self.mock_server_port)
nurl_6 = self.wg.getHead(inurl_6)
inurl_7 = "http://localhost:{}/redirect/from-3".format(self.mock_server_port)
# Assumes localhost resolves to 127.0.0.1. Is this ever not true? TCPv6?
outurl_7 = "http://127.0.0.1:{}/".format(self.mock_server_port)
nurl_7 = self.wg.getHead(inurl_7)
self.assertEqual(outurl_7, nurl_7)
def test_http_auth(self):
wg_1 = WebRequest.WebGetRobust(creds=[("localhost:{}".format(self.mock_server_port), "lol", "wat")])
page = wg_1.getpage("http://localhost:{}/password/expect".format(self.mock_server_port))
self.assertEqual(page, b'Password Ok?')
wg_2 = WebRequest.WebGetRobust(creds=[("localhost:{}".format(self.mock_server_port), "lol", "nope")])
page = wg_2.getpage("http://localhost:{}/password/expect".format(self.mock_server_port))
self.assertEqual(page, b'Password Bad!')

0
util/__init__.py Normal file
View File

File diff suppressed because it is too large Load Diff