# -*- coding: utf-8 -*-
import scrapy
import sys
from scrapy_splash import SplashRequest
from android.agents import agents
reload(sys)
sys.setdefaultencoding('utf-8')
class AndroidSpider(BaseSpider):
name = "android"
start_url="http://www.wandoujia.com/top/app"
def start_requests(self):
script = """
function main(splash)
assert(splash:go(splash.args.url))
for var=0,50,1 do
local get_dimensions = splash:jsfunc([[
function () {
var rect = document.getElementById('j-refresh-btn').getClientRects()[0];
return {"x": rect.left, "y": rect.top}
}
]])
splash:set_viewport_full()
splash:wait(0.1)
local dimensions = get_dimensions()
splash:mouse_click(dimensions.x, dimensions.y)
-- Wait split second to allow event to propagate.
splash:wait(0.5)
end
return splash:html()
end
"""
agent = random.choice(agents)
print "------cookie---------"
headers={
"User-Agent":agent,
"Referer":"xxxxxxx",
}
splash_args = {
'wait': 3,
"http_method":"GET",
# "images":0,
"timeout":1800,
"render_all":1,
"headers":headers,
'lua_source': script,
#"cookies":cookies
# "proxy":"http://101.200.153.236:8123",
}
yield SplashRequest(AndroidSpider.start_url, self.parse_result, endpoint='execute',args=splash_args,dont_filter=True)
# +"&page="+str(x+1)
except Exception, e:
print e.__doc__
print e.message
pass
def parse_result(self):
splash爬虫
最新推荐文章于 2024-05-01 13:23:03 发布