scrapy爬虫利用selenium实现用户登录和cookie传递
1. 背景
2. 环境
- python 3.6.1
- 系统:win7
- IDE:pycharm
- 安装过chrome浏览器
- 配置好chromedriver(设置好环境变量)
- selenium 3.7.0
- scrapy 1.4.0
3. selenium和scrapy中cookie形态对比
3.1. scrapy中的cookie
- 当用户登录成功之后,向百度云俱乐部主页发起请求,然后在parse()方法中,取出request请求中携带的cookie信息:
def parseLoginResPage(self, response):
print(f"parseLoginResPage: statusCode = {response.status}, url = {response.url}")
print(f"text = {response.text}")
yield scrapy.Request(
url="http://www.51baiduyun.com/",
headers=self.headerData,
callback=self.parse,
dont_filter=True,
)
def parse(self, response):
print(f"parse: url = {response.url}, meta = {response.meta}")
Cookie = response.request.headers.getlist('Cookie')
print(f'parse: After login CookieReq = {Cookie}')
parse: After login CookieReq = [b'L3em_2132_saltkey=gSZXPVeG; L3em_2132_lastvisit=1523251829; L3em_2132_sid=Ir7Mht; L3em_2132_lastact=1523255437%09member.php%09logging; L3em_2132_seccode=85931.874f29c987d4fb59f3; L3em_2132_ulastactivity=d666YmbdpNj9Iz%2FNUEi%2BjDvc4WOWgYaPjSlfz9WctVMX7egl2vDA; L3em_2132_auth=8d0cjiUMrZ3s55Jt%2B4ypshxBHoUuNN4Z5e3ExPbzViN5lFcOjNWxZ8sz8vaBOTzYEIK7AHENUH%2F%2Fcw6VVnzLC%2BFvOa12; L3em_2132_lastcheckfeed=1315026%7C1523255437; L3em_2132_checkfollow=1; L3em_2132_lip=183.12.51.62%2C1523255149; L3em_2132_security_cookiereport=c346vrKUIfmjcBwk7YP92rl%2FHJROM1lF0Y2knuvE1PvPfZOvnxad']
3.2. selenium中的cookie
- 当用selenium登录成功之后,获取其中的cookie值,如下:
seleniumCookies = spider.browser.get_cookies()
print(f"seleniumCookies = {seleniumCookies}")
seleniumCookies = [{
'domain': 'www.51baiduyun.com', 'expiry': 1538989361, 'httpOnly': False, 'name': 'CNZZDATA1253365484', 'path': '/', 'secure': False, 'value': '964419069-1523259525-%7C1523259525'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1525856539.733429, 'httpOnly': True, 'name': 'L3em_2132_saltkey', 'path': '/', 'secure': False, 'value': 'uL0UL77j'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1523307758.631004, 'httpOnly': False, 'name': 'L3em_2132_security_cookiereport', 'path': '/', 'secure': False, 'value': '6bd1%2FSD%2F0OzhXwpZ5fhpBFDHH1WGRAslxA8eGAjOvYKJjvJkwLkc'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1525856539.733484, 'httpOnly': False, 'name': 'L3em_2132_lastvisit', 'path': '/', 'secure': False, 'value': '1523261207'}, {
'domain': 'www.51baiduyun.com', 'httpOnly': False, 'name': 'L3em_2132_seccode', 'path': '/', 'secure': False, 'value': '120125.68ba4641e97556392b'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1523350961.711943, 'httpOnly': False, 'name': 'L3em_2132_sid', 'path': '/', 'secure': False, 'value': 'mBP4sb'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1523264840.028978, 'httpOnly': False, 'name': 'L3em_2132_sendmail', 'path': '/', 'secure': False, 'value': '1'}, {
'domain': '.51baiduyun.com', 'expiry': 1538989340, 'httpOnly': False, 'name': 'UM_distinctid', 'path': '/', 'secure': False, 'value': '162a9a44e0823b-098677e48fe2be-454c092b-1fa400-162a9a44e094bf'}, {
'domain': '.www.51baiduyun.com', 'expiry': 1554800561, 'httpOnly': False, 'name': 'Hm_lvt_79316e5471828e6e10f2df47721ce150', 'path': '/', 'secure': False, 'value': '1523264541'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1538989361, 'httpOnly': False, 'name': 'CNZZDATA1253863031', 'path': '/', 'secure': False, 'value': '1393313043-1523261609-%7C1523261609'}, {
'domain': '.www.51baiduyun.com', 'expiry': 1554800561, 'httpOnly': False, 'name': 'Hm_lvt_eaefab1768d285abfc718a706c1164f3', 'path': '/', 'secure': False, 'value': '1523264541'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1554800558.630797, 'httpOnly': False, 'name': 'L3em_2132_ulastactivity', 'path': '/', 'secure': False, 'value': 'e52eGQjsi80DLGLXvdzm1z0xQ7lmIKuBlBUK8mQlJmAMXr7Ep8D8'}, {
'domain': 'www.51baiduyun.com', 'httpOnly': True, 'name': 'L3em_2132_auth', 'path': '/', 'secure': False, 'value': 'be395ZoslCjexHStJKSaOCgvl9krhLvGLWmNm4hRKMH1qZ65gGUlWA5q9KV7veHBRF6hrQxqUiINkF844oiL5hukCNMg'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1554800558.630948, 'httpOnly': False, 'name': 'L3em_2132_lastcheckfeed', 'path': '/', 'secure': False, 'value': '2533730%7C1523264825'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1523264588.630963, 'httpOnly': False, 'name': 'L3em_2132_checkfollow', 'path': '/', 'secure': False, 'value': '1'}, {
'domain': 'www.51baiduyun.com', 'httpOnly': False, 'name': 'L3em_2132_lip', 'path': '/', 'secure': False, 'value': '183.12.51.62%2C1523264610'}, {
'domain': 'www.51baiduyun.com', 'expiry': 1523264591.846338, 'httpOnly': False, 'name': 'L3em_2132_checkpm', 'path'