具体实践代码如下:
# -*- coding: utf-8 -*-
import scrapy
import requests
from scrapy.linkextractors import LinkExtractor
from scrapy.spiders import CrawlSpider,Rule,Request
from scrapy.http import FormRequest
import requests
class DemoSpider(scrapy.Spider):
name = 'demo'
#allowed_domains = ['www.douban.com']
#start_urls = ['https://accounts.douban.com/passport/login_popup?login_source=anony']
def start_requests(self):
return [Request(url = 'https://movie.douban.com', meta = {'cookiejar':1}, callback = self.post_login)]
def post_login(self, response):
return FormRequest(
url = 'https://accounts.douban.com/j/mobile/login/basic',
method = 'POST',
formdata = {
'ck': '',
'name': '13949094212',
'password': 'fan13938409755',
'remember': 'true'
},
meta = {'cookiejar':response.meta['cookiejar']},
dont_filter = True,
callback = self.after_login
)
def after_login(self, response):
url='https://movie.douban.com'
yield Request(url = url, meta = {'cookiejar':1}, callback = self.parse_item, dont_filter = True)
def parse_item(self, response):
text=response.text
fileName = '看看是啥'
with open(fileName,"w",encoding='utf-8') as f:
f.write(text)
f.close()
a=response.css('.top-nav-info .nav-user-account .bn-more span::text').extract_first()
print("输出会有的")
print(a)
print(response.status)
print(response.url)
这里采取的思路是线将登陆后重定向的网址给下载下来,然后对照一下,好确认。