通过scrapy框架模拟登陆豆瓣并进入个人信息页面
# -*- coding: utf-8 -*-
import scrapy
from scrapy.http import Request,FormRequest
import urllib.request
class DbSpider(scrapy.Spider):
name = 'db'
allowed_domains = ['douban.com']
hearder={"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"}
#start_urls = ('http://douban.com/',)
#开始的请求信息
def start_requests(self):
print("进入开始爬取")
return [Request("https://www.douban.com/login",meta={"cookiejar":1},callback=self.parse)]
def parse(self, response):
print("进入parse方法")
#验证码的判断
captcha=response.xpath('//*[@id="captcha_image"]/@src').extract()
print(captcha)
print("验证码长度",len(captcha))
if len(captcha)>0: