我给你改成了这样,你试试
# spider.py
import scrapy
import urllib
from scrapy import Request
class DbSpider(scrapy.Spider):
name = 'db'
allowed_domains = ['douban.com']
#start_urls = ['http://douban.com/']
def start_requests(self):
return [Request('https://accounts.douban.com/login?alias=&redir=https%3A%2F%2Fwww.douban.com%2F&source=index_nav&error=1001',callback=self.parse,meta={'cookiejar':1})]
def parse(self, response):
capt = response.xpath('//div/img[@id="captcha_image"]/@src').extract()#判断是否出现验证码
url = 'https://accounts.douban.com/login'
print(capt)
if len(capt)>0:
print('有验证码')
local_path = 'capt.jpeg'
urllib.request.urlretrieve(capt[0], f