用scrapy模拟登陆教务网碰到验证码为网站的相对路径,然后无法下载到本地。
教务网html的验证码部分为
我的爬虫部分代码为
# -*- coding: utf-8 -*-
import scrapy
import urllib
from scrapy.http import Request, FormRequest
class scoreQuerySpider(scrapy.Spider):
name = "scoreQuery"
allowed_domains = ["http://kdjw.hnust.cn/"]
header = {"user-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_3) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.54 Safari/536.5"}
def start_requests(self):
return [Request("http://kdjw.hnust.cn/kdjw/", meta={"cookiejar": 1}, callback=self.parse)]
def parse(self, response):
captcha = response.xpath('//span[@id="SafeCodeImg"]/img/@src').extract()
#captcha = "http://kdjw.hnust.cn/kdjw/verifycode.servlet"
if len(captcha) > 0:
print "此时有验证码 "
localpath = "D:/project/ScoreQuery/yzm/1.png"
urllib.urlretrieve(captcha[0], filename=localpath)
print "请输入验证码: "
captcha_value = input()
data={
"useDogCode": "",
"dlfl": "0",
"USERNAME": "***",
"PASSWORD": "***",
"RANDOMCODE": captcha_value,
"redir": "http://kdjw.hnust.cn/kdjw/xszqcjglAction.do?method=queryxscj"
}
else:
print "此时没有验证码"
data = {
"useDogCode": "",
"dlfl": "0",
"USERNAME": "***",
"PASSWORD": "***",
"redir": "http://kdjw.hnust.cn/kdjw/xszqcjglAction.do?method=queryxscj"
}
print "登录中..."
return [FormRequest.from_response(response, meta={"cookiejar": response.meta["cookiejar"]},
headers=self.header, formdata=data, callback=self.next)]
报错如下:
2017-08-26 21:41:55 [scrapy.core.scraper] ERROR: Spider error processing (referer: None)
Traceback (most recent call last):
File "C:\Python27\lib\site-packages\twisted\internet\defer.py", line 653, in _runCallbacks
current.result = callback(current.result, *args, **kw)
File "D:\project\ScoreQuery\ScoreQuery\jwc_spider\jwc_spider\spiders\scoreQuery.py", line 19, in parse
urllib.urlretrieve(captcha[0], filename=localpath)
File "C:\Python27\lib\urllib.py", line 98, in urlretrieve
return opener.retrieve(url, filename, reporthook, data)
File "C:\Python27\lib\urllib.py", line 245, in retrieve
fp = self.open(url, data)
File "C:\Python27\lib\urllib.py", line 213, in open
return getattr(self, name)(url)
File "C:\Python27\lib\urllib.py", line 469, in open_file
return self.open_local_file(url)
File "C:\Python27\lib\urllib.py", line 483, in open_local_file
raise IOError(e.errno, e.strerror, e.filename)
IOError: [Errno 2] : '\\kdjw\\verifycode.servlet'