1. 要爬取网站使用的可信任证书(默认支持)
只需要在settings.py作如下配置
DOWNLOADER_HTTPCLIENTFACTORY = "scrapy.core.downloader.webclient.ScrapyHTTPClientFactory"
DOWNLOADER_CLIENTCONTEXTFACTORY = "scrapy.core.downloader.contextfactory.ScrapyClientContextFactory"
2. 要爬取网站使用的自定义证书
1.settings.py配置
DOWNLOADER_HTTPCLIENTFACTORY = "scrapy.core.downloader.webclient.ScrapyHTTPClientFactory"
DOWNLOADER_CLIENTCONTEXTFACTORY = "xxx.https.MySSLFactory"
2.文件下自定义https.py
from scrapy.core.downloader.contextfactory import ScrapyClientContextFactory
from twisted.internet.ssl import (optionsForClientTLS, CertificateOptions, PrivateCertificate)
class MySSLFactory(ScrapyClientContextFactory):
def getCertificateOptions(self):
from OpenSSL import crypto
v1 = crypto.load_privatekey(crypto.FILETYPE_PEM, open('xxx/client.key.unsecure', mode='r').read())#证书路径
v2 = crypto.load_certificate(crypto.FILETYPE_PEM, open('xxx/client.pem', mode='r').read())#证书路径
return CertificateOptions(
privateKey=v1, # pKey对象
certificate=v2, # X509对象
verify=False,
method=getattr(self, 'method', getattr(self, '_ssl_method', None))
)