问题描述:
最近写了个下载图片的Nodejs爬虫server,当时直接使用的http.get,在线上测试时发现对于https的请求时直接报错。
_http_client.js:131
throw new Error('Protocol "' + protocol + '" not supported. ' +
^
Error: Protocol "https:" not supported. Expected "http:"
at new ClientRequest (_http_client.js:131:11)
at request (http.js:38:10)
at Object.get (http.js:42:13)
at Server.<anonymous> (/var/www/cgi-bin/download_pic.js:46:25)
at emitTwo (events.js:126:13)
at Server.emit (events.js:214:7)
at Server.recordHandler (/usr/lib/node_modules/npm/node_modules/node-fastcgi/lib/server.js:293:22)
at module.exports.<anonymous> (/usr/lib/node_modules/npm/node_modules/node-fastcgi/lib/server.js:271:34)
at emitTwo (events.js:126:13)
at module.exports.emit (events.js:214:7)
于是,考虑能否自动选择性处理http与https的请求。
部分代码:
fcgi.createServer(function(req, res){
if(req.method === 'GET'){
let urlData = urlparser.parse(req.url, true)
let queryData = urlData.query
let url = queryData.url
let option = urlparser.parse(url)
/
//下载图片并保存到文件
/
/*
const options = {
url: url,
dest: 'd:\\',
timeout: 1000
}
download.image(options)
.then(({ filename, image }) => {
console.log('File saved to', filename)
}).catch((err) => {
console.error(err)
});
*/
var request=http.get(option , function (response) {
var chunks = []
response.on('data', function (chunk)
{
chunks.push(chunk);
}).on('end', function() {
var buffer = Buffer.concat(chunks);
var lists = response.headers['content-type'].toString().split('/');
let name = url.split('/').pop()
if(sizeOf(buffer).width<WIDTH_LIMIT || sizeOf(buffer).height<HEIGHT_LIMIT)
{
res.writeHead(200,{ 'Content-Type': 'html/text'});
res.write('Logo Filter');
res.end();
logger.log(LogPrefix()+"[URL]:"+url+" [Info]:Logo Filter")
}
解决方法:
方法一:
var http = require('http');
var https = require('https');
var protocol = (parsedUrl.protocol == 'https:' ? https : http);
protocol.get(parsedUrl, function(res) {
...
});
方法二:
var adapterFor = (function() {
var url = require('url'),
adapters = {
'http:': require('http'),
'https:': require('https'),
};
return function(inputUrl) {
return adapters[url.parse(inputUrl).protocol]
}
}());
adapterFor(url).get(url, ...)
至此,其实已经可以了,不过,如果https证书没有被认证或自己搭建的https服务器。则请求时会出去问题:
[ERROR]:Error: self signed certificate
解决方法:
在get请求语句之前加上:
process.env.NODE_TLS_REJECT_UNAUTHORIZED = "0";