"use strict";
//to support let-of syntax;
var ProxyPort = 8888;
var http = require('http'),
net = require('net'),
url = require('url'),
httpProxy = require('http-proxy'); //需要安装Node 5.0, 然后执行npm install http-proxy --save
var proxy = httpProxy.createProxyServer({
autoRewrite: true, //??
});
proxy.on('error', function (err, req, res) {
/*
res.writeHead(500, {
'Content-Type': 'text/plain'
});
res.end('httpProxy代理请求发生IO错误:URL='+req.url);
*/
});
function isBlockedByGFW(host){
var exact_blocked_hosts = {
"ajax.googleapis.com": true,
"fonts.googleapis.com": true,
"cdn.datatables.net": true,
"www.google.com": true,
"www.slideshare.net": true,
"twitter.com": true,
"botanwang.com": true,
"facebook.com": true,
};
//TODO:使用正则表达式来匹配被GFW阻塞的网站?
if( exact_blocked_hosts[host]==true )
return true;
var suffix_blocked_hosts = [
".facebook.com",
".google.com",
".googleapis.com",
];
for( let suffix of suffix_blocked_hosts){
if( host.endsWith(suffix) ){
return true;
}
}
return false;
}
function check_url_remapping_needed(url){
var gfw_url_remapping = {
"http://ajax.googleapis.com/ajax/libs/jquery/1.7.2/jquery.min.js": "http://127.0.0.1/jquery-1.12.3.min.js",
"http://ajax.googleapis.com/ajax/libs/jquery/1.8/jquery.min.js": "http://127.0.0.1/jquery-1.12.3.min.js",
//for http://www.menscyzo.com/
};
return gfw_url_remapping[url];
}
//支持http CONNECT协议的https代理:(当然,修改不了https连接的内容)
function connect(cReq, cSock) {
//console.log("CONNECT: cReq.url="+cReq.url);
var u = url.parse('http://' + cReq.url);
if (isBlockedByGFW(u.hostname)){
//console.log("CONNECT to host="+u.hostname+" is blocked by GFW, fast-return 404 instead");
cSock.write('HTTP/1.1 404 CONNECT request blocked to avoid GFW timeout\r\n\r\n');
cSock.end();
return;
}
var pSock = net.connect(u.port||80, u.hostname, function() {
cSock.write('HTTP/1.1 200 Connection Established\r\n\r\n');
pSock.pipe(cSock);
}).on('error', function(e) {
console.log("CONNECT: error! "+e);
cSock.end();
});
cSock.pipe(pSock);
}
var server = http.createServer();
server.on('request', function(req, res) {
console.log("REQUEST req.url=" + req.url);
var u = url.parse(req.url);
//console.log("req.url-parsed.path=" + u.path); //u.path包含了查询参数,而u.pathname不包含
//console.log("REQUEST u="+JSON.stringify(u));
var mapped_local_url = check_url_remapping_needed(req.url);
if (mapped_local_url){//将墙外的CDN url资源地址映射为本地localhost路径
console.log("remap "+req.url+" to "+mapped_local_url);
var mapped_u = url.parse(mapped_local_url);//?
var options = {
hostname: mapped_u.hostname,
port: 80,
path: mapped_u.path,
method: 'GET',
headers: req.headers,
};
var proxy_request = http.request(options, (res2) => {
res2.pipe(res);
});
proxy_request.end();
}
else if (u.hostname=="hm.baidu.com" && u.pathname.match(/h.js$/)){
//屏蔽baidu的js脚本;
res.writeHead(500, {
'Connection': 'close',
'Content-Type': 'application/x-javascript'
});
res.end('');
}else if (isBlockedByGFW(u.hostname)){
console.log("REQUEST host="+u.hostname+" is blocked by GFW, fast-return 404 instead");
res.writeHead(404, {
'Connection': 'close',
//'Content-Type': 'application/x-javascript'
});
res.end('');
}
else{
proxy.web(req, res, {
target: req.url, //needs apply patch https://github.com/gagern/node-http-proxy/commit/35000fc0d7dc0a6073ac37db097b73575a861d34
prependPath: false,
secure: false
});
}
}).on('connect', connect)
.on('error', function(err){
//这里error指的应该是客户端浏览器到代理脚本之间的连接
console.log("客户端连接错误: "+JSON.stringify(err));
});
console.log("node-proxy-server: listening on port "+ProxyPort)
server.listen(ProxyPort);
遗留问题:尽管我现在的代码已经处理了当CONNECT地址是被gfw屏蔽的情况下直接返回404,并断开连接,但是Chrome浏览器似乎仍然会不停发送CONNECT请求?
感觉Node的性能还是挺不错的,至少感觉比我用Python写的版本要好,而且我尝试用上了let-of和=>语法,哈哈