最近因为有项目需求 需要用到全国各地区的区域代码 网上又不能下载到最新的,正好在一个网站看到可以各个省份去查询,所以就自己用Node.js写了一个简单的抓取程序
var http = require("http");
var cheerio = require("cheerio");
var BufferHelper = require('bufferhelper');
var iconv = require('iconv-lite');
var fs = require("fs");
var url = "http://www.diqudaima.com/";
function download(url, callback) {
http.get(url, function (res) {
var bufferHelper = new BufferHelper();
res.on('data', function (chunk) {
bufferHelper.concat(chunk);
});
res.on("end", function () {
callback(iconv.decode(bufferHelper.toBuffer(), 'GBK'));
});
}).on("error", function (err) {
console.log("err");
});
}
createTxtFile();
download(url, function (data) {
console.time("Start!");
if (data) {
//var str = "";
var $ = cheerio.load(data);
$("div.Count>ul>li").each(function (i, e) {
var link = $(e).find("a");
var newUrl = url + link.attr("href");
console.log("获取" + link.text() + "Url: " + newUrl);
download(newUrl, getCity)
});
}
});
function createTxtFile() {
fs.open("code.txt", "w", function (err, fd) {
var buf = new Buffer("省份,城市,县/区,地区编码,邮编,电话区号\n");
fs.write(fd, buf, 0, buf.length, 0, function (err, written, buffer) {
console.log(err);
});
})
}
function getCity(data) {
if (data) {
var str = "";
var $ = cheerio.load(data);
$("div.all>ul>li").each(function (i, e) {
var link = $(e).find("a");
var newUrl = url + link.attr("href");
console.log("获取" + link.text() + "Url: " + newUrl);
download(newUrl, getArea)
});
}
}
var num = 0;
function getArea(data) {
if (data) {
var str = "";
var $ = cheerio.load(data);
// console.log(data);
var provinceName = "";
$("div.title>span").each(function (i, e) {
provinceName = $(e).text().split("-->")[1]+","+$(e).text().split("-->")[2];
});
$("ul>li").each(function (i, e) {
var link = $(e).find("a");
var infoStr = $(e).text().replace(/:/g," ");
var strArr = infoStr.split(" ");
var str = provinceName+","+strArr[0]+","+strArr[2]+","+strArr[4]+","+strArr[6]+"\n";
fs.appendFile("code.txt", str, function(err){
if(err)
console.log("写入第"+(++num)+"条记失败! " + err);
else
{
console.log("写入第"+(++num)+"条记录成功!");
}
});
console.log(provinceName+","+strArr[0]+","+strArr[2]+","+strArr[4]+","+strArr[6]);
});
}
}