最近在做数据爬虫小DEMO,但目标网页为gb2312编码,windows10下superagent请求会出现乱码,试了一轮,均没成功,最后用以下方法成功的。
app.js
const
superagent =
require(
'superagent')
const
cheerio =
require(
'cheerio')
const
iconv =
require(
'iconv-lite');
var
charset =
require(
"superagent-charset");
charset(
superagent); //设置字符
const
reptileUrl =
'http://www.stats.gov.cn/tjsj/tjbz/tjyqhdmhcxhfdm/2016/index.html'
superagent.
get(
reptileUrl).
charset(
'gbk').
end(
function (
err,
res) {
// 抛错拦截
if (
err) {
return
err;
}
// 等待 code
let
$ =
cheerio.
load(
res.
text, {
decodeEntities:
false });
let
citys =
$(
'.provincetr td a')
let
provinces = [];
var
t =
$(
citys[
0]).
html();
console.
log(
t)
});