node - async 抓取所有车系车型数据

node - async 抓取所有车系车型数据
之前自己用过node抓取数据,但是在没用async之前,老容易抓崩,或者出了问题不好找出来。现在改为同步执行。
代码写完测试OK就发了,没做修饰。见谅。

/*
* 作者@王杨 qq:357466524
* 本代码主要是自己尝试如何获取某站的数据,并非恶意所为。也仅仅作为技术分享,希望不要投入到商业中去。
* 分享 转发请保留作者。
*/

var http = require('http');
//var cheerio = require('cheerio');
var BufferHelper = require('bufferhelper');
var iconv = require('iconv-lite');
//var mysql = require('mysql');
var async = require('async');
process.on('uncaughtException', function (err) {
    //打印出错误
    console.log('global-e:',err);
    //打印出错误的调用栈方便调试
    console.log('e-stack:', err.stack);
});
//var connection = mysql.createConnection({
//    host: 'localhost',
//    user: 'root',
//    password: '',
//    database: 'test'
//});
//connection.connect();
var global_i = 0;
get_html('http://i.che168.com/Handler/SaleCar/ScriptCarList_V1.ashx?needData=1', function (html) {
    eval(html);
    if (typeof fct == 'undefined' || fct['0'].length == 0) { return; }
    var gblist = '', blist = '', bletter = 'A';
    var brandList = fct['0'].split(',');
    var brandName = '', brandId = '', brandZm = '';
    var brandListArr = [];
    for (var i = 0; i < brandList.length; i = i + 2) {
        var _obj = {};
        _obj.brandName = brandList[i + 1].substring(2);
        _obj.brandId = brandList[i];
        _obj.brandZm = brandList[i + 1].substring(0, 1);
        brandListArr.push(_obj);
        //console.log(brandId, brandZm, brandName);
    }
    //brandListArr.shift();
    //console.log(brandListArr); 
    async.eachSeries(brandListArr, function (obj, callback) {
        if (typeof obj == 'undefined' || obj == null) {
            console.log('-------------------------1----------------------------');
            callback();
            return;
        }
        var get_class_html = 'http://i.che168.com/Handler/SaleCar/ScriptCarList_V1.ashx?seriesGroupType=2&needData=2&bid=' + obj.brandId;
        get_html(get_class_html, function (html) {
            eval(html);
            if (typeof br[obj.brandId] == 'undefined' || br[obj.brandId].length == 0) {
                return;
            }
            var slArray = br[obj.brandId].split(',');
            var gblist = '', blist = '', factoryname = '', seriessplit = '';
            factoryname = slArray[1].split(' ')[0];
            var factoryName = '';
            var classListArr = [];
            for (var i = 0; i < slArray.length; i += 2) {
                var _obj = {};
                seriessplit = slArray[i + 1].split(' ');
                factoryName = seriessplit[0];
                _obj.factoryName = seriessplit[0];
                seriessplit.shift();
                _obj.classId = slArray[i];
                _obj.className = seriessplit.join(' ');
                classListArr.push(_obj);

            }

            async.eachSeries(classListArr, function (cls, callback2) {
                get_html('http://i.che168.com/Handler/SaleCar/ScriptCarList_V1.ashx?seriesGroupType=2&needData=3&seriesid=' + cls.classId, function (html) {
                    var spcArray = eval(html);
                    for (var i = 0; i < spcArray.length; i++) {
                        var year = spcArray[i].year;
                        for (var j = 0; j < spcArray[i].spec.length; j++) {
                            var modelName = spcArray[i].spec[j].name;
                            var modelId = spcArray[i].spec[j].id;
                            console.log(global_i++);
                            console.log(obj.brandId, obj.brandZm, obj.brandName, cls.factoryName, cls.classId, cls.className, year, modelId, modelName);
                        }
                    }
                    callback2();
                });
            }, function (err) {
                if (typeof err == 'undefined' || err == null) {
                    callback();
                } else {
                    console.log('!get model error');
                }
            });
        }, function (err) {
            console.log('socked-err', err);
            callback();
        });
    }, function (err) {
        if (typeof err == 'undefined' || err == null) {
            console.log('finished');
        } else {
            console.log('!get class error');
        }
    });
    return;
});


//获取页面
function get_html(url, callback, errCallback) {
    http.get(url, function (res) {
        var bufferHelper = new BufferHelper();
        var html = '';
        res.on('data', function (chunk) {
            bufferHelper.concat(chunk);
        });
        res.on('end', function () {
            //console.log(url.substr(-5));
            html = iconv.decode(bufferHelper.toBuffer(), 'GBK');
            //console.log(html);
            callback(html);
        });
    }).on('error', function (e) {
        console.log('e1', e);
        errCallback(e);
    });
}

// connection.end();

console的输出图如下
这里写图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值