nodejs 爬取热点明星存入mysql数据库

配置文件说明:
/root/Spider/nodejs-server-server的project.json
{
“open_allbaidu_hot” : 1, //是否爬取所有百度热点信息
“open_allwangyi_hot”:1,//是否爬取所有网易热点信息
“open_alljinritoujiao_hot”:1,//是否爬取今日头条热点信息
“open_allweibo_hot”:1,/是否爬取微博热点信息
“open_allxinlang_hot”:1,//是否爬取新浪热点信息
“open_allbaidutieba_hot”:1,//是否爬取百度贴吧热点信息
“open_baidu_hot_people”:1,//是否爬取百度热点明星
“open_weibo_hot_people”:1,//是否爬取微博热点明星
“timeInterval”:1//设置爬虫时间间隔单位为小时
“db_user”:”root”,//数据库用户名
“password”:”233233”,//密码
“database”:”nodejs”//数据库名字
}

主类别表:

mid: int 类别id,主键
mclassname: varchar(20) 主类名

次类别表:
Cid int 类别id,主键
Cclassname varchar(20) 次类名

成员表:
Id int 成员id,主键
Name varchar(20) 名字
mClass int 所属主类名id,外键
CClass int 所属次类名id,外键
Source varchar(50) 来源网站
Description varchar(2000) 信息描述
Hot varchar(5) 热度指数
Time varchar(20) 获取时间

登陆mysql查看结果:
这里写图片描述

重点内容

var sources = ["热点人物","娱乐名人","女演员","男演员","演员","女歌手","男歌手","歌手","名家人物","主持人","体坛人物","美女","帅哥","选秀歌手","欧美明星","财经人物","互联网人物","历史人物","公益明星"]
var bmsites = ["http://top.baidu.com/buzz?b=258&c=9&fr=topcategory_c9","http://top.baidu.com/buzz?b=618&c=9&fr=topbuzz_b258_c9","http://top.baidu.com/buzz?b=18&c=9&fr=topbuzz_b18_c9","http://top.baidu.com/buzz?b=17&c=9&fr=topbuzz_b18_c9","http://top.baidu.com/buzz?b=1395&c=9&fr=topbuzz_b17_c9","http://top.baidu.com/buzz?b=16&c=9&fr=topbuzz_b1395_c9","http://top.baidu.com/buzz?b=15&c=9&fr=topbuzz_b16_c9","http://top.baidu.com/buzz?b=1396&c=9&fr=topbuzz_b15_c9","http://top.baidu.com/buzz?b=260&c=9&fr=topbuzz_b1396_c9","http://top.baidu.com/buzz?b=454&c=9&fr=topbuzz_b260_c9","http://top.baidu.com/buzz?b=255&c=9&fr=topbuzz_b454_c9","http://top.baidu.com/buzz?b=3&c=9&fr=topbuzz_b255_c9","http://top.baidu.com/buzz?b=22&c=9&fr=topbuzz_b3_c9","http://top.baidu.com/buzz?b=493&c=9&fr=topbuzz_b22_c9","http://top.baidu.com/buzz?b=491&c=9&fr=topbuzz_b493_c9","http://top.baidu.com/buzz?b=261&c=9&fr=topbuzz_b491_c9","http://top.baidu.com/buzz?b=257&c=9&fr=topbuzz_b261_c9","http://top.baidu.com/buzz?b=259&c=9&fr=topbuzz_b257_c9","http://top.baidu.com/buzz?b=612&c=9&fr=topbuzz_b259_c9"]
//var bmsite = "http://top.baidu.com/buzz?b=258&c=9&fr=topcategory_c9";
var bmingxingentity = [];
var examples = {};

var mxurl = bmsites[mxn];
console.log(bmsites.length)
var req = http.get(mxurl,function(res){


    // res.setEncoding('binary');
     var buffer = new BufferHelper();
     res.on('data',function(data){
         buffer.concat(data);
     }).on('end',function(){
         var buf = buffer.toBuffer();
         //var buf = new Buffer(html,'binary');
         var str = iconv.decode(buf,'gbk');
        try{
         var res = str.match(/href_top.*>.*\</gm);
         var mxhot = str.match(/span class=\"icon\-.*>.*\</gm);
         if(res != null){
 var conn = mysql.createConnection({
       host: 'localhost',
        user: project.db_user,
        password: project.password,
            database:project.database,
        port: 3306
    });
     console.log("begin to connect");
     conn.connect();
             if(res.length != 0){
            for(var i = 0;i < res.length;i++){
                var r = res[i].match(/\>.*\</gm)[0];
                var mh = mxhot[i].match(/\>.*\</gm)[0];
                r = r.replace("<","");
                r = r.replace(">","");
                mh = mh.replace("<","");
                mh = mh.replace(">","");


            var CID = mxn + 1;
                var baidu = "baidu";
                var test  = new Date();
                var time = test.getFullYear() + "-" + test.getMonth() +"-" + test.getDate() +  " " + test.getHours() +  ":"+ test.getMinutes();
        conn.query("insert into Member (name,hot,mid,source,cid,time) values ( '"+ r + "'" + ","  + parseFloat(mh) + "," + 1 + ","  + "'" + baidu +"'"+"," +  CID + "," + "'" + time +"'" +")", function(err, rs, fields){
            //处理数据
        if(err) {
                 console.log('ClientConnectionReady Error: ' + err.message);

                 return;
             }
        });






                bmingxingentity.push({
                    'name':r,
                    'hot':parseInt(mh)
              })
            }
            }
             }
        conn.end();
         examples['application/json'] = bmingxingentity;
         if(Object.keys(examples).length > 0) {
          console.log(10000);
           //res.setHeader('Content-Type', 'application/json');
        var data = JSON.stringify(examples[Object.keys(examples)[0]] || {}, null, 2);
        client.set('Hot4' + (mxn+10000), data, redis.print);
        //client.expire('Hot1' + (3), 300);
           //res.end(data);
         }
         else {
           res.end();
         }
        }catch(e){

        }

     }).on('close',function(){
         console.log('Close recevied!');
     });
 });
 req.on('error',function(error){

 });
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值