node爬虫HTTP请求中的form data和request payload的区别

参考:
http://www.cnblogs.com/btgyoyo/p/6141480.html

代码片段:

let request = require('request');
let cheerio = require('cheerio');
let async=require('async');
let querystring=require('querystring');
let login_url='https://auth2.cityads.com/login/';
let offers_url='https://cityads.com/stat/analytics/offers';
let create_job="https://cityads.com/stat/ds/create_job";
let get_job_status="https://cityads.com/stat/ds/get_job_status";
let fs=require('fs');
let path=require('path');
let xlsx = require('node-xlsx');
let jobHash='';
let file_path='';
let headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.113 Safari/537.36'
};

function start() {
    async.waterfall([
        function (cb) {
            //当前方法可以省略
            let _headers=headers;
            _headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1";
            let param={
                url:offers_url,
                method:"GET",
                headers:_headers
            };
            request(param,function (err,res,body) {
                // console.log(body);
                cb()
            });
        },function (cb) {
            let fiter={
                "mainGroup":"203",
                "subGroup":"",
                "period":"11.09.2017 - 11.09.2017",
                "dateType":"event_time",
                "sort":"",
                "sdir":"",
                "cols":"is_sale,order_key,subaccount,click_time,lead_time,sale_time,lead_delta,sale_delta,total_delta,action_name,campaign_target_id,status,customer_type,payment_method,basket_count,original_total,currency,order_total,wm_currency,percent,commission_open_adv,commission_rejected_adv,commission_adv",
                "reportPageId":11020001,
                "lang":"cn",
                "keyword":"",
                "complexFilter":[],
                "skin":"",
                "useSkin":"0",
                "jd":"",
                "userCurrency":"rub",
                "format":"xls"
            };
            let _headers=headers;
            _headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1";
            _headers["origin"]="https://cityads.com";
            _headers["referer"]="https://cityads.com/stat/conversions";
            _headers["x-json"]=1;
            _headers["x-requested-with"]="XMLHttpRequest";
            _headers["Content-Type"]="text/plain;charset=UTF-8";
            let param={
                url:create_job,
                method:"POST",
                headers:_headers,
                form:JSON.stringify(fiter)
            };
            request(param,function (err,res,body) {
                jobHash=JSON.parse(body)["jobHash"];
                cb(err,jobHash);
            });
        },function (jobHash,callback) {
            let flag=true;
            async.whilst(
                function () {
                    return flag;
                },
                function (cb) {
                    let _headers=headers;
                    _headers["origin"]="https://cityads.com";
                    _headers["referer"]="https://cityads.com/stat/conversions";
                    _headers["x-json"]=1;
                    _headers["x-requested-with"]="XMLHttpRequest";
                    let param={
                        url:get_job_status,
                        method:"POST",
                        form:{
                            json:1,
                            jobHash:jobHash
                        },
                        headers:_headers
                    };
                    request(param,function (err,res,body) {
                        let result=JSON.parse(body);
                        if (result["status"]==="success") {
                            flag=false;
                            return cb(err,result.uri)
                        }
                        cb();
                    })
                },
                function (err,uri) {
                    if (err) console.log(err);
                    console.log("uri=",uri);
                    callback(err,uri);
                });
        },function (uri,cb) {
            let download_url="https://cityads.com"+uri;
            let _headers=headers;
            _headers["accept"]="text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8";
            _headers["referer"]="https://cityads.com/stat/conversions";
            _headers["accept-encoding"]="gzip, deflate, br";
            _headers["accept-language"]="zh-CN,zh;q=0.8";
            _headers["upgrade-insecure-requests"]=1;
            // _headers["Content-Type"]="application/octet-stream";
            _headers["Cookie"]="PHPSESSID=8rkhggprglk1an1cg4g2sjahg4; _ym_uid=1505217442926426080; _ym_isad=2; _ga=GA1.2.1435829105.1505217441; _gid=GA1.2.307976531.1505217441; storage_key_stat=106009bbb914944c62857aead94b7b7229d4d5d1";
            let param={
                url:download_url,
                method:"GET",
                headers:_headers
            };
            let _file_path=uri.split("/")[3];
            file_path=_file_path;
            request(param).pipe(fs.createWriteStream(_file_path));
            cb()
        },
        function (cb) {
            setTimeout(read_file,5000);
            cb()
        }
    ],function (err) {
        if (err) console.log(err);
    });
}
function read_file() {
    let _path=path.join(__dirname,file_path);
    let obj = xlsx.parse(_path);
    let excelArray = obj[0].data;
    path='';
    console.log(excelArray);
}

start();
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值