PHP用curl模拟登陆并采集后台数据

PHP用curl模拟登陆并采集后台数据步骤:
1、请求登陆,获取返回的保存身份信息的cookie,并存入指定cookie文件。见类login()方法
2、请求需身份验证的页面,带上cookie文件。见类getcontent()方法

PHP的CURL库中可以设置记录和读取cookie。有三个重点:
1、CURLOPT_COOKIE: 在http头中设置cookie的信息
如:
curl_setopt($ch,CURLOPT_COOKIE, "JSESSIONID=BFC03277CCB4059BDD08510F19E712E5;__qca:"P0-2009019170-1433422712728");
2、CURLOPT_COOKIEJAR: 收到的http response中set-cookie的存放路径
设置:curl_setopt($ch, CURLOPT_COOKIEJAR, "temp.cookie");
3、CURLOPT_COOKIEFILE: 发出的http request中cookie从哪里读取
设置:curl_setopt($ch, CURLOPT_COOKIEFILE, "temp.cookie");
其中temp.cookie文件要有读写权限

<?php
/**
*登陆采集类
*/
class Gather
{
    /**
    *存放用于接收和发送的Cookie文件路径
    *系统生成,只读
    */
    private  $cookie_jar;
    public function get_cookie_jar(){
        return $this->cookie_jar;
    }

    public function login($url, $logininfo)
    {
        //根据当前上下文的Cookie里的内容,找到用于Curl接收和发送的Cookie文件路径
        if (isset($_COOKIE['cookie_jar']) && is_file($_COOKIE['cookie_jar']))
        {
            $this->cookie_jar=$_COOKIE['cookie_jar'];
            //如果Cookie存在,说明已经登陆,不再请求登陆接口。
            return;
        }
        else
        {
            //产生一个Cookie文件
            $cookie_jar = tempnam('./tmp/', 'cookie');
            $params[CURLOPT_COOKIEJAR] = $cookie_jar;
            //保存接收Cookie路径到当前上下文的Cookie中
            setcookie('cookie_jar', $cookie_jar);
            $this->cookie_jar=$cookie_jar;
        }
        $ch = curl_init();
        $params[CURLOPT_URL] = $url. strpos($url,'?')!==FALSE? "&":"?".rand(1000,9999);    //请求url地址,加随机数
        $params[CURLOPT_HEADER] = true; //是否返回响应头信息
        $params[CURLOPT_RETURNTRANSFER] = true; //是否将结果返回
        $params[CURLOPT_FOLLOWLOCATION] = true; //是否重定向
        $params[CURLOPT_USERAGENT] = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';

        //也可用http_build_query()函数
        $postfields = '';
        foreach ($logininfo as $key => $value){
            $postfields .= urlencode($key) . '=' . urlencode($value) . '&';
        }

        $params[CURLOPT_POST] = true;
        $params[CURLOPT_POSTFIELDS] = $postfields;


        curl_setopt_array($ch, $params); //传入curl参数
        $content = curl_exec($ch); //执行
        //file_put_contents("./tmp/login.tmp", $content);
        //echo iconv('gbk','utf-8',$content);
    }

    function getcontent($uri){

        if (!(isset($this->cookie_jar) && is_file($this->cookie_jar)))
        {
            throw new Exception("can't find any cookie file,call login() first!");//英语有点烂是不是?
            exit();
        }
        $ch = curl_init();

        $params[CURLOPT_COOKIEFILE] = $this->cookie_jar;
        $params[CURLOPT_URL] = $uri;
        $params[CURLOPT_POST] = FALSE;
        $params[CURLOPT_HEADER] = FALSE; //是否返回响应头信息
        $params[CURLOPT_RETURNTRANSFER] = true; //是否将结果返回
        $params[CURLOPT_FOLLOWLOCATION] = true; //是否重定向
        $params[CURLOPT_USERAGENT] = 'Mozilla/5.0 (Windows NT 5.1; rv:9.0.1) Gecko/20100101 Firefox/9.0.1';
        curl_setopt_array($ch, $params); //传入curl参数
        $content = curl_exec($ch); //执行
        //file_put_contents("./tmp/list.tmp", $content);
        return $content;
    }
}
    require_once './Classes/PHPExcel.php';


    /**
    *---------以下是类调用---------------
    */

    //first 登陆
    $demo=new Gather();
    $login_info=array('user'=>'twm','pwd'=>'111111');
    $demo->login('http://127.0.0.1:9090/user/doLogin', $login_info);

    //获取主列表
    $uri_list='http://127.0.0.1:9090/Bill_list?railReceiveDate=isc_RestDataSource_0&'.rand(1000,9999);
    $list_result=$demo->getcontent($uri_list);
    $list_obj=json_decode($list_result);

    //如果获取出错,说明cookie过期,删除文件后,重新刷新页面登陆
    //{"response":{"data":"hello","endRow":null,"errors":null,"startRow":null,"status":-9,"totalRows":null}}
    if(!is_array($list_obj->response->data)){
        unlink($demo->get_cookie_jar());
        sleep(2);
        header('Location: http://127.0.0.1/gather.php');
    }


    //将结果内容存成EXECL
    /*----------设置EXECL表头-------------*/
    $objPHPExcel = new PHPExcel();
    $objPHPExcel->getProperties()->setCreator("TWM")
                         ->setTitle($search_data)
                         ->setSubject($search_data)
                         ->setDescription("管理表格");
    $objPHPExcel->setActiveSheetIndex(0)
            ->setCellValue('A1', '名称')
            ->setCellValue('B1', '规格')
            ->setCellValue('C1', '库存')
            ->setCellValue('D1', '单价')
            ->setCellValue('E1', '颜色')
            ->setCellValue('F1', '描述');


    /*----------设置EXECL表格内容-------------*/
    $i=2;
    //主从表循环
    foreach($list_obj->response->data as $value) {
        $uri_detail='http://127.0.0.1:9090/listDetail?bid='.$value->railReceiveBillNo.'dataFormat=json';
        $detail_result=$demo->getcontent($uri_detail);
        $detail_obj=json_decode($detail_result);

        foreach($detail_obj->response->data as $detailvalue)
        {
            $objPHPExcel->setActiveSheetIndex(0)
            ->setCellValue('A'.$i, $value->title)
            ->setCellValue('B'.$i, $value->model)
            ->setCellValue('C'.$i, $value->store)
            ->setCellValue('D'.$i, $value->price)
            ->setCellValue('E'.$i, $value->color)
            ->setCellValue('F'.$i, $value->remark)
            $i++;
        }
    }

    /*----------输出下载-------------*/
    header('Content-Type: application/vnd.ms-excel');
    header('Content-Disposition: attachment;filename="管理表格'.$search_data.'.xls"');
    header('Cache-Control: max-age=0');
    // If you're serving to IE 9, then the following may be needed
    header('Cache-Control: max-age=1');

    // If you're serving to IE over SSL, then the following may be needed
    header ('Expires: Mon, 26 Jul 1997 05:00:00 GMT'); // Date in the past
    header ('Last-Modified: '.gmdate('D, d M Y H:i:s').' GMT'); // always modified
    header ('Cache-Control: cache, must-revalidate'); // HTTP/1.1
    header ('Pragma: public'); // HTTP/1.0

    $objWriter = PHPExcel_IOFactory::createWriter($objPHPExcel, 'Excel5');
    $objWriter->save('php://output');
    exit;

  • 1
    点赞
  • 6
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值