php 爬虫 登陆,php curl 需要登录爬取页面

$data='username=demo_peter@126.com&password=123qwe&remember=1';

$curlobj = curl_init(); // 初始化

curl_setopt($curlobj, CURLOPT_URL, "http://www.imooc.com/user/login"); // 设置访问网页的URL

curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, true); // 执行之后不直接打印出来

// Cookie相关设置,这部分设置需要在所有会话开始之前设置

date_default_timezone_set('PRC'); // 使用Cookie时,必须先设置时区

curl_setopt($curlobj, CURLOPT_COOKIESESSION, TRUE);

curl_setopt($curlobj,CURLOPT_COOKIEFILE,"cookiefile");

curl_setopt($curlobj,CURLOPT_COOKIEJAR,"cookiefile");

curl_setopt($curlobj,CURLOPT_COOKIE,session_name().'='.session_id());

curl_setopt($curlobj, CURLOPT_HEADER, 0);

// 注释掉这行,因为这个设置必须关闭安全模式 以及关闭open_basedir,对服务器安全不利

//curl_setopt($curlobj, CURLOPT_FOLLOWLOCATION, 1);

curl_setopt($curlobj, CURLOPT_POST, 1);

curl_setopt($curlobj, CURLOPT_POSTFIELDS, $data);

curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8",

"Content-length: ".strlen($data)

));

curl_exec($curlobj); // 执行

curl_setopt($curlobj, CURLOPT_URL, "http://www.imooc.com/space/index");

curl_setopt($curlobj, CURLOPT_POST, 0);

curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("Content-type: text/xml"

));

$output=curl_redir_exec($curlobj); // 执行

curl_close($curlobj); // 关闭cURL

echo $output;

/**

* 自定义实现页面链接跳转抓取

*/

function curl_redir_exec($ch,$debug="")

{

static $curl_loops = 0;

static $curl_max_loops = 20;

if ($curl_loops++ >= $curl_max_loops)

{

$curl_loops = 0;

return FALSE;

}

curl_setopt($ch, CURLOPT_HEADER, true); // 开启header才能够抓取到重定向到的新URL

curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

$data = curl_exec($ch);

// 分割返回的内容

$h_len = curl_getinfo($ch, CURLINFO_HEADER_SIZE);

$header = substr($data,0,$h_len);

$data = substr($data,$h_len - 1);

$http_code = curl_getinfo($ch, CURLINFO_HTTP_CODE);

if ($http_code == 301 || $http_code == 302) {

$matches = array();

preg_match('/Location:(.*?)\n/', $header, $matches);

$url = @parse_url(trim(array_pop($matches)));

// print_r($url);

if (!$url)

{

//couldn't process the url to redirect to

$curl_loops = 0;

return $data;

}

$last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));

if (!isset($url['scheme']))

$url['scheme'] = $last_url['scheme'];

if (!isset($url['host']))

$url['host'] = $last_url['host'];

if (!isset($url['path']))

$url['path'] = $last_url['path'];

$new_url = $url['scheme'] . '://' . $url['host'] . $url['path'] . (isset($url['query'])?'?'.$url['query']:'');

curl_setopt($ch, CURLOPT_URL, $new_url);

return curl_redir_exec($ch);

} else {

$curl_loops=0;

return $data;

}

}

$url = 'http://zhidao.chanjet.com/restserver/zhidao';

$data = array(

'api_key'=>'3qQ2Edm62Vd4bAVCwNoxgn0l',

'method'=>'baidu.zhidao.getQuestionList',

'call_id'=>'1308713190',

'cid'=>59533,

'qstatus'=>1,

'format'=>'json',

'page_no'=>1,

'page_size'=>25,

'keywords'=>'财务',

'bd_sig'=>'2bad1c47bb75e0363a689f4b09743afb'

);

$json_data = postData($url, $data);

$array = json_decode($json_data,true);

echo '

';print_r($array);

function postData($url, $data)

{

$ch = curl_init();

$timeout = 300;

curl_setopt($ch, CURLOPT_URL, $url);

curl_setopt($ch, CURLOPT_POST, true);

curl_setopt($ch, CURLOPT_POSTFIELDS, $data);

curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);

curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, $timeout);

$handles = curl_exec($ch);

curl_close($ch);

return $handles;

}

?>

$curlobj = curl_init();

curl_setopt($curlobj, CURLOPT_URL, "https://news.zhibo8.cc/nba/");

curl_setopt($curlobj, CURLOPT_POST, 0);

curl_setopt($curlobj, CURLOPT_RETURNTRANSFER, 1);

curl_setopt($curlobj, CURLOPT_HTTPHEADER, array("Content-type: text/xml"

));

curl_setopt($curlobj, CURLOPT_SSL_VERIFYPEER, false);

$output=curl_exec($curlobj); // 执行

// 关闭cURL

if ($output === FALSE) {

echo "cURL Error: " . curl_error($curlobj);

}

curl_close($curlobj);

//var_dump($output);

preg_match('/

var_export($arMatch);

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值