在爬虫的过程中,很多时候需要登录后才能浏览一些网页。Snoopy类库可以方便地实现向服务器发送cookie,从而模拟登录状态。以下代码是模拟登录新浪微博,不过出于安全考虑,我的cookie字串没有展示出来~
<?php
require_once ('Snoopy.class.php');
$snoopy = new Snoopy();
$cookieStr = " ";
$snoopy->host = "weibo.com";
$snoopy->agent = "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:43.0) Gecko/20100101 Firefox/43.0";
$snoopy->referer = "http://weibo.com/";
$items = explode(";",$cookieStr);
foreach ($items as $item){
$value = explode("=",$item);
$snoopy->cookies[trim($value[0])]=urldecode(trim($value[1]));
}
$url = "http://weibo.com/u/2901542563/home";
$snoopy->fetch($url);
$content=$snoopy->results;
echo $content;
?>