复制直接可用
注意正则匹配仅限与示例中的网址
index();
function index()
{
// 要爬取的网页
$url = 'https://mc.dfrobot.com.cn/';
$result = httpCurl($url,[],'GET');
// 图片
$avatarRule = "/<div class=\"imgBox\".*?>.*?<\/div>/s";
preg_match_all($avatarRule,$result,$avatar);
$preg = '/<img.*?src=[\"|\']?(.*?)[\"|\']?\s.*?>/i';
$titleRule = "/<div class=\"titleBox\".*?>.*?<\/div>/s";
preg_match_all($titleRule,$result,$title);
$titleRulea = "/<a href=\"([^<>]*)\">([^<>]*)<\/a>/s";
// 简介
$titleRuleb = "/<div class=\"descBox\">([^<>]*)<\/div>/s";
preg_match_all($titleRuleb,$result,$content);
$insert=[];
$titleRulec = "/<div class=\"descBox\">([^<>]*)<\/div>/s";
foreach ($content[0] as $k => $v) {
preg_match($titleRulec, $v, $arrc);
// 简介
$arr['content']=trim($arrc[1]);
// 图片
preg_match($preg, $avatar[0][$k], $imgs);
$arr['img']='https://mc.dfrobot.com.cn/'.$imgs[1];
preg_match($titleRulea, $title[0][$k], $url_title);
$arr['url']=str_replace('" target="_blank','',$url_title[1]);// 链接地址
$arr['title']=$url_title[2];// 标题
$insert[]=$arr;
}
var_dump($insert);die;
}
function httpCurl($url, $params, $method = 'POST', $header = array(), $multi = false){
date_default_timezone_set('PRC');
$opts = array(
CURLOPT_TIMEOUT => 30,
CURLOPT_RETURNTRANSFER => 1,
CURLOPT_SSL_VERIFYPEER => false,
CURLOPT_SSL_VERIFYHOST => false,
CURLOPT_HTTPHEADER => $header,
CURLOPT_COOKIESESSION => true,
CURLOPT_FOLLOWLOCATION => 1,
CURLOPT_COOKIE =>session_name().'='.session_id(),
);
/* 根据请求类型设置特定参数 */
switch(strtoupper($method)){
case 'GET':
// $opts[CURLOPT_URL] = $url . '?' . http_build_query($params);
// 链接后拼接参数 & 非?
$opts[CURLOPT_URL] = $url . '?' . http_build_query($params);
break;
case 'POST':
//判断是否传输文件
$params = $multi ? $params : http_build_query($params);
$opts[CURLOPT_URL] = $url;
$opts[CURLOPT_POST] = 1;
$opts[CURLOPT_POSTFIELDS] = $params;
break;
default:
echo '不支持的请求方式!';
}
/* 初始化并执行curl请求 */
$ch = curl_init();
curl_setopt_array($ch, $opts);
$data = curl_exec($ch);
$error = curl_error($ch);
curl_close($ch);
// if($error) throw new Exception('请求发生错误:' . $error);
return $data;
}