PHP利用正则表达式抓取页面数据
(记录一下,00后程序员第一天写博客,2021/4/1)
(抓取招头标网站中的数据)
<?php
header('Content-Type:text/html;Cache-control:private;charset=utf-8');
ini_set('mbstring.internal_encoding',"utf-8");
date_default_timezone_set ('Asia/Shanghai');
//抓取
$url = "http://gpcgd.gd.gov.cn/bsfw/cgxx/cgxxgg/index.html";
$contents = file_get_contents($url);
$encode = mb_detect_encoding($contents, array("ASCII","UTF-8","GB2312","GBK","BIG5"));
if( $encode != 'UTF-8' ){
$contents = iconv($encode, "utf-8",$contents);
}
//echo $contents;
$preg='/<a href=\'(.*?)\'>(.*?)<\/a>/';
preg_match_all($preg,$contents,$array); //$array[1]为链接,$array[2]为标题
foreach($array[1] as $key => $val)
{
echo $val.' '.$array[2][$key].'<br>';
}
//echo "<pre>";
//print_r($array);
//echo "</pre>";
die();