php获取页面div内容,php获取页面并切割页面div内容

最新推荐文章于 2021-04-16 08:51:31 发布

搁浅的鲎

最新推荐文章于 2021-04-16 08:51:31 发布

阅读量245

点赞数

文章标签： php获取页面div内容

$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有div前缀

preg_match_all('/$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有div后缀

$hit = strpos($data,$div_id);if($hit == -1) return false; //未命中

$divs = array(); //合并所有div

foreach($pre_matches[0] as $index=>$pre_div){$divs[(int)$pre_div[1]] = 'p';$divs[(int)$suf_matches[0][$index][1]] = 's';

}//对div进行排序

$sort = array_keys($divs);asort($sort);$count = count($pre_matches[0]);foreach($pre_matches[0] as $index=>$pre_div){//if(($pre_matches[0][$index][1] < $hit) && ($hit < $pre_matches[0][$index+1][1])){$deeper = 0;//弹出被命中div前的div

while(array_shift($sort) != $pre_matches[0][$index][1] && ($count--)) continue;//对剩余div进行匹配，若下一个为前缀，则向下一层，$deeper加1，

//否则后退一层，$deeper减1，$deeper为0则命中匹配，计算div长度

foreach($sort as $key){if($divs[$key] == 'p') $deeper++;else if($deeper == 0) {$length = $key-$pre_matches[0][$index][1];break;

}else{$deeper--;

}

}$hitDivString = substr($data,$pre_matches[0][$index][1],$length).'';break;

}

}return $hitDivString;

}echo getWebDiv('id="taglist"','http://www.cnblogs.com/Zjmainstay/tag/');//End_php

考虑到id符号问题，id="u"由用户自己填写。

声明：此段php只针对带 id div内容的读取。

test.jsp?url=http%3A%2F%2Fimages.cnblogs.com%2FOutliningIndicators%2FContractedBlock.gif&refer=http%3A%2F%2Fwww.cnblogs.com%2FZjmainstay%2Farchive%2F2012%2F08%2F06%2Fphp_getDivContain.html

test.jsp?url=http%3A%2F%2Fimages.cnblogs.com%2FOutliningIndicators%2FExpandedBlockStart.gif&refer=http%3A%2F%2Fwww.cnblogs.com%2FZjmainstay%2Farchive%2F2012%2F08%2F06%2Fphp_getDivContain.html View Code

1 php2 header("Content-type: text/html; charset=utf-8");3 function getWebTag($tag_id,$url=false,$tag='div',$data=false){4 if($url !== false){5 $data = file_get_contents( $url);6 }7 $charset_pos = stripos($data,'charset');8 if($charset_pos) {9 if(stripos($data,'charset=utf-8',$charset_pos)) {10 $data = iconv('utf-8','utf-8',$data);11 }else if(stripos($data,'charset=gb2312',$charset_pos)) {12 $data = iconv('gb2312','utf-8',$data);13 }else if(stripos($data,'charset=gbk',$charset_pos)) {14 $data = iconv('gbk','utf-8',$data);15 }16 }17

18 preg_match_all('/$tag.'/i',$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有div前缀

19 preg_match_all('/$tag.'/i',$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有div后缀

20 $hit = strpos($data,$tag_id);21 if($hit == -1) return false; //未命中

22 $divs = array(); //合并所有div

23 foreach($pre_matches[0] as $index=>$pre_div){24 $divs[(int)$pre_div[1]] = 'p';25 $divs[(int)$suf_matches[0][$index][1]] = 's';26 }27

28 //对div进行排序

29 $sort = array_keys($divs);30 asort($sort);31

32 $count = count($pre_matches[0]);33 foreach($pre_matches[0] as $index=>$pre_div){34 //35 if(($pre_matches[0][$index][1] < $hit) && ($hit < $pre_matches[0][$index+1][1])){36 $deeper = 0;37 //弹出被命中div前的div

38 while(array_shift($sort) != $pre_matches[0][$index][1] && ($count--)) continue;39 //对剩余div进行匹配，若下一个为前缀，则向下一层，$deeper加1，40 //否则后退一层，$deeper减1，$deeper为0则命中匹配，计算div长度

41 foreach($sort as $key){42 if($divs[$key] == 'p') $deeper++;43 else if($deeper == 0) {44 $length = $key-$pre_matches[0][$index][1];45 break;46 }else{47 $deeper--;48 }49 }50 $hitDivString = substr($data,$pre_matches[0][$index][1],$length).''.$tag.'>';51 break;52 }53 }54 return $hitDivString;55 }56

57 echo getWebTag('id="nav"','http://mail.163.com/html/mail_intro/','ul');58 echo getWebTag('id="homeBanners"','http://mail.163.com/html/mail_intro/');59 echo getWebTag('id="performance"','http://mail.163.com/html/mail_intro/','div');60

61 //End_php

修复：stripos($data,'charset=utf-8',$charset_pos) 加入charset=，避免有些gb2312格式的网页中包含utf-8造成错误。或者用户可以自行修改函数传入一个确定的charset参数。

演示地址：parseDiv

本文原创发布php中文网，转载请注明出处，感谢您的尊重！

php获取页面div内容,php获取页面并切割页面div内容

“相关推荐”对你有帮助么？