$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有div前缀
preg_match_all('/$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有div后缀
$hit = strpos($data,$div_id);if($hit == -1) return false; //未命中
$divs = array(); //合并所有div
foreach($pre_matches[0] as $index=>$pre_div){$divs[(int)$pre_div[1]] = 'p';$divs[(int)$suf_matches[0][$index][1]] = 's';
}//对div进行排序
$sort = array_keys($divs);asort($sort);$count = count($pre_matches[0]);foreach($pre_matches[0] as $index=>$pre_div){//if(($pre_matches[0][$index][1] < $hit) && ($hit < $pre_matches[0][$index+1][1])){$deeper = 0;//弹出被命中div前的div
while(array_shift($sort) != $pre_matches[0][$index][1] && ($count--)) continue;//对剩余div进行匹配,若下一个为前缀,则向下一层,$deeper加1,
//否则后退一层,$deeper减1,$deeper为0则命中匹配,计算div长度
foreach($sort as $key){if($divs[$key] == 'p') $deeper++;else if($deeper == 0) {$length = $key-$pre_matches[0][$index][1];break;
}else{$deeper--;
}
}$hitDivString = substr($data,$pre_matches[0][$index][1],$length).'';break;
}
}return $hitDivString;
}echo getWebDiv('id="taglist"','http://www.cnblogs.com/Zjmainstay/tag/');//End_php
考虑到id符号问题,id="u"由用户自己填写。
声明:此段php只针对带 id div内容的读取。
View Code
1 php2 header("Content-type: text/html; charset=utf-8");3 function getWebTag($tag_id,$url=false,$tag='div',$data=false){4 if($url !== false){5 $data = file_get_contents( $url);6 }7 $charset_pos = stripos($data,'charset');8 if($charset_pos) {9 if(stripos($data,'charset=utf-8',$charset_pos)) {10 $data = iconv('utf-8','utf-8',$data);11 }else if(stripos($data,'charset=gb2312',$charset_pos)) {12 $data = iconv('gb2312','utf-8',$data);13 }else if(stripos($data,'charset=gbk',$charset_pos)) {14 $data = iconv('gbk','utf-8',$data);15 }16 }17
18 preg_match_all('/$tag.'/i',$data,$pre_matches,PREG_OFFSET_CAPTURE); //获取所有div前缀
19 preg_match_all('/$tag.'/i',$data,$suf_matches,PREG_OFFSET_CAPTURE); //获取所有div后缀
20 $hit = strpos($data,$tag_id);21 if($hit == -1) return false; //未命中
22 $divs = array(); //合并所有div
23 foreach($pre_matches[0] as $index=>$pre_div){24 $divs[(int)$pre_div[1]] = 'p';25 $divs[(int)$suf_matches[0][$index][1]] = 's';26 }27
28 //对div进行排序
29 $sort = array_keys($divs);30 asort($sort);31
32 $count = count($pre_matches[0]);33 foreach($pre_matches[0] as $index=>$pre_div){34 //35 if(($pre_matches[0][$index][1] < $hit) && ($hit < $pre_matches[0][$index+1][1])){36 $deeper = 0;37 //弹出被命中div前的div
38 while(array_shift($sort) != $pre_matches[0][$index][1] && ($count--)) continue;39 //对剩余div进行匹配,若下一个为前缀,则向下一层,$deeper加1,40 //否则后退一层,$deeper减1,$deeper为0则命中匹配,计算div长度
41 foreach($sort as $key){42 if($divs[$key] == 'p') $deeper++;43 else if($deeper == 0) {44 $length = $key-$pre_matches[0][$index][1];45 break;46 }else{47 $deeper--;48 }49 }50 $hitDivString = substr($data,$pre_matches[0][$index][1],$length).''.$tag.'>';51 break;52 }53 }54 return $hitDivString;55 }56
57 echo getWebTag('id="nav"','http://mail.163.com/html/mail_intro/','ul');58 echo getWebTag('id="homeBanners"','http://mail.163.com/html/mail_intro/');59 echo getWebTag('id="performance"','http://mail.163.com/html/mail_intro/','div');60
61 //End_php
修复:stripos($data,'charset=utf-8',$charset_pos) 加入charset=,避免有些gb2312格式的网页中包含utf-8造成错误。或者用户可以自行修改函数传入一个确定的charset参数。
演示地址:parseDiv
本文原创发布php中文网,转载请注明出处,感谢您的尊重!
相关文章
相关视频