PHP 得到HTML中的文本信息
转载:
function SpHtml2Text($str)
{
$str = preg_replace("/||/isU","",$str);
$alltext = "";
$start = 1;
for($i=0;$i
{
if($start==0 && $str[$i]==">")
{
$start = 1;
}
else if($start==1)
{
if($str[$i]=="
{
$start = 0;
$alltext .= " ";
}
else if(ord($str[$i])>31)
{
$alltext .= $str[$i];
}
}
}
$alltext = str_replace(" "," ",$alltext);
$alltext = preg_replace("/&([^;&]*)(;|&)/","",$alltext);
$alltext = preg_replace("/[ ]+/s"," ",$alltext);
return $alltext;
}
function Html2Text($str,$r=0)
{
if($r==0)
{
return SpHtml2Text($str);
}
else
{
$str = SpHtml2Text(stripslashes($str));
return addslashes($str);
}
}
$html=file_get_contents('http://www.baidu.com');
echo Html2Text($html);
?>