function zhangjie($url){
$content = file_get_contents($url);
$doc=new DomDocument();
$doc->loadHTML($content);
$lpath=new DOMXpath($doc);
$list = $lpath->query("//tr/td[contains(@class,'L')]/a");
foreach ($list as $node) {
$result[] = $node->attributes->getNamedItem('href')->nodeValue;
}
return $result;
}
for($i=1;$i<1146;$i++)
{
$url='http://www.23us.so/top/lastupdate_'.$i.'.html';
$content = file_get_contents($url);
$doc=new DomDocument();
$doc->loadHTML($content);
$xpath=new DOMXpath($doc);
$list = $xpath->query("//tr/td[contains(@class,'L')][1]/a");
foreach ($list as $node) {
echo $node->attributes->getNamedItem('href')->nodeValue.'<br>';
$result[] = $node->attributes->getNamedItem('href')->nodeValue;
}
//$i= $xpath->evaluate("string(//strong)");
}
$result = array_unique($result);
for($i=0;$i<$result;$i++)
{
$doc=new DomDocument();
$doc->loadHTML($content);
$xpath=new DOMXpath($doc);
$data[$i]['title']= $xpath->evaluate("string(//h1)");
$data[$i]['type'] = $xpath->evaluate("string(//tr[1]//td[1]/a)");
$data[$i]['author'] = $xpath->evaluate("string(//tr[1]//td[2])");
$data[$i]['length'] = $xpath->evaluate("string(//tr[2]//td[2])");
$data[$i]['updatedate'] = $xpath->evaluate("string(//tr[2]//td[3])");
$data[$i]['click'] = $xpath->evaluate("string(//tr[3]//td[1])");
$data[$i]['new'] = $xpath->evaluate("string(//a[contains(@class,'read')]/@href)");
$data[$i]['image'] = $xpath->evaluate("string(//div[contains(@class,'fl')][1]/a/img/@src)");
//进行数组操作 存下数据 再返回id
//$id
list($insert_id, $total_rows) = DB::insert('xiao', array('title','type','author','length','updatedate','click','new','image'))->values($data[$i])->execute();
$nr = zhangjie($data[$i]['new']);//内容页 返回内容页数组
foreach($nr as $k=>$v)
{
$doc=new DomDocument();
$doc->loadHTML($v);
$lpath=new DOMXpath($doc);
$arr[$k]['chapter'] = $lpath->evaluate("string(//title)");
$arr[$k]['content'] = $lpath->evaluate("string(//dd[contains(@id,'contents')])");
$arr[$k]['wid'] = $insert_id;
DB::insert('content', array('chapter','content','wid'))->values($arr[$k])->execute();
echo $data[$i]['title']."已爬去成功>";
}
}
测试效率很低 需进行优化
$content = file_get_contents($url);
$doc=new DomDocument();
$doc->loadHTML($content);
$lpath=new DOMXpath($doc);
$list = $lpath->query("//tr/td[contains(@class,'L')]/a");
foreach ($list as $node) {
$result[] = $node->attributes->getNamedItem('href')->nodeValue;
}
return $result;
}
for($i=1;$i<1146;$i++)
{
$url='http://www.23us.so/top/lastupdate_'.$i.'.html';
$content = file_get_contents($url);
$doc=new DomDocument();
$doc->loadHTML($content);
$xpath=new DOMXpath($doc);
$list = $xpath->query("//tr/td[contains(@class,'L')][1]/a");
foreach ($list as $node) {
echo $node->attributes->getNamedItem('href')->nodeValue.'<br>';
$result[] = $node->attributes->getNamedItem('href')->nodeValue;
}
//$i= $xpath->evaluate("string(//strong)");
}
$result = array_unique($result);
for($i=0;$i<$result;$i++)
{
$doc=new DomDocument();
$doc->loadHTML($content);
$xpath=new DOMXpath($doc);
$data[$i]['title']= $xpath->evaluate("string(//h1)");
$data[$i]['type'] = $xpath->evaluate("string(//tr[1]//td[1]/a)");
$data[$i]['author'] = $xpath->evaluate("string(//tr[1]//td[2])");
$data[$i]['length'] = $xpath->evaluate("string(//tr[2]//td[2])");
$data[$i]['updatedate'] = $xpath->evaluate("string(//tr[2]//td[3])");
$data[$i]['click'] = $xpath->evaluate("string(//tr[3]//td[1])");
$data[$i]['new'] = $xpath->evaluate("string(//a[contains(@class,'read')]/@href)");
$data[$i]['image'] = $xpath->evaluate("string(//div[contains(@class,'fl')][1]/a/img/@src)");
//进行数组操作 存下数据 再返回id
//$id
list($insert_id, $total_rows) = DB::insert('xiao', array('title','type','author','length','updatedate','click','new','image'))->values($data[$i])->execute();
$nr = zhangjie($data[$i]['new']);//内容页 返回内容页数组
foreach($nr as $k=>$v)
{
$doc=new DomDocument();
$doc->loadHTML($v);
$lpath=new DOMXpath($doc);
$arr[$k]['chapter'] = $lpath->evaluate("string(//title)");
$arr[$k]['content'] = $lpath->evaluate("string(//dd[contains(@id,'contents')])");
$arr[$k]['wid'] = $insert_id;
DB::insert('content', array('chapter','content','wid'))->values($arr[$k])->execute();
echo $data[$i]['title']."已爬去成功>";
}
}
测试效率很低 需进行优化