eechen
2016/07/11 14:17
可以看看PHP的几个DOM操作库: Simple-HTML-DOM , phpQuery, Ganon
比如轻松抓取PHP官方首页新闻的标题和发布时间:
<?php
require dirname(__FILE__).'/simple_html_dom.php';
$html = file_get_html('http://cn2.php.net');
$news = array();
foreach($html->find('article.newsentry') as $article) {
$item['time'] = trim($article->find('time', 0)->plaintext);
$item['title'] = trim($article->find('h2.newstitle', 0)->plaintext);
//$item['content'] = trim($article->find('div.newscontent', 0)->plaintext);
$news[] = $item;
}
var_export($news);
//输出
array (
0 =>
array (
'time' => '07 Jul 2016',
'title' => 'PHP 7.1.0 Alpha 3 Released',
),
1 =>
array (
'time' => '27 Jun 2016',
'title' => 'PHP 7.1.0 Alpha 2 Released',
),
2 =>
array (
'time' => '23 Jun 2016',
'title' => 'PHP 5.5.37 is released',
),
3 =>
array (
'time' => '23 Jun 2016',
'title' => 'PHP 5.6.23 is released',
),
4 =>
array (
'time' => '23 Jun 2016',
'title' => 'PHP 7.0.8 Released',
),
5 =>
array (
'time' => '09 Jun 2016',
'title' => 'PHP 7.1.0 Alpha 1 Released',
),
)