一、说明
二、php代码
<?php
class Test
{
public function __construct()
{
require_once 'simple_html_dom.php';
}
public function start()
{
$htmlObj = new simple_html_dom();
$html = $this->getPageHtml();
$htmlObj->load($html);
$contentObj = $htmlObj->find('div#content', 0);
$hrefs = $contentObj->find('a');
$hrefsCount = count($hrefs);
$data = [];
for ($index = 0; $index < $hrefsCount; $index++) {
$href = $contentObj->find('a', $index)->href;
$h3Text = $contentObj->find('h3', $index)->plaintext;
$data[] = [
'href' => $href,
'h3_text'=> $h3Text
];
}
$imgUrls = $this->getImgHtml();
$htmlObj->load($imgUrls);
$imgObjs = $htmlObj->find('div.content img');
$imgCount = count($imgObjs);
$imgData = [];
for ($index = 0; $index < $imgCount; $index++) {
$src = $htmlObj->find('div.content img', $index)->src;
$imgData[] = $src;
}
echo "<pre>【文章数据】<br/>";
var_export($data);
echo "<br/>【图片数据】<br/>";
var_export($imgData);
}
public function getPageHtml()
{
$html =
<<<EOF
<div id="content">
<h3 class="yh"><a target="_blank" href="http://www.***.com/0">同是过路同做过梦 本应是一对</a></h3>
<h3 class="yh"><a target="_blank" href="http://www.***.com/1">人在少年梦中不觉 醒后要归去</a></h3>
<h3 class="yh"><a target="_blank" href="http://www.***.com/2">三餐一宿也共一双 到底会是谁</a></h3>
<h3 class="yh"><a target="_blank" href="http://www.***.com/3">但凡未得到 但凡是过去</a></h3>
<h3 class="yh"><a target="_blank" href="http://www.***.com/4">总是最登对</a></h3>
<h3 class="yh"><a target="_blank" href="http://www.***.com/5">台下你望台上我做 你想做的戏</a></h3>
<h3 class="yh"><a target="_blank" href="http://www.***.com/6">前事故人忘忧的你 可曾记得起</a></h3>
</div>
EOF;
return $html;
}
public function getImgHtml()
{
$html =
<<<EOF
<div class="content">
<div class="image img_wrap"><img src='http://*******.net/images/2022/01.png' /></div>
<div class="image img_wrap"><img src='http://*******.net/images/2022/02.png' /></div>
<div class="image img_wrap"><img src='http://*******.net/images/2022/03.png' /></div>
<div class="image img_wrap"><img src='http://*******.net/images/2022/04.png' /></div>
<div class="image img_wrap"><img src='http://*******.net/images/2022/05.png' /></div>
<div class="image img_wrap"><img src='http://*******.net/images/2022/06.png' /></div>
</div>
EOF;
return $html;
}
}
三、浏览器输出
【文章数据】
array (
0 =>
array (
'href' => 'http://www.***.com/0',
'h3_text' => '同是过路同做过梦 本应是一对',
),
1 =>
array (
'href' => 'http://www.***.com/1',
'h3_text' => '人在少年梦中不觉 醒后要归去',
),
2 =>
array (
'href' => 'http://www.***.com/2',
'h3_text' => '三餐一宿也共一双 到底会是谁',
),
3 =>
array (
'href' => 'http://www.***.com/3',
'h3_text' => '但凡未得到 但凡是过去',
),
4 =>
array (
'href' => 'http://www.***.com/4',
'h3_text' => '总是最登对',
),
5 =>
array (
'href' => 'http://www.***.com/5',
'h3_text' => '台下你望台上我做 你想做的戏',
),
6 =>
array (
'href' => 'http://www.***.com/6',
'h3_text' => '前事故人忘忧的你 可曾记得起',
),
)
【图片数据】
array (
0 => 'http://*******.net/images/2022/01.png',
1 => 'http://*******.net/images/2022/02.png',
2 => 'http://*******.net/images/2022/03.png',
3 => 'http://*******.net/images/2022/04.png',
4 => 'http://*******.net/images/2022/05.png',
5 => 'http://*******.net/images/2022/06.png',
)