- 使用yii2框架,执行命令行:php yii hello/html
- 开箱即食
public function actionHtml()
{
require_once __DIR__ . DIRECTORY_SEPARATOR .'../common/simple_html_dom.php';
$page = 1;
$url = 'https://so.gushiwen.cn/shiwens/default.aspx?page='.$page.'&tstr=&astr=%e6%9d%9c%e7%94%ab&cstr=&xstr=';
$htmlObj = new \simple_html_dom();
$htmlStr = file_get_contents($url);
$cotObj = $htmlObj->load($htmlStr);
$zzobj = $cotObj->find('#type2 .sright',0);
$insertData = [];
$allAuthor = FaPoetry::find()->select('author')->groupBy('author')->column();
foreach ($zzobj->find('a') as $value){
//作者
$author = trim($value->plaintext);
if (in_array($author,$allAuthor)) {
continue;
}
$page = 1;
$encodeAuthor = urlencode($author);
while ($page <= 10){//网站只允许访问到第10页
$url = 'https://so.gushiwen.cn/shiwens/default.aspx?page='.$page.'&tstr=&astr='.$encodeAuthor.'&cstr=&xstr=';
$htmlObj = new \simple_html_dom();
$htmlStr = file_get_contents($url);
$cotObj = $htmlObj->load($htmlStr);
foreach ($cotObj->find('#leftZhankai .sons .cont') as $cont){
$insertItem = [];
$insertItem['title'] = $cont->find('p a b',0)->plaintext;
foreach ($cont->find('.source a') as $k=>$item){
if ($k == 0) {
$insertItem['author'] = $item->plaintext;
$isInsert = FaPoetry::findOne(['title'=>$insertItem['author']]);
if (!empty($isInsert)) {
break 3;
}
}else{
$insertItem['dynasty'] = $item->plaintext;
}
}
$content = $cont->find('.contson',0)->plaintext;
$insertItem['content'] = trim($content);
$insertItem['createtime'] = time();
$insertData[] = $insertItem;
if (count($insertData) == 100) {
\Yii::$app->db->createCommand()->batchInsert(FaPoetry::tableName(),['title','author','dynasty','content','createtime'],$insertData)->execute();
$insertData = [];
}
}
$page++;
}
}
if (!empty($insertData)) {
\Yii::$app->db->createCommand()->batchInsert(FaPoetry::tableName(),['title','author','dynasty','content','createtime'],$insertData);
}
}