php 怎么获取点击<a>标签的url,Goutte怎么获取a标签里面的url,或者好用的PHP爬虫库,谢谢...

Goutte怎么获取a标签里面的url?或者好用的PHP爬虫库,谢谢

[],

'link' => [],

'content' => [],

'source' => [],

'date' => [],

];

public function __construct()

{

try {

$this->_client = new Client();

$this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');

// $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);

} catch (Exception $e) {

throw new \Exception($e->getMessage(), 1);

}

}

public function getDate()

{

$this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {

$this->_news['date'][] = $node->text();

});

}

public function getTitle()

{

$link = $this->_crawler->selectLink('宁陕县召开政协八届二十二次次常委会')->link();

var_dump($link->getUri);die;

$this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {

if ($node->text() !== '宁陕要闻') {

$this->_news['title'][] = $node->text();

$this->_news['link'][] = $node->link();

$this->_news['source'][] = '宁陕要闻';

}

});

}

}

//-----------------------------------

try {

$spider = new Spider();

$spider->getDate();

$spider->getTitle();

echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);

} catch (Exception $e) {

echo $e->getMessage();

}

回复内容:

Goutte怎么获取a标签里面的url?或者好用的PHP爬虫库,谢谢

[],

'link' => [],

'content' => [],

'source' => [],

'date' => [],

];

public function __construct()

{

try {

$this->_client = new Client();

$this->_crawler = $this->_client->request('GET', 'http://www.ningshan.gov.cn/Category_90/Index.aspx');

// $client->getClient()->setDefaultOption('config/curl/'.CURLOPT_TIMEOUT, 10);

} catch (Exception $e) {

throw new \Exception($e->getMessage(), 1);

}

}

public function getDate()

{

$this->_crawler->filter('div#list>ul>li>span')->each(function ($node) {

$this->_news['date'][] = $node->text();

});

}

public function getTitle()

{

$link = $this->_crawler->selectLink('宁陕县召开政协八届二十二次次常委会')->link();

var_dump($link->getUri);die;

$this->_crawler->filter('div#list>ul>li>a')->each(function ($node) {

if ($node->text() !== '宁陕要闻') {

$this->_news['title'][] = $node->text();

$this->_news['link'][] = $node->link();

$this->_news['source'][] = '宁陕要闻';

}

});

}

}

//-----------------------------------

try {

$spider = new Spider();

$spider->getDate();

$spider->getTitle();

echo json_encode($spider->_news, JSON_UNESCAPED_UNICODE);

} catch (Exception $e) {

echo $e->getMessage();

}

现找的

$crawler = $client->request('GET', 'http://www.symfony.com/blog/');

$link = $crawler->selectLink('Security Advisories')->link();

print_r($link->getUri());

手册:http://symfony.com/doc/curren...

GIT:https://github.com/FriendsOfP...

采集类参考:http://flc.ren/2016/06/528.html

相关标签:php

本文原创发布php中文网,转载请注明出处,感谢您的尊重!

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值