PHPQuery
include 'phpQuery.php';
phpQuery::newDocumentFileXHTML('my-xhtml.html')->find('p'); $ul = pq('ul');
$url='http://wwwbaidu.com';
$data=file_git_content($url);
$data=phpQuery::newDocumentFile('http://wwwbaidu.com');
echo pq("title")->text(); // 获取网页标题
echo pq("div#header")->html();
$proxyArray = array();
foreach (pq('.articleList2 ul li', $doc) as $liOne) {
$proxyOne = array();
foreach (pq('a', $liOne) as $aOne) {
$a = pq($aOne)->text();
$href=$aOne->getAttribute('href');
$proxyOne['href'] = $base.$href;
$proxyOne['content']=getContent($proxyOne['href']);
$proxyOne['title'] = trim($a);
}
foreach (pq('span', $liOne) as $spanOne) {
$span = pq($spanOne)->text();
$proxyOne['time'] = strtotime(trim($span,'[]'));
}
$proxyArray[] = $proxyOne;
}
phpQuery::newDocument($html, $contentType = null) 根据标记URL新建一个文档。如果 $contentType为空,则根据文档自动检测编码。检测失败, 则对于text/html类型文档自动赋予utf-8编码。
phpQuery::newDocumentFile($file, $contentType = null) 根据文件新建一个文档。类似于newDocument()
phpQuery::newDocumentHTML($html, $charset = 'utf-8')
phpQuery::newDocumentXHTML($html, $charset = 'utf-8')
phpQuery::newDocumentXML($html, $charset = 'utf-8')
phpQuery::newDocumentPHP($html, $contentType = null)
phpQuery::newDocumentFileHTML($file, $charset = 'utf-8')
phpQuery::newDocumentFileXHTML($file, $charset = 'utf-8')
phpQuery::newDocumentFileXML($file, $charset = 'utf-8')
phpQuery::newDocumentFilePHP($file, $contentType)
pq($param, $context = null);
pq(); 相当于 jQuery的$();。它主要完成三件事情:
1. 载入标记资源:
输入到载入的文档:
对于最开始输入的字符串不接收文本类型的节点:pq('<div/>')
从`$pq->getDocumentID()根据ID载入到文档: pq('<div/>', $pq->getDocumentID())`
// 根据DOM节点的归属将同样的文档载入:pq('<div/>', DOMNode)
// 从phpQuery 对象载入文档: pq('<div/>', $pq)
2. 运行查询
// 根据最后一个选择的文档执行查询:pq('div.myClass')
// 根据$pq->getDocumentID()的ID从文档中进行查询:pq('div.myClass', $pq->getDocumentID())
// 在同样的文档上根据DOM节点的归属进行查询并且使用节点作为查询的根节点:pq('div.myClass', DOMNode)
// 在文档上使用phpQuery对象进行查询
// 同时使用对象的栈作为根节点进行查询: pq('div.myClass', $pq)
3. 使用phpQuery对象对DOM节点进行原型化操作
foreach(pq('li') as $li) // $li是纯DOM节点, 将它变为phpQuery对象: pq($li);**
function request($url,$https=true,$proxy=false,$method='get',$data=null){
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
$host = parse_url($url);
$host = $host['host'];
curl_setopt($ch, CURLOPT_REFERER, 'http://'.$host);
curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/33.0.1750.152 Safari/537.36');
if($proxy === true){
curl_setopt($ch, CURLOPT_PROXY, '61.191.41.130');
curl_setopt($ch, CURLOPT_PROXYPORT,80);
}
if($https === true){
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
}
if($method === 'post'){
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
}
$content = curl_exec($ch);
curl_close($ch);
return $content;
}