哎~~ 不想拿点死工资,跑去买基金,结果一直亏损,工作之余准备抓点数据研究一下,结果某某平台提供接口居然还有收钱,无奈~只有跑到别人网站去找请求…….
(有兴趣的朋友留下email一起设计一个适合自己的投资基金算法呢?
目前正在写一个自己用的帮助系统,感兴趣的小伙伴call我哦http://www.zhengyang0818.cn)
由于是在windows下面写的,抓取多个内容就没有用php的子进程,用的curl_multi,速度还可以,将就用吧。。。。
<?php
/**
* 基金数据捕获.
* User: zyy
* Date: 2017/12/22
* Time: 16:13
*/
//基金分类 [全部,指数型,股票型,混合型]
$urlOrderBy = ['all'=>'all','zs'=>'zs', 'gp'=>'gp','hh'=>'hh'];
//涨幅时间段 [周涨幅,月涨幅,三月涨幅,六月涨幅,年涨幅,三年涨幅]
$urlZf = ['zzf'=>'zzf','1yzf'=>'1yzf','3yzf'=>'3yzf','6yzf'=>'6yzf','1nzf'=>'1nzf','3nzf'=>'3nzf'];
/** 获取基金数据URL
* @param string $order [排序类型,参考变量 $urlOrderBy]
* @param string $zf [涨幅类型,参考变量 $urlZf]
* @param int $zfPaiMing [涨幅排名所需数目,仅当设置了涨幅类型参数时有限,如果不是设置周涨幅会使$needNum失效]
* @param int $needNum [所需条目]
* @return string
*/
function buildJiJinUrl($order,$zf,$zfPaiMing,$needNum=4000){
$url = 'http://fund.eastmoney.com/data/rankhandler.aspx?op=ph&dt=kf&ft=';
//分类拼接
$url.=$order.'&rs=&gs=0&sc=';
//涨幅排序拼接
if($zf){
$url.=$zf.','.$zfPaiMing;
}else{
$url.='zzf';
}
$today = date('Y-m-d');
$lastYear = date('Y-m-d',strtotime('-1 year'));
$url.="&st=desc&sd=$today&ed=$lastYear&qdii=|&tabSubtype=,,,,,&pi=1&pn=$needNum&dx=1&v=0.5339311461808629";
return $url;
}
/** curl请求 单独请求
* @param $url
* @param string $method
* @param array $requestData
* @return mixed
*/
function curlRequest($url,$method='get',$requestData=[])
{
try{
$curlHandle = curl_init();
curl_setopt($curlHandle,CURLOPT_URL,$url);
curl_setopt($curlHandle,CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($curlHandle,CURLOPT_RETURNTRANSFER,true);
if($method=='post'){
curl_setopt($curlHandle, CURLOPT_POST, true);
curl_setopt($curlHandle, CURLOPT_POSTFIELDS, $requestData);
}
$response = curl_exec($curlHandle);
curl_close($curlHandle);
return $response;
}catch (\Exception $e){
exit('请求失败:'.$e->getMessage());
}
}
/** curl请求 批量请求
* @param array $url
* @param array $method
* @param array $requestData
* @return array
*/
function curlRequests(array $url, array $method,array $requestData)
{
try{
//组装curl
$allCurlHandle = [];
$curlNum=0;
foreach ($url as $key=>$value){
$allCurlHandle[$key] = curl_init();
curl_setopt($allCurlHandle[$key] ,CURLOPT_URL,$value);
curl_setopt($allCurlHandle[$key] ,CURLOPT_SSL_VERIFYPEER,false);
curl_setopt($allCurlHandle[$key] ,CURLOPT_RETURNTRANSFER,true);
if($method[$curlNum]=='post'){
curl_setopt($allCurlHandle[$key] , CURLOPT_POST, true);
curl_setopt($allCurlHandle[$key] , CURLOPT_POSTFIELDS, $requestData[$curlNum]);
}
$curlNum++;
}
// 创建批处理cURL句柄
$mh = curl_multi_init();
// 增加句柄
foreach ($allCurlHandle as $value){
curl_multi_add_handle($mh,$value);
}
$response = [];
do {// 轮询
while (($execrun = curl_multi_exec($mh, $running)) == CURLM_CALL_MULTI_PERFORM){
if ($execrun != CURLM_OK) { return $response; }
}
while ($done = curl_multi_info_read($mh)) { // 一旦有一个请求完成,找出来,处理,因为curl底层是select,所以最大受限于1024
// $response[] = curl_getinfo( $done['handle'] ); //从请求中获取信息
$response[] = curl_multi_getcontent($done['handle']);
curl_multi_remove_handle($mh, $done['handle']); // 把请求已经完成了得 curl handle 删除
}
if ($running) {// 当没有数据的时候进行堵塞,把 CPU 使用权交出来,避免上面 do 死循环空跑数据导致 CPU 100%
$rel = curl_multi_select($mh, 1);
if($rel == -1){ usleep(1000); }
}
if( $running == false){ return $response; }
} while (true);
return $response;
}catch (\Exception $e){
exit('请求失败:'.$e->getMessage());
}
}
//例如此URL将返回本周涨幅排名前10
//var_dump(curlRequest(buildJiJinUrl($urlOrderBy['all'],$urlZf['zzf'],10,10)));
$url1 = buildJiJinUrl($urlOrderBy['zs'],$urlZf['zzf'],1,1);
$url2 = buildJiJinUrl($urlOrderBy['gp'],$urlZf['zzf'],1,1);
$url3 = buildJiJinUrl($urlOrderBy['hh'],$urlZf['zzf'],1,1);
$url4 = buildJiJinUrl($urlOrderBy['zs'],$urlZf['zzf'],3,3);
$urls = [$url1,$url2,$url3,$url4];
$methods =['get','get','get','get'];
$info = curlRequests($urls,$methods,array());
//一次抓取多个数据
var_dump($info);