php数据采集

<?php


  function get_page_content($url){
	$url = preg_replace('/^http:\/\//i', '', $url);
	$temp = explode('/', $url);
	$host = array_shift($temp);
	$path = '/'.implode('/', $temp);
	$temp = explode(':', $host);
	$host = $temp[0];
	$port = isset($temp[1]) ? $temp[1] : 80;
	$fp = @fsockopen($host, $port, &$errno, &$errstr, 30);
	if ($fp){
	@fputs($fp, "GET $path HTTP/1.1\r\nHost: $host\r\nAccept: */*\r\nReferer:$url\r\nUser-Agent: Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1)\r\nConnection: Close\r\n\r\n");
	}
	$Content = '';
	while ($str = @fread($fp, 4096)){
	$Content .= $str;
	}
	@fclose($fp);
	//重定向
	if(preg_match("/^HTTP\/\d.\d 301 Moved Permanently/is",$Content)){
	if(preg_match("/Location:(.*?)\r\n/is",$Content,$murl)){
	return get_page_content($murl[1]);
	}
}
//读取内容
if(preg_match("/^HTTP\/\d.\d 200 OK/is",$Content)){
preg_match("/Content-Type:(.*?)\r\n/is",$Content,$murl);
$contentType=trim($murl[1]);
$Content=explode("\r\n\r\n",$Content,2);
$Content=$Content[1];
}
return $Content;
}

function go($page){
$aa = get_page_content("http://www.csto.com/member/pg:".$page);
//echo $aa;
 $arr = array();
  preg_match_all('/<span class="authentication fr">(.*?)<span class="status">/s',$aa,$row);
  //print_r($row[1]);
  $html_arr = $row[1];
  foreach($html_arr as $key=>$value){
	preg_match_all('/ <a href=".*?">(.*?)<\/a>/s',$value,$row2);
	//print_r($row2);
	
	//$url = $row2[1][0];
	$name = $row2[1][0];
	$url =  "http://www.csto.com/u/".urlencode($name)."/profile";
	//echo $url;
	//break;
	$profile = get_page_content($url);
	//echo $profile;
	preg_match_all('/<div class="contbox">(.*?)<div class="talent_right">/s',$profile,$row3);
	//print_r($row3);
    $contbox = $row3[1][0];
	//echo $contbox;
	preg_match_all('/[\w.%-]+@[\w.-]+\.[a-z]{2,4}/',$contbox,$row4);
	$email = (count($row4[0])>0)?$row4[0][0]:"";
	//echo $email;
	//print_r($row4);
	if($email!="")
	array_push($arr, array("name"=>$name,"email"=>$email));
	//break;
  }
  return $arr;
}  
echo ",";
//echo $_GET['page'];
echo json_encode(go($_GET['page']));
//print_r(go(1));
?>

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值