大家都知道,你采集别人的网站,可能封掉你的IP地址,那么肯定就不能用自己的真实IP地址了,今天我们就浅谈PHP伪装IP地址,进行数据采集!
首先,我用的是TP5框架,把IP伪装及数据采集封装到common.php中了,代码如下:
<?php // +---------------------------------------------------------------------- // | ThinkPHP [ WE CAN DO IT JUST THINK ] // +---------------------------------------------------------------------- // | Copyright (c) 2006-2016 http://thinkphp.cn All rights reserved. // +---------------------------------------------------------------------- // | Licensed ( http://www.apache.org/licenses/LICENSE-2.0 ) // +---------------------------------------------------------------------- // | Author: 流年 <liu21st@gmail.com> // +---------------------------------------------------------------------- // 应用公共文件 use think\Db; /************************************* 封装公共方法 *****************************************/ /** * 伪装IP 地址 - 抓取数据 * GET 请求 * @param $url * @return mixed */ function pretendIpData($url){ // 给与IP 段 $data = array( 119.120.'.'.rand(1,255).'.'.rand(1,255), 124.174.'.'.rand(1,255).'.'.rand(1,255), 116.249.'.'.rand(1,255).'.'.rand(1,255), 118.125.'.'.rand(1,255).'.'.rand(1,255), 42.175.'.'.rand(1,255).'.'.rand(1,255), 124.162.'.'.rand(1,255).'.'.rand(1,255), 211.167.'.'.rand(1,255).'.'.rand(1,255), 58.206.'.'.rand(1,255).'.'.rand(1,255), 117.24.'.'.rand(1,255).'.'.rand(1,255), 203.93.'.'.rand(1,255).'.'.rand(1,255), ); //随机获取一个IP地址 $ip = $data[array_rand($data)]; //模拟来源网址 $referUrl = "http://www.baidu.com"; $agentArray=[ //PC端的UserAgent "safari 5.1 – MAC"=>"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/536.11 (KHTML, like Gecko) Chrome/20.0.1132.57 Safari/536.11", "safari 5.1 – Windows"=>"Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50", "Firefox 38esr"=>"Mozilla/5.0 (Windows NT 10.0; WOW64; rv:38.0) Gecko/20100101 Firefox/38.0", "IE 11"=>"Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; .NET4.0C; .NET4.0E; .NET CLR 2.0.50727; .NET CLR 3.0.30729; .NET CLR 3.5.30729; InfoPath.3; rv:11.0) like Gecko", "IE 9.0"=>"Mozilla/5.0 (