header('Content-Type:text/html;charset=gbk');
set_time_limit(0);
$mobiles = Wwtx::getMobiles(100);
class Wwtx {
public static function getMobiles($chunk_count=100){
//all bbs 获取所有的论坛
$forums = file_get_contents('http://bbs.wwtx.cn');
//用正则匹配出论坛所有的板块
preg_match_all('/"(http:\/\/bbs.wwtx.cn\/forum-.*?-1.html)">/', $forums, $matches);
//all forums 获取所有的板块并移除数组中重复的值
$all_forums = array_unique($matches[1]);
foreach ($all_forums as $key => $value) {
//$value is the first forum page $value是第一个论坛页面,使用foreach循环出取出第一个要抓取的板块
$forum_first_page_content = @file_get_contents($value);
//total page 总页面 使用正则匹配出板块下所有的帖子页数
preg_match('//',$forum_first_page_content,$totalpages);
//fetch all pages 把所有的页面
$posts = array();
for ($i=1; $i<=$totalpages[1] ; $i++) {
//handle one page 处理一个页面 用preg_replace进行正则表达式搜索url,
$page_url = preg_replace('/-\d+.html/', "-$i.html", $value);
//获取$page_url里面的url地址
$page_content = @file_get_contents($page_url);
//get all posts 把所有的帖子获取所有帖子的url去除重复的url并把数组中的url合并成一个数组
preg_match_all('/
$posts = array_merge($posts, array_unique($matches[1]));
}
//more threads 多线程
$split_chunk = array_chunk($posts, $chunk_count);
foreach ($split_chunk as $chunk) {
$phones = array();
$mh = curl_multi_init();
// var_dump($chunk);die;
foreach ($chunk as $ikey => $post_url) {
// 初始化一个 cURL 对象
$conn[$ikey]=curl_init($post_url);
curl_setopt($conn[$ikey],CURLOPT_RETURNTRANSFER,1);
curl_setopt($conn[$ikey], CURLOPT_TIMEOUT, 0);
//curl_easy_setopt(curl, CURLOPT_NOSIGNAL, 1L);
curl_multi_add_handle ($mh,$conn[$ikey]);
}
do {
$mrc = curl_multi_exec($mh,$active);
usleep(250000);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
while ($active and $mrc == CURLM_OK) {
//if (curl_multi_select($mh) != -1) {
do {
$mrc = curl_multi_exec($mh, $active);
usleep(250000);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
//}
}
foreach ($chunk as $ikey => $post_url) {
//read post's content 阅读帖子的内容
$res[$ikey]=curl_multi_getcontent($conn[$ikey]);
// echo $res[$ikey];exit;
curl_close($conn[$ikey]);//关闭资源
preg_match_all('/[^0-9]+(1[34578]\d{9})/i', $res[$ikey], $matches);
$temp_phones = array_unique($matches[1]);
// print_r($temp_phones);
if(empty($temp_phones)){
//find lz url 发现楼主的url
preg_match('/http:\/\/bbs.wwtx.cn\/space-uid-(\d+).html/',$res[$ikey],$matches);
if(!empty($matches)) {
$uid = $matches[1];
$content = @file_get_contents($matches[0]);
// echo $content;exit;
preg_match_all('/[^0-9]+(1[34578]\d{9})/i', $content, $matches);
// var_dump();
$temp_phones = array_unique($matches[1]);
}
}
if(empty($temp_phones)){
//find lz person info page 发现楼主个人信息页
$content = @file_get_contents("http://bbs.wwtx.cn/home.php?mod=space&uid={$uid}&do=profile");
preg_match_all('/[^0-9]+(1[3578]\d{9})/i', $content, $matches);
$temp_phones = array_unique($matches[1]);
}
//over one post's handle 超过一个后的处理
$phones = array_merge($phones,$temp_phones);
}
curl_multi_close($mh);
$phones = array_unique($phones);
static $l = 0;
//$db = new Mysql('101.200.154.25','admin','kunya1818','test');
$db = new Mysql('localhost','root','','test');
foreach ($phones as $key => $value) {
$mobile=$db->getOne("select * from mobile where mobile='$value'");
if($mobile) continue;
$data['mobile'] = $value;
$data['from'] = 'bbs.wwtx.cn';
$data['create_at'] = time();
$db->insert('mobile',$data);
echo "$value|";
$l++;
if($l%10==0){echo '
';}
ob_flush();flush();sleep(1);
}
//per chunk sleep 1 seconds 每段睡眠1秒
sleep(1);
}
}
}
}
class Mysql{
private $conn;
function __construct($hostname,$username,$password,$dbname,$charset='utf8'){
$conn=@mysql_pconnect($hostname,$username,$password);
if(!$conn){
echo 'could not connect';
exit;
}
$this->conn = $conn;
$res = mysql_select_db($dbname);
if(!$res){
echo 'could not select db';
exit;
}
mysql_set_charset($charset);
}
function __destruct(){
mysql_close();
}
function getOne($sql){
$result = mysql_query($sql,$this->conn);
$data = array();
if($result && mysql_num_rows($result)>0){
$data = mysql_fetch_assoc($result);
}
return $data;
}
function insert($table,$data){
$str = '';
$str .="INSERT INTO `$table` ";
$str .="(`".implode("`,`",array_keys($data))."`) ";
$str .=" VALUES ";
$str .= "('".implode("','",$data)."')";
$res = mysql_query($str,$this->conn);
if($res && @mysql_affected_rows()>0){
return @mysql_insert_id();
}else{
return false;
}
}
}