抓取vk.com粉丝页用户id

<?php


//usage  php vk.php config_path  offset limit 
//example of config_path
/*
  https://vk.com/realmadrid_news
  https://vk.com/i.madrid.real
  https://vk.com/news_madridista
  https://vk.com/live.cristiano
  https://vk.com/live_madrid
  https://vk.com/realmadrid_rf
  https://vk.com/i_madridista_fans
  https://vk.com/rm_the_best_club
  https://vk.com/realmadridkz
 */
if (!isset($argv[1])) {
    echo 'required to special file path that include the urls of club to scrap';
    return;
}
$offset = 0;
$limit = 0;
//specify the offset and limit to choice a part of all urls to scrap
if (isset($argv[2])) {
    $offset = (int) $argv[2];
}
if (isset($argv[3])) {
    $limit = (int) $argv[3];
}
$lines = file($argv[1]);
if($limit==0){
    $limit=count($lines);
}else{
    $limit+=$offset;
}
if ($lines === false) {
    echo 'file does not exist';
    return;
}
foreach ($lines as $line_num=>$url) {
    if($line_num<$offset || $line_num>=$limit){
       // echo "{$line_num} {$offset} {$limit} \n";
        continue;
    }
    $url=trim($url);
    $file_name = trim(str_replace("https://vk.com/", '', $url)) . '.txt';
    echo "start {$url} $file_name\n";
    $curl = curl_init();
    curl_setopt($curl, CURLOPT_URL, $url);
    $header = array('user-agent:Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.89 Safari/537.36');
    curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
    curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
    curl_setopt($curl, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($curl, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($curl, CURLOPT_COOKIE, 'remixstid=733344069_79104f06be6d06c989; remixlhk=afd63ce76580029ac7; remixdt=18000; remixtst=8e8fdb0a; remixsid=c213ff63618ee65ea76fac08c73a725d8d4b02f06dbaf8b7918af; remixlang=18; remixflash=29.0.0; remixscreen_depth=24; tmr_detect=0%7C1521453754938; remixseenads=0');
    $content = curl_exec($curl);
    if (curl_errno($curl)) {
        echo 'connect error ' . curl_error($curl) . '\n';
        return;
    }
    if (preg_match_all('/<a href="\/search\?c\[section\]=people&c\[group\]=(\d+)".+<span class="header_count fl_l">(.+)<\/span>.+<\/a>/msU', $content, $matches)) {
      
        $club_id = $matches[1][0];
        echo "club_id {$club_id} ";
        $members_count = (int) str_replace(',', '', $matches[2][0]);
        echo "members_count {$members_count} \n";
        //to get vk user ids of the club memmbers 


        curl_setopt($curl, CURLOPT_URL, 'https://vk.com/al_page.php');
        curl_setopt($curl, CURLOPT_POST, true);
        $page_offset =0;
        while ($page_offset< $members_count) {
            $post_data = array(
                'act' => 'box',
                'al' => 1,
                'al_ad' => 0,
                'offset' => $page_offset == 0 ? null : $page_offset,
                'oid' => '-' . $club_id,
                'tab' => 'members',
            );
            curl_setopt($curl, CURLOPT_POSTFIELDS, $post_data);
            $content = curl_exec($curl);
            if (preg_match_all('/data-id="(\d+)"/', $content, $matches)) {
                foreach ($matches[1] as $index => $vkUserId) {
                    if ($index == 0 && $page_offset == 0) {
                        file_put_contents($file_name, $vkUserId . "\r\n");
                    } else {
                        file_put_contents($file_name, $vkUserId . "\r\n", FILE_APPEND);
                    }
                }
            }
            $page_offset += 60;
        }
    } else {
        echo "it's failed to get club_id and members_count";
    }
    curl_close($curl);
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值