爬Q群好友

爬去QQ群信息和好友信息,根据不同需求选择和改动。
ps: 尽量爬取数据放慢些,容易被反扒而不响应数据,因为这个不是正式项目,不知道还有什么反扒,也没检测他的反扒。

# -*- coding: utf-8 -*-
"""
-------------------------------------------------
    File Name   :   collect_QQ_group_data.py
    date        :   19-3-29
    Author      :    Hebel
-------------------------------------------------
    Description:
    note: 
-------------------------------------------------
"""

import requests

class Collent_Group_info():

    def __init__(self, cookies, Gp_num=None):

        self.Gp_num = Gp_num
        self.cookies = cookies
        self.skey = cookies.get("skey")
        if not self.skey: raise ValueError("无效的cookie")


    def GetBkn(self,skey):
        """生成bkn参数"""
        hash = 5381
        sklen = len(skey)
        i = 0
        while True:
            if i < sklen:
                hash += (hash << 5) + ord(skey[i])
            else:
                return str(2147483647 & hash)
            i += 1

    def pass_repeat(self, data_list):

        assert isinstance(data_list,list)
        if not data_list:
            return data_list
        repeat_list = []
        new_data_list = []
        for dict_data in data_list:
            code = dict_data.get("user_code")
            if not code in repeat_list:
                repeat_list.append(code)
                new_data_list.append(dict_data)
        return new_data_list


    def set_form_data(self,Gp_num, cookies):
        """构建form_data"""
        form_data = {}
        form_data["gc"] = str(Gp_num)
        form_data["bkn"] = self.GetBkn(self.skey)
        form_data["end"] = "20"
        form_data["sort"] = "0"
        return form_data


    def set_headers(self):
        """返回请求头"""
        headers = {
            'accept': "application/json, text/javascript, */*; q=0.01",
            'accept-encoding': "gzip, deflate, br",
            'accept-language': "zh-CN,zh;q=0.9",
            'content-length': "45",
            'content-type': "application/x-www-form-urlencoded; charset=UTF-8",
            'origin': "https://qun.qq.com",
            'referer': "https://qun.qq.com/member.html",
            'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) snap Chromium/73.0.3683.75 Chrome/73.0.3683.75 Safari/537.36",
            'x-requested-with': "XMLHttpRequest"}
        return headers


    def parse_group_friends_data(self, dict_data):
        """解析群成员数据"""

        if dict_data.get("ec") is not 0: raise ValueError("请求失败cookie可能已过期")
        friends_list = dict_data.get("mems")
        friends_info_list = []
        for f_info in friends_list:
            friends_info_dict = {}
            friends_info_dict["user_code"] = f_info.get("uin") #QQ号
            friends_info_dict["user_name"] = f_info.get("nick") #昵称
            # friends_info_dict["card"] = f_info.get("card")
            friends_info_list.append(friends_info_dict)
        return friends_info_list


    def parse_group_info_data(self, dict_data):
        """解析群成员数据"""
        if dict_data.get("errcode") is not 0: raise ValueError("请求失败cookie可能已过期")
        group_list = dict_data.get("join")
        group_info_list = []
        for group_info in group_list:
            group_info_dict = {}
            group_info_dict["group_num"] = group_info.get("gc") # 群号
            group_info_dict["group_name"] = group_info.get("gn") # 群昵称
            group_info_dict["have_num"] = group_info.get("owner") #拥有者QQ号
            # friends_info_dict["card"] = f_info.get("card")
            group_info_list.append(group_info_dict)
        return group_info_list


    def get_group_friends_info(self, Gp_num=None):
        """获取指定群成员数据"""

        Gp_num = Gp_num if Gp_num else self.Gp_num
        assert Gp_num
        assert self.cookies
        url = "https://qun.qq.com/cgi-bin/qun_mgr/search_group_members"
        form_data = self.set_form_data(Gp_num= Gp_num, cookies=self.cookies)
        headers = self.set_headers()
        try:
            resp = requests.post(url=url, data=form_data, headers=headers, cookies=cookies, timeout=6)
        except Exception as err:
            raise IOError("请求失败:{err}".format(err=err))
        if resp.status_code is 200:
            dict_data = resp.json()
            friends_info_list = self.parse_group_friends_data(dict_data)
            return friends_info_list
        else:
            raise IOError("请求失败:status_code:{code}".format(code=resp.status_code))



    def get_group_number_all(self):
        """获取所有群信息"""

        url = "https://qun.qq.com/cgi-bin/qun_mgr/get_group_list"
        form_data = {"bkn": self.GetBkn(self.skey)}
        headers = self.set_headers()
        try:
            resp = requests.post(url=url, data=form_data, headers=headers, cookies=cookies, timeout=6)
        except Exception as err:
            raise IOError("请求失败:{err}".format(err=err))
        if resp.status_code is 200:
            dict_data = resp.json()
            group_info_list = self.parse_group_info_data(dict_data)
            return group_info_list
        else:
            raise IOError("请求失败:status_code:{code}".format(code=resp.status_code))


    def collect_group_all_friends(self):
        """获取所有群的所有好友信息"""
        friends_info_all_list = []
        get_defeated_list = []
        info_list = cg.get_group_number_all()
        for group_info in info_list:
            Gp_num = group_info.get("group_num")
            try:
                friends_info_list = self.get_group_friends_info(Gp_num=Gp_num)

            except Exception as err:
                print(err)
                get_defeated_list.append(group_info)
                continue
            [dicts.update(group_info) for dicts in friends_info_list]
            friends_info_all_list.extend(friends_info_list)
        new_friends_info_all_list = self.pass_repeat(friends_info_all_list)
        return new_friends_info_all_list,get_defeated_list

if __name__ == '__main__':

    #登录以后的cookie
    cookies = {'pgv_pvi': '', 'RK': '', 'ptcz': '', 'pgv_pvid': '', 'pgv_si': '', 'ptisp': 'cnc', 'uin': '', 'skey': '', 'p_uin': '', 'pt4_token': '', 'p_skey': '', 'traceid': ''}

    #群号,在指定获取某个群的时候使用
    # Group_num = "377012112"

    cg = Collent_Group_info(cookies=cookies)
    #获取所有群信息
    # info_list = cg.get_group_number_all()
    #获取指定群成员信息
    # info_list = cg.get_group_friends_info(Gp_num=Group_num)
    # for i in info_list:
    #     print(i)
    # 获取所有群和成员
    # friends_info_all_list, get_defeated_list = cg.collect_group_all_friends()
    #
    # for i in friends_info_all_list:
    #     print(i)


  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

_JackSparrow

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值