爬去QQ群信息和好友信息,根据不同需求选择和改动。
ps: 尽量爬取数据放慢些,容易被反扒而不响应数据,因为这个不是正式项目,不知道还有什么反扒,也没检测他的反扒。
# -*- coding: utf-8 -*-
"""
-------------------------------------------------
File Name : collect_QQ_group_data.py
date : 19-3-29
Author : Hebel
-------------------------------------------------
Description:
note:
-------------------------------------------------
"""
import requests
class Collent_Group_info():
def __init__(self, cookies, Gp_num=None):
self.Gp_num = Gp_num
self.cookies = cookies
self.skey = cookies.get("skey")
if not self.skey: raise ValueError("无效的cookie")
def GetBkn(self,skey):
"""生成bkn参数"""
hash = 5381
sklen = len(skey)
i = 0
while True:
if i < sklen:
hash += (hash << 5) + ord(skey[i])
else:
return str(2147483647 & hash)
i += 1
def pass_repeat(self, data_list):
assert isinstance(data_list,list)
if not data_list:
return data_list
repeat_list = []
new_data_list = []
for dict_data in data_list:
code = dict_data.get("user_code")
if not code in repeat_list:
repeat_list.append(code)
new_data_list.append(dict_data)
return new_data_list
def set_form_data(self,Gp_num, cookies):
"""构建form_data"""
form_data = {}
form_data["gc"] = str(Gp_num)
form_data["bkn"] = self.GetBkn(self.skey)
form_data["end"] = "20"
form_data["sort"] = "0"
return form_data
def set_headers(self):
"""返回请求头"""
headers = {
'accept': "application/json, text/javascript, */*; q=0.01",
'accept-encoding': "gzip, deflate, br",
'accept-language': "zh-CN,zh;q=0.9",
'content-length': "45",
'content-type': "application/x-www-form-urlencoded; charset=UTF-8",
'origin': "https://qun.qq.com",
'referer': "https://qun.qq.com/member.html",
'user-agent': "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) snap Chromium/73.0.3683.75 Chrome/73.0.3683.75 Safari/537.36",
'x-requested-with': "XMLHttpRequest"}
return headers
def parse_group_friends_data(self, dict_data):
"""解析群成员数据"""
if dict_data.get("ec") is not 0: raise ValueError("请求失败cookie可能已过期")
friends_list = dict_data.get("mems")
friends_info_list = []
for f_info in friends_list:
friends_info_dict = {}
friends_info_dict["user_code"] = f_info.get("uin") #QQ号
friends_info_dict["user_name"] = f_info.get("nick") #昵称
# friends_info_dict["card"] = f_info.get("card")
friends_info_list.append(friends_info_dict)
return friends_info_list
def parse_group_info_data(self, dict_data):
"""解析群成员数据"""
if dict_data.get("errcode") is not 0: raise ValueError("请求失败cookie可能已过期")
group_list = dict_data.get("join")
group_info_list = []
for group_info in group_list:
group_info_dict = {}
group_info_dict["group_num"] = group_info.get("gc") # 群号
group_info_dict["group_name"] = group_info.get("gn") # 群昵称
group_info_dict["have_num"] = group_info.get("owner") #拥有者QQ号
# friends_info_dict["card"] = f_info.get("card")
group_info_list.append(group_info_dict)
return group_info_list
def get_group_friends_info(self, Gp_num=None):
"""获取指定群成员数据"""
Gp_num = Gp_num if Gp_num else self.Gp_num
assert Gp_num
assert self.cookies
url = "https://qun.qq.com/cgi-bin/qun_mgr/search_group_members"
form_data = self.set_form_data(Gp_num= Gp_num, cookies=self.cookies)
headers = self.set_headers()
try:
resp = requests.post(url=url, data=form_data, headers=headers, cookies=cookies, timeout=6)
except Exception as err:
raise IOError("请求失败:{err}".format(err=err))
if resp.status_code is 200:
dict_data = resp.json()
friends_info_list = self.parse_group_friends_data(dict_data)
return friends_info_list
else:
raise IOError("请求失败:status_code:{code}".format(code=resp.status_code))
def get_group_number_all(self):
"""获取所有群信息"""
url = "https://qun.qq.com/cgi-bin/qun_mgr/get_group_list"
form_data = {"bkn": self.GetBkn(self.skey)}
headers = self.set_headers()
try:
resp = requests.post(url=url, data=form_data, headers=headers, cookies=cookies, timeout=6)
except Exception as err:
raise IOError("请求失败:{err}".format(err=err))
if resp.status_code is 200:
dict_data = resp.json()
group_info_list = self.parse_group_info_data(dict_data)
return group_info_list
else:
raise IOError("请求失败:status_code:{code}".format(code=resp.status_code))
def collect_group_all_friends(self):
"""获取所有群的所有好友信息"""
friends_info_all_list = []
get_defeated_list = []
info_list = cg.get_group_number_all()
for group_info in info_list:
Gp_num = group_info.get("group_num")
try:
friends_info_list = self.get_group_friends_info(Gp_num=Gp_num)
except Exception as err:
print(err)
get_defeated_list.append(group_info)
continue
[dicts.update(group_info) for dicts in friends_info_list]
friends_info_all_list.extend(friends_info_list)
new_friends_info_all_list = self.pass_repeat(friends_info_all_list)
return new_friends_info_all_list,get_defeated_list
if __name__ == '__main__':
#登录以后的cookie
cookies = {'pgv_pvi': '', 'RK': '', 'ptcz': '', 'pgv_pvid': '', 'pgv_si': '', 'ptisp': 'cnc', 'uin': '', 'skey': '', 'p_uin': '', 'pt4_token': '', 'p_skey': '', 'traceid': ''}
#群号,在指定获取某个群的时候使用
# Group_num = "377012112"
cg = Collent_Group_info(cookies=cookies)
#获取所有群信息
# info_list = cg.get_group_number_all()
#获取指定群成员信息
# info_list = cg.get_group_friends_info(Gp_num=Group_num)
# for i in info_list:
# print(i)
# 获取所有群和成员
# friends_info_all_list, get_defeated_list = cg.collect_group_all_friends()
#
# for i in friends_info_all_list:
# print(i)