from email import header import requests from hyper.contrib import HTTP20Adapter #导入HTTP2的模块 import json import math import time #import xlwt #导入excel官方模块,用于将字典生成excel import pandas as pd #将字典列表转换为DataFrame import re def load_data(st,end): #加载数据需要传入st开始位置和end结束位置 url = 'https://qun.qq.com/cgi-bin/qun_mgr/search_group_members' cookie = '您的cookie' headers = { ":authority" : "qun.qq.com" , ":method" : "POST" , ":path" : "/cgi-bin/qun_mgr/search_group_members" , ":scheme" : "https" , "accept" : "application/json, text/javascript, */*; q=0.01" , "accept-encoding" : "gzip, deflate, br" , "accept-language" : "zh-CN,zh;q=0.9" , "content-length" : "45" , "content-type" : "application/x-www-form-urlencoded; charset=UTF-8" , "cookie" : cookie, "origin" : "https://qun.qq.com" , "referer" : "https://qun.qq.com/member.html" , "user-agent" : "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3947.100 Safari/537.36" , "x-requested-with" : "XMLHttpRequest" } def cookie_to_dict(cookie): #将cookie字符串转换为字典 cookie_dict = {} for i in cookie.split( '; ' ): cookie_dict[i.split( '=' )[ 0 ]] = i.split( '=' )[ 1 ] return cookie_dict def get_bkn(): #从QQ群中获取bkn函数 e = cookie_to_dict(cookie)[ 'skey' ] #e = cookie里skey的值,调用cookie_to_dict函数将cookie字符串转换为字典 t = 5381 n = 0 o = len (e) while n < o: t + = (t << 5 ) + ord (e[n]) n + = 1 return ( 2147483647 & t) # print(get_bkn()) # gc = input('请输入要查询的群号:') global gc gc = '599500635' # 群号 data = { "gc" : gc, "st" : st, "end" : end, "sort" : "0" , "bkn" : get_bkn() } sessions = requests.session() sessions.mount( 'https://qun.qq.com' , HTTP20Adapter()) response = sessions.post(url,headers = headers,data = data) response = response.text response = response.encode( 'utf-8' ).decode( "unicode_escape" ) return response def get_qq_member_count(): #获取群成员数量函数 response = load_data( 0 , 0 ) qq_member_count = json.loads(response)[ 'count' ] #qq群人数 # print(qq_member_count) return qq_member_count global qq_qun_info #全局变量qq群信息列表 qq_qun_info = [] def get_qq_member_list(): #获取群成员列表函数 count = math.ceil(get_qq_member_count() / 21 ) #需要循环的次数 # print('需要循环的次数:'+str(count)) n = 0 #用于计数 j = 0 #计数器用于判断当前循环的次数 num = 1 #用于给字典里的信息加序号 while j < count: #load_data()参数的取值 第一次是从0到20 第二次是从21到41 第三次是从42到62以此类推保证不获取重复值 response = load_data(n + j,n + 20 + j) res = json.loads(response)[ 'mems' ] # print('res='+str(res)) qq_name = '' # qq名字 qq_qun_name = '' # qq群名字 qq_number = '' # qq号码 sex = '' # 性别 qq_age = '' #q龄 join_qun_time = '' # 入群时间 last_speak_time = '' # 最后一次发言 for i in res: qq_name = i[ 'nick' ] #替换qq昵称中的\为空 qq_name = filter_emoji(qq_name, '???' ) qq_qun_name = i[ 'card' ] #替换qq群昵称中的\为空 qq_qun_name = filter_emoji(qq_qun_name, '???' ) qq_number = str (i[ 'uin' ]) sex = i[ 'g' ] #如果sex = 0则为男性, sex = -1未知, sex = 1为女性 if sex = = 0 : sex = '男' elif sex = = 1 : sex = '女' elif sex = = - 1 : sex = '未知' else : sex = '错误' qq_age = i[ 'qage' ] join_qun_time = i[ 'join_time' ] #这里返回的是10位整数 last_speak_time = i[ 'last_speak_time' ] #将以上内容加入字典dict1中 dict1 = {} dict1[ 'num' ] = num dict1[ "qq_name" ] = qq_name dict1[ "qq_qun_name" ] = qq_qun_name dict1[ "qq_number" ] = qq_number dict1[ "sex" ] = sex dict1[ "qq_age" ] = qq_age #将十位数入群时间转为正常时间 join_qun_time = time.strftime( "%Y-%m-%d %H:%M:%S" , time.localtime(join_qun_time)) dict1[ "join_qun_time" ] = join_qun_time #将十位数最后一次发言时间转为正常时间 last_speak_time = time.strftime( "%Y-%m-%d %H:%M:%S" , time.localtime(last_speak_time)) dict1[ "last_speak_time" ] = last_speak_time qq_qun_info.append(dict1) num = num + 1 j = j + 1 n + = 20 print (qq_qun_info) #将qq_qun_info列表导出为excel表格 # export_excel(qq_qun_info) print ( '导出表格完成' ) def export_excel(export): #将qq_qun_info列表保存为excel函数 #将字典列表转换为DataFrame pf = pd.DataFrame( list (export)) #指定字段顺序 order = [ 'num' , 'qq_name' , 'qq_qun_name' , 'qq_number' , 'sex' , 'qq_age' , 'join_qun_time' , 'last_speak_time' ] pf = pf[order] #将列名替换为中文 columns_map = { 'num' : '序号' , 'qq_name' : 'qq昵称' , 'qq_qun_name' : 'qq群昵称' , 'qq_number' : 'qq号码' , 'sex' : '性别' , 'qq_age' : 'Q龄' , 'join_qun_time' : '入群时间' , 'last_speak_time' : '最近发言时间' } pf.rename(columns = columns_map,inplace = True ) #指定生成的Excel表格名称 file_path = pd.ExcelWriter(gc + '.xlsx' ) #替换空单元格 pf.fillna( ' ' ,inplace = True ) #输出 pf.to_excel(file_path,encoding = 'utf-8' ,index = False ) #保存表格 file_path.save() def filter_emoji(desstr, restr = ''): #过滤特殊字符函数 #desstr是要过滤的字符串 #restr是替换的字符串 # 过滤表情 res = re. compile (u '[\U00010000-\U0010ffff\\uD800-\\uDBFF\\uDC00-\\uDFFF]' ) return res.sub(restr, desstr) if __name__ = = '__main__' : get_qq_member_list() |