收到的QQ坦白说通常会提供两条信息,比如‘一个天蝎座的男生’,‘一个认识5年的女生’,那如何找出符合以上条件的好友呢?如果你有几百甚至几千个好友,一个一个找不现实。自动寻找方法当然是通过网络爬虫获取信息再分析过滤实现。
思路:
1. 登录自己QQ空间,获取所有好友QQ号
2. 通过好友QQ号,爬好友空间,获得好友的星座、性别、年龄等个人信息
3. 通过自己QQ号和好友QQ号,获取认识天数,因为认识天数是和两个你和你的好友两个QQ号一起决定的
4. 如果遇到好友空间不让你访问,这个不影响以上信息,因为就算你不能访问好友空间,也可以看见好友的个人信息
5. 如果好友没有填写信息,你当然获取不到相关信息
编程语言:Python
用到的库:
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import re
import datetime
用到的知识:
1. 通过网址访问云端获取好友信息
2. 用selenium和浏览器配合抓取网页
3. 用正则表达式等处理网页数据,获得有效信息
注意:频繁抓取网页会被腾讯当成违规操作,造成封号两个小时,我的QQ已经被封号两次了,现在还在封号中,通过每获取5个好友信息就退出登录,等两分钟后再登录也许,只是也许可以解决问题。
代码如下(改进登录时间):
下载代码也可以在链接:https://download.csdn.net/download/gengli2017/10619273
对于初学者,想知道一些代码意思,尽快写一个理解说明。
初学时也找不到资料。
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
import re
import datetime
def frankSpeak(account, password) :
myQQ = account
driver = loginQQ(account, password)
gtk, g_qzonetoken = getGtk_Token(driver)
friendsDict = getFriends(driver, myQQ, gtk, g_qzonetoken)
i = 1
friendInfoVector = []
for friendQQ in friendsDict :
i += 1
(age, city, constellation, gender, province) = getFriendInfo(driver, friendQQ, gtk, g_qzonetoken)
knownTime = getKnownTime(driver, myQQ, friendQQ, gtk, g_qzonetoken)
friendInfo = (friendQQ, friendsDict[friendQQ], age, gender, constellation, knownTime, province, city)
#(age,city,constellation,gender,province)
#male:gender=1, famale:gender=0, else:gender=2
friendInfoVector.append(friendInfo)
#to aviod your QQ locked by Tenser, find 5 friends infomation then reload
if(i % 5 == 0) :
time.sleep(120)
driver = loginQQ(account, password)
gtk, g_qzonetoken = getGtk_Token(driver)
print('一个双鱼座的女生')
for i in range(len(friendInfoVector)) :
if(friendInfoVector[i][4] == '双鱼座' and gender == '0') :
print(friendInfoVector[i][1])
print('所有好友信息')
print('(QQ号, 备注, 年龄, 性别, 星座, 认识时间, 省份, 城市)')
print('女:性别=0 男:性别=1 未注明:性别=2')
for i in range(len(friendInfoVector)) :
print(friendInfoVector[i])
# print(gtk)
# print(g_qzonetoken)
# print(friendsDick)
#登录QQ,获取QQ页面
def loginQQ(account, password) :
chrome_options = Options()
chrome_options.add_argument("--disable-infobars")
driver = webdriver.Chrome() #这个是chormedriver的地址
driver.get('https://qzone.qq.com/')
driver.switch_to.frame('login_frame')
driver.find_element_by_id('switcher_plogin').click()
driver.find_element_by_id('u').clear()
driver.find_element_by_id('u').send_keys(account)
driver.find_element_by_id('p').clear()
driver.find_element_by_id('p').send_keys(password)
driver.find_element_by_id('login_button').click()
time.sleep(2)
return driver
#从Cookie获取GTK
def getGTKFromCookie(cookie):
hashes = 5381
for letter in cookie['p_skey']:
hashes += (hashes << 5) + ord(letter)
return hashes & 0x7fffffff
#获取gtk和g_qzonetoken
def getGtk_Token(driver) :
cookie = {} #初始化cookie字典
for elem in driver.get_cookies(): #取cookies
cookie[elem['name']] = elem['value']
gtk = getGTKFromCookie(cookie)
html = driver.page_source
g_qzonetoken=re.search('window\.g_qzonetoken = \(function\(\)\{ try\{return (.*?);\} catch\(e\)',html)
#从网页源码中提取g_qzonetoken
g_qzonetoken = str(g_qzonetoken[0]).split('\"')[1]
return gtk, g_qzonetoken
#获得好友列表
def getFriends(driver, myQQ, gtk, g_qzonetoken) :
friendUrl = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/friend_hat_get.cgi?hat_seed=1&uin=' + str(myQQ) +'&fupdate=1&g_tk='+str(gtk)+'&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)
driver.get(friendUrl)
friend_list = driver.page_source
friend_list = str(friend_list )
pattern = re.compile('\"(.\d*)\":\{\\n"realname":"(.*?)"}',re.S)
QQ_name_list = re.findall(pattern, str(friend_list))
friendDick=dict() #numList => (QQnum:QQname)
for friend in QQ_name_list:
friendDick[str(friend[0])]=str(friend[1])
return friendDick
#通过好友QQ号获取好友信息
def getFriendInfo(driver, friendQQ, gtk, g_qzonetoken) :
friendInfoUrl = 'https://mobile.qzone.qq.com/profile_get?qzonetoken='+str(g_qzonetoken) + '&g_tk='+str(gtk)+'&format=json&hostuin=' + str(friendQQ)
driver.get(friendInfoUrl)
friendInfo = driver.page_source
friendInfo = str(friendInfo)
#savefile(str(friendQQ)+'txt', friendInfo)
pattern = re.compile( r'"age":(\d*).*"city":"(\w*)".*"constellation":"(\w*).*"gender":(-?\d*).*"province":"(\w*)"')
usefulInfo = pattern.findall(friendInfo)
if (any(usefulInfo)) :
return usefulInfo[0]
else :
return ('-1','NULL','NULL','2','NULL')
#usefulInfo=[(age,city,constellation,gender,province)]
#usefulInfo[0]=(age,city,constellation,gender,province)
#male:gender=1, famale:gender=0, else:gender=2
#通过自己QQ和好友QQ获取认识的时间
def getKnownTime(driver, myQQ, friendQQ, gtk, g_qzonetoken) :
knownDaysUrl = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/friendship/cgi_friendship?activeuin=' + str(myQQ) +'&passiveuin=' + str(friendQQ) +'&situation=1&isCalendar=1&g_tk='+str(gtk) + '&qzonetoken='+str(g_qzonetoken)+'&g_tk='+str(gtk)
driver.get(knownDaysUrl)
knownDaysInfo = driver.page_source
knownDaysInfo = str(knownDaysInfo)
beginStamp = re.search(r'"addFriendTime":(\d+)', knownDaysInfo)
beginStamp = str(beginStamp.group(1))
beginTime = datetime.date.fromtimestamp(int(beginStamp))
beginY = beginTime.year
beginM = beginTime.month
beginD = beginTime.day
lastStamp = re.search(r'"systemTime":(\d+)', knownDaysInfo)
lastStamp = str(lastStamp.group(1))
lastTime = datetime.date.fromtimestamp(int(lastStamp))
lastY = lastTime.year
lastM = lastTime.month
lastD = lastTime.day
if(lastY > beginY) :
return (str(lastY - beginY + 1) + '年')
elif(lastM > lastM) :
return (str(lastM - beginM + 1) + '月')
else :
return (str(lastD - beginD + 1) + '日'
#运行程序,把QQNumber换成要登录QQ, password换成密码
frankSpeak('QQNumber', 'password')