注意:只能下载允许访问的用户,设置权限的必须加好友的用户不能获取其私人信息
1.配置环境:
python2.7
selenium-2.40.0
beautifulsoup4-4.6.0
代码解析:
1.生成QQ号,对应的文件夹;
2.登录自己的QQ;
3.访问QQ用户相册,判断是否需要加好友;
4.无权限设置下载相册内容;
5.访问QQ用户说说,下载说说图片内容;
6.释放资源;
7.删除空文件夹。
小白一枚,有不足的地方请指教
#-*- coding:utf-8 -*-
from selenium import webdriver
from time import sleep
import requests
import os
from PIL import Image
from bs4 import BeautifulSoup
import sys
# 保存图片目录
save_file = 'E:/qq_friend/'
#获取相册图片
def login(driver):
# driver = webdriver.Firefox()
try:
driver.find_element_by_id('login_div')
a = True
except:
a = False
if a == True:
#浏览器可见登录
driver.switch_to.frame('login_frame')
driver.find_element_by_id('switcher_plogin').click()
driver.find_element_by_id('u').clear() # 选择用户名框3083567057
driver.find_element_by_id('u').send_keys('x')
driver.find_element_by_id('p').clear()
driver.find_element_by_id('p').send_keys('x')
driver.implicitly_wait(100) # 在一个时间范围内智能的等待。
# driver.execute_script('var button = document.getElementById("login_button")')
driver.find_element_by_id('login_button').click()
driver.implicitly_wait(60)
try:
driver.find_element_by_id('QM_OwnerInfo_Icon')
print '允许访问空间'
b = True
except:
b = False
print '此QQ不允许访问空间'
# driver.close()
# driver.quit()
# if b == True:
# loadPic(qq, driver)
return b
#下载相册中的资源
def loadPic(qq,driver):
print('====正在访问相册=====')
try:
driver.get('http://user.qzone.qq.com/{}/4'.format(qq))
except:
print '不予许访问相册'
driver.implicitly_wait(60)
driver.find_element_by_id('QM_OwnerInfo_Icon')
iframe = driver.find_element_by_xpath("//iframe[@name='app_canvas_frame']")#phantomjs对xpath元素定位查找有较好的支持
driver.switch_to.frame(iframe)
allowaccess_ls = driver.find_elements_by_xpath("//div[@data-allowaccess='1' and @data-total>'1']")
for i in range(0,len(allowaccess_ls)):
driver.implicitly_wait(30)
allowaccess_ls1 = driver.find_elements_by_xpath("//div[@data-allowaccess='1' and @data-total>'1']/div/a")
#TODU 无界面不能使用click()
driver.implicitly_wait(30) # 在一个时间范围内智能的等待。
allowaccess_ls1[i].click()
print '点击可访问的第'+str(i+1)+'个相册'
img_ls = driver.find_elements_by_css_selector(".j-pl-photoitem-img")
for j in range(0,len(img_ls)):
driver.implicitly_wait(30)
img_ls1 = driver.find_elements_by_css_selector(".j-pl-photoitem-img")
link = img_ls1[j].get_attribute('src')
print 'link====',link
try:
dowmloadPic(link, save_file+str(qq)+'/' + str(i) + '_' + str(j) + '_pic.jpg')
except:
print 'picture url error'
continue
driver.implicitly_wait(30)
driver.back()
driver.switch_to.default_content()
#下载说说里面的资源
def get_shuoshuo(qq,driver):
print('====开始下载说说=====')
try:
driver.get('http://user.qzone.qq.com/{}/311'.format(qq))
except:
print '不予许访问说说'
driver.switch_to.default_content()
driver.switch_to.frame('app_canvas_frame')
hp = driver.find_elements_by_class_name('img-attachments-inner')
img_number=0
for ho in hp:
hq = ho.find_elements_by_tag_name('a')
for tg in hq:
try:
linkF = tg.get_attribute('href')
print('linkF====',linkF[0:-1])
# urllib.request.urlretrieve(linkF[0:-1], 'F:/qq_friend/myself/%s.jpg' % str(x))
dowmloadPic(linkF[0:-1], save_file+str(qq)+'/' + str(img_number) + '.jpg')
img_number += 1
except:
pass
#下载图片
def dowmloadPic(url,save_file):
try:
pic = requests.get(url, timeout=20)
fp = open(save_file.decode('utf-8').encode('cp936'),'wb')
fp.write(pic.content)
fp.close()
except requests.exceptions.ConnectionError:
pass
#新建文件目录
#参数path:目录文件名
#成功返回1
#失败返回0
def createfile(path):
# 去除首位空格
path = path.strip()
# 去除尾部 \ 符号
path = path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists = os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print path + ' 创建成功'
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print path + ' 目录已存在'
return False
# 删除不能打开的文件和目录
def delcantopen(dir):
for root, save_file, files in os.walk(dir):
for file in files:
try:
im = Image.open(root + '//' + file)
except:
os.remove(root + '//' + file)
try:
os.rmdir(root)#删除不能打开的文件
except:
pass
#下载某个范围的QQ数据
def downQQPic(begin,end):
# driver = webdriver.PhantomJS('E:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')
driver = webdriver.PhantomJS('E:\\phantomjs-2.1.1-windows\\bin\\phantomjs.exe')
driver.get('http://qzone.qq.com')
temp = login(driver)
if temp == True:
for qq in range(begin,end):
# 获取qq
filepath = save_file + str(qq)
if createfile(filepath) == True:
try:
#获取QQ相册图片
loadPic(qq, driver)
# 获取qq说说的图片
get_shuoshuo(qq,driver)
except:
continue
driver.close()
driver.quit()
if __name__ == '__main__':
# 随机生成QQ号(1164867801----1164870024)1164869068 =====1223号,已经查看到1164870223 1164880025
downQQPic(1164880465, 1164890025)
# 删除空目录、删除不能打开的图片
delcantopen(save_file)