利用python爬取qq个性网图片
网站头像布局大同小异,稍改代码即可爬取想要的头像。
不多bb,上代码。
import requests
from parsel import Selector
import time
# 返回处理好的链接地址
def get_url():
url_list = []
# 循环拼接链接地址
for url_index in range(1, 11, 1):
if url_index == 1:
url = 'https://www.woyaogexing.com/touxiang/z/ktnan/index.html'
url_list.append(url)
else:
url = 'https://www.woyaogexing.com/touxiang/z/ktnan/index' + '_' + f'{url_index}' '.html'
url_list.append(url)
return url_list
# 获取每个链接的html,并提取每个类图片地址
def get_img_html():
url_list = get_url()
url_lists = []
count = 0
for html_url in url_list:
response = requests.get(html_url)
response.encoding = 'utf-8'
html = response.text
# 创建对象,解析图片,提取地址
selector = Selector(text=html)
selectors = selector.xpath("//div[@class='pMain']")
left_url = selectors.xpath("./div[@class='txList ']/a/@href").getall()
right_url = selectors.xpath("./div[@class='txList listRight']/a/@href").getall()
url_all = left_url + right_url
url_lists.append(url_all)
count += 1
print(f'第{count}页成功爬取url,正在放入列表,休息3s--->')
time.sleep(3)
print('-->爬取url完成')
# 用set集合去重,防止有相同链接
url_sets = set()
for url_list in url_lists:
for url_url in url_list:
url_sets.add(url_url)
return url_sets
# 获取单张小图片的地址,并返回链接地址列表
def get_img_url():
url_sets = get_img_html()
url_lists = []
count = 0
img_real = []
for url in url_sets:
# 拼接每个小图片的地址
http_url = 'https://www.woyaogexing.com' + url
response = requests.get(http_url)
response.encoding = 'utf-8'
selector = Selector(text=response.text)
# 解析图片
selectors = selector.xpath("//ul[@class='artCont cl']")
img_url = selectors.xpath("./li[@class='tx-img']/a/@href").getall()
url_lists.append(img_url)
count += 1
print(f'解析第{count}页url-->')
print('解析url完成-->')
for url_list in url_lists:
for url in url_list:
img_real.append(url)
return img_real
# 下载图片到指定路径
def download_pic():
img_urls = get_img_url()
http = 'https:'
count = 0
for img_url in img_urls:
# 拼接图片地址
real_url = http + img_url
print(real_url)
response = requests.get(real_url)
count += 1
# 下载图片到本地
with open(f'.//pic/ {count}.jpeg', 'wb') as f:
f.write(response.content)
print('第%s张照片成功下载照片到指定路径' % count)
f.close()
if __name__ == '__main__':
download_pic()
爬取结果
欢迎各位大神交流。