前言
本文的文字及图片来源于网络,仅供学习、交流使用,不具有任何商业用途,如有问题请及时联系我们以作处理。
步骤
将代码中的cookie替换成自己的cookie
方式:打开彼岸图网:地址,并且登录,f12打开控制台,然后查看,查看cookie
全部代码:
"""
爬取彼岸图网
"""
import requests
import random
import re
import time
from lxml import etree
from fake_useragent import FakeUserAgent
# 请求头信息
headers = {
"User-Agent": FakeUserAgent().random,
"Cookie": "注意:这里替换成自己的cookie",
"Referer": "https://pic.netbian.com/"
}
def get_image_info(page):
if page == 1:
url = 'https://pic.netbian.com/index.html'
else:
url = f'https://pic.netbian.com/index_{page}.html'
webpage = requests.get(url, headers=headers).content.decode("gbk")
html = etree.HTML(webpage)
image_id = [i[8:13] for i in html.xpath('//div[@class="slist"]/ul/li/a/@href')]
image_name = [i.replace("*", "_") for i in html.xpath('//div[@class="slist"]/ul/li/a/@title')]
if not image_name:
image_name = [i.replace("*", "_") for i in html.xpath('//div[@class="slist"]/ul/li/a/img/@alt')]
data_list = []
for i in range(0, len(image_id)):
data_list.append({"id": image_id[i], "name": image_name[i]})
return data_list
def get_token(image_id):
t = random.random
url = f'https://pic.netbian.com/e/extend/downpic.php?id={image_id}&t={t}'
return re.findall(r'token=(\w+)', requests.get(url, headers=headers).text)[0]
def save_picture(token, image_name):
url = f'https://pic.netbian.com/e/extend/downpic.php?token={token}'
data = requests.get(url, headers=headers).content
with open(fr"D:\pictures\{image_name}.jpg", "wb") as file:
file.write(data)
print(image_name, "下载成功")
if __name__ == '__main__':
dict_list = get_image_info(4)
print("开始下载,共", len(dict_list), "张")
for i in range(0, len(dict_list)):
token = get_token(dict_list[i]['id'])
print("正在下载第", i + 1, "张")
save_picture(token, dict_list[i]['name'])
time.sleep(2)