网站地址
爬虫代码
仅做参考
# 获取图片数据
import os.path
import requests
from lxml import etree
import time
pic_name = 0
def start(url):
print(f'开始爬取{url}')
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.45 Safari/537.36'}
# 发送请求
response = requests.get(url, headers=headers)
# 获取想要的数据
res_text = response.text
# 数据解析
tree = etree.HTML(res_text)
li_list = tree.xpath("//div[@class='slist']/ul/li")
for li in li_list:
# 图片的url
img_url = "https://pic.netbian.com" + "".join(li.xpath("./a/img/@src"))
# 发送请求
img_response = requests.get(img_url, headers=headers)
# 获取想要的数据
img_content = img_response.content
global pic_name
with open(f"./result/{pic_name}.jpg", "wb") as fp:
fp.write(img_content)
pic_name += 1
if __name__ == '__main__':
# 创建存放照片的文件夹
if not os.path.exists("./result"):
os.mkdir("./result")
# 网站的url
url = "https://pic.netbian.com/4kdongman/"
start(url)
for i in range(1,30):
next_url = f"https://pic.netbian.com/4kmeinv/index_{i}.html"
time.sleep(1)
start(next_url)