一,确定爬取的网页
下面爬取的是4k动漫,当然你也可以爬取4k美女的图片哦
二,发起请求并获取响应数据
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
response=requests.get(url=url,headers=headers)
三,用xpath解析想要的数据
li_list=tree.xpath('//div[@class="slist"]/ul/li')
源码注意缩进
import requests
from lxml import etree
import os
if not os.path.exists("./4k动漫"):
os.mkdir("./4k动漫")
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
}
for page in range(1,3):
if page == 1:
url = "https://pic.netbian.com/4kdongman/index.html"
else:
url=f"https://pic.netbian.com/4kdongman/index_{page}.html"
print(url)
response=requests.get(url=url,headers=headers)
response.encoding='gbk'
page_text=response.text
tree=etree.HTML(page_text)
li_list=tree.xpath('//div[@class="slist"]/ul/li')
for li in li_list:
post_url = "https://pic.netbian.com/" + li.xpath('./a/img/@src')[0]
img = requests.get(url=post_url, headers=headers).content
img_name=li.xpath('.//a/img/@alt')[0]+".jpg"
img_path="./4k动漫/"+img_name
with open(img_path,"wb") as fp:
fp.write(img)
print(f"{img_name}下载完成")