如下面简单代码爬取某网站1-100页的图片到本地
以下是我的一些代码,小白轻喷
import os.path
import lxml
from lxml import etree
import requests
if __name__ == '__main__':
if not os.path.exists('./image'):
os.mkdir('./image')
head = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0.0.0 Safari/537.36 Edg/118.0.2088.46'}
url = 'https://pic.netbian.com/4kdongman/index_%s.html'
for i in range(1, 101):
new_url = format(url % i)
resp_text = requests.get(new_url, headers=head).text
tree = etree.HTML(resp_text)
contain = tree.xpath("//div/ul/li/a/img/@src")
# 获取src拼接url
for img in contain:
img_url = 'https://pic.netbian.com' + img
# print(img_url)
img_resp = requests.get(img_url, headers=head).content
filename = img_url.split('/')[-1]
# print(filename)
filepath = './image/' + filename
with open(filepath, 'wb') as ws:
ws.write(img_resp)
print("第{}页爬取成功".format(i))