import requests
from lxml import etree
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36'
}
name = 1 # 全局变量
# 下载网页
def get_html(link):
print("正在获取第{}页数据".format(page))
respon = requests.get(link,headers=headers).content.decode('utf-8')
return respon
# 解析网页 提取图片真实地址
def parse_html(html):
html_etree = etree.HTML(html)
img_urls = html_etree.xpath('//div[@class="il_img"]/a/img/@src')
return img_urls
# 保存图片到本地
def save_img(img_urls):
global name
for img_url in img_urls:
new_link = 'https:' +img_url
time.sleep(3) # 每暂停 3 s 请求一次
a = requests.get(new_link,headers=headers).content
print("正在下载{}张图片".format(name))
with open(r'C:\Users\DELL\Desktop\python_wd\mig\天堂\\'+str(name)+'.jpg','wb')as f:
f.write(a)
name += 1
def main(page):
link = 'https://www.ivsky.com/tupian/meinv_t50/index_{}.html'.format(page)
html = get_html(link)
img_urls = parse_html(html)
save_img(img_urls)
if __name__ == '__main__':
for page in range(1,4):
main(page)
运行代码数据部分结果:
————————END