正则还没学,用find()练习一下
import requests
import os
os.mkdir("ooxx") # 创建ooxx文件夹用来保存图片
def get_page(html):
"""
下载图片
"""
a = 0
while a != -1:
a = html.find('class="view_img_link"',a+500)
b = html.find('href="',a-100)
if b != -1:
url = "http://"+html[b+8:a-18]
print(url+"-----下载成功") # 打印提示
re = requests.get(url)
with open("ooxx/"+url[28:-4]+".png","wb") as j:
j.write(re.content) # 保存图片
def find_url(html,count):
# 获取下一页网址
a = html.find('class="previous-comment-page">下一页</a>')
b = html.find('href="',a-100)
print("第"+str(count)+"页: http:"+html[b+6:a-2]+" =====================================")
return "http:"+html[b+6:a-2]
def get_html(url):
# 请求网页数据,获取页面源码
header = {"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3528.4 Safari/537.36"}
re = requests.get(url, headers=header)
html = re.text
return html
if __name__ == '__main__':
num = 10 # 获取几页数据
url = "http://jandan.net/ooxx"
print("第一页: "+url+" =======================================================")
for i in range(num):
html = get_html(url) # 获取图片网址
get_page(html)
if i != 9:
url = find_url(html, i + 2) # 获取下一页网址