'''
思路: 从缩略图页面开始爬取
1) 先爬取所有缩略图的a标签
2)
3)
'''
import requests
from bs4 import BeautifulSoup
import os
if not os.path.exists("音乐"):
os.makedirs("音乐")
import lxml
from urllib import request
url = "https://www.i4.cn/ring_22_0_1.html"
if __name__ == '__main__':
for i in range(1, 5):
url = "https://www.i4.cn/ring_22_0_%d.html"%i
req = requests.get(url = url)
req.encoding = 'utf-8'
print("响应码: ", req.status_code)
html = req.text
bf = BeautifulSoup(html, "lxml")
ring_list = bf.find_all('div', class_ = "list ring_list")
for item in ring_list:
#print(item.get('href'), item.get("title"))
# name = item.get("title")
# print(name)
bf2 = BeautifulSoup(str(item), 'lxml')
title = bf2.select(".title")
name = title[0].string + ".mp3"
url_ = bf2.select(".audio_play")
url = url_[0].get("data-mp3")
print(url)
request.urlretrieve(filename="音乐/%s.jpg" %name, url= url)
print("%s下载成功" % name + '>' * 10)