1.元素获取:
from tqdm import tqdm
import requests
from selenium import driver
from bs4 import BeautifulSoup
def get_cont(target):
webbrowser = driver.Opera()
webbrowser.get(url=target)
text = BeautifulSoup(webbrowser.page_source,'lxml')
bs = text.find_all('audio')
for texts in bs:
headers ={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)}
music_url = texts.get('src')
response = requests.get(music_url,headers = headers).content
return response
if __name__ =='__main__':
server = 'https://www.kugou.com/yy/special/single/3672076.html'
browser = driver.Opera()
browser.get(server)
soup = BeautifulSoup(browser.page_source, 'lxml')
texts_bs = soup.find('div',class_='list1')
texts_bs = texts_bs.find_all('a')
for song_url in tqdm(texts_bs):
try:
if song_url.get('href').strip().split('.')[-1] == 'html':
url = song_url.get('href')
name = song_url.get('title')
response = get_cont(url)
with open('音乐\%s'%(name + '.mp3'),'wb') as f:
f.write(response)
f.close()
else:
continue
except IndexError as e:
continue
2.选取目标,网页解析:
直接打开相关下载网页下载就行(已经加密mp3网页地址):
3.分析网页要获取的元素id:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
HEADERS = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'referer': 'https://music.163.com/',
'x-csrf-token': '',
'x-requested-with': 'XMLHttpRequest',
'cookie': ''
,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/90.0.4430.212 Safari/537.36'}
#抓取对象
POPULAR_URL = "https://music.163.com/discover/toplist?id=19723756"
server = 'http://music.163.com/song/media/outer/url?id='
browser = webdriver.Opera()
#获取网页元素:
browser.get(POPULAR_URL)
soup = BeautifulSoup(browser.page_source, 'lxml')
4.拿id[ :10]
texts = webtext.select('ul.f-hide > li > a')
for text in texts :
id = text.get('href')
name = text.string
5.下载:
for text in texts:
url = server + id.split('=')[-1] + '.mp3'
name = text.string
print(name,url)
response = resquetst.get(url,headers=HEADERS)
with open('音乐\%s'%(name + '.mp3'),'wb') as f:
f.write(response.content)
f.close()
6.完整代码:
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from tqdm import tqdm
HEADERS = {
'Accept': 'application/json, text/javascript, */*; q=0.01',
'referer': 'https://music.163.com/',
'x-csrf-token': '',
'x-requested-with': 'XMLHttpRequest',
'cookie': ''
,
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/90.0.4430.212 Safari/537.36'}
server_url = 'https://music.163.com/discover/toplist?id=19723756'
server = 'http://music.163.com/song/media/outer/url?id='
if __name__ == '__main__':
response = requests.get(server_url,headers=HEADERS
response.encoding = 'utf-8'
webtext = BeautifulSoup(response.text, 'lxml')
texts = webtext.select('ul.f-hide > li > a')
for text in tdqm(texts):
url = server + text.get('href').split('=')[-1] + '.mp3'
name = text.string
print(name,url)
try:
response = resquetst.get(url,headers=HEADERS)
with open('音乐\%s'%(name + '.mp3'),'wb') as f:
f.write(response.content)
f.close()
except FileNotFoundError as f:
continue