# coding:utf-8
from bs4 import BeautifulSoup
import re
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
import csv
browser = webdriver.Chrome()
wait = WebDriverWait(browser, 5) # 设置等待时间
# 返回歌手名字和歌手id
def get_singer(url):
browser.get(url)
browser.switch_to.frame('g_iframe')#转到它所在的窗口
html = browser.page_source#selenium的page_source方法可以获取到页面源码
soup = BeautifulSoup(html, 'lxml')#解析源码
# 有图歌手m-artist-box > li:nth-child(1) > p > a.nm.nm-icn.f-thide.s-fc0
# #无图歌手#m-artist-box > li:nth-child(91) > a
info = soup.select('.nm.nm-icn.f-thide.s-fc0')#找到歌手名所在的ID
sname = []
songids = []
for snames in info:
name = snames.get_text()
songid = str(re.findall('href="(.*?)"', str(snames))
).split('=')[1].split('\'')[0]#利用正则表达式提取出来歌手的ID
sname.append(name)
songids.append(songid)
return sname, songids#提取出来
def song_url():
sname, songids = get_singer(url)#拿到歌手名跟歌手的ID以后
top50urls = []
for id in songids:
# 拼接热门歌曲 top 50 的url
top50url = 'http://music.163.com/#/artist?id={}'.format(id)#找到这个歌手的最受欢迎的50首歌曲
top50urls.append(top50url)
return top50urls
def song_name():
songnames = []
for top50url in song_url():
browser.get(top50url)
browser.switch_to.frame('g_iframe')
html = browser.page_source
soup = BeautifulSoup(html, 'lxml')
songinfo = soup.select('div div div span a b')
songname = re.findall('title="(.*?)"', str(songinfo))
songnames.append(songname)
return songnames
def get_data():#探索数据
sname, songids = get_singer(url)
songnames = song_name()#去找歌手的歌曲
data = []
for snames, songs in zip(sname, songnames):
info = {}
info['歌手名字'] = snames
info['top50歌曲'] = songs
# 将字典里 value 值为list转换为一对一关系
for i in info:
for j in info[i]:
info2 = {i: j}
data.append(info2)
return data
def download2csv():
print('保存歌手信息中...请稍后查看')
with open('E:\\歌手top50.csv', 'w', newline='', encoding='utf-8-sig') as f:
fieldnames = ['歌手名字', 'top50歌曲']
writer = csv.DictWriter(f, fieldnames=fieldnames)#DictWriter 以字典形式写入
# 但是如果此时直接写入内容,会导致没有数据名,所以,应先写数据名(也就是我们上面定义的文件头)。
# 写数据名,可以自己写如下代码完成:
writer.writeheader()
#得到要存在的字典信息
data = get_data()
# 之后,按照(属性:数据)的形式,将字典写入CSV文档即可
writer.writerows(data)
print('保存成功')
# 1开头:华语;2开头:欧美,4:其他;6:日本;7:韩国
if __name__ == '__main__':
idlist = [1001]
for id in idlist:
url = 'http://music.163.com/#/discover/artist/cat?id={}&initial=-1'.format(str(id))
download2csv()
有啥不懂的可以留言问我啊