#酷狗网络红歌榜
import requests
from bs4 import BeautifulSoup
#加入请求头
headers = {
‘User-Agent’:‘Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36’
}
def getdata(url):
#获取页面需要的信息, 这里我们需要 排名, 歌手的名字,音乐名,播放时间
wb_data = requests.get(url,headers=headers)
soup = BeautifulSoup(wb_data.text,‘lxml’) # 解析网页信息
ranks = soup.select('span.pc_temp_num') # 用select 获取ranks 标签
titles = soup.select('div.pc_temp_songlist > ul > li > a')
times = soup.select('span.pc_temp_tips_r > span')
list=[]
for rank ,title,time in zip(ranks,titles,times):
data = {
'排名':rank.get_text().strip() ,
'歌手':title.get_text().split('-')[0],
'歌名':title.get_text().split('-')[1],
'时长':time.get_text().strip()
}
list.append(data)
if name == ‘main’:
urls = [‘https://www.kugou.com/yy/rank/home/{}-23784.html’.format
(str(i)) for i in range(1,12)] # 生成url 链接
for url in urls:
getdata(url)
如何将爬取数据导入到Excel,课设求助