import requests
from bs4 import BeautifulSoup
headers={
'UserAgent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36'
}
def get_songs(url):#网页含有22首歌的网址
res=requests.get(url,headers=headers)
soup=BeautifulSoup(res.text,'html.parser')
ranks=soup.select('#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_num')
#所有数字标的列表
#return ranks
titles=soup.select('#rankWrap > div.pc_temp_songlist > ul > li > a')
#所有标题的列表
times=soup.select('#rankWrap > div.pc_temp_songlist > ul > li > span.pc_temp_tips_r > span')
#所有播放时长的列表
total=[]
for rank,title,time in zip(ranks,titles,times):
data={'rank':rank.get_text().strip(),\
'title':title.get_text().strip(),\
'time':time.get_text().strip()}
#对应元素打包成一个个字典
total.append(data)#将一个个字典放进列表中
return total
'''
URL='http://www.kugou.com/yy/rank/home/2-8888.html?from=rank'
print(get_songs(URL))
'''
'''
for a in get_songs(URL):
print(a.get_text())
'''
totals=[]
for i in range(1,24):
urls='http://www.kugou.com/yy/rank/home/{}-8888.html?from=rank'.format(i)
totals.append(get_songs(urls))#一页为一个列表,将一个个列表放进列表中
print(totals)
import pandas
deal1=list(map(pandas.DataFrame,totals))#有序排列
deal2=pandas.concat(deal1)#去除上标
#print(deal2)
deal2.index=list(range(deal2.shape[0]))#连续序号
#print(deal2.index)
deal2.to_excel('kougouTOP500.xls')#保存到excel
爬取酷狗top500
最新推荐文章于 2024-03-29 18:02:41 发布