豆瓣音乐top250爬取,并将内容写入文件
from urllib.request import Request, urlopen
import bs4
import requests
import re
import json
import xlwt
workbook = xlwt.Workbook(encoding='utf-8')
worksheet = workbook.add_sheet('sheet1', cell_overwrite_ok=True)
worksheet.write(0, 0, label='歌名')
worksheet.write(0, 1, label='歌手')
worksheet.write(0, 2, label='时间')
worksheet.write(0, 3, label='类型')
worksheet.write(0, 4, label='评分')
z = 1
url=('https://music.douban.com/top250?start=0')#.format(i)
header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36'}
ret = Request(url=url, headers=header)
html = urlopen(ret)
bs = bs4.BeautifulSoup(html, 'html.parser')
alls = bs.find_all('div',{'class':'pl2'})
for x in alls:
music_name = x.find('a').get_text()
leixing = x.find('p').get_text().split('/ ')
score = x.find('span',{'class':'rating_nums'}).get_text()
singer = leixing[0]
time = leixing[1]
type = leixing[-1]
# print(leixing)
worksheet.write(z,0,music_name)
worksheet.write(z,1, singer)
worksheet.write(z,2,time)
worksheet.write(z,3,type)
worksheet.write(z,4,score)
z += 1
workbook.save('yinyue.xls')