这篇文章是上一篇的续集( 爬虫第五关----狂热粉丝),即将爬取到的歌曲信息存储到Excel里面。
理论知识
csv写入与读取(引用csv模块)
CSV写入步骤
代码实现
#csv写入的代码:
import csv
csv_file = open('demo.csv','w',newline='')
writer = csv.writer(csv_file)
writer.writerow(['电影','豆瓣评分'])
csv_file.close()
CSV读取步骤
代码实现
#csv读取的代码:
import csv
csv_file = open('demo.csv','r',newline='')
reader=csv.reader(csv_file)
for row in reader:
print(row)
Excel写入与读取(引用openpyxl模块)
Excel写入步骤
代码实现
#Excel写入的代码:
import openpyxl
wb = openpyxl.Workbook()
sheet = wb.active
sheet.title ='new title'
sheet['A1'] = '漫威宇宙'
rows = [['美国队长','钢铁侠','蜘蛛侠','雷神'],['是','漫威','宇宙', '经典','人物']]
for i in rows:
sheet.append(i)
print(rows)
wb.save('Marvel.xlsx')
Excel读取步骤
代码实现
#Excel读取的代码:
import openpyxl
wb = openpyxl.load_workbook('Marvel.xlsx')
sheet = wb['new title']
sheetname = wb.sheetnames
print(sheetname)
A1_value = sheet['A1'].value
print(A1_value)
代码实现
import requests,openpyxl
# 引用requests模块和openpyxl
wb=openpyxl.Workbook()
#创建工作簿
sheet=wb.active
#获取工作簿的活动表
sheet.title='restaurants'
#工作表重命名
sheet['A1'] ='歌曲名' #加表头,给A1单元格赋值
sheet['B1'] ='所属专辑' #加表头,给B1单元格赋值
sheet['C1'] ='播放时长' #加表头,给C1单元格赋值
sheet['D1'] ='播放链接' #加表头,给D1单元格赋值
url = 'https://c.y.qq.com/soso/fcgi-bin/client_search_cp'
headers = {
'origin':'https://y.qq.com',
# 请求来源,本案例中其实是不需要加这个参数的,只是为了演示
'referer':'https://y.qq.com/n/yqq/song/004Z8Ihr0JIu5s.html',
# 请求来源,携带的信息比“origin”更丰富,本案例中其实是不需要加这个参数的,只是为了演示
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
# 标记了请求从什么设备,什么浏览器上发出
}
#伪装请求头
for x in range(20):
params = {
'ct':'24',
'qqmusic_ver': '1298',
'new_json':'1',
'remoteplace':'sizer.yqq.song_next',
'searchid':'48335540534966617',
't':'0',
'aggr':'1',
'cr':'1',
'catZhida':'1',
'lossless':'0',
'flag_qc':'0',
'p':str(x+1),
'n':'20',
'w':'周杰伦',
'g_tk':'5381',
'loginUin':'0',
'hostUin':'0',
'format':'json',
'inCharset':'utf8',
'outCharset':'utf-8',
'notice':'0',
'platform':'yqq.json',
'needNewCode':'0'
}
# 将参数封装为字典
res_music = requests.get(url,headers=headers,params=params)
# 调用get方法,下载这个字典
json_music = res_music.json()
print(json_music)
# 使用json()方法,将response对象,转为列表/字典
list_music = json_music['data']['song']['list']
#print(list_music)
# 一层一层地取字典,获取歌单列表
for music in list_music:
# list_music是一个列表,music是它里面的元素
name = music['name']
# 以name为键,查找歌曲名,把歌曲名赋值给name
album = music['album']['name']
# 查找专辑名,把专辑名赋给album
time = music['interval']
# 查找播放时长,把时长赋值给time
link = 'https://y.qq.com/n/yqq/song/' + str(music['mid']) + '.html\n\n'
# 查找播放链接,把链接赋值给link
sheet.append([name, album, time, link])
# 把name、album、time和link写成列表,用append函数多行写入Excel
print('歌曲名:' + name + '\n' + '所属专辑:' + album + '\n' + '播放时长:' + str(time) + '\n' + '播放链接:' + link)
wb.save('Jay.xlsx')