import json
import time
import requests
import pandas as pd
headers = {
'Host': 'music.163.com',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
def get_comments(page):
"""
获取评论信息
"""
url = 'http://music.163.com/api/v1/resource/comments/R_SO_4_1313354324?limit=20&offset=' + str(page)
response = requests.get(url=url, headers=headers)
# 将字符串转为json格式
result = json.loads(response.text)
items = result['comments']
for item in items:
# 用户ID
user_id = str(item['user']['userId'])
# 评论内容
comment = item['content'].strip().replace('\n', '').replace(',', ',')
print(user_id, comment)
with open('music_comments.csv', 'a', encoding='utf-8-sig') as f:
f.write( user_id + ',' + comment + ',' + '\n')
f.close()
def main():
# 前500页
# for i in range(210000, 230000, 20):
# 后500页
for i in range(0, 5000, 20):
print('\n---------------第 ' + str(i // 20 + 1) + ' 页---------------')
get_comments(i)
if __name__ == '__main__':
main()
df = pd.DataFrame(items, columns=['用户id', '评论内容'])
df.to_excel(r'C:\Users\tanongg\Desktop\评论内容.xls')
【无标题】
最新推荐文章于 2024-07-13 20:36:14 发布