网易云音乐评论爬取、情感分析一体化

开局一张图在这里插入图片描述

网易云诞生了很多励志鸡汤,那么多的伤感流行句式,那么多微甜情话,今天我们就看他个天翻地覆,话不多说直接上个干货。

导入包、相关库

import requests
import math
import random
from Crypto.Cipher import AES
import codecs
import base64
import tkinter
from snownlp import SnowNLP
import matplotlib.pyplot as plt
import numpy as np

获取窗体内输入的内容

def go():
    global song_id
    x = str(entry1.get())
    print(x)  # 获取文本框的内容
    song_id = str(x)
def go1():
    global path2
    x = str(entry2.get())
    print(x)  # 获取文本框的内容
    path2 = str(x)
def go2():
    global file_name
    x = str(entry3.get())
    print(x)  # 获取文本框的内容
    file_name = str(x)
def go3():
    global cleaning_file
    x = str(entry4.get())
    print(x)  # 获取文本框的内容
    cleaning_file = str(x)
def go4():
    global cleaned_file
    x = str(entry5.get())
    print(x)  # 获取文本框的内容
    cleaned_file = str(x)
def go5():
    global analysis_path
    x = str(entry6.get())
    print(x)  # 获取文本框的内容
    analysis_path = str(x)

设置窗体大小、标题

win = tkinter.Tk()
win.geometry('500x500')
win.title('网易云音乐情感分析')

创建窗体的按钮,附带提示信息如输入规范

entry1 = tkinter.Entry(win, width=50, fg="black")
entry1.pack()
button = tkinter.Button(win, text="请输入id,输入后请点击我", command=go) 
 # 收到消息执行这个函数
button.pack()  # 加载到窗体

button1 = tkinter.Button(win, text="请输入存储文件的路径(以\结束,路径就可以,下一个输入框输入名称),输入后请点击我", command=go1,bg='yellow')  # 收到消息执行这个函数
entry2 = tkinter.Entry(win, width=50, fg="black",bg='yellow')
entry2.pack()
button1.pack()  # 加载到窗体

entry3 = tkinter.Entry(win, width=50, fg="black",bg='green')
entry3.pack()
button2 = tkinter.Button(win, text="请输入存储文件的名称(名称就可以,我们自动为您生成txt文件),输入后请点击我", command=go2,bg='green')  # 收到消息执行这个函数
button2.pack()  # 加载到窗体

entry4 = tkinter.Entry(win, width=50, fg="black",bg='gray')
entry4.pack()
button3 = tkinter.Button(win, text="请输入将要清洗文件的路径(具体到格式),输入后请点击我", command=go3,bg='gray')  # 收到消息执行这个函数
button3.pack()  # 加载到窗体

entry5 = tkinter.Entry(win, width=50, fg="black",bg='pink')
entry5.pack()
button4 = tkinter.Button(win, text="请输入清洗完毕文件的路径(具体到格式),输入后请点击我", command=go4,bg='pink')  # 收到消息执行这个函数
button4.pack()  # 加载到窗体

entry6 = tkinter.Entry(win, width=50, fg="black",bg='orange')
entry6.pack()
button5 = tkinter.Button(win, text="请输入进行情感分析文件的路径(具体到格式),输入后请点击我", command=go5,bg='orange')  # 收到消息执行这个函数
button5.pack()  # 加载到窗体

button6 = tkinter.Button(win,text='全部输入完毕请点击我',command=win.destroy).pack()
win.mainloop()

构造函数获取歌手信息

def get_comments_json(url, data):
    headers = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
               'Accept-Encoding': 'gzip, deflate',
               'Accept-Language': 'zh-CN,zh;q=0.9',
               'Connection': 'keep-alive',
               'Cookie': 'Province=****; City=****;   #说明此处为省份、城市代码,如山东济南为0530,0531建议用自定获取到的
 _ntes_nnid=861aff69ca0c1a71635a5ed2a0243acb,1544690244162; _ntes_nuid=861aff69ca0c1a71635a5ed2a0243acb; UM_distinctid=167a6b6a0a83f1-00127701049a17-335a497c-e1000-167a6b6a0a9517; usertrack=ezq0o1wSGjl5etI0BD5JAg==; vjuids=2a1d704ac.167a6b6cb26.0.b0103dac2188b; vjlast=1544690257.1544690257.30; nteslogger_exit_time=1544692086080; vinfo_n_f_l_n3=8a31831dccfd73bf.1.0.1544690256693.0.1544692154822; JSESSIONID-WYYY=HYgCofY5xb%5Cbn0UObOx4nvEqF1Akb3e%2Fh%2FzcPVbhWyj1KaJZnTusNDfyT5mWEBuSWSJ9uNs5G%2BTpVkenwYj1V7CpefhlP9FP6RtFWxFrbWIbsKPMFQo8lV58%2FrH%2BsHf42oU20b1lqMfoHApESJqjCDM9Mtgs2WRkXWs4Qbb4WTmcIipY%3A1545471673818; _iuqxldmzr_=32; __utma=94650624.195620955.1545469875.1545469875.1545469875.1; __utmc=94650624; __utmz=94650624.1545469875.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; WM_NI=HehEClh%2F
 %2FQUZj98wglZfRgNpbsu1q9m2HxBPcS9UkOXXysR7gOXojWNn82ueE5kAzm4tLz3eUvdfIZTqY5%2BVheKLttjo3RnK9Bho7dWiyA6FIqm7%2BVm5tA61RUEIYGa%2BQ3k%3D; WM_NIKE=9ca17ae2e6ffcda170e2e6eeb4d7639bb9f7bad16ea7b88fa7c54f879f9a85bc5cb686a2b5d364bab287bac72af0fea7c3b92a8cbba8a6c268acf1e1b2d852aae7f898e6689aba9e88cd7ca2adbbd2f433afee8899c15ca18df7b6ea459c88b794d13da5919890e95d8eb2f8d3f86f87eba5a2f967f8ac849bb26f97aaac87cc5298af97d6aa5eacbb85adf780aab1fdb3ed41fb9ea890b67095b7b7a2b54e8bafa3d8aa5eafebbca2b53b928b8baadc4da3f59fd4ea37e2a3; WM_TID=U0nsAG4m95hEAFVVFVZ8fqJzHf1jkqZC; __utmb=94650624.7.10.1545469875',
               'Host': 'music.163.com',
               'Referer': 'http://music.163.com/',
               'Upgrade-Insecure-Requests': '1',
               'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                             'Chrome/66.0.3359.181 Safari/537.36'}
      try:
        r = requests.post(url, headers=headers, data=data)
        r.encoding = "utf-8"
        if r.status_code == 200:
            # 返回json格式的数据
            return r.json()          
      except:
        print("爬取失败!")

生成16个随机字符

def generate_random_strs(length):
    string = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
    # 控制次数参数i
    i = 0
    # 初始化随机字符串
    random_strs = ""
    while i < length:
        e = random.random() * len(string)
        # 向下取整
        e = math.floor(e)
        random_strs = random_strs + list(string)[e]
        i = i + 1
    return random_strs

AES加密

def AESencrypt(msg, key):
    # 如果不是16的倍数则进行填充(paddiing)
    padding = 16 - len(msg) % 16
    # 这里使用padding对应的单字符进行填充
    msg = msg + padding * chr(padding)
    # 用来加密或者解密的初始向量(必须是16位)
    iv = '0102030405060708'
    
    cipher = AES.new(key, AES.MODE_CBC, iv)
    # 加密后得到的是bytes类型的数据
    encryptedbytes = cipher.encrypt(msg)
    # 使用Base64进行编码,返回byte字符串
    encodestrs = base64.b64encode(encryptedbytes)
    # 对byte字符串按utf-8进行解码
    enctext = encodestrs.decode('utf-8')
         
    return enctext

RSA加密

def RSAencrypt(randomstrs, key, f):
    # 随机字符串逆序排列
    string = randomstrs[::-1]
    # 将随机字符串转换成byte类型数据
    text = bytes(string, 'utf-8')
    seckey = int(codecs.encode(text, encoding='hex'), 16) ** int(key, 16) % int(f, 16)
    return format(seckey, 'x').zfill(256)

获取参数

def get_params(page):
    # msg也可以写成msg = {"offset":"页面偏移量=(页数-1) * 20", "limit":"20"},offset和limit这两个参数必须有(js)
    # limit最大值为100,当设为100时,获取第二页时,默认前一页是20个评论,也就是说第二页最新评论有80个,有20个是第一页显示的
    # msg = '{"rid":"R_SO_4_1302938992","offset":"0","total":"True","limit":"100","csrf_token":""}'
    # 偏移量
    offset = (page - 1) * 20
    # offset和limit是必选参数,其他参数是可选的,其他参数不影响data数据的生成
    msg = '{"offset":' + str(offset) + ',"total":"True","limit":"20","csrf_token":""}'
    key = '0CoJUm6Qyw8W8jud'
    f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7'
    e = '010001'
    enctext = AESencrypt(msg, key)
    # 生成长度为16的随机字符串
    i = generate_random_strs(16)
      # 两次AES加密之后得到params的值
    encText = AESencrypt(enctext, i)
    # RSA加密之后得到encSecKey的值
    encSecKey = RSAencrypt(i, e, f)
    return encText, encSecKey

最主要的部分来了

def comments(html, songname, i, pages, total, filepath):
    # with open(filepath, 'a', encoding='utf-8') as f:
    #     f.write("\n正在获取歌曲{}的第{}页评论,总共有{}页{}条评论!\n".format(songname, i, pages, total))
    print("{}/{}\n".format( i, pages))
    # 全部评论
    j = 1
    for item in html['comments']:
        # 提取发表评论的用户名
        user = item['user']
        # print("全部评论{}: {} : {}    点赞次数: {}".format(j, user['nickname'], item['content'], item['likedCount']))
        with open(filepath, 'a', encoding='utf-8') as f:
            f.write(item['content'])
            f.write('\n')
            f.close()
      j += 1
    f.close()

歌曲id号

songid = song_id
filepath = path2

歌曲名字

songname = file_name
print(songid)
print(filepath)
print(songname)

文件存储路径

filepath = filepath  + songname + ".txt"
page = 1
params, encSecKey = get_params(page)

获取第一页评论

url = 'https://music.163.com/weapi/v1/resource/comments/R_SO_4_' + str(songid) + '?csrf_token='
data = {'params': params, 'encSecKey': encSecKey}
# url = 'https://music.163.com/#/song?id=19292984'
# 获取第一页评论
html = get_comments_json(url, data)
# 评论总数
total = html['total']
    # 总页数
pages = math.ceil(total / 20)
# hotcomments(html, songname, page, pages, total, filepath)
comments(html, songname, page, pages, total, filepath)

获取全部评论

page = 2
while page <= pages:
    params, encSecKey = get_params(page)
    data = {'params': params, 'encSecKey': encSecKey}
    html = get_comments_json(url, data)
    # 从第二页开始获取评论
    comments(html, songname, page, pages, total, filepath)
    page += 1

数据预处理

filepath1 = cleaning_file
file1 = open (filepath1,'r',encoding='utf-8')
filepath2 = cleaned_file
file2 = open(filepath2,'w',encoding='utf-8')
try:
    print("转换中......")
    for line in file1.readlines():
        if line == '\n' :
            line = line.strip("\n")
        file2.write(line)
 finally:
    file1.close()
    file2.close()
    print("执行完毕!")

情感分析SnowNLP及可视化

comment = []
with open(analysis_path, mode='r', encoding='utf-8') as f:
    rows = f.readlines()
    # print(rows)
    for row in rows:
        if row not in comment:
            comment.append(row.strip('\n'))
    # print(comment)
def snowanalysis(self):
    sentimentslist = []
    for li in self:
        print(li)
        s = SnowNLP(li)
        print(s.sentiments)
        sentimentslist.append(s.sentiments)
    plt.hist(sentimentslist, bins=np.arange(0, 1, 0.01))
    plt.show()
    print(sentimentslist)
  
    for i in range(len(sentimentslist)):
        if (sentimentslist[i]>0.5):
            sentimentslist[i]=1
        else:
            sentimentslist[i]=-1
    print(sentimentslist)
    info=[]
    a=0
    b=0
    for x in range(0,len(sentimentslist)):
        if(sentimentslist[x]==1):
            a=a+1
        else:
            b=b+1
    info.append(b)
    info.append(a)
    print(info)
    info2=['negative','positive']
    plt.bar(info2,info,tick_label=info2,color='#2FC25B')
    plt.show()
snowanalysis(comment)
  • 注:本帖只用于学习交流,不得用于商业活动
根据引用内容和,通过使用Python爬虫和数据分析工具,可以对网易云音乐上的评论进行情感分析。具体步骤如下: 1. 使用Pycharm编译器和Python3.7环境来进行开发。 2. 使用Binaryify的网易云音乐API来获取评论数据。可以通过API接口获取评论用户ID、昵称、位置、评论内容、点赞数、用户头像地址和评论时间等信息。 3. 通过分析评论数据,可以进行中文分词、命名实体识别、关键词提取、句法分析、文本向量化、情感分析以及舆情分析等数据处理和应用。 4. 对评论进行情感分析可以了解用户对肖战《红梅赞》的评论情感倾向。可以使用自然语言处理技术,如情感词典、机器学习模型等方法,将评论文本分类为正面、负面或中性情感。这样可以统计出正面评论、负面评论以及中性评论的数量,并分析用户的情感倾向。 以上就是关于如何通过Pycharm编译器、Python爬虫和数据可视化工具进行网易云音乐评论情感分析的步骤。<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *2* [网易云音乐爬虫实战——肖战《红梅赞》下评论数据挖掘与分析](https://blog.csdn.net/JAVA_wangyi/article/details/107068244)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] - *3* [Python爬虫之网易云音乐数据爬取(十五)](https://blog.csdn.net/weixin_42555080/article/details/90105330)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 50%"] [ .reference_list ]
评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值