爬取网易云指定歌曲评论,生成词云

爬取网易云指定歌曲评论,生成词云

环境

python3.8

使用工具

pycharm,jieba分词,wordcloud词云生成,Crypto加密工具

代码

from Crypto.Cipher import AES
import base64
import requests
import json
from wordcloud import WordCloud,ImageColorGenerator
import jieba
from imageio import imread
import os
import matplotlib.pyplot as plt
import random
import time

headers = {
    'origin': 'https://music.163.com',
    'referer': 'https://music.163.com/song?id=1901371647',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}

e = '010001'

f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7 '

# 第一次加密的key
g = '0CoJUm6Qyw8W8jud'

# 加密的iv值
df = '0102030405060708'

# 随机值:用于生成enSeckey,及二次加密的key
i = "91TtJZeGZiCOC1Gp"

key = '74ed05cc3d4ab24dc71b989aa2b282b6a81490e0499326b2021e598b2c7b0fcee5ab11b721b465332d412b94cb3480a59358c7816adccae8e67cae09e7cc333246ece7235390a354fe690068ed75e6ff90f7b8533c74c212f4d3520eb1e60d784ccee3281305f46b9757f9bc4a5dc8a60a8393578156f0918785c31b6cb35e8e'

url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="


# aes加密:模式cbc
def encText(data, key, iv):
    # 加密块大小 16位
    bs = AES.block_size

    # 需要编码转换
    # 待加密的文本 如果data不足16位的倍数就用空格补足为16位
    data_convert = data.encode(encoding='utf-8').decode(encoding='utf-8', errors='strict')
    fill_data = data_convert + (bs - len(data_convert) % bs) * chr(bs - len(data_convert) % bs)

    # key
    key_convert = key.encode(encoding='utf-8').decode(encoding='utf-8', errors='strict')
    # iv
    iv_convert = iv.encode(encoding='utf-8').decode(encoding='utf-8', errors='strict')

    # 创建aes对象
    cipher = AES.new(key_convert, AES.MODE_CBC, iv_convert)

    # 用aes对象进行加密 加密后得到的是bytes类型的数据,使用Base64进行编码,返回byte字符串
    encrypt = cipher.encrypt(fill_data)

    # 加密后得到的是bytes类型的数据,使用Base64进行编码,返回byte字符串
    encrypt_hex = base64.b64encode(encrypt)
    encrypt_str = encrypt_hex.decode(encoding='utf-8', errors='strict')
    print(encrypt_str)
    return encrypt_str


def fill_data_for_aes(data):
    BS = AES.block_size
    pad = lambda data: data + (BS - len(data) % BS) * chr(0)
    return pad(data)


if __name__ == '__main__':

    data = {
        "rid": "R_SO_4_1901371647",
        "threadId": "R_SO_4_1901371647",
        "pageNo": 1,
        "pageSize": 20,
        "cursor": -1,
        "offset": 0,
        "orderType": 1
    }

    print(json.dumps(data))

    liststr = ''
    word_set = []
    print(time.time())
    for pageNum in range(1,2):
        data['pageNo'] = pageNum
        param = encText(json.dumps(data), g, df)
        param = encText(param, i, df)
        post = requests.post(url, data={'params': param, 'encSecKey': key}, headers=headers)
        time.sleep(3)
        loads = json.loads(post.text)
        commnets = loads['data']['hotComments']
        for comment in commnets:
            cut = jieba.cut(comment['content'], cut_all=False)
            for word in cut:
                if 2 > len(word):
                    pass
                else:
                    if word not in word_set:
                        word_set.append(word)

    print(time.time())
    liststr += ' '.join(word_set)
    print(liststr)
    # 词库图片
    orign_path = os.getcwd()
    image_word_path = orign_path + '/cat.jpg'
    image_background = imread(image_word_path)
    wc = WordCloud(font_path='C:\Windows\Fonts\微软雅黑\msyh.ttc',background_color="white",width=1080,max_words=2000,mask=image_background,max_font_size=200, random_state=42, height=860, margin=2)
    wc.generate(liststr)
    # create coloring from image
    image_colors_byImg = ImageColorGenerator(image_background)

    # show
    # we could also give color_func=image_colors directly in the constructor
    plt.imshow(wc.recolor(color_func=image_colors_byImg), interpolation="bilinear")
    plt.axis("off")
    plt.figure()
    plt.axis("off")
    # save wordcloud
    wc.to_file(orign_path + '\\' + ''.join(random.sample('zyxwvutsrqponmlkjihgfedcba',8)) + '.jpg' )

所使用的的图片:生成词云的模板图片

最终效果

词云效果图

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值