环境
python3.8
使用工具
pycharm,jieba分词,wordcloud词云生成,Crypto加密工具
代码
from Crypto.Cipher import AES
import base64
import requests
import json
from wordcloud import WordCloud,ImageColorGenerator
import jieba
from imageio import imread
import os
import matplotlib.pyplot as plt
import random
import time
headers = {
'origin': 'https://music.163.com',
'referer': 'https://music.163.com/song?id=1901371647',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36'
}
e = '010001'
f = '00e0b509f6259df8642dbc35662901477df22677ec152b5ff68ace615bb7b725152b3ab17a876aea8a5aa76d2e417629ec4ee341f56135fccf695280104e0312ecbda92557c93870114af6c9d05c4f7f0c3685b7a46bee255932575cce10b424d813cfe4875d3e82047b97ddef52741d546b8e289dc6935b3ece0462db0a22b8e7 '
# 第一次加密的key
g = '0CoJUm6Qyw8W8jud'
# 加密的iv值
df = '0102030405060708'
# 随机值:用于生成enSeckey,及二次加密的key
i = "91TtJZeGZiCOC1Gp"
key = '74ed05cc3d4ab24dc71b989aa2b282b6a81490e0499326b2021e598b2c7b0fcee5ab11b721b465332d412b94cb3480a59358c7816adccae8e67cae09e7cc333246ece7235390a354fe690068ed75e6ff90f7b8533c74c212f4d3520eb1e60d784ccee3281305f46b9757f9bc4a5dc8a60a8393578156f0918785c31b6cb35e8e'
url = "https://music.163.com/weapi/comment/resource/comments/get?csrf_token="
# aes加密:模式cbc
def encText(data, key, iv):
# 加密块大小 16位
bs = AES.block_size
# 需要编码转换
# 待加密的文本 如果data不足16位的倍数就用空格补足为16位
data_convert = data.encode(encoding='utf-8').decode(encoding='utf-8', errors='strict')
fill_data = data_convert + (bs - len(data_convert) % bs) * chr(bs - len(data_convert) % bs)
# key
key_convert = key.encode(encoding='utf-8').decode(encoding='utf-8', errors='strict')
# iv
iv_convert = iv.encode(encoding='utf-8').decode(encoding='utf-8', errors='strict')
# 创建aes对象
cipher = AES.new(key_convert, AES.MODE_CBC, iv_convert)
# 用aes对象进行加密 加密后得到的是bytes类型的数据,使用Base64进行编码,返回byte字符串
encrypt = cipher.encrypt(fill_data)
# 加密后得到的是bytes类型的数据,使用Base64进行编码,返回byte字符串
encrypt_hex = base64.b64encode(encrypt)
encrypt_str = encrypt_hex.decode(encoding='utf-8', errors='strict')
print(encrypt_str)
return encrypt_str
def fill_data_for_aes(data):
BS = AES.block_size
pad = lambda data: data + (BS - len(data) % BS) * chr(0)
return pad(data)
if __name__ == '__main__':
data = {
"rid": "R_SO_4_1901371647",
"threadId": "R_SO_4_1901371647",
"pageNo": 1,
"pageSize": 20,
"cursor": -1,
"offset": 0,
"orderType": 1
}
print(json.dumps(data))
liststr = ''
word_set = []
print(time.time())
for pageNum in range(1,2):
data['pageNo'] = pageNum
param = encText(json.dumps(data), g, df)
param = encText(param, i, df)
post = requests.post(url, data={'params': param, 'encSecKey': key}, headers=headers)
time.sleep(3)
loads = json.loads(post.text)
commnets = loads['data']['hotComments']
for comment in commnets:
cut = jieba.cut(comment['content'], cut_all=False)
for word in cut:
if 2 > len(word):
pass
else:
if word not in word_set:
word_set.append(word)
print(time.time())
liststr += ' '.join(word_set)
print(liststr)
# 词库图片
orign_path = os.getcwd()
image_word_path = orign_path + '/cat.jpg'
image_background = imread(image_word_path)
wc = WordCloud(font_path='C:\Windows\Fonts\微软雅黑\msyh.ttc',background_color="white",width=1080,max_words=2000,mask=image_background,max_font_size=200, random_state=42, height=860, margin=2)
wc.generate(liststr)
# create coloring from image
image_colors_byImg = ImageColorGenerator(image_background)
# show
# we could also give color_func=image_colors directly in the constructor
plt.imshow(wc.recolor(color_func=image_colors_byImg), interpolation="bilinear")
plt.axis("off")
plt.figure()
plt.axis("off")
# save wordcloud
wc.to_file(orign_path + '\\' + ''.join(random.sample('zyxwvutsrqponmlkjihgfedcba',8)) + '.jpg' )
所使用的的图片:生成词云的模板图片