制作哔哩哔哩弹幕词云

1 篇文章 0 订阅
1 篇文章 0 订阅

先展示一下实物图吧

词云制作
S8英雄联盟全球总决赛
不知道有没有人能猜出来这是哪个视频的弹幕

直接上代码

先是爬取视频弹幕

import re
import requests
def main():
    print("输入想爬取的b站视频网址:")
    url = input()
    res = requests.get(url)
    cid = re.findall(r'"cid":(.*?),', res.text)[0]#其中cid是弹幕对应的id
    dmurl = f'https://comment.bilibili.com/{cid}.xml'
    datalist = get_Html(dmurl)
    list=datalist.content.decode("utf-8")
    savelist(list)
def get_Html(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
    }         #请输入你个人的User-Agent
    response = requests.get(url, headers=headers)
    return response
def savelist(list):
    danmu = re.compile(r'<d p=".*?">(.*?)</d>')
    File = open("弹幕.txt", "w", encoding="utf-8")
    data = re.findall(danmu,list)
    for i in data:
        File.writelines(i)
        File.writelines("\n")
    File.close()
if __name__ =="__main__":
    main()
    print("爬取完毕")

然后是制作词云代码

可能出现的报错:
缺少stoplist.txt文件,自行创建即可。作用是制作词的云排除你不想要的词,一个词占一行

如果你想制作一个特定图案的词云,就可以将第六步中的两行代码的注释求掉,并添加你的图片文件,最好是颜色相差较大的图片,不然制作出来的词云效果不好,如果效果不好的话可以用PS把图片背景去掉,留下你想要的图案

# 1 导入相关库
import pandas as pd
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imread

import warnings
warnings.filterwarnings("ignore")


# 2 读取文本文件,并使用lcut()方法进行分词
with open("弹幕.txt",encoding="utf-8") as f:
    txt = f.read()
txt = txt.split()
txt = [i.upper() for i in txt]
data_cut = [jieba.lcut(x) for x in txt]

# 3 读取停用词
with open("stoplist.txt",encoding="utf-8") as f:
    stop = f.read()
stop = stop.split()
stop = [" "] + stop

# 4 去掉停用词之后的最终词
s_data_cut = pd.Series(data_cut)
all_words_after = s_data_cut.apply(lambda x:[i for i in x if i not in stop])

# 5 词频统计
all_words = []
for i in all_words_after:
    all_words.extend(i)
word_count = pd.Series(all_words).value_counts()

# 6 词云图的绘制
# 1)读取背景图片
# back_picture = imread("1.jpg")

# 2)设置词云参数
wc = WordCloud(font_path="simhei.ttf",
               background_color="white",
               max_words=1000,
            #    mask=back_picture,
               max_font_size=150,
               random_state=42
              )
wc2 = wc.fit_words(word_count)

# 3)绘制词云图
plt.figure(figsize=(16,8))
plt.imshow(wc2)
plt.axis("off")
plt.show()
wc.to_file("ciyun.png")

之后就要制作ui界面了

可以跟着这个视频学习一下点我
我这个只是一个示例,以作参考,如果需要做出一个界面的话还是得自己学习一下
文件类型为 .ui

<?xml version="1.0" encoding="UTF-8"?>
<ui version="4.0">
 <class>Form</class>
 <widget class="QWidget" name="Form">
  <property name="geometry">
   <rect>
    <x>0</x>
    <y>0</y>
    <width>645</width>
    <height>496</height>
   </rect>
  </property>
  <property name="minimumSize">
   <size>
    <width>0</width>
    <height>0</height>
   </size>
  </property>
  <property name="windowTitle">
   <string>弹幕词云</string>
  </property>
  <property name="styleSheet">
   <string notr="true">QPushButton:hover{
	font-family:微软雅黑;
	font-size:15px;
	color:#1d649c;
}
*{
	background-image:url(bi.jpg);
}</string>
  </property>
  <layout class="QVBoxLayout" name="verticalLayout" stretch="0,0,0,0,0">
   <item>
    <spacer name="verticalSpacer">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeType">
      <enum>QSizePolicy::Expanding</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>150</height>
      </size>
     </property>
    </spacer>
   </item>
   <item>
    <layout class="QHBoxLayout" name="horizontalLayout_2" stretch="1,0,20">
     <item>
      <spacer name="horizontalSpacer">
       <property name="orientation">
        <enum>Qt::Horizontal</enum>
       </property>
       <property name="sizeHint" stdset="0">
        <size>
         <width>40</width>
         <height>20</height>
        </size>
       </property>
      </spacer>
     </item>
     <item>
      <widget class="QTextEdit" name="textEdit">
       <property name="minimumSize">
        <size>
         <width>280</width>
         <height>10</height>
        </size>
       </property>
       <property name="maximumSize">
        <size>
         <width>16777215</width>
         <height>30</height>
        </size>
       </property>
       <property name="placeholderText">
        <string>请输入b站视频网址:</string>
       </property>
      </widget>
     </item>
     <item>
      <spacer name="horizontalSpacer_2">
       <property name="orientation">
        <enum>Qt::Horizontal</enum>
       </property>
       <property name="sizeHint" stdset="0">
        <size>
         <width>40</width>
         <height>20</height>
        </size>
       </property>
      </spacer>
     </item>
    </layout>
   </item>
   <item>
    <spacer name="verticalSpacer_2">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeType">
      <enum>QSizePolicy::Expanding</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>89</height>
      </size>
     </property>
    </spacer>
   </item>
   <item>
    <layout class="QHBoxLayout" name="horizontalLayout" stretch="2,1,7">
     <item>
      <spacer name="horizontalSpacer_4">
       <property name="orientation">
        <enum>Qt::Horizontal</enum>
       </property>
       <property name="sizeHint" stdset="0">
        <size>
         <width>40</width>
         <height>20</height>
        </size>
       </property>
      </spacer>
     </item>
     <item>
      <widget class="QPushButton" name="pushButton">
       <property name="sizePolicy">
        <sizepolicy hsizetype="Maximum" vsizetype="Fixed">
         <horstretch>0</horstretch>
         <verstretch>0</verstretch>
        </sizepolicy>
       </property>
       <property name="layoutDirection">
        <enum>Qt::LeftToRight</enum>
       </property>
       <property name="text">
        <string>制作词云</string>
       </property>
      </widget>
     </item>
     <item>
      <spacer name="horizontalSpacer_3">
       <property name="orientation">
        <enum>Qt::Horizontal</enum>
       </property>
       <property name="sizeHint" stdset="0">
        <size>
         <width>40</width>
         <height>20</height>
        </size>
       </property>
      </spacer>
     </item>
    </layout>
   </item>
   <item>
    <spacer name="verticalSpacer_3">
     <property name="orientation">
      <enum>Qt::Vertical</enum>
     </property>
     <property name="sizeType">
      <enum>QSizePolicy::Expanding</enum>
     </property>
     <property name="sizeHint" stdset="0">
      <size>
       <width>20</width>
       <height>89</height>
      </size>
     </property>
    </spacer>
   </item>
  </layout>
 </widget>
 <resources/>
 <connections/>
</ui>

最后是汇总的代码

这里如果你直接复制粘贴的话必出错,就算你用了我上面的 .ui文件。因为我在 .ui文件里还设置了背景图片,所以这段代码也是用于参考。

汇总的原因是方便打包文件,制作成 .exe文件,可以发送给别人运行,即使电脑上没有python环境。可以根据这篇文章制作 .exe点我

from PySide2.QtWidgets import QApplication,QMessageBox
from PySide2.QtUiTools import QUiLoader
from PySide2.QtGui import QIcon
import re
import requests
import pandas as pd
import jieba
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from imageio import imread
import warnings
warnings.filterwarnings("ignore")

class Stats:

    def __init__(self):
        self.ui = QUiLoader().load('bilibili.ui')
        self.ui.pushButton.clicked.connect(self.handleCalc)

    def handleCalc(self):
        url = self.ui.textEdit.toPlainText()
        res = requests.get(url)
        cid = re.findall(r'"cid":(.*?),', res.text)[0]     #其中cid是弹幕对应的id
        dmurl = f'https://comment.bilibili.com/{cid}.xml'
        datalist = self.get_Html(dmurl)
        list=datalist.content.decode("utf-8")
        self.savelist(list)
        self.ciyun()

    def get_Html(self,url):        
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
        }         #请输入你个人的User-Agent
        response = requests.get(url, headers=headers)
        return response

    def savelist(self,list):
        danmu = re.compile(r'<d p=".*?">(.*?)</d>')
        File = open("弹幕.txt", "w", encoding="utf-8")
        data = re.findall(danmu,list)
        for i in data:
            File.writelines(i)
            File.writelines("\n")
        File.close()

    def ciyun(self):
        with open("弹幕.txt",encoding="utf-8") as f:
            txt = f.read()
        txt = txt.split()
        txt = [i.upper() for i in txt]
        data_cut = [jieba.lcut(x) for x in txt]

        with open("stoplist.txt",encoding="utf-8") as f:
            stop = f.read()
        stop = stop.split()
        stop = [" "] + stop

        s_data_cut = pd.Series(data_cut)
        all_words_after = s_data_cut.apply(lambda x:[i for i in x if i not in stop])

        all_words = []
        for i in all_words_after:
            all_words.extend(i)
        word_count = pd.Series(all_words).value_counts()

        # 1)读取背景图片
        # back_picture = imread("EDG.jpg")

        # 2)设置词云参数
        wc = WordCloud(font_path="simhei.ttf",
                    background_color="white",
                    max_words=1000,
                    #    mask=back_picture,
                    max_font_size=150,
                    random_state=42
                    )
        wc2 = wc.fit_words(word_count)

        # 3)绘制词云图
        plt.figure(figsize=(16,8))
        plt.imshow(wc2)
        plt.axis("on")
        plt.show()
        wc.to_file("ciyun.png")

app=QApplication([])
app.setWindowIcon(QIcon('li.jpg'))
stats=Stats()
stats.ui.show()
app.exec_()

文章到这里就结束了,后面是一些废话

之前因为个人习惯原因丢失的代码现在基本上补回来了(能想起来的),之后如果能想起来还有哪些代码的话随缘补上吧,这次其实不是很想补,得花几天的时间,但想着顺便复习一下Qt designer的用法,就补了这次代码

  • 2
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值