#创作灵感#
之前的代码思路失效了,已于5.4更新了代码,现在完全可以用了
学习python新手项目,并慢慢完善,最后做成了全新思路运行速度也比较快的脚本
并且添加了ui界面可以自动判断地址是否正确,视频或者弹幕是否存在最后生成弹幕词云
接下来就是代码完整片段,仅供参考可以自己完善优化
import re
import requests
import xml.etree.ElementTree as ET
from collections import Counter
from wordcloud import WordCloud
import matplotlib.pyplot as plt
import tkinter as tk
def generate_wordcloud(event=None):
# 从用户获取输入
video_url = entry.get()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Referer': 'https://www.bilibili.com/'
}
# 进行地址格式检查
pattern = r'^https://www.bilibili.com/video/BV.*$'
match = re.match(pattern, video_url)
if not match:
status_label.config(text="地址格式不正确,请重新输入")
return
status_label.config(text="地址格式正确,正在解析视频弹幕,请稍等...")
window.update()
# 提取 CID
def extract_cid(video_url):
bv = re.search(r'BV(\w+)', video_url)
if bv:
api_url = f'https://api.bilibili.com/x/player/pagelist?bvid={bv.group(1)}'
try:
response = requests.get(api_url, headers=headers)
data = response.json()
if data['code'] == 0 and len(data['data']) > 0:
return data['data'][0]['cid']
except requests.RequestException:
pass
# 连续请求获取会有时会返回None,这里做了个循环,结果为None重新请求获取,连续五次获取均为None则地址不正确
cid = None
attempts = 0
while cid is None and attempts < 5:
cid = extract_cid(video_url)
attempts += 1
if not cid:
status_label.config(text="无法获取视频的 CID,请检查视频地址是否正确")
return
# 弹幕访问 URL
danmaku_url = f'https://comment.bilibili.com/{cid}.xml'
# 获取弹幕数据
try:
response = requests.get(danmaku_url)
response.encoding = response.apparent_encoding
danmaku_xml = response.text
except requests.RequestException:
status_label.config(text="无法获取弹幕数据")
return
# 解析弹幕数据
try:
danmaku_root = ET.fromstring(danmaku_xml)
danmakus = danmaku_root.findall('d')
if not danmakus:
status_label.config(text="弹幕列表为空")
return
status_label.config(text="弹幕解析成功,正在生成云图...")
window.update()
except ET.ParseError:
status_label.config(text="解析弹幕数据时发生错误")
return
# 统计每个弹幕的出现次数
counter = Counter(danmaku.text for danmaku in danmakus)
# 生成云图
font_path = 'msyh.ttc'
wordcloud = WordCloud(font_path=font_path, width=800, height=800, background_color='white', colormap='YlOrBr').generate_from_frequencies(counter)
status_label.config(text="云图生成成功!")
window.update()
# 显示云图
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
status_label.config(text="")
window.update()
def on_focus_in(event):
if entry.get() == "https://www.bilibili.com/video/BVXXXXXXXXXX":
entry.delete(0, tk.END)
entry.config(fg="black")
def on_focus_out(event):
if entry.get() == "":
entry.insert(0, "https://www.bilibili.com/video/BVXXXXXXXXXX")
entry.config(fg="gray")
# 创建窗口
window = tk.Tk()
window.title("Bilibili弹幕云图生成器")
# 设置窗口大小和位置
window_width = 300
window_height = 130
screen_width = window.winfo_screenwidth()
screen_height = window.winfo_screenheight()
x = (screen_width // 2) - (window_width // 2)
y = (screen_height // 2) - (window_height // 2)
window.geometry(f"{window_width}x{window_height}+{x}+{y}")
# 设置窗口优先级
def on_window_open(event):
window.attributes('-topmost', False)
window.attributes('-topmost', True)
window.bind("<Map>", on_window_open)
# 创建标签
label = tk.Label(window, text="\n请输入需要解析视频的地址")
label.pack()
# 创建输入框
entry = tk.Entry(window, width=50)
entry.insert(0, "https://www.bilibili.com/video/BVXXXXXXXXXX")
entry.config(fg="gray")
entry.bind('<FocusIn>', on_focus_in)
entry.bind('<FocusOut>', on_focus_out)
entry.pack(pady=10)
# 创建按钮
button = tk.Button(window, text="开始生成", command=generate_wordcloud)
button.pack()
# 创建状态标签
status_label = tk.Label(window, text="")
status_label.pack()
# 绑定回车键事件
entry.bind('<Return>', generate_wordcloud)
# 运行窗口
window.mainloop()