先自我介绍一下,小编浙江大学毕业,去过华为、字节跳动等大厂,目前阿里P7
深知大多数程序员,想要提升技能,往往是自己摸索成长,但自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!
因此收集整理了一份《2024年最新大数据全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友。
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新
如果你需要这些资料,可以添加V获取:vip204888 (备注大数据)
正文
4、爬虫代码
import pymysql
import requests
from lxml import etree
import csv
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.charts import Bar
#连接数据库
db = pymysql.connect(
host='localhost',
port=3306,
user='root',
password='123456',
database='BiliBili',
)
cursor = db.cursor()
#爬虫准备工作
base_url = 'https://search.bilibili.com/all?vt=77434542&keyword=%E7%BC%96%E7%A8%8B%E8%AF%BE%E7%A8%8B&from_source=webtop_search&spm_id_from=333.1007&search_source=5'
params = {}
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
page_number = 1
o_number = 0
# 将包含“万”的字符串转换为实际数字的函数
def convert_to_actual_number(string_with_unit):
try:
if '万' in string_with_unit:
# 如果字符串中包含“万”,则将其转换为对应的数字
return float(string_with_unit.replace('万', '')) * 10000
else:
# 如果字符串中不包含“万”,则直接转换为数字
return float(string_with_unit)
except ValueError:
# 处理无法转换的情况
return None
# 设置空集合,目的是存入链接进行比较,删减重复数据
unique_links = set()
video_data_by_keyword = {'C语言', 'C++', 'Python', 'PHP', '算法', 'Java','go语言','Mysql','C#','Scratch','web','计算机'}
k=0
#绘图
keyword_count = {keyword: 0 for keyword in video_data_by_keyword}
# 打开文件,并将爬取到的数据保存到文件中
with open('video_data.csv', 'w', newline='', encoding='utf-8-sig') as csvfile:
csv_writer = csv.writer(csvfile)
# 写入列名
csv_writer.writerow(['视频链接', '视频名称', '作者', '类别', '播放量', '评论数', '时长'])
while True:
# 设置参数,实现翻页
params['page'] = str(page_number)
params['o'] = str(o_number)
response = requests.get(base_url, params=params, headers=headers)
html = response.text
html = response.content.decode('utf-8')
parse = etree.HTMLParser(encoding='utf-8')
print(response.url)
contentPath = []
contentname = []
contentauthor = []
doc = etree.HTML(html)
doc.xpath('//div[@class="bili-video-card__info--right"]//a/@href')
#视频路径
contentPath = doc.xpath('//div[@class="bili-video-card__info--right"]/a/@href')
#视频名称
contentname = doc.xpath('//div[@class="bili-video-card__info--right"]//h3[@class="bili-video-card__info--tit"]/@title')
#视频创作者
contentauthor = doc.xpath('//div[@class="bili-video-card__info--right"]//span[@class="bili-video-card__info--author"]/text()')
#Video View(VV):视频播放量
contentVV = doc.xpath('//div[@class="bili-video-card__stats--left"]/span[@class="bili-video-card__stats--item"][1]/span/text()')
#Comment(CM):视频评论数
contentCM = doc.xpath('//div[@class="bili-video-card__stats--left"]/span[@class="bili-video-card__stats--item"][2]/span/text()')
#Duration(DR):视频时长
contentDR = doc.xpath('//div[@class="bili-video-card__stats"]/span[@class="bili-video-card__stats__duration"]/text()')
# print(contentPath)
# print(contentname)
# print(contentauthor)
# print(contentVV)
# print(contentDR)
# print(len(contentPath))
# print(len(contentname))
# 将数据写入csv文件和数据库中,若已存在或报错“无权限”需删除csv文件和数据库数据
for link, name, author, vv, cm, dr in zip(contentPath, contentname, contentauthor, contentVV, contentCM , contentDR):
category_found = False
lower_name = name.lower() # 将视频名称转换为小写
matched_keyword = None # 存储匹配的关键词
vv = convert_to_actual_number(vv)
cm = convert_to_actual_number(cm)
for keyword in video_data_by_keyword:
lower_keyword = keyword.lower() # 将关键词转换为小写
if lower_keyword in lower_name:
category_found = True
matched_keyword = keyword
# 在这里处理匹配后的操作,例如写入 CSV 和数据库
if category_found and matched_keyword:
if link not in unique_links:
csv_writer.writerow([link, name, author, matched_keyword, vv, cm, dr])
sql = "INSERT INTO videos(VideoName, VideoAuther, Category, VideoView, Comment, Duration) VALUES (%s, %s, %s, %s, %s, %s )"
values = (name, author, matched_keyword, vv, cm, dr)
cursor.execute(sql, values)
db.commit()
unique_links.add(link)
# 将数据按关键词分类并统计数据绘图
for i, (link, name, author) in enumerate(zip(contentPath, contentname, contentauthor), start=1):
#设置一个bool值判断数据是否在类型中
category_found = False
for keyword in video_data_by_keyword:
if keyword in name:
category_found = True
keyword_count[keyword] += 1
if category_found == True:
keyword_list = list(keyword_count.items())
pie = (
Pie()
.add("", keyword_list)
.set_colors(['red', 'blue', 'green', 'purple', 'orange', 'cyan', 'pink', 'brown'])
.set_series_opts(label_opts=opts.LabelOpts(formatter="{b}: {c}"))
.set_global_opts(title_opts=opts.TitleOpts(title="主要视频统计"))
)
pie.render('keyword_pie_chart.html')
# 进入下一页
page_number += 1
o_number += 24
print(page_number)
print(o_number)
if not contentPath:
break
# 执行查询获取VV值最大的前20条记录
sql_query = "SELECT Category, VideoView FROM videos ORDER BY VideoView DESC LIMIT 20"
cursor.execute(sql_query)
result = cursor.fetchall()
# 提取结果中的数据
video_category = [row[0] for row in result]
vv_values = [row[1] for row in result]
# 使用 Pyecharts 生成横向柱状图
bar = (
Bar()
.add_xaxis(video_category)
.add_yaxis("Views", vv_values)
.set_global_opts(
title_opts=opts.TitleOpts(title="播放量最高的前20条视频的类型"),
xaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(
rotate=-45,
font_size=11,
interval=0,
)
),
)
)
bar.render("videoview.html")
#评论前20的视频类型
# 执行查询获取cm值最大的前20条记录
sql_query1 = "SELECT Category, Comment FROM videos ORDER BY Comment DESC LIMIT 20"
cursor.execute(sql_query1)
result = cursor.fetchall()
# 关闭数据库连接
db.close()
# 提取结果中的数据
video_category = [row[0] for row in result]
vv_comment = [row[1] for row in result]
# 使用 Pyecharts 生成横向柱状图
bar = (
Bar()
.add_xaxis(video_category) # 将视频名称作为 x 轴数据
.add_yaxis("Views", vv_comment) # 将视频数据作为 y 轴数据
.reversal_axis() # 将 x 轴和 y 轴交换
.set_global_opts(
title_opts=opts.TitleOpts(title="评论量最高的前20条视频"),
xaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(
font_size=11, # 调整字体大小
interval=0, # 设置标签显示的间隔
)
),
)
)
bar.render("comment.html")
这个代码包括了爬取(requests模块)和对数据进行标签分类、生成图表(pyecharts模块),同时将数据保存在了对应的数据库中,并且还生成了一个csv文件保存对应数据。但是代码仍有缺陷,可能会导致爬取错误。
爬取并分类好的数据大致如下:
5、界面设计
使用QtDesigner进行界面设计
设计界面如下:
1、采集数据界面
2、数据可视化界面
3、数据分析界面
如果对于QtDesigner不熟悉的,可以看看我的这篇文章利用PySide2模块以及Qt设计师(Qt-Designer)设计简易的系统窗体
6、QThread多线程设计
想要实现爬取数据动态显示到界面上,我们需要使用Qt中的QThread类实现对于线程的创建与管理。
我们需要新建一个派生类DataThread(名称任意),该派生类由QThread这个基类派生,可以使用QThread中的相关成员函数,同时我们可以在DataThread这个由我们自己定义的派生类中进行相关修改。
DataThread派生类大致如下:
from PyQt5.QtCore import *
class DataThread(QThread):
signal = pyqtSignal(str, str, str, str, int, int, str)
def __init__(self):
QThread.__init__(self)
self.state = 1
def run(self):
pass
def Stop(self):
self.state = 0
1、其中 signal = pyqtSignal(str, str, str, str, int, int, str) 对应的每一个类型为我们所爬取的数据的类型,比如我使用的数据为:
VideoID(视频编号)、VideoName(视频名称)、VideoAuther(视频作者)、Category(视频类型)、VideoView(观看量)、Comment(评论数)、Duration(视频时长),对应的数据类型为字符串(str),字符串(str),字符串(str),字符串(str),整型(int),整型(int),字符串(str)。
2、其中 def __init__(self): 这个是初始化函数,可以用来设置全局变量,例如 self.state 表示的是当前线程的状态,1为进行中,0为停止,初始化为1。
3、其中 def run(self): 这个函数比较重要,我们需要将爬虫代码写在run函数中,并且每爬取一条数据,进行数据处理、分类后都要将数据传递出去
4、其中 def Stop(self): 这个函数用来控制线程的停止,当我需要停止的时候就调用该函数,那么线程就会停止,爬虫也会随之停止。
加上爬虫代码并根据个人的需求完善代码,完整DataThread类的代码如下:
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
import requests
from lxml import etree
class DataThread(QThread):
signal = pyqtSignal(str, str, str, str, int, int, str)
def __init__(self):
QThread.__init__(self)
self.state = 1
self.page_number = 1
self.o_number = 0
self.key = []
self.value = []
def run(self):
# while(self.state):
# 爬虫准备工作
base_url = 'https://search.bilibili.com/all?vt=77434542&keyword=%E7%BC%96%E7%A8%8B%E8%AF%BE%E7%A8%8B&from_source=webtop_search&spm_id_from=333.1007&search_source=5'
params = {}
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0"
}
# page_number = 1
# o_number = 0
unique_links = set()
video_data_by_keyword = ['C语言', 'C++', 'Python', 'PHP', '算法', 'Java', 'go语言','Mysql','C#','Scratch','web','计算机']
while self.page_number <= 34:
params['page'] = str(self.page_number)
params['o'] = str(self.o_number)
response = requests.get(base_url, params=params, headers=headers)
html = response.text
html = response.content.decode('utf-8')
parse = etree.HTMLParser(encoding='utf-8')
contentPath = []
contentname = []
contentauthor = []
contentVV = []
contentCM = []
contentDR = []
doc = etree.HTML(html)
doc.xpath('//div[@class="bili-video-card__info--right"]//a/@href')
contentPath = doc.xpath('//div[@class="bili-video-card__info--right"]/a/@href')
contentname = doc.xpath('//div[@class="bili-video-card__info--right"]//h3[@class="bili-video-card__info--tit"]/@title')
contentauthor = doc.xpath('//div[@class="bili-video-card__info--right"]//span[@class="bili-video-card__info--author"]/text()')
contentVV = doc.xpath('//div[@class="bili-video-card__stats--left"]/span[@class="bili-video-card__stats--item"][1]/span/text()')
contentCM = doc.xpath('//div[@class="bili-video-card__stats--left"]/span[@class="bili-video-card__stats--item"][2]/span/text()')
contentDR = doc.xpath('//div[@class="bili-video-card__stats"]/span[@class="bili-video-card__stats__duration"]/text()')
# print(contentVV)
# print(contentCM)
for link, name,auther,vv,cm,dr in zip(contentPath,contentname,contentauthor,contentVV,contentCM,contentDR):
category_found = False
VideoID = str(self.data)
VideoName = name
VideoAuther = auther
if vv[-1] == '万':
num = float(vv[0:-1])
num *= 10000
VideoView = int(num)
else:
VideoView = int(vv)
if cm[-1] == '万':
num = float(cm[0:-1])
num *= 10000
Comment = int(num)
else:
Comment = int(cm)
Duration = dr
Category = None
for keyword in video_data_by_keyword:
lower_keyword = keyword.lower() # 将关键词转换为小写
if lower_keyword in name.lower():
Category = keyword
if link not in unique_links:
if self.state:
self.signal.emit(VideoID, VideoName, VideoAuther, Category, VideoView, Comment, Duration)
self.data += 1
time.sleep(0.1)
unique_links.add(link)
break
self.page_number += 1
self.o_number += 24
def Stop(self):
self.state = 0
在这个代码中,爬虫代码不断爬取数据,通过 self.signal.emit(VideoID, VideoName, VideoAuther, Category, VideoView, Comment, Duration) 这行代码将数据传递到界面上。
7、通过UI界面实时显示数据
到最为关键的一步了,我们需要对设计好的UI界面进行初始化,并利用爬取好传递过来的数据来生成可视化图表,并实现实时自动刷新图表以及手动刷新图表。
还是和DataThread派生类一样,我们可以创建一个自己的窗口类MyWindow,继承于我们通过QtDesigner设计好的界面生成的类Ui_MainWindow,同时还要继承于PyQt5中的QMainWindow类。
class MyWindow(Ui_MainWindow,QMainWindow):
signal = pyqtSignal(str, str, str, str, int, int, str)
def __init__(self):
QMainWindow.__init__(self)
self.setupUi(self)
self.Operate()
self.dt = {'C语言': 0, 'C++': 0, 'Python': 0, 'PHP': 0, '算法': 0, 'Java': 0, 'go语言': 0, 'Mysql': 0, 'C#': 0, 'Scratch': 0, 'web': 0, '计算机': 0}
self.showflag = 0
# 初始化并指明各个函数
def Operate(self):
self.InitTable()
self.ConnectDB()
self.CreateThread()
# 初始化各个图表
def InitTable(self):
# 设置列个数
self.tableWidget_show_all_datas.setColumnCount(7)
# 标题
self.tableWidget_show_all_datas.setHorizontalHeaderLabels(
['视频ID号', '视频名称', '视频作者', '相关分类', '视频观看量', '评论数', '视频时长'])
# 设置整行选中模式
self.tableWidget_show_all_datas.setSelectionBehavior(True)
# 设置列宽度
self.tableWidget_show_all_datas.setColumnWidth(0, 147)
self.tableWidget_show_all_datas.setColumnWidth(1, 630)
self.tableWidget_show_all_datas.setColumnWidth(2, 247)
self.tableWidget_show_all_datas.setColumnWidth(3, 147)
self.tableWidget_show_all_datas.setColumnWidth(4, 120)
self.tableWidget_show_all_datas.setColumnWidth(5, 120)
self.tableWidget_show_all_datas.setColumnWidth(6, 147)
# 最后一列自动填充剩余宽度
# self.tableWidget_show_all_datas.horizontalHeader().setStretchLastSection(True)
# 设置标题带排序
self.tableWidget_show_all_datas.setSortingEnabled(True)
# 隐藏默认行号
self.tableWidget_show_all_datas.verticalHeader().setHidden(True)
def ConnectDB(self):
self.con = GetConn()
self.cur = self.con.cursor()
sql = "delete from videos"
try:
self.cur.execute(sql)
self.con.commit()
print('清空数据成功')
except Exception as e:
print('清空数据失败', e)
def ShowSelWindow(self):
selindex = self.tabWidget.currentIndex()
self.tabWidget.setCurrentIndex(selindex)
# 创建多线程
def CreateThread(self):
self.datathread = DataThread()
self.datathread.data = 1
self.datathread.signal.connect(self.DoWork)
self.signal.connect(self.datathread.Stop)
# 启动多线程
def StartThread(self): # 启动多线程
text = self.pButton_data_collection.text()
if text == '开始采集':
self.pButton_data_collection.setText('停止采集')
self.datathread.state = 1
self.datathread.start()
else:
self.pButton_data_collection.setText('开始采集')
self.datathread.Stop()
def DoWork(self, VideoID, VideoName, VideoAuther, Category, VideoView, Comment, Duration):
pass
我的MyWindow类大体架构就是这样,在这个类中,最最关键的成员函数是DoWork函数。第5步多线程中通过爬虫代码爬取的每一条数据都会依次传递到DoWork函数中。多线程在进行中状态下会不断调用DoWork函数,当多线程停止后才停止对DoWork函数的调用。
8、效果展示
b站编程课程爬虫多线程客户端示例展示视频
9.完整代码
bilibili_UI.py
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'bilibili_UI.ui'
#
# Created by: PyQt5 UI code generator 5.15.9
#
# WARNING: Any manual changes made to this file will be lost when pyuic5 is
# run again. Do not edit this file unless you know what you are doing.
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(1450, 853)
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.layoutWidget = QtWidgets.QWidget(self.centralwidget)
self.layoutWidget.setGeometry(QtCore.QRect(0, 0, 1451, 29))
self.layoutWidget.setObjectName("layoutWidget")
self.horizontalLayout = QtWidgets.QHBoxLayout(self.layoutWidget)
self.horizontalLayout.setContentsMargins(0, 0, 0, 0)
self.horizontalLayout.setObjectName("horizontalLayout")
spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout.addItem(spacerItem)
self.label = QtWidgets.QLabel(self.layoutWidget)
font = QtGui.QFont()
font.setFamily("华文楷体")
font.setPointSize(15)
font.setBold(True)
font.setWeight(75)
self.label.setFont(font)
self.label.setObjectName("label")
self.horizontalLayout.addWidget(self.label)
spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
self.horizontalLayout.addItem(spacerItem1)
self.tabWidget = QtWidgets.QTabWidget(self.centralwidget)
self.tabWidget.setGeometry(QtCore.QRect(0, 30, 1451, 791))
font = QtGui.QFont()
font.setBold(True)
font.setWeight(75)
self.tabWidget.setFont(font)
self.tabWidget.setObjectName("tabWidget")
self.tab = QtWidgets.QWidget()
self.tab.setObjectName("tab")
self.pButton_data_collection = QtWidgets.QPushButton(self.tab)
self.pButton_data_collection.setGeometry(QtCore.QRect(0, 10, 121, 41))
font = QtGui.QFont()
font.setFamily("华文仿宋")
font.setPointSize(9)
font.setBold(True)
font.setItalic(False)
font.setUnderline(False)
font.setWeight(75)
font.setStrikeOut(False)
font.setKerning(False)
self.pButton_data_collection.setFont(font)
self.pButton_data_collection.setObjectName("pButton_data_collection")
self.tableWidget_show_all_datas = QtWidgets.QTableWidget(self.tab)
self.tableWidget_show_all_datas.setGeometry(QtCore.QRect(0, 50, 1451, 331))
self.tableWidget_show_all_datas.setObjectName("tableWidget_show_all_datas")
self.tableWidget_show_all_datas.setColumnCount(0)
self.tableWidget_show_all_datas.setRowCount(0)
self.Label_current_collection_data = QtWidgets.QLabel(self.tab)
self.Label_current_collection_data.setGeometry(QtCore.QRect(0, 380, 1441, 31))
self.Label_current_collection_data.setText("")
self.Label_current_collection_data.setObjectName("Label_current_collection_data")
self.WebEngineView_show_data_pie = QtWebEngineWidgets.QWebEngineView(self.tab)
self.WebEngineView_show_data_pie.setGeometry(QtCore.QRect(0, 420, 811, 351))
self.WebEngineView_show_data_pie.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_data_pie.setObjectName("WebEngineView_show_data_pie")
self.label_pie = QtWidgets.QLabel(self.tab)
self.label_pie.setGeometry(QtCore.QRect(820, 420, 621, 341))
font = QtGui.QFont()
font.setFamily("华文仿宋")
font.setPointSize(17)
font.setBold(True)
font.setWeight(75)
self.label_pie.setFont(font)
self.label_pie.setText("")
self.label_pie.setObjectName("label_pie")
self.tabWidget.addTab(self.tab, "")
self.tab_2 = QtWidgets.QWidget()
self.tab_2.setObjectName("tab_2")
self.WebEngineView_show_vv_Bar = QtWebEngineWidgets.QWebEngineView(self.tab_2)
self.WebEngineView_show_vv_Bar.setGeometry(QtCore.QRect(0, 50, 701, 361))
self.WebEngineView_show_vv_Bar.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_vv_Bar.setObjectName("WebEngineView_show_vv_Bar")
self.WebEngineView_show_cm_Bar = QtWebEngineWidgets.QWebEngineView(self.tab_2)
self.WebEngineView_show_cm_Bar.setGeometry(QtCore.QRect(0, 410, 701, 341))
self.WebEngineView_show_cm_Bar.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_cm_Bar.setObjectName("WebEngineView_show_cm_Bar")
self.pButton_show_four_picture = QtWidgets.QPushButton(self.tab_2)
self.pButton_show_four_picture.setGeometry(QtCore.QRect(10, 0, 121, 41))
self.pButton_show_four_picture.setObjectName("pButton_show_four_picture")
self.WebEngineView_show_ca_Tunnel = QtWebEngineWidgets.QWebEngineView(self.tab_2)
self.WebEngineView_show_ca_Tunnel.setGeometry(QtCore.QRect(700, 50, 751, 361))
self.WebEngineView_show_ca_Tunnel.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_ca_Tunnel.setObjectName("WebEngineView_show_ca_Tunnel")
self.WebEngineView_show_ca_Cloud = QtWebEngineWidgets.QWebEngineView(self.tab_2)
self.WebEngineView_show_ca_Cloud.setGeometry(QtCore.QRect(700, 410, 751, 341))
self.WebEngineView_show_ca_Cloud.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_ca_Cloud.setObjectName("WebEngineView_show_ca_Cloud")
self.tabWidget.addTab(self.tab_2, "")
self.tab_3 = QtWidgets.QWidget()
self.tab_3.setObjectName("tab_3")
self.tabWidget1 = QtWidgets.QTabWidget(self.tab_3)
self.tabWidget1.setGeometry(QtCore.QRect(0, 0, 1441, 761))
font = QtGui.QFont()
font.setBold(True)
font.setWeight(75)
self.tabWidget1.setFont(font)
self.tabWidget1.setObjectName("tabWidget1")
self.tab_4 = QtWidgets.QWidget()
self.tab_4.setObjectName("tab_4")
self.WebEngineView_show_vv_Bar2 = QtWebEngineWidgets.QWebEngineView(self.tab_4)
self.WebEngineView_show_vv_Bar2.setGeometry(QtCore.QRect(0, 60, 921, 661))
self.WebEngineView_show_vv_Bar2.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_vv_Bar2.setObjectName("WebEngineView_show_vv_Bar2")
self.pButton_show1 = QtWidgets.QPushButton(self.tab_4)
self.pButton_show1.setGeometry(QtCore.QRect(20, 10, 111, 41))
self.pButton_show1.setObjectName("pButton_show1")
self.label_vv = QtWidgets.QLabel(self.tab_4)
self.label_vv.setGeometry(QtCore.QRect(920, 60, 511, 661))
font = QtGui.QFont()
font.setFamily("华文仿宋")
font.setPointSize(15)
font.setBold(True)
font.setWeight(75)
self.label_vv.setFont(font)
self.label_vv.setText("")
self.label_vv.setObjectName("label_vv")
self.tabWidget1.addTab(self.tab_4, "")
self.tab_5 = QtWidgets.QWidget()
self.tab_5.setObjectName("tab_5")
self.pButton_show2 = QtWidgets.QPushButton(self.tab_5)
self.pButton_show2.setGeometry(QtCore.QRect(30, 10, 111, 41))
self.pButton_show2.setObjectName("pButton_show2")
self.WebEngineView_show_cm_Bar2 = QtWebEngineWidgets.QWebEngineView(self.tab_5)
self.WebEngineView_show_cm_Bar2.setGeometry(QtCore.QRect(0, 60, 981, 661))
self.WebEngineView_show_cm_Bar2.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_cm_Bar2.setObjectName("WebEngineView_show_cm_Bar2")
self.label_cm = QtWidgets.QLabel(self.tab_5)
self.label_cm.setGeometry(QtCore.QRect(990, 60, 441, 661))
font = QtGui.QFont()
font.setFamily("华文仿宋")
font.setPointSize(15)
font.setBold(True)
font.setWeight(75)
self.label_cm.setFont(font)
self.label_cm.setText("")
self.label_cm.setObjectName("label_cm")
self.tabWidget1.addTab(self.tab_5, "")
self.tab_6 = QtWidgets.QWidget()
self.tab_6.setObjectName("tab_6")
self.pButton_show3 = QtWidgets.QPushButton(self.tab_6)
self.pButton_show3.setGeometry(QtCore.QRect(20, 10, 111, 41))
self.pButton_show3.setObjectName("pButton_show3")
self.WebEngineView_show_ca_Tunnel2 = QtWebEngineWidgets.QWebEngineView(self.tab_6)
self.WebEngineView_show_ca_Tunnel2.setGeometry(QtCore.QRect(0, 60, 961, 341))
self.WebEngineView_show_ca_Tunnel2.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_ca_Tunnel2.setObjectName("WebEngineView_show_ca_Tunnel2")
self.WebEngineView_show_ca_Cloud2 = QtWebEngineWidgets.QWebEngineView(self.tab_6)
self.WebEngineView_show_ca_Cloud2.setGeometry(QtCore.QRect(0, 400, 961, 321))
self.WebEngineView_show_ca_Cloud2.setStyleSheet("border: 1px solid black;")
self.WebEngineView_show_ca_Cloud2.setObjectName("WebEngineView_show_ca_Cloud2")
self.label_ca = QtWidgets.QLabel(self.tab_6)
self.label_ca.setGeometry(QtCore.QRect(970, 60, 451, 661))
font = QtGui.QFont()
font.setFamily("华文仿宋")
font.setPointSize(15)
font.setBold(True)
font.setWeight(75)
self.label_ca.setFont(font)
self.label_ca.setText("")
self.label_ca.setObjectName("label_ca")
self.tabWidget1.addTab(self.tab_6, "")
self.tabWidget.addTab(self.tab_3, "")
MainWindow.setCentralWidget(self.centralwidget)
self.menubar = QtWidgets.QMenuBar(MainWindow)
self.menubar.setGeometry(QtCore.QRect(0, 0, 1450, 26))
self.menubar.setObjectName("menubar")
MainWindow.setMenuBar(self.menubar)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
self.tabWidget.setCurrentIndex(2)
self.tabWidget1.setCurrentIndex(2)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow"))
self.label.setText(_translate("MainWindow", "B站编程课程采集与分析"))
self.pButton_data_collection.setText(_translate("MainWindow", "开始采集"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab), _translate("MainWindow", "采集课程数据"))
self.pButton_show_four_picture.setText(_translate("MainWindow", "数据可视化"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_2), _translate("MainWindow", "数据可视化"))
self.pButton_show1.setText(_translate("MainWindow", "显示分析"))
self.tabWidget1.setTabText(self.tabWidget1.indexOf(self.tab_4), _translate("MainWindow", "分析点1"))
self.pButton_show2.setText(_translate("MainWindow", "显示分析"))
self.tabWidget1.setTabText(self.tabWidget1.indexOf(self.tab_5), _translate("MainWindow", "分析点2"))
self.pButton_show3.setText(_translate("MainWindow", "显示分析"))
self.tabWidget1.setTabText(self.tabWidget1.indexOf(self.tab_6), _translate("MainWindow", "分析点3"))
self.tabWidget.setTabText(self.tabWidget.indexOf(self.tab_3), _translate("MainWindow", "数据分析"))
from PyQt5 import QtWebEngineWidgets
main.py
import sys
from PyQt5.QtWidgets import *
from Function import *
if __name__ == '__main__':
app = QApplication([])
mywin = MyWindow()
mywin.show()
sys.exit(app.exec_())
DB.py
import pymysql
def GetConn():
try:
conn = pymysql.connect(host='localhost',user='root',password='123456',database='bilibili')
except Exception as e:
print("连接失败!\n",e)
print()
else:
print("连接成功!\n")
return conn
def CloseConn(cur,conn):
try:
if cur:
cur.close()
if conn:
conn.close()
except Exception as e:
print("操作异常!!!\n")
Function.py
from PyQt5.QtWidgets import *
from PyQt5.QtCore import *
from PyQt5.QtGui import *
from PyQt5.QtWebEngineWidgets import *
import datetime
import time
from bilibili_UI import *
from DB import *
import pymysql
import requests
from lxml import etree
from pyecharts import options as opts
from pyecharts.charts import Funnel
from pyecharts.render import make_snapshot
from pyecharts.globals import ThemeType
from pyecharts import options as opts
from pyecharts.charts import Pie
from pyecharts.charts import Bar
from pyecharts.charts import WordCloud
class DataThread(QThread):
signal = pyqtSignal(str, str, str, str, int, int, str)
def __init__(self):
QThread.__init__(self)
self.state = 1
self.page_number = 1
self.o_number = 0
self.key = []
self.value = []
def run(self):
# while(self.state):
# 爬虫准备工作
base_url = 'https://search.bilibili.com/all?vt=77434542&keyword=%E7%BC%96%E7%A8%8B%E8%AF%BE%E7%A8%8B&from_source=webtop_search&spm_id_from=333.1007&search_source=5'
params = {}
headers = {
**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**
**需要这份系统化的资料的朋友,可以添加V获取:vip204888 (备注大数据)**
![img](https://img-blog.csdnimg.cn/img_convert/220cf4e2c7ddbdb0b9c826a71d2baf77.png)
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**
ort WordCloud
class DataThread(QThread):
signal = pyqtSignal(str, str, str, str, int, int, str)
def __init__(self):
QThread.__init__(self)
self.state = 1
self.page_number = 1
self.o_number = 0
self.key = []
self.value = []
def run(self):
# while(self.state):
# 爬虫准备工作
base_url = 'https://search.bilibili.com/all?vt=77434542&keyword=%E7%BC%96%E7%A8%8B%E8%AF%BE%E7%A8%8B&from_source=webtop_search&spm_id_from=333.1007&search_source=5'
params = {}
headers = {
**网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**
**需要这份系统化的资料的朋友,可以添加V获取:vip204888 (备注大数据)**
[外链图片转存中...(img-VgHr2oNU-1713186009243)]
**一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!**