爬取房价网站结合PYQT实现翻页功能,并绘制相关图表

本文涉及到网络爬虫、PYQT、界面表格显示及实现上下翻页,并实现界面图表显示,下面进入分享环节:

1、在PYQT的designer中设计界面,如下:

 2、将ui文件转换为test1.py文件,如下:

from PyQt5 import QtCore, QtGui, QtWidgets

class Ui_MainWindow(object):
    def setupUi(self, MainWindow):
        MainWindow.setObjectName("MainWindow")
        MainWindow.resize(800, 600)
        self.centralwidget = QtWidgets.QWidget(MainWindow)
        self.centralwidget.setObjectName("centralwidget")
        self.verticalLayout = QtWidgets.QVBoxLayout(self.centralwidget)
        self.verticalLayout.setObjectName("verticalLayout")
        self.horizontalLayout = QtWidgets.QHBoxLayout()
        self.horizontalLayout.setObjectName("horizontalLayout")
        self.lineEdit = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit.setObjectName("lineEdit")
        self.horizontalLayout.addWidget(self.lineEdit)
        self.pushButton = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton.setObjectName("pushButton")
        self.horizontalLayout.addWidget(self.pushButton)
        self.verticalLayout.addLayout(self.horizontalLayout)
        self.horizontalLayout_2 = QtWidgets.QHBoxLayout()
        self.horizontalLayout_2.setObjectName("horizontalLayout_2")
        self.lineEdit_2 = QtWidgets.QLineEdit(self.centralwidget)
        self.lineEdit_2.setObjectName("lineEdit_2")
        self.horizontalLayout_2.addWidget(self.lineEdit_2)
        self.pushButton_2 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_2.setObjectName("pushButton_2")
        self.horizontalLayout_2.addWidget(self.pushButton_2)
        self.verticalLayout.addLayout(self.horizontalLayout_2)
        self.horizontalLayout_3 = QtWidgets.QHBoxLayout()
        self.horizontalLayout_3.setObjectName("horizontalLayout_3")
        self.pushButton_3 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_3.setObjectName("pushButton_3")
        self.horizontalLayout_3.addWidget(self.pushButton_3)
        spacerItem = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
        self.horizontalLayout_3.addItem(spacerItem)
        self.pushButton_4 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_4.setObjectName("pushButton_4")
        self.horizontalLayout_3.addWidget(self.pushButton_4)
        spacerItem1 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
        self.horizontalLayout_3.addItem(spacerItem1)
        self.pushButton_5 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_5.setObjectName("pushButton_5")
        self.horizontalLayout_3.addWidget(self.pushButton_5)
        self.verticalLayout.addLayout(self.horizontalLayout_3)
        self.gridLayout_2 = QtWidgets.QGridLayout()
        self.gridLayout_2.setObjectName("gridLayout_2")
        self.tableWidget = QtWidgets.QTableWidget(self.centralwidget)
        sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Minimum, QtWidgets.QSizePolicy.Expanding)
        sizePolicy.setHorizontalStretch(0)
        sizePolicy.setVerticalStretch(0)
        sizePolicy.setHeightForWidth(self.tableWidget.sizePolicy().hasHeightForWidth())
        self.tableWidget.setSizePolicy(sizePolicy)
        self.tableWidget.setObjectName("tableWidget")
        self.tableWidget.setColumnCount(0)
        self.tableWidget.setRowCount(0)
        self.gridLayout_2.addWidget(self.tableWidget, 0, 0, 1, 1)
        self.widget = QtWidgets.QWidget(self.centralwidget)
        self.widget.setObjectName("widget")
        self.gridLayout = QtWidgets.QGridLayout(self.widget)
        self.gridLayout.setObjectName("gridLayout")
        self.groupBox = QtWidgets.QGroupBox(self.widget)
        self.groupBox.setObjectName("groupBox")
        self.gridLayout.addWidget(self.groupBox, 0, 0, 1, 1)
        self.gridLayout_2.addWidget(self.widget, 0, 1, 1, 1)
        self.verticalLayout.addLayout(self.gridLayout_2)
        self.horizontalLayout_4 = QtWidgets.QHBoxLayout()
        self.horizontalLayout_4.setObjectName("horizontalLayout_4")
        self.pushButton_6 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_6.setObjectName("pushButton_6")
        self.horizontalLayout_4.addWidget(self.pushButton_6)
        spacerItem2 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
        self.horizontalLayout_4.addItem(spacerItem2)
        self.pushButton_7 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_7.setObjectName("pushButton_7")
        self.horizontalLayout_4.addWidget(self.pushButton_7)
        spacerItem3 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
        self.horizontalLayout_4.addItem(spacerItem3)
        self.pushButton_8 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_8.setObjectName("pushButton_8")
        self.horizontalLayout_4.addWidget(self.pushButton_8)
        spacerItem4 = QtWidgets.QSpacerItem(40, 20, QtWidgets.QSizePolicy.Expanding, QtWidgets.QSizePolicy.Minimum)
        self.horizontalLayout_4.addItem(spacerItem4)
        self.pushButton_9 = QtWidgets.QPushButton(self.centralwidget)
        self.pushButton_9.setObjectName("pushButton_9")
        self.horizontalLayout_4.addWidget(self.pushButton_9)
        self.verticalLayout.addLayout(self.horizontalLayout_4)
        MainWindow.setCentralWidget(self.centralwidget)
        self.menubar = QtWidgets.QMenuBar(MainWindow)
        self.menubar.setGeometry(QtCore.QRect(0, 0, 800, 23))
        self.menubar.setObjectName("menubar")
        MainWindow.setMenuBar(self.menubar)
        self.statusbar = QtWidgets.QStatusBar(MainWindow)
        self.statusbar.setObjectName("statusbar")
        MainWindow.setStatusBar(self.statusbar)

        self.retranslateUi(MainWindow)
        self.pushButton.clicked.connect(MainWindow.first)
        self.pushButton_2.clicked.connect(MainWindow.show_plot)
        self.pushButton_6.clicked.connect(MainWindow.first)
        self.pushButton_3.clicked.connect(MainWindow.save)
        self.pushButton_4.clicked.connect(MainWindow.clear)
        self.pushButton_5.clicked.connect(MainWindow.bey)
        self.pushButton_7.clicked.connect(MainWindow.beforpage)
        self.pushButton_8.clicked.connect(MainWindow.nextpage)
        self.pushButton_9.clicked.connect(MainWindow.last)
        QtCore.QMetaObject.connectSlotsByName(MainWindow)

    def retranslateUi(self, MainWindow):
        _translate = QtCore.QCoreApplication.translate
        MainWindow.setWindowTitle(_translate("MainWindow", "二手房价信息"))
        self.lineEdit.setPlaceholderText(_translate("MainWindow", "请输入城市名缩写:如合 肥,输入hf"))
        self.pushButton.setText(_translate("MainWindow", "确定"))
        self.lineEdit_2.setPlaceholderText(_translate("MainWindow", "用于显示网页"))
        self.pushButton_2.setText(_translate("MainWindow", "显示房价图"))
        self.pushButton_3.setText(_translate("MainWindow", "保存"))
        self.pushButton_4.setText(_translate("MainWindow", "清除"))
        self.pushButton_5.setText(_translate("MainWindow", "退出"))
        self.groupBox.setTitle(_translate("MainWindow", "房价图"))
        self.pushButton_6.setText(_translate("MainWindow", "首页"))
        self.pushButton_7.setText(_translate("MainWindow", "上一页"))
        self.pushButton_8.setText(_translate("MainWindow", "下一页"))
        self.pushButton_9.setText(_translate("MainWindow", "尾页"))

 3、写主程序house_APP.py,绘图相关,我在上一篇已经讲过了,https://blog.csdn.net/qq_33267306/article/details/119863582?spm=1001.2014.3001.5502,主要想实现上下页的翻转,我加入了一个lineEdit_2来显示网页,并获取网页内容进行实现翻转,代码如下:

import sys
from PyQt5 import QtCore, QtGui, uic, QtWidgets
from PyQt5.QtWidgets import QGridLayout,QApplication, QWidget, QMainWindow, QFileDialog, QMessageBox, QTableWidget, QTableWidgetItem
from PyQt5.QtGui import QIcon
import pandas as pd
import requests   #对网页进行请求

from lxml import etree
import numpy as np
from test1 import Ui_MainWindow #导入界面文件

import matplotlib
matplotlib.use("Qt5Agg")  # 声明使用QT5
from matplotlib.backends.backend_qt5agg import FigureCanvasQTAgg as FigureCanvas
from matplotlib.figure import Figure
import matplotlib.pyplot as plt

class MyFigure(FigureCanvas):
    def __init__(self,width, height, dpi):
         # 创建一个Figure,该Figure为matplotlib下的Figure,不是matplotlib.pyplot下面的Figure
        self.fig = Figure(figsize=(width, height), dpi=dpi)
         # 在父类中激活Figure窗口,此句必不可少,否则不能显示图形
        super(MyFigure,self).__init__(self.fig) #此句必不可少,否则不能显示图形
         # 调用Figure下面的add_subplot方法
        self.axes = self.fig.add_subplot(111)
                
class Window(QMainWindow,Ui_MainWindow):
    def __init__(self,parent=None):
        super(Window, self).__init__(parent)
        self.setupUi(self)
        self.table=pd.DataFrame()
        #定义MyFigure类的一个实例
        self.F = MyFigure(width=3, height=2, dpi=100)
        self.gridlayout = QGridLayout(self.groupBox)  # 继承容器groupBox
        self.gridlayout.addWidget(self.F,0,1)
        # self.widget.setVisible(False)
      
    def plot(self):
        plt.rcParams['font.sans-serif']=['SimHei']
        plt.rcParams['axes.unicode_minus'] = False
        #matplotlib画图中中文显示会有问题,需要这两行设置默认字体
        # self.df = self.table.sort_values(by = '单价').reset_index(drop = True)
        x = self.table['小区名'].tolist()
        y = self.table['单价'].tolist()
        self.F.axes.plot(x, y)  #scatter是散点图,
        self.F.axes.set_xticklabels(x, rotation=90, fontsize='small') #x标签竖排显示
        self.F.fig.suptitle("房价图") #设置总标题
        self.F.axes.set_xlabel('XLabel0')  #设置x轴标题
        self.F.axes.set_ylabel('YLabel0')  #设置Y轴标题
        # self.widget.setVisible(True)
    def show_plot(self):
        self.plot()  #点击按钮可能有点慢显示
        
        
    def start(self,page):
        city = self.lineEdit.text()
        if city:
            url = 'https://'+city+'.lianjia.com/ershoufang/pg'+str(page)+'/'
            self.lineEdit_2.setText(url)
            headers = {
           'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'
             }
            html = requests.get(url,headers = headers).content.decode('utf-8')
            data = etree.HTML(html)
            lists = data.xpath('//ul[@class="sellListContent"]/li')
            a = []
            b = []
            c = []
            d = []
            e = []
            f = []
            g = []
            h =[]
            i = []
            j = []
            k = []
            l = []
            m = []
            for li in lists:
                title = li.xpath('./div[@class="info clear"]/div[@class="title"]/a/text()')[0] #售卖标题
                name_house = li.xpath('./div[@class="info clear"]/div[@class="flood"]/div[@class="positionInfo"]/a/text()')[0]  #小区名
                address = li.xpath('./div[@class="info clear"]/div[@class="flood"]/div[@class="positionInfo"]/a/text()')[1] #地址
                layout = li.xpath('./div[@class="info clear"]/div[@class="address"]/div[@class="houseInfo"]/text()')[0].split('|')[0] #几室几厅
                area = li.xpath('./div[@class="info clear"]/div[@class="address"]/div[@class="houseInfo"]/text()')[0].split('|')[1]  # 面积
                direction = li.xpath('./div[@class="info clear"]/div[@class="address"]/div[@class="houseInfo"]/text()')[0].split('|')[2] #朝向
                renovation = li.xpath('./div[@class="info clear"]/div[@class="address"]/div[@class="houseInfo"]/text()')[0].split('|')[3] #装修情况
                floor = li.xpath('./div[@class="info clear"]/div[@class="address"]/div[@class="houseInfo"]/text()')[0].split('|')[4] #楼层高度
                time_house = li.xpath('./div[@class="info clear"]/div[@class="address"]/div[@class="houseInfo"]/text()')[0].split('|')[5] #建筑时间
                time_send = li.xpath('./div[@class="info clear"]/div[@class="followInfo"]/text()')[0].split('/')[1] #发帖时间
                unitprice = li.xpath('./div[@class="info clear"]/div[@class="priceInfo"]/div[@class="unitPrice"]/span/text()')[0].split('价')[1].split('元')[0]  #单价
                totalprice = li.xpath('./div[@class="info clear"]/div[@class="priceInfo"]/div[@class="totalPrice"]/span/text()')[0]+'万' #总价
                picture = li.xpath('./a[@class="noresultRecommend img LOGCLICKDATA"]/img[@class="lj-lazy"]/@data-original')[0]
                
                a.append(title)
                b.append(name_house)
                c.append(address)
                d.append(layout)
                e.append(area)
                f.append(direction)
                g.append(renovation)
                h.append(floor)
                i.append(time_house)
                j.append(time_send)
                k.append(unitprice)
                l.append(totalprice)
                m.append(picture)
                
            df = pd.DataFrame([a,b,c,d,e,f,g,h,i,j,k,l,m]).T
            df.columns = ['售卖标题','小区名','地址','房间布局','面积','朝向','装修情况','楼层高度','建筑时间','发帖时间','单价','总价','贴图地址']
            return df
        else:
            QMessageBox.about(self,'提示信息','你倒是输入信息啊')
    
    def sho(self,pa):
        input_table = self.start(pa)
        self.table = input_table.sort_values(by = '单价').reset_index(drop = True)
        data_rows = self.table.shape[0]   #shape[0]表示data有多少行
        data_columns = self.table.shape[1]   #shape[1]表示data有多少列
        data_header =  self.table.columns.values.tolist() #获取表头
        self.tableWidget.setColumnCount(data_columns)
        self.tableWidget.setRowCount(data_rows)
        self.tableWidget.setHorizontalHeaderLabels(data_header)
        
        for i in range(data_rows):
            data_rows_values = self.table.iloc[i].tolist()
            for j in range(data_columns):
                data_c = data_rows_values[j]
                data_columns_values = str(data_c)
                newItem = QTableWidgetItem(data_columns_values)
                self.tableWidget.setItem(i, j, newItem)
                
    def first(self):
        self.sho(1)
        
    def last(self):
        self.sho(60)
        
    def clear(self):
        self.tableWidget.clearContents() #删除内容,保留表头
    def nextpage(self):
        url = self.lineEdit_2.text() #获取lineEdit中字符串
        b = int(url.split('pg')[1].split('/')[0])+1 #将字符串分解获得第几页进行+1处理
        if b<=60:
            self.sho(b)
        else:
            QMessageBox.warning(self,'警示信息','已经是最后一页了')
            
    def beforpage(self):
        url = self.lineEdit_2.text() #获取
        b = int(url.split('pg')[1].split('/')[0])-1
        if b>=1:
            self.sho(b)
        else:
            QMessageBox.warning(self,'警示信息','已经没有上一页了')
  
    def save(self):
        dirpath = QFileDialog.getSaveFileName(self, '选择保存路径', '','Excel files(*.xlsx , *.xls)')
#创建指定路径和文件名的excel文件
        self.table.to_excel(dirpath[0],na_rep="NULL",index=False)
        
    def bey(self):
        self.close()
    
        
if __name__ == '__main__':
    app = QtWidgets.QApplication(sys.argv)
    app.setWindowIcon(QIcon('D:/Python/2021/pyqt/翻页显示/house.ico'))
    MainWindow = QtWidgets.QMainWindow()
    ui = Window()
    ui.show()
    sys.exit(app.exec_())

4、运行代码即可实现

 如果您喜欢,请帮忙点赞,谢谢~

  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值