最近不知不觉迷上了PyQt这个东东。今天分享一个爬取天气数据的小工具。
废话不多说,直接上代码。
1、用qt设计师创建界面
2、pyuic5 ui文件名 -o py文件名 ,用这个命令将ui文件转成py文件。
3、写主程序代码
'''
Create on 2022-05-25
@author: 藝海朝阳
encoding=utf8
'''
import plotly.express as px
import requests
from lxml import etree
import json
import os
import sys
import pandas as pd
from spider_data import Ui_spider_data
from PyQt5.QtWidgets import (
QApplication,QMessageBox,QDialog,QFileDialog,QTableWidgetItem,QHBoxLayout
)
from PyQt5 import QtWebSockets
from PyQt5 import (
QtWebEngineWidgets,QtCore
)
'''
url = 'http://tianqi.2345.com/'
url1 = 'http://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=72039&areaInfo%5BareaType%5D=2&date%5Byear%5D=年份&date%5Bmonth%5D=月份'
'''
class TianQi(Ui_spider_data,QDialog):
#初始化变量
def __init__(self) -> None:
super().__init__()
self.setupUi(self)
self.url = 'http://tianqi.2345.com/'
self.header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
years = [str(i) for i in range(2017,2030+1)]
self.comboBox_choose_year.addItems(years)
months = [str(i) for i in range(1,12+1)]
self.comboBox_choose_month.addItems(months)
self.show()
self.df =None
hbox = QHBoxLayout()
self.webview = QtWebEngineWidgets.QWebEngineView()
hbox.addWidget(self.webview)
self.tab_2.setLayout(hbox)
self.pushButton_spider_data.clicked.connect(self.get_data)
self.pushButton_download_to_excel.clicked.connect(self.save_data)
#生成访问的url
def get_data(self):
self.year = int(self.comboBox_choose_year.currentText())
self.month = int(self.comboBox_choose_month.currentText())
url = f'{self.url}Pc/GetHistory?areaInfo%5BareaId%5D=72039&areaInfo%5BareaType%5D=2&date%5Byear%5D={self.year}&date%5Bmonth%5D={self.month}'
#爬取每月的天气数据并下载存储到电脑
all_data = []
#遍历爬取每月的天气数据
res = requests.get(url,self.header)
if res.status_code != 200:
raise Exception()
res.encoding='utf-8'
items = json.loads(res.text)
#因爬取到的json数据中包含html的影子,所以我试着用xpath的方式进行数据的筛选
html = etree.HTML(items['data'])
#获取数据的表头,可以作为输出文件内容的列名
title = html.xpath('//th/text()')
#获取单月的天气数据并存入datas列表
datas = []
for idx,i in enumerate(title):
if idx == 0:
pass
else:
if idx == 1:
data = [i[:-3] for i in html.xpath(f'//td[{idx}]/text()')]
if data:
datas.append(data)
else:
data = html.xpath(f'//td[{idx}]/text()')
if data:
datas.append(data)
air_quality = html.xpath('//td/span/text()')
datas.append(air_quality)
#将title与datas组合成字典形式,再将每月的天气数据存到all_data的列表里
dic_data = {key:value for key,value in zip(title,datas)}
all_data.append(dic_data)
#遍历大列表all_data里的字典,使用pandas将每月的天气数据生成DataFrame,并保存到对应目录下
data_list = []
for idx,i in enumerate(all_data):
df = pd.DataFrame(i)
data_list.append(df)
self.df = pd.concat(data_list)
#######一定要加上这段代码,才会在输出栏显示结果#######
self.tableWidget_show_result.setRowCount(len(self.df))
#显示天气数据
row_num = 0
for idx,row in self.df.iterrows():
for col,value in enumerate(["日期","最高温","最低温","天气","风力风向","空气质量指数"]):
self.tableWidget_show_result.setItem(
row_num,col,
QTableWidgetItem(str(row[value]))
)
row_num += 1
self.show_chart(self.df)
#生成图表
def show_chart(self,df):
print(df)
df.sort_values(by="日期", ascending=True, inplace=True)
print("show df######")
df["最高温"] = df["最高温"].map(lambda x: int(x.replace("°","")) if x else 0)
df["最低温"] = df["最低温"].map(lambda x: int(x.replace("°","")) if x else 0)
print("show df######")
print(df)
# df.to_excel("xxx.xlsx")
fig = px.line(
df,
x="日期", y=["最低温","最高温"], title=f'{self.year}-{self.month}温度数据')
self.webview.setHtml(fig.to_html(include_plotlyjs="cdn"))
def save_data(self):
year = int(self.comboBox_choose_year.currentText())
month = int(self.comboBox_choose_month.currentText())
path = QFileDialog.getSaveFileName(self,"选择保存路径","E:/","xlsx(*.xlsx)")
self.df.to_excel(path[0],sheet_name=f"{year}-{month}",index=False)
if __name__ == "__main__":
app = QApplication(sys.argv)
spider_data = TianQi()
sys.exit(app.exec_())
4、最终效果
5、如果想要打包成exe,用pyinstaller -F -w py文件名。