先是爬取代码
代码功能都很简单,只是想做为以后模板copy
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
ALL_DATA = []
#网页的解析函数
def parse_page(url):
headers = {'User-Agent': 'Mozilla/5.0(Windows NT 10.0;Win64;x64) AppleWebkit/537.36 (KHTML, like Geoko) Chrome/70.0.3538.102 safari/537.36'}
response = requests.get(url,headers = headers)
text = response.content.decode('utf-8')
soup = BeautifulSoup(text, 'html5lib')
conMidtab = soup.find('div',class_='conMidtab')
tables = conMidtab.find_all('table')
#查看是否拿到每个城市的天气
for table in tables:
trs = table.find_all('tr')[2:]
for index,tr in enumerate(trs):
tds = tr.find_all('td')
city_td = tds[0]
if index == 0:
city_td = tds[1]
city = list(city_td.stripped_strings)[0] #获取标签里面的字符串属性返回一个生成器,转化为列表
temp_td = tds[-2]
min_temp = list(temp_td.stripped_strings)[0]
ALL_DATA.append({'城市':city,'最低气温':int(min_temp)})
def main():
#预设需要请求的url
urls = [
'http://www.weather.com.cn/textFC/hb.shtml',
'http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hz.shtml',
'http://www.weather.com.cn/textFC/hn.shtml',
'http://www.weather.com.cn/textFC/hd.shtml',
'http://www.weather.com.cn/textFC/xb.shtml',
'http://www.weather.com.cn/textFC/xn.shtml',
'http://www.weather.com.cn/textFC/gat.shtml'
]
for url in urls:
parse_page(url)
#分析数据,根据最低气温进行排序
ALL_DATA.sort(key=lambda data: data['最低气温'])
data = ALL_DATA[0:10] #取出前10的最低气温及其城市
return data
if __name__ == '__main__':
datas = main()
city = []
temp = []
for data in datas:
city.append(data['城市'])
temp.append(data['最低气温'])
plt.bar(range(len(city)), temp, tick_label=city)#绘制柱状图
plt.show()#显示图表
这是简单数据可视化代码
import pandas as pd
import matplotlib.pyplot as plt
from pyecharts import options as opts
from pyecharts.charts import Bar
path = './china_city_AQI.csv'
path_AQI_top = './china_city_AQI_top10.csv'
path_AQI_tail = './china_city_AQI_tail10.csv'
data_AQI = pd.read_csv(path)
#最大值
AQI_max=data_AQI['AQI'].max()
#最小值
AQI_min=data_AQI['AQI'].min()
#平均值
AQI_mean=data_AQI['AQI'].mean()
AQI_top=data_AQI.sort_values(by='AQI')[:10]
AQI_top.to_csv(path_AQI_top)
AQI_tail=data_AQI.sort_values(by='AQI',ascending=False)[:10]
AQI_tail.to_csv(path_AQI_tail)
def matplotlib_top50():
city = []
temp = []
AQI_top = data_AQI.sort_values(by='AQI')[:50]
for index,data in AQI_top.iterrows():
city.append(data['City'])
temp.append(data['AQI'])
plt.bar(range(len(AQI_top)), temp, tick_label=city)#绘制柱状图
plt.show()#显示图表
def pyecharts_top50() -> Bar:
AQI_top = data_AQI.sort_values(by='AQI')[:50]
city = []
temp = []
for index,data in AQI_top.iterrows():
city.append(data['City'])
temp.append(data['AQI'])
c = (
Bar()
.add_xaxis(city)
.add_yaxis("中国AQI", temp)
.set_global_opts(title_opts=opts.TitleOpts(title="城市AQI值top50"))
.set_series_opts(
label_opts=opts.LabelOpts(is_show=False),
markline_opts=opts.MarkLineOpts(
data=[
opts.MarkLineItem(type_="min", name="最小值"),
opts.MarkLineItem(type_="max", name="最大值"),
opts.MarkLineItem(type_="average", name="平均值"),
]
)
)
)
return c
if __name__ == '__main__':
pyecharts_top50().render()