__author__ = '田明博'
__date__ = '2019/10/9 21:23'
'''
获取所有城市的天气预报,按最低温度排名
'''
import requests
import operator
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
def get_page(link):
'''
获取每一个页面的源代码,并分析
:param link:每一个页面的链接
:return:
'''
all_temp = []
url = link
resp = requests.get(url)
resp.encoding = 'utf-8'
soup = BeautifulSoup(resp.text, 'html5lib') # html5lib解析器,解析速度没有lxml快
conMidtab = soup.find('div', attrs={'class': 'conMidtab'}) #第一个,每周七天,提取当天的即可
tables = conMidtab.find_all('table')
for table in tables:
trs = table.find_all('tr')[2:] # 获取数据
for index, tr in enumerate(trs):
one_info = {} # 一条记录
tds = tr.find_all('td') # 找到所有的信息td
city_td = tds[0] # 找到city所在的表格
if index == 0:
city_td = tds[1]
city = list(city_td.stripped_strings)[0] # city名字
temp = tds[-2] # 温度
temp = int(list(temp.stripped_strings)[0])
one_info['city'] = city
one_info['temp'] = temp
all_temp.append(one_info) # append所有记录
return all_temp
def show_charset(min_temp_citys):
'''
展示图表
:param min_temp_citys:
:return:
'''
# print(min_temp_citys)
x = []
y = []
# 解析获取的前十数据(字典格式)
for i in min_temp_citys:
x.append(i['city'])
y.append(int(i['temp']))
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.title('全国温度最低的前十城市')
plt.xlabel('城市') # 横坐标
plt.ylabel('温度℃') # 纵坐标
plt.bar(x, y) # 绘制柱状图
plt.show()
def main():
all_infos = []
# 各地区链接
links = ['http://www.weather.com.cn/textFC/hb.shtml',
'http://www.weather.com.cn/textFC/db.shtml',
'http://www.weather.com.cn/textFC/hd.shtml',
'http://www.weather.com.cn/textFC/hz.shtml',
'http://www.weather.com.cn/textFC/hn.shtml',
'http://www.weather.com.cn/textFC/xb.shtml',
'http://www.weather.com.cn/textFC/xn.shtml', ]
for link in links:
all = get_page(link)
all_infos = all_infos + all # 用于拼接列表
# print(all_infos)
min_temp_ten = sorted(all_infos, key=operator.itemgetter('temp'))[:10]
print(min_temp_ten)
show_charset(min_temp_ten)
if __name__ == '__main__':
main()
运行截图:
采用matplotlib库绘制。