使用美丽汤来获取数据城市最低温度,然后使用matplotlib来生成数据图表。
教程源于知了课堂。自己重新码了一遍。
# coding: utf-8
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei'] #显示图表中中文字体
HEADERS = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
ALL_DATE = []
def parse_page(url):
resp = requests.get(url,headers = HEADERS)
text = resp.content.decode('utf-8')
soup = BeautifulSoup(text,'html5lib') #因为港澳台table不规则使用了html5lib解析器
conMidtab =soup.find('div',class_= 'conMidtab')
tables = conMidtab.find_all('table')
for table in tables:
trs = table.find_all('tr')[2:]
for index,tr in enumerate(trs): #过滤之后,第三个tr下的第一个td是省的名称,第二个才是城市名字。后面的tr下的第一td是城市的名字
tds = tr.find_all('td')
city_name = tds[0]
if index == 0:
city_name = tds[1]
city = list(city_name.stripped_strings)[0] #生成器转换成列表
city_temp = tds[-2]
temp = int(list(city_temp.stripped_strings)[0]) #字符串无法进行排序
ALL_DATE.append({'city':city,'temp':temp})
# print({'city':city,'temp':temp})
def main():
cities = []
base_url='http://www.weather.com.cn/textFC/{}.shtml' #变量名不能用url,否则会变成一样
xs =['hb','db','hz','hn','xb','xn','gat']
for x in xs:
url = base_url.format(x)
parse_page(url)
# for i in ALL_DATE:
# if i['city'] =='大兴安岭': #用于查询某个城市的最低气温
# print(i['temp'])
# break
ALL_DATE.sort(key = lambda data:data['temp'])
data = ALL_DATE[:20]
for i in data: #取出列表中字典中城市的值方法1
g =i.get('city') #或者g =i['city']
cities.append(g)
# cities = list(map(lambda x:x['city'],data)) #取出列表中字典中城市的值方法2,生成新的列表
min_temp = list(map(lambda x:x['temp'],data))
plt.plot(cities, min_temp, linewidth=5)
plt.title('中国最低温排行', fontsize =24)
plt.xlabel('City', fontsize =14)
plt.ylabel('Min_temp', fontsize =14)
plt.tick_params(axis='both', labelsize =14)
plt.show()
if __name__ == '__main__':
main()