前言:
爬取网站www.tianqihoubao.com上各城市的历史天气数据,并利用matplotlib将数据可视化。
代码实现:
1 import asyncio 2 import aiohttp 3 from lxml import etree 4 import re 5 from collections import namedtuple 6 import matplotlib.pyplot as plt 7 8 #用来正常显示中文标签 9 plt.rcParams['font.sans-serif']=['SimHei'] 10 11 Args = namedtuple('Args',['city','year','month']) 12 13 #获取给定城市列表在给定年列表的以月为单位的每天平均温度字典, 14 #返回格式 {Args(city='wuhan', year=2018, month=4): [21.5, 22.5, ...], Args(city='shanghai', year=2018, month=2): [3.5, 2.5, ...], ...} 15 def get_weather(citys, years): 16 tdata = {} 17 wdata = {} 18 async def work(args): 19 url = "http://www.tianqihoubao.com/lishi/%s/month/%d%02d.html" % (args.city, args.year, args.month) 20 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'} 21 async with aiohttp.ClientSession() as session: 22 async with session.get(url, headers=headers, timeout=1000, verify_ssl=False) as response: 23 html = await response.text() 24 ethtml = etree.HTML(html) 25 26 #从html页面提取数据,数据格式如: '10℃/-2℃' 27 result = [re.sub("\r\n *","",item.text) for item in ethtml.xpath("//table[@class='b']/tr/td")