需要安装的库:requests,bs4,pyecharts[版本0.1.9.4],lxml,html5lib
代码复制粘贴即可用,2019年8月9日测试通过
爬取全国城市的最高温度,以及导出张柱状图:
import requests
from bs4 import BeautifulSoup
from pyecharts import Bar
DATA = []
def parse_page(url):
headers={
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36"
}
response = requests.get(url,headers=headers)
text = response.content.decode('utf-8')
soup = BeautifulSoup(text,"html5lib")
conMidtab=soup.find('div',class_="conMidtab")
tables = conMidtab.find_all('table')
for table in tables:
trs = table.find_all('tr')[2:]
for tr in trs :
city = tr.find_all('td')[-8]
city_name = list(city.stripped_strings)[0]
temp = tr.find_all('td')[-5]
max_temp=list(temp.stripped_strings)[0]
DATA.append({"city":city_name,"temp":int(max_temp)})
def main():
baseurl = "http://www.weather.com.cn/textFC/{}.shtml"
areas = ['hb','db','hz','hd','hn','xb','xn','gat']
for area in areas:
url = baseurl.format(area)
parse_page(url)
# 分析数据
# 排序
DATA.sort(key=lambda DATA:DATA['temp'],reverse=True)
print(DATA)
data = DATA[:10]
chart = Bar("中国城市最高气温排行榜")
chart.add("温度",
list(map(lambda data:data['city'],data)),
list(map(lambda data:data['temp'],data))
)
chart.render('temperature.html')
if __name__=='__main__':
main()
运行截图:
自动导出可视化数据: