爬虫中国天气网
目的得到中国天气网中气温最低的十个城市
采用request库和BeautifulSoup分析数据,然后matplotlib可视化
#coding:utf-8
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
all_data=[]
def parse_temp(url):
headers={"Use-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.102 Safari/537.36"}
req=requests.get(url,headers=headers)
text=req.content.decode('utf-8')
soup=BeautifulSoup(text,'lxml')
conMidtab=soup.find('div',class_='conMidtab')
tables=conMidtab.find_all('table')
for table in tables:
trs=table.find_all('tr')[2:]
for index,tr in enumerate(trs) :
tds=tr.find_all('td')
city_td=tds[0]
if index==0:
city_td=tds[1]
city=list(city_td.stripped_strings)[0]
low_temp_td=tr.find_all('td')[-2]
low_temp = list(low_temp_td.stripped_strings)[0]
all_data.append({'city':city,'low_temp':int(low_temp)})
def main():
urls=[ "http://www.weather.com.cn/textFC/hb.shtml",
"http://www.weather.com.cn/textFC/db.shtml",
"http://www.weather.com.cn/textFC/hd.shtml",
"http://www.weather.com.cn/textFC/hz.shtml",
"http://www.weather.com.cn/textFC/hn.shtml",
"http://www.weather.com.cn/textFC/xb.shtml",
"http://www.weather.com.cn/textFC/xn.shtml"
]
for url in urls:
parse_temp(url)
all_data.sort(key=lambda data: data['low_temp'])
data=all_data[0:10]
cities=[]
# for city in data:
# city=city['city']
# cities.append(city)
cities=list(map(lambda x:x['city'],data))
temps=list(map(lambda x:x['low_temp'],data))
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签
plt.bar(cities,temps)
plt.show()
# print(data)
if __name__ == '__main__':
main()
结果如下图所示