分析部分见https://blog.csdn.net/weixin_44586452/article/details/113757005
python对中国天气网进行爬取并且对爬取到的数据进行数据可视化的显示:
- 爬取全国各个城市的气温(最高气温,最低气温);
- 获取全国城市中气温最低和最高的前十名城市(地区)进行可视化操作(以直方图的形式给出展示形式为 .html格式)
网页分析
首先打开中国天气网http://www.weather.com.cn/,找到全国各个区域(华北、东北、华东、华中、华南、西北、西南、港澳台)天气信息。
以华北地区为例分析网页源代码
打开谷歌浏览器,打开华北http://www.weather.com.cn/textFC/hb.shtml地区天气预报,鼠标右键点击 检查,找到Elements
如图所示:
代码
# 爬取全国所有城市名称和最低气温
# http://www.weather.com.cn/textFC/hb.shtml
import requests
from bs4 import BeautifulSoup
from pyecharts.charts import Bar
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36'
# 'referer': 'https://dytt8.net/html/gndy/dyzz/list_23_2.html'
}
ALL_DATA = []
def parse_page(url):
response = requests.get(url,headers)
text = response.content.decode('utf-8') # 解决乱码
soup = BeautifulSoup(text,'html5lib')#增强容错性,使用lxml港澳台的时候会出错
# 网页解析
conMidtab = soup.find('div',class_= 'conMidtab')
# table
tables = conMidtab.find_all('table')
# tr
for table in tables:
#print(table)
trs = table.find_all("tr")[2:]
for index, tr in enumerate(trs):
tds = tr.find_all('td')
city_td=tds[0]
if index==0:
city_td = tds[1]
city = list(city_td.stripped_strings)[0] # 打印子孙节点的文本
temp_td = tr.find_all('td')[-2]
temp = list(temp_td.stripped_strings)[0]
ALL_DATA.append({"城市": city, "最低气温": int(temp)})
print(city,temp)
def main():
#parse_page('http://www.weather.com.cn/textFC/gat.shtml')
areas = ['hb','hd','gat','xn','xb','gat']
for area in areas:
url = 'http://www.weather.com.cn/textFC/{}.shtml'.format(area)
parse_page(url)
ALL_DATA.sort(key=lambda data:data["最低气温"])
min_data = ALL_DATA[:20]
cities_min = list(map(lambda x:x["城市"], min_data))
min_temp = list(map(lambda x:x["最低气温"], min_data))
bar1 = Bar()
bar1.add_xaxis(cities_min)
bar1.add_yaxis("气温/℃", min_temp)
bar1.set_global_opts(title_opts={"text": "中国城市气温排行榜", "subtext": "最低气温"})
bar1.render("最低气温.html")
if __name__ == '__main__':
main()