天气预报获取模块
基于http://www.weather.com.cn/做了代码优化,每个函数功能更加明确,更加适合单独集成。接口也进行了优化,比网络上博主抄来抄去的源码要好一些。
改进点:获取天气拆成了多个函数,多次保存csv文件不再出现标题栏重写的情况。
天气获取模块
1.1 依赖表
包名 | 含义 | 安装方式 |
---|
系统包 | | |
os | | |
csv | | |
json | | |
第三方依赖 | | |
requests | 爬虫模块 | |
bs4.BeautifulSoup | 网页解析对象 | |
bs4.element.Tag | 网页标签对象 | |
1.2 全局变量表
1.3 函数
函数名 | 含义 | 是否接口 |
---|
get_html_text | 请求网页内容,无代理无head模式 | 否 |
get_today_weather | 获取当天天气 | 否 |
get_1_7_weather | 获取当周天气 | 否 |
get_today_and_week | 获取当周和当天的天气 | 否 |
get_8_14_weather | 获取下一周天气 | 否 |
write_to_csv | 保存文件模块 | 否 |
| | |
| | |
| | |
| | |
| | |
1.4 接口函数
函数名 | 含义 | |
---|
get_two_weak_weather | 获取两周天气 | |
入参 | 含义 | 类型 |
city_code | 城市代码 | 字符串 |
出参 | | |
None | | |
1.5 代码
import os
import csv
import json
import requests
from bs4 import BeautifulSoup
from bs4.element import Tag
def get_html_text(url):
"""请求获得网页内容"""
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
print("成功访问")
return r.text
except Exception as e:
print(e)
print("访问错误")
return " "
def get_today_weather(body_tag: Tag):
td_wea_list = []
count = 0
def get_today_json(_tag: Tag):
weather_div = _tag.find_all('div', {'class': 'left-div'})
observe24h_data = weather_div[2].find('script').string
begin_index = observe24h_data.index('=') + 1
end_index = -2
observe24h_data = observe24h_data[begin_index: end_index]
observe24h_json = json.loads(observe24h_data)
t_json = observe24h_json['od']['od2']
if t_json[0]['od28'] == "":
t_json[0]['od28'] = t_json[1]['od28']
return t_json
today_json = get_today_json(body_tag)
for i in today_json:
od_wea_list = []
if count <= 23:
od_wea_list.append(i['od21'])
od_wea_list.append(i['od22'])
od_wea_list.append(i['od24'])
od_wea_list.append(i['od25'])
od_wea_list.append(i['od26'])
od_wea_list.append(i['od27'])
od_wea_list.append(i['od28'])
print(od_wea_list)
td_wea_list.append(od_wea_list)
count = count + 1
print(td_wea_list)
return td_wea_list
def get_1_7_weather(body_tag: Tag):
week_wea_list = list()
data = body_tag.find('div', {'id': '7d'})
ul = data.find_all('ul')[0]
li = ul.find_all('li')
for day in li:
od_wea_list = list()
date = day.find('h1').string
date = date[0:date.index('日')]
od_wea_list.append(date)
inf = day.find_all('p')
od_wea_list.append(inf[0].string)
if inf[1].find('i') is None:
tem_low = None
else:
tem_low = inf[1].find('i').string
if inf[1].find('span') is None:
tem_high = None
else:
tem_high = inf[1].find('span').string
if tem_low[-1] == '℃':
od_wea_list.append(tem_low[:-1])
else:
od_wea_list.append(tem_low)
if tem_high[-1] == '℃':
od_wea_list.append(tem_high[:-1])
else:
od_wea_list.append(tem_high)
wind = inf[2].find_all('span')
for j in wind:
od_wea_list.append(j['title'])
wind_scale = inf[2].find('i').string
index1 = wind_scale.index('级')
od_wea_list.append(int(wind_scale[index1 - 1:index1]))
week_wea_list.append(od_wea_list)
return week_wea_list
def get_today_and_week(html: str):
"""处理得到有用信息保存数据文件"""
bs = BeautifulSoup(html, "html.parser")
body = bs.body
td_wea_list = get_today_weather(body)
week_wea_list = get_1_7_weather(body)
return td_wea_list, week_wea_list
def get_8_14_weather(html):
week_wea_list = []
i = 0
bs = BeautifulSoup(html, "html.parser")
body = bs.body
data = body.find('div', {'id': '15d'})
ul = data.find('ul')
li = ul.find_all('li')
for day in li:
if i < 7:
od_wea_list = list()
date = day.find('span', {'class': 'time'}).string
date = date[date.index('(') + 1:-2]
od_wea_list.append(date)
weather = day.find('span', {'class': 'wea'}).string
print(day.find('span', {'class': 'wea'}).string)
print(day.find('span', {'class': 'wea'}).text)
od_wea_list.append(weather)
tem = day.find('span', {'class': 'tem'}).text
print(tem)
od_wea_list.append(tem[tem.index('/') + 1:-1])
od_wea_list.append(tem[:tem.index('/') - 1])
wind = day.find('span', {'class': 'wind'}).string
if '转' in wind:
od_wea_list.append(wind[:wind.index('转')])
od_wea_list.append(wind[wind.index('转') + 1:])
else:
od_wea_list.append(wind)
od_wea_list.append(wind)
wind_scale = day.find('span', {'class': 'wind1'}).string
index1 = wind_scale.index('级')
od_wea_list.append(int(wind_scale[index1 - 1:index1]))
week_wea_list.append(od_wea_list)
return week_wea_list
def write_to_csv(file_name, data, day=14):
"""保存为csv文件"""
if not os.path.exists(file_name):
with open(file_name, 'w', errors='ignore', newline='') as f:
if day == 14:
header = ['日期', '天气', '最低气温', '最高气温', '风向1', '风向2', '风级']
else:
header = ['小时', '温度', '风力方向', '风级', '降水量', '相对湿度', '空气质量']
f_csv = csv.writer(f)
f_csv.writerow(header)
f_csv.writerows(data)
else:
with open(file_name, 'a', errors='ignore', newline='') as f:
f_csv = csv.writer(f)
f_csv.writerows(data)
def get_two_weak_weather(city_code: str):
url_head = "http://www.weather.com.cn/weather"
url_1_7 = "".join([url_head, "/", city_code, ".shtml"])
url_8_14 = "".join([url_head, "15d", "/", city_code, ".shtml"])
html_1_7 = get_html_text(url_1_7)
data1, data1_7 = get_today_and_week(html_1_7)
html8_14 = get_html_text(url_8_14)
data8_14 = get_8_14_weather(html8_14)
data14 = data1_7 + data8_14
write_to_csv('weather14.csv', data14, 14)
write_to_csv('weather1.csv', data1, 1)
if __name__ == '__main__':
get_two_weak_weather("101280701")