天气查询:http://www.tianqihoubao.com/
参考:https://blog.csdn.net/qq_38440882/article/details/82829865文章是保存到数据库,可以参考原文。
根据上面的内容,做了修改保存为DataFrame,保存为CSV,并且增加了温度上下限,可以参考下文。
from bs4 import BeautifulSoup
import requests
import pandas as pd
import warnings
from pypinyin import pinyin, lazy_pinyin
import pypinyin
warnings.filterwarnings("ignore")
def get_temperature(url, city):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'} # 设置头文件信息
response = requests.get(url, headers=headers).content # 提交requests get 请求
soup = BeautifulSoup(response, "lxml") # 用Beautifulsoup 进行解析
conmid2 = soup.findAll('div', class_='wdetail')
date_all=[]
weather_all =[]
temperature_up_all = []
temperature_down_all = []
wind_all = []
for info in conmid2:
tr_list = info.find_all('tr')[1:] # 使用切片取到第三个tr标签
for index, tr in enumerate(tr_list): # enumerate可以返回元素的位置及内容
td_list = tr.find_all('td')
date = td_list[0].text.strip().replace("\n", "") # 取每个标签的text信息,并使用replace()函数将换行符删除
weather = td_list[1].text.strip().replace("\n", "").split("/")[0].strip()
temperature_up = td_list[2].text.strip().replace("\n", "").split("/")[0].strip()
temperature_down = td_list[2].text.strip().replace("\n", "").split("/")[1].strip()
wind = td_list[3].text.strip().replace("\n", "").split("/")[0].strip()
#print(city, date, weather, wind, temperature)
date_all.append(date)
weather_all.append(weather)
temperature_up_all.append(temperature_up)
temperature_down_all.append((temperature_down))
wind_all.append(wind)
return date_all, weather_all, temperature_up_all,temperature_down_all, wind_all
if __name__ == '__main__':
citys1 = ["广州市"]
data_all = pd.DataFrame()
for city in citys1:
city1 = ''.join(lazy_pinyin(city[:-1]))
print(city1)
urls = ['http://www.tianqihoubao.com/lishi/' + city1 + '/month/201809.html',
'http://www.tianqihoubao.com/lishi/' + city1 + '/month/201810.html',
'http://www.tianqihoubao.com/lishi/' + city1 + '/month/201811.html'
]
for url in urls:
date_all, weather_all, temperature_up_all, temperature_down_all, wind_all = get_temperature(url, city)
data = pd.DataFrame({
"date":date_all, "weather":weather_all, "temperature_up":temperature_up_all,"temperature_down":temperature_down_all,"wind":wind_all})
data_all = pd.concat((data_all,data),axis=0)
print(data_all.shape,data_all.columns)
data_all.to_csv("weather_gz.csv", index= False)