结果:
![](https://i-blog.csdnimg.cn/blog_migrate/a666161f9165138506a1ea78801b8fa9.png)
代码如下:
import requests
from bs4 import BeautifulSoup
import IO as ios
class item:
def __init__(self):
self.date = list() # 日期
self.max_temp = list() # 最高温
self.min_temp = list() # 最低温
self.weather = list() # 天气
self.wind_direction = list() # 风向
self.wind_force=list() # 风力
Data_Box = item() # 数据盒子
# 函数默认返回北京市2018年1月到12月的url
def get_url(city):
'''
city为城市拼写的字符串,year为年份+月份
'''
for i in range(2015,2021):
for time in range(int(str(i)+"01"), int(str(i)+"13")):
url = "http://lishi.tianqi.com/{}/{}.html".format(city, time)
yield url+" "+city
# 获取天气数据
def get_datas():
for line in open("CityEn.txt",encoding='utf-8'):
print(str(line.split(" ")[1]).strip("\n"))
urls = get_url(str(line.split(" ")[1]).strip("\n"))
cookie = {
"cityPy": "UM_distinctid=171f2280ef23fb-02a4939f3c1bd4-335e4e71-144000-171f2280ef3dab; Hm_lvt_ab6a683aa97a52202eab5b3a9042a8d2=1588905651; CNZZDATA1275796416=871124600-1588903268-%7C1588990372; Hm_lpvt_ab6a683aa97a52202eab5b3a9042a8d2=1588994046"}
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3756.400 QQBrowser/10.5.4039.400"}
for url in urls:
try:
html = requests.get(url=url.split(" ")[0], headers=header, cookies=cookie)
soup = BeautifulSoup(html.text, 'html.parser')
ul = soup.find_all("ul", class_='thrui')[0]
# print(ul)
lis = ul.find_all("li")[:-1]
for li in lis:
# 最后一个li标签不是天气数据
div = li.find_all("div")
Data_Box.date.append(div[0].text.split("-")[0]+div[0].text.split("-")[1]+div[0].text.split("-")[2])
Data_Box.max_temp.append(div[1].text)
Data_Box.min_temp.append(div[2].text)
Data_Box.weather.append(div[3].text)
Data_Box.wind_direction.append(div[4].text.split(" ")[0])
Data_Box.wind_force.append(div[4].text.split(" ")[1])
except:
print("该页面爬取失败!")
else:
print("该页面爬取成功!")
for i in range(0, len(Data_Box.date)):
print(str(line.split(" ")[0]),url.split(" ")[1],Data_Box.date[i], Data_Box.min_temp[i],Data_Box.max_temp[i], Data_Box.weather[i],
Data_Box.wind_direction[i],Data_Box.wind_force[i])
ios.cw("weather.txt",str(line.split(" ")[0])+" "+url.split(" ")[1]+" "+Data_Box.date[i]+" "+Data_Box.min_temp[i]+" "+Data_Box.max_temp[i]+" "+Data_Box.weather[i]+" "+
Data_Box.wind_direction[i]+" "+Data_Box.wind_force[i]+"\n")
return "数据获取完毕"
# 爬取程序主函数
if __name__ == "__main__":
get_datas()