获取历史天气网信息,并保存在Excel中
# -*- coding: utf-8 -*-
#获取天气信息并存入excel
import requests
from bs4 import BeautifulSoup as bs
import pandas as pd
import datetime
import time
from dateutil.relativedelta import relativedelta
import xlsxwriter as xlw
#pd.date_range(start_date, end_date) 产生时间序列
#datetime.strftime(x, '%Y%m') 有了datetime对象,要把它格式化为字符串显示给用户,就需要转换为str,格式化为201801
#sort 是应用在 list 上的方法,sorted 可以对所有可迭代的对象进行排序操作。
#list(set())函数:对原列表去重并按从小到大排序
def date_range(start_date, end_date): #计算日期范围,返回日期的list
date_list1 = [datetime.datetime.strftime(x, '%Y%m') for x in list(pd.date_range(start_date, end_date))] #获得时间序列
date_list = sorted(list(set(date_list1)))#对list去重并排序
#print(date_list1[0])
return date_list
#计算间隔的天数
def date_day_range(start_date, end_date):
strptime, strftime = datetime.datetime.strptime, datetime.datetime.strftime
end = strptime(end_date, "%Y-%m") + relativedelta(months=+1)#月份加一
days = (end - strptime(start_date, "%Y-%m")).days #计算天数差
print(days) #返回天数
return days
def get_data_city(city, start_date, end_date):
temp = []
k = 0
days = date_day_range(start_date, end_date)
weather_data = [[] for i in range(days)] #创建一个二维list, 一个一维list对应一天的信息
date_list = date_range(start_date, end_date)
for i in date_list:
url = 'https://lishi.tianqi.com/' + city + '/' + i + '.html'
headers = {'User-Agent':'Mozilla/5.0'}
res = requests.get(url, headers=headers)#请求数据
soup = bs(res.text, 'html.parser')#解析数据
data1 = soup.find_all('ul', class_='thrui')#寻找数据
data = data1[0].find_all('li')#寻找数据
#print(data)
for item in data:
data_list = item.get_text().splitlines()#按行分到一个list
#print(data_list)
#print(len(data_list))
for j in range(len(data_list)):#清空list里的空字符
if len(data_list[j]) != 0:
temp.append(data_list[j])
#print(temp)
weather_data[k].extend(temp)#添加在二维list的一维list
temp.clear()
k += 1
#time.sleep(0.5)#延时,防止被认为是爬虫
#print(weather_data)
print('geted weather_data')
list_to_excel(city, start_date, end_date, weather_data)
def list_to_excel(city, start_date, end_date, data):
filename = city
print(filename)
workbook = xlw.Workbook('%s.xlsx' % filename)
sheet_name = start_date + '_' + end_date
sheet = workbook.add_worksheet(sheet_name)
title = ['日期', '最高气温', '最低气温', '天气', '风向', '风力'] #写第一排
for i in range(len(title)):
sheet.write_string(0, i, title[i], workbook.add_format({'bold': True})) # 写入表头,字体加粗
row, col = 1, 0
for item in data: #写内容
for i in item:
sheet.write_string(row, col, i)
col += 1
row += 1
col = 0
workbook.close()
if __name__ == '__main__':
print('start')
get_data_city('chongqing', '2011-01', '2021-02')
print('end')