import requests
import pandas
url = "http://tianqi.2345.com/Pc/GetHistory"
def get_tianqi(year: int, month: int):
"""
根据提供的年份和月份爬取天气数据
:param year:
:param month:
:return:
"""
params = {
"areaInfo[areaId]": 54511,
"areaInfo[areaType]": 2,
"data[year]": year,
"date[month]": month
}
headers = {
"User-Agent": "xxx".encode('utf-8')
}
resp = requests.get(url, headers=headers, params=params)
if resp.status_code != 200:
raise Exception("请求失败")
data = resp.json()["data"]
# read_html() 方法获取网页中的所有表格,以下区第一个表格
df = pandas.read_html(data)[0]
# df.head() 表示查看表格的前几行数据
# print(df.head())
return df
df_list = []
for i in range(2012, 2023):
df = get_tianqi(2021, 10)
df_list.append(df)
print(df.head())
pandas.concat(df_list).to_excel("北京十年天气数据.xlsx")
Python 爬虫实战 —— 爬取北京天气数据
于 2023-09-20 21:24:37 首次发布