# 爬取往年天气信息
import requests
from lxml import etree
from pypinyin import lazy_pinyin # 将中文转换为拼音
from datetime import datetime
import csv,time
# 写入信息
f = open(f"{str(int(time.time()))}.csv","w",newline="",encoding="utf-8")
write = csv.DictWriter(f,['日期','最高温度','最低温度','天气状况','风向以及风级'])
write.writeheader()
place = input("请输入要查找的城市地点:")
place = "".join(lazy_pinyin(place)) # 将地点转换相应的拼音
year = int(input("请输入要查找的年份:"))
while True:
if year > datetime.now().year or year < 2011:
print("输入的年份范围有误")
year = int(input("请输入要查找的年份:"))
else:
break
month = int(input("请输入要查找的月份:"))
while True:
if month > datetime.now().month:
print("输入的月份范围有误")
month = int(input("请输入要查找的月份:"))
else:
if month <10:
month ="0" + str(month)
break
url = f"https://lishi.tianqi.com/{place}/{str(year)+str(month)}.html"
# url = "https://lishi.tianqi.com/shanghai/202405.html"
# print(url)
headers = {
"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
}
response = requests.get(url=url,headers = headers)
tree = etree.HTML(response.text)
lis = tree.xpath('/html/body/div[7]/div[1]/div[4]/ul/li')
print(len(lis)) # 总共有多少天
datas = []
for i in range(1,len(lis)+1):
dic = {}
result = tree.xpath(f'/html/body/div[7]/div[1]/div[4]/ul/li[{i}]/div/text()')
dic["日期"] = result[0]
dic["最高温度"] = result[1]
dic["最低温度"] = result[2]
dic["天气状况"] = result[3]
dic["风向以及风级"] = result[4]
datas.append(dic)
write.writerow(dic)
print(datas)
# /html/body/div[7]/div[1]/div[4]/ul/li[1]
Xpath爬取天气预报
最新推荐文章于 2024-06-28 17:08:16 发布