爬取天气信息,并存储到txt文件
import requests
from bs4 import BeautifulSoup
import xpinyin
def getHtml(url, header=None):
if header != None:
res = requests.get(url, headers=header)
else:
res = requests.get(url)
res.encoding = 'utf8'
if res.status_code == 200:
return res.text
return None
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36',
'Host':'lishi.tianqi.com',
'Accept-Encoding': "gzip, deflate, br",
'Connection': "keep-alive",
'cache-control': "max-age=0"}
def getURL(city, month, year=2022):
p = xpinyin.Pinyin()
city = p.get_pinyin(city).replace("-", "")
if month < 10:
month = "0" + str(month)
return "https://lishi.tianqi.com/" + city + "/" + str(year) + str(month) + ".html"
dic = {"晴": 0,"阴": 1,"多云": 2,"风": 3, "霾": 4, "雾":5,"小雪": 6,
"中雪": 7, "大雪": 8,"小雨": 9,"中雨": 10,"大雨": 11,"阵雨":12,
"暴雨":13}
citys = ["海南","晋城"]
for city in citys:
with open(city + "2022天气数据.txt", 'w', encoding='utf8') as f:
for m in range(1, 13):
cnt = 1
url = getURL(city, m)
html = getHtml(url, headers)
soup = BeautifulSoup(html, 'html.parser')
weather_html = soup.findAll("ul", class_="thrui")[0]
weather = weather_html.text
weather = weather.split("\n\n")[:-1]
for d in weather:
if d != "":
dd = d.replace("\n", " ")
dd = dd.split(" ")
print(dd)
m = str(m)
dd[0] = m.rjust(2,"0")
dd[1] = cnt
cnt += 1
dd[3] = dd[3][:-1]
dd[4] = dd[4][:-1]
sss = dd[5]
sss = sss.split("转")[0]
sss = sss.split("到")[0]
dd[5] = dic[sss]
ss=str(dd[0])+" "+str(dd[1])+" "+str(dd[3])+ " "+str(dd[4])+" "+str(dd[5])+"\n"
f.write(ss)