爬虫爬取天气信息
实现了分页爬取,数据写入
#https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=57516&areaInfo%5BareaType%5D=2&date%5Byear%5D=2018&date%5Bmonth%5D=3
import requests
import json
from bs4 import BeautifulSoup
import csv
def craw_json_html(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36'}
response = requests.get(url=url, headers=headers)
print(response.status_code)
# response.encoding = response.apparent_encoding
response.encoding ='utf-8'
response = json.loads(response.text)
# print(response)
data=response["data"]
# print(data)
soup=BeautifulSoup(data,'html.parser')
table_trs=soup.find('table',class_='history-table').find_all('tr')
months=[]
for trs in table_trs:
texts=trs.text
newtext=list(filter(None,str(texts).split('\n')))
months.append(newtext)
days=months[1:]
for i in range(len(days)):
calenders=days[i][0].split(' ')[0]
days[i][0]=calenders
# print(days[i][0].split(' ')[0])
# print(type(days[i][0]))
# print(days)
#录入数据到csv
loadinfo_to_csv(days)
except:
pass
def index_all():
#爬取1到3月的数据
for i in range(3):
url = 'https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=57516&areaInfo%5BareaType%5D=2&date%5Byear%5D=2018&date%5Bmonth%5D={}'.format(i+1)
craw_json_html(url)
def loadinfo_to_csv(infor):
file_name=infor[0][0][:7]
# print(file_name)
with open("{}月的天气数据.csv".format(file_name), "w", encoding="utf-8", newline="") as f:
csv_writer = csv.writer(f)
csv_writer.writerow(["日期", "最高温", "最低温","天气","风力风向","空气质量"])
for i in range(len(infor)):
# 4. 写入csv文件内容
csv_writer.writerow([infor[i][0], infor[i][1],infor[i][2],infor[i][3],infor[i][4],infor[i][5]])
print("{}月的天气数据写入成功".format(file_name))
pass
if __name__ == '__main__':
# url='https://tianqi.2345.com/Pc/GetHistory?areaInfo%5BareaId%5D=57516&areaInfo%5BareaType%5D=2&date%5Byear%5D=2018&date%5Bmonth%5D=3'
# craw_json_html(url)
index_all()
结果实例展示