from lxml import etree
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import requests
import csv
写入行索引
headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.51 Safari/537.36 Edg/99.0.1150.30'}
with open(r"C:\Users\hk\Desktop\opencv\杨陵天气2021"+".csv",'w',newline='') as f:
write=csv.writer(f)
write.writerow(['日期','星期','max','min','天气','风向','风级'])
使用xpath爬数据
with open(r"C:\Users\hk\Desktop\opencv\杨陵天气2021"+".csv",'a',newline='') as f:
w=csv.writer(f)
for num in range(1,13):
if num<10:
moth='0'+str(num)
else:
moth=str(num)
url=f"https://lishi.tianqi.com/yangling/2021{moth}.html"
response=requests.post(url,headers=headers)
tree=etree.HTML(response.text)
out=tree.xpath('//ul[@class="thrui"]/li/div/text()')
flag=1
lis=[]
for i in out:
if flag==1:
lis.append(i.split()[0])
lis.append(i.split()[1])
elif flag==5:
flag=1
lis.append(i.split()[0])
lis.append(i.split()[1])
w.writerow(lis)
print(lis)
lis=[]
continue
else:
lis.append(i)
flag+=1
df=pd.read_csv(r"C:\Users\hk\Desktop\opencv\杨陵天气2021.csv")
df
将温度数值int化
yMin=df.loc[:]['min']
yMax=df.loc[:]['max']
print(yMax)
print(yMin)
for i in range(365):
yMax.loc[i]=int(yMax[i][:-1])
yMin.loc[i]=int(yMin[i][:-1])
print(yMax)
print(yMin)
制作图像,并保存
plt.rcParams["font.sans-serif"]=["SimHei"]
plt.rcParams["axes.unicode_minus"]=False
x=pd.date_range('20210101',periods=365)
plt.figure(figsize=(30,15))
plt.plot(x,yMax.values,'r-',x,yMin.values,'b-')
plt.grid()
plt.title('杨陵2021气温')
plt.xlabel('时间',fontsize=20)
plt.ylabel('温度',fontsize=20)
plt.legend(['max','min'],loc='upper right',frameon=False,fontsize=15)
plt.savefig(r'./WeatherData.png')
plt.show()