import numpy as np
import pandas as pd
import time
import math
import warnings
warnings.filterwarnings("ignore")
pre_txt=np.loadtxt(r"F:\09_climate_site_data\03_日降水集中指数\02_data\pre-day.txt")
print(pre_txt)
pre_txt=np.array(pre_txt)
print(pre_txt.shape)
pre_txt1=[]
for i in range(pre_txt.shape[0]):
for j in range(pre_txt.shape[1]):
if pre_txt[i,j]!=32766:
pre_txt1.append(pre_txt[i,j]*0.1)
else:
pre_txt1.append(32766)
pre_txt1=np.array(pre_txt1)
pre_txt2=pre_txt1.reshape(1672,19359).T
print(pre_txt2.shape)
pre_cols_txt=np.loadtxt(r"F:\09_climate_site_data\id-value.txt")
pre_cols=np.array(pre_cols_txt[:,0])#站点号
pre_rows=pd.date_range(start='1960-1-1',freq="D",periods=19359)#日期
pre_csv=pd.DataFrame(pre_txt2,columns=pre_cols,index=pre_rows)
year=[1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,
2001,2002,2003,2004,2005,2006,2007,2008,2009,2010]
pre_csv=pre_csv.iloc[7671:18628]
print(pre_csv)
[[ 0. 0. 12. ... 12. 10. 0.]
[ 3. 1. 18. ... 28. 5. 0.]
[ 2. 2. 22. ... 15. 2. 0.]
...
[ 0. 0. 0. ... 99. 90. 0.]
[ 0. 0. 0. ... 44. 2. 0.]
[ 0. 0. 0. ... 0. 4. 1.]]
(1672, 19359)
(19359, 1672)
50353.0 50425.0 50431.0 50434.0 50445.0 50468.0 50514.0 \
1981-01-01 0.0 0.0 0.1 0.2 0.0 0.0 0.0
1981-01-02 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1981-01-03 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1981-01-04 0.0 0.1 0.0 0.0 0.0 0.0 0.0
1981-01-05 0.0 0.1 0.0 0.1 0.4 0.2 0.0
... ... ... ... ... ... ... ...
2010-12-27 0.2 1.0 1.2 1.5 0.8 0.7 1.2
2010-12-28 0.0 0.3 0.2 0.0 0.1 1.2 0.0
2010-12-29 1.0 0.5 0.5 1.3 1.9 1.3 0.0
2010-12-30 0.0 0.5 0.5 1.1 0.4 0.0 1.3
2010-12-31 0.0 0.2 0.0 0.0 0.0 0.0 0.0
50524.0 50525.0 50526.0 ... 59843.0 59848.0 59849.0 \
1981-01-01 0.1 0.2 0.4 ... 0.0 0.0 0.0
1981-01-02 0.0 0.0 0.1 ... 0.0 0.1 0.0
1981-01-03 0.0 0.1 0.1 ... 0.0 0.0 0.0
1981-01-04 0.0 0.1 0.1 ... 0.0 0.0 0.0
1981-01-05 0.0 0.1 0.1 ... 0.0 0.0 0.0
... ... ... ... ... ... ... ...
2010-12-27 2.7 2.7 2.7 ... 0.0 0.0 0.0
2010-12-28 0.7 0.9 2.6 ... 0.0 0.0 0.0
2010-12-29 0.6 0.3 0.7 ... 0.0 0.0 0.0
2010-12-30 1.1 0.8 1.5 ... 0.0 0.0 0.0
2010-12-31 0.2 0.6 0.1 ... 0.0 0.0 0.0
59854.0 59855.0 59941.0 59948.0 59951.0 59954.0 59981.0
1981-01-01 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1981-01-02 0.0 0.0 0.0 0.0 0.1 0.0 1.5
1981-01-03 0.0 0.2 0.0 0.0 0.1 0.0 0.1
1981-01-04 0.0 0.4 0.0 0.0 0.0 0.0 0.1
1981-01-05 0.0 0.0 0.0 0.1 11.6 0.7 0.0
... ... ... ... ... ... ... ...
2010-12-27 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2010-12-28 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2010-12-29 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2010-12-30 0.0 0.0 0.0 0.0 0.0 0.0 0.1
2010-12-31 0.0 0.0 0.0 0.0 0.0 0.0 0.0
[10957 rows x 1672 columns]
# # print(pre_csv.iloc[18627])
# #计算气候态,先找到nan值
# year=[1981,1982,1983,1984,1985,1986,1987,1988,1989,1990,1991,1992,1993,1994,1995,1996,1997,1998,1999,2000,
# 2001,2002,2003,2004,2005,2006,2007,2008,2009,2010]
# pre_csv=pre_csv.iloc[7671:18628]
# # print(pre_csv)
pre_rows=pd.date_range(start='1981-1-1',freq="D",periods=10957)#81年到10年
# print(pre_rows)
pre_fill=[]
for j in range(1672):
clumns = pre_csv.iloc[:, j] # 某站所有所有天的
list1 = []
print(j)
for i in range (0,10957):
row = clumns.iloc[i]
row = pd.DataFrame(row, index=[pre_rows[i]], columns=[str(i)])
#print(row)
#print(row.iloc[0])
if float(row.iloc[0,0])!= float(32766):
row_index = row.index[0] # 取缺测值对应的行索引Var
#print(row_index)
list1.append(row.iloc[0,0])
#print(list1)
else:
row_index = row.index[0] # 取缺测值对应的行索引Var
row_index = str(row_index)
monday = row_index[5:10] # 取空缺值的月和日
list3 = []
for h in year:
try:
h = str(h)
# print(h)
index1 = h + "-" + monday
# print(index1)
cc = clumns.loc[index1]
# print(cc)
if cc !=32766.0:
list3.append(cc)
# print(list3)
except KeyError as e:
print("KeyError Details : " + str(e))
pass
continue
list3_mean=np.array(list3).mean()
#print("zhenao"+str(list3_mean))
list1.append(list3_mean)
print(list1)
# np.savetxt(fname=r"F:\09_climate_site_data\01_precipitation\pre_站点data"+str(i)+".csv", X=list1, fmt="%d",delimiter=",")
pre_fill.append(list1)
# print(pre_fill)
# print(pre_fill)
np.savetxt(r'F:\09_climate_site_data\01_precipitation\pre_站点\spcluster.txt',pre_fill)
需要注意年份的索引以及闰年平年的天数…
1981年至2010年共10957天
整体思路就是:
1.将txt文件中降水数据限加上日期索引;
2.取出一个站点所有时间的降水数据;
3.接下来对站点中32766也就是空缺值进行检索判断,判断出空缺值的,得到空缺值的日期索引。对该站点该日期索引除32766的所有数据进行平均对32766进行替换;
4.遍历所有站点。
需要注意的是:
1.numpy中的reshape功能很好用!
2.List、Numpy中的array、Pandas中的DataFrame可以相互转化。