# -*- coding: utf-8 -*-
"""
Created on Fri Aug 12 15:48:40 2022
Task:导出填充率大于75%的sites
"""
import os
import pandas as pd
import numpy as np
import shutil
path=r'D:\Fluxnet\要插补的LE_Driversqc=0'
site_name=[]
Filling_Rate=[]
for i in os.listdir(path):
a=pd.read_csv(os.path.join(path,i))
a.drop(['LE_F_MDS_QC'],axis=1,inplace=True)
a.replace(-9999,np.nan,inplace=True)
# print(a.shape[0])
pre_num=a.shape[0]
# print(a.isna().sum().sort_values(ascending=True))
a=a.interpolate()
# print(a.isna().sum().sort_values(ascending=True))
a=a.dropna()
# print(a.shape[0])
afterdrop_num=a.shape[0]
# print(a.isna().sum().sort_values(ascending=True))
#将字典内的填充率导出
dic1=i.split('_',6)[1]
site_name+=[dic1]
dic2=afterdrop_num/pre_num
Filling_Rate+=[dic2]
dic={'site':site_name,
'Filling_Rate':Filling_Rate}
a=pd.DataFrame(dic)
path1=r'D:\Fluxnet\要训练的LE_qc=0_Driversqc=0'
outpath=r'D:\Fluxnet\填充率大于75%的可训练站点_Drivers_Before_Filled'
#统计一下填充率在%以上的站点个数
# print(len(goal_site_name))
# print(pd.value_counts(goal_site_name))
# goal=[]
goal_site_name=[j for j in a.loc[a['Filling_Rate']>=0.80]['site']]
print(len(goal_site_name))
# print( a.loc[a['Filling_Rate']>=0.75,'site'])
# print(a.loc[a['Filling_Rate']>=0.75]['site'])
for i in os.listdir(path1):
# print(i.split('_',6)[1] in goal_site_name)
if i.split('_',3)[0] in goal_site_name:
#这块儿注意下,列表加的也得列表格式!不然字符串的形式就一个字母一个字母地都输进去了
# goal+=[i] print(goal) print(len(goal))
shutil.copy(os.path.join(path1,i),os.path.join(outpath,i))
############################就这四行其实#############################
goal_site=[j for j in a.loc[a['filling_rate']>=0.80]['site']]
for i in os.listdir(path1):
if i.split('_',3)[0] in goal_site:
shutil.copy(os.path.join(path,i),os.path.join(outpath,i))
【导出填充率大于某值的sites】
于 2022-08-13 12:33:42 首次发布