为我分析下面这个代码
import os.path
from operator import index
from turtledemo.penrose import start
import pandas as pd
# 读取 Excel 文件
align_by='report_time'
file_path='./masked_data' #/output0731.csv'
save_file='./split_new'
file_list=os.listdir(file_path)
with open('./{}/droped_file.txt'.format(file_path),'w') as f:
for file in file_list:
if 'output' not in file or file.endswith('xlsx'):
continue
save_path='data_{}'.format(file.split('output')[-1].split('.')[0].strip())
#print(os.path.join(file_path,file))
df = pd.read_csv(os.path.join(file_path,file), encoding="utf-8-sig",index_col=False)
# pd.read_csv("data.csv", encoding="utf-8-sig")
start=df[align_by].iloc[0] #[:-4]
end=df[align_by].dropna().iloc[-1] #[:-4]
print(start,end)
if not os.path.exists('./{}/{}'.format(save_file,save_path)):
os.makedirs('./{}/{}'.format(save_file,save_path))
# 按 'addr' 列进行分组
grouped = df.groupby('addr')
ts = pd.date_range(start=start, end=end, freq='20s')
table_columns=['addr',align_by,'ua','ub','uc','ia','ib','ic','pi','pa','pb','pc','qi','qa',
'qb','qc','ft','fa','fb','fc','qp_power1','qp_power0','qr_power1','qr_power0',
'power1','power2','power3','power4']
df[align_by] = pd.to_datetime(df[align_by])
# 将每个分组保存为一个 Excel 文件
for addr, group_df in grouped:
# 清理文件名中的非法字符
group_df= group_df.drop_duplicates(subset=align_by)
#print(len(group_df))
new_group_df=pd.DataFrame(columns=table_columns)
new_group_df[align_by] = ts
group_df = pd.merge_asof(ts.to_frame(name=align_by),
group_df,
on=align_by,
direction="nearest",
tolerance=pd.Timedelta("5s"))
df_merged = pd.merge(new_group_df, group_df, on=align_by, how='left',suffixes=('_x',''))
df_merged=df_merged[[col for col in df_merged.columns if not col.endswith('_x')]]
df_merged=df_merged.interpolate(method='linear',limit_direction='both')
if (df_merged['ua'].all() ==0) and (df_merged['ub'].all()==0) and (df_merged['uc'].all()==0):
print('{} is droped'.format(addr))
f.write(file + "_" + addr)
f.write('\n')
continue
safe_addr = str(addr).strip() # .replace('/', '_').replace('\\', '_')
# if safe_addr[0]=='0':
# safe_addr=safe_addr[1:]
print(len(df_merged),addr)
df_merged.to_csv(f"./{save_file}/{save_path}/{safe_addr}.csv", index=False)
最新发布