import os
import pandas as pd
# 输入文件夹路径和输出文件夹路径
input_folder = ""
output_folder = ''
os.makedirs(output_folder, exist_ok=True)
for file_name in os.listdir(input_folder):
if file_name.endswith('.xlsx'):
input_file_path = os.path.join(input_folder, file_name)
df = pd.read_excel(input_file_path)
distinct_columns = ['table_hive_name', 'column_name'] #按指定字段去重
if all(column in df.columns for column in distinct_columns):
# df_unique = df.drop_duplicates()
df_unique = df.drop_duplicates(subset=distinct_columns)
output_file_path = os.path.join(output_folder, file_name)
df_unique.to_excel(output_file_path, index=False)
else:
print(f"{file_name}不存在{distinct_columns}")
python对excel文件去重
最新推荐文章于 2024-07-23 14:36:35 发布