两个Excel表格根据列名,进行数据对比
1.单条件匹配(只根据一个列名)
import pandas as pd
df1 = pd.read_excel('数据1.xls', dtype='str')
df2 = pd.read_excel('数据2.xlsx', dtype='str')
condition = df1['ID'].isin(df2['ID'])
matched_rows = df1[condition ]
matched_rows.to_excel('匹配数据.xlsx', index=True)
df1 = df1[~df1['ID'].isin(df2['ID'])]
df1.to_excel('更新后本单位创建2024-1-29.xlsx', index=True)
2.多条件匹配(根据多个列名)
import pandas as pd
df1 = pd.read_excel('表格1.xlsx', dtype='str')
df2 = pd.read_excel('表格2.xlsx', dtype='str')
df1['unique_id'] = df1['row1'] + '_' + df1['row2'] + '_' + df1['row3']
df2['unique_id'] = df2['row4'] + '_' + df2['row5'] + '_' + df2['row6']
condition = df1['unique_id'].isin(df2['unique_id'])
df1_matched = df1[condition]
df1_matched.to_excel('匹配数据.xlsx', index=True)