用pandas和openpyxl 实现对2个或多个excel的合并,去重;具体代码与解释如下:
#用pandas实现vlookup类似的功能来实现两个表的合并,注意:要有一个对应的列单才可以,比如两个表里都有学号;
import pandas as pd
import openpyxl # pip install openpyxl
#load datasets from Excel files
df_1 = pd.read_excel('E:/菌株上架测序结果_新老99和潜在新种大整合结果/管子测序结果20200927.xlsx')
print(df_1)
df_2 = pd.read_excel('E:/菌株上架测序结果_新老99和潜在新种大整合结果/1.xlsx')
print(df_2)
#df_3 = pd.read_excel('sheet3.xlsx')
#merge datasets
df_combine = df_1.merge(df_2, left_on='客户编号', right_on = "编号")
#df_combine = df_combine.merge(df_3, on='PolicyID')
print(df_combine)
#output back into Excel
df_combine=df_combine.drop(columns='编号') #drop a column
print(df_combine.drop_duplicates())
df_combine.to_excel('E:/菌株上架测序结果_新老99和潜在新种大整合结果/df_combine