import pandas as pd
import numpy as np
from collections import Counter
df = pd.read_excel(r'E:\Anaconda3\work\csv.xlsx')
# 求出a列存在,b列不存在的数据。
ab = [i for i in set(df["a"]) - set(df["b"]) if i is not np.nan]
# 求出b列存在,a列不存在的数据。
ba = [i for i in set(df["b"]) - set(df["a"]) if i is not np.nan]
a = Counter(df["a"])
b = Counter(df["b"])
# 统计出现的个数,以字典形式返回
xa = []
xb = []
ji = a.keys() & b.keys()
for i in ji:
ai = a.get(i)
bi = b.get(i)
if ai > bi:
count = ai-bi
for k in range(count):
xa.append(i)
elif bi > ai:
count = bi-ai
for k in range(count):
xb.append(i)
for val in ab:
sa = a.get(val)
if sa !=None:
for coun in range(sa):
xa.append(val)
for val in ba:
sb = b.get(val)
if sb !=None:
for coun in range(sb):
xb.append(val)
ds = pd.DataFrame([xa,xb]).T
ds.columns = ['xa','xb']
ds.to_excel(r'E:\Anaconda3\work\result.xlsx',index=False)
来源之知乎