读取excel文件,并找到重复的数据
df = pd.read_excel(r'project.xls',sheet_name='Sheet1')
data = {}
dupList = [k for k,v in df['title'].value_counts().to_dict().items() if v > 1]
print(type(dupList),len(dupList),dupList)
for i in dupList:
d = df[df['title']==i]['id'].tolist()
data[i] = d
print(data)
处理结果:
{'title1': ['2110251552596668', '2110251913137755', '2110251930146802'], ...}