In python, set() is an unordered collection with no duplicate elements. It is very helpful in finding the number of duplicates.
# Check for duplicates in a given data frame def check_duplicates(df_check,column_name): cnt_unique = len(set(df_check[column_name])) print("cnt_unique:",cnt_unique) cnt_total = df_check.shape[0] print("cnt_total:",cnt_total) cnt_dupli = cnt_total - cnt_unique print("cnt_dupli:",cnt_dupli) print("There are " + str(cnt_dupli) + " duplicate " + column_name + "s for " + str(cnt_total) + " total entries\n")