核对两个excel文件内容

任务名称:
在A学院中存在一些信息错误,需要找出来。
具体问题:查看A学院中填写的姓名、学号、证件号和正确汇总文件的姓名、学号、证件号是否相同。
注意事项:根据名字来判断,因为证件号和学号可能有错误,此时有重名的问题!!!

# coding: utf-8
# Name:     process
# Author:   tyler
# Data:     2022/3/26

import pandas as pd

if __name__ == "__main__":
    file1 = "***学院.xlsx"
    df1 = pd.read_excel(file1)
    check1 = df1[["姓名", "证件号", "学号"]]
    # print(check1)

    file_all = "学生总库.xlsx"
    df_all = pd.read_excel(file_all)
    check_all = df_all[["姓名", "证件号", "学号"]]
    # print(check_all)
    error_list = []
    count = 0
    not_find_name = []
    repeated_name = []
    for row in check1.iterrows():
        name = row[1][0]
        ids = row[1][1]
        stu_ids = row[1][2]
        old_data = tuple([name, ids, stu_ids])
        # print(name, ids, stu_ids)
        if name in check_all.values:
            # real_ids = 1
            cur = check_all[check_all["姓名"].str.contains(name)]
            # name, rel_ids, rel_stu_ids = cur[1], cur[2], cur[3]

            cur_values = cur.values
            # print(cur_values)
            # print(type(cur))
            if len(cur) > 1 :
                # 处理同名的问题
                # cur_values = cur.values
                cur_new = check_all[check_all["证件号"].str.contains(ids)]
                # print(cur_new.values)
                if len(cur_new.values)==0:
                    print("身份证有问题")
                    print(old_data)
                    print("=================================================")
                else:   # 身份证没问题
                    cur_new_value = cur_new.values
                    name, rel_ids, rel_stu_ids = cur_new_value[0][0], cur_new_value[0][1], cur_new_value[0][2]
                    # print(rel_ids, rel_stu_ids)
                    if ids == rel_ids and stu_ids == rel_stu_ids:
                        count += 1
                    else:
                        real_data = tuple([name, rel_ids, rel_stu_ids])
                        error_list.append(tuple([old_data, real_data]))
                # continue

            else:
                name, rel_ids, rel_stu_ids = cur_values[0][0], cur_values[0][1], cur_values[0][2]
                # print(rel_ids, rel_stu_ids)
                if ids == rel_ids and stu_ids == rel_stu_ids:
                    count += 1
                else:
                    real_data = tuple([name, rel_ids, rel_stu_ids])
                    error_list.append(tuple([old_data, real_data]))
        else:
            # 没找到名字的
            not_find_name.append(name)


    # print(len(error_list))
    for data in error_list:
        print(data)

    print("下列名称没找到")
    print(not_find_name)




  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值