根据身份证统计各年龄段的性别人数

pandas,大数据

class AgeAndSexStatistics(object):
    """各年龄段的性别统计"""
    def age_and_sex_statistics(self,cer_number_data):
        df = self.get_age_and_sex(cer_number_data)
        """建立二阶字典:
            input:
                DataFrame(df['age_range', 'sex])
                such as:
                        age sex age_range2
                    0   12  1   10-20
                    1   11  0   10-20
            return:
        """
        age_and_sex_dict = {'0-1': {'男': 0, '女': 0}, '1-4': {'男': 0, '女': 0}, '5-9': {'男': 0, '女': 0},
                            '10-14': {'男': 0, '女': 0}, '15-19': {'男': 0, '女': 0}, '20-24': {'男': 0, '女': 0},
                            '25-29': {'男': 0, '女': 0}, '30-34': {'男': 0, '女': 0}, '35-39': {'男': 0, '女': 0},
                            '40-44': {'男': 0, '女': 0}, '45-49': {'男': 0, '女': 0}, '50-54': {'男': 0, '女': 0},
                            '55-59': {'男': 0, '女': 0}, '60-64': {'男': 0, '女': 0}, '65-69': {'男': 0, '女': 0},
                            '70-74': {'男': 0, '女': 0}, '75-79': {'男': 0, '女': 0}, '80-84': {'男': 0, '女': 0},
                            '85-89': {'男': 0, '女': 0}, '90-94': {'男': 0, '女': 0}, '95-99': {'男': 0, '女': 0},
                            '100+': {'男': 0, '女': 0}}

        for _ in range(len(df)):
            if df['age_range'][_] in age_and_sex_dict.keys():
                if df['sex'][_] == 1:
                    age_and_sex_dict[df['age_range'][_]]['男'] += 1
                else:
                    age_and_sex_dict[df['age_range'][_]]['女'] += 1
        return age_and_sex_dict

    def get_age_and_sex(self, cer_number_data):
        """根据身份证获取年龄和性别,并就年龄进行分段"""

        # 根据身份证获取性别和年龄
        cer_number_data['age'] = cer_number_data['cer_number']  # 1岁没满?
        cer_number_data['age'] = cer_number_data['age'].map(self.get_age_by_id_card)
        cer_number_data['sex'] = cer_number_data['cer_number']
        cer_number_data['sex'] = cer_number_data['sex'].map(self.get_sex_by_id_card)

        # 根据年龄分段,并取别称
        # cer_number_data['age_range'] = pd.cut(x=cer_number_data["age"], bins=[0, 5, 10])  # (0.0, 5.0]
        cer_number_data['age_range'] = pd.cut(x=cer_number_data["age"],
                                              bins=[0, 1, 4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84,
                                                    89, 94, 99, 999],
                                              labels=['0-1', '1-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34',
                                                      '35-39',
                                                      '40-14', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74',
                                                      '75-79',
                                                      '80-84', '85-89', '90-94', '95-99', '100+'])
        return cer_number_data

    def get_age_by_id_card(self, id_card):
        birth_year = int(id_card[6:10])
        today_year = datetime.date.today().year
        age = today_year - birth_year
        return age

    def get_sex_by_id_card(self, id_card):
        """性别:根据身份证倒数第二位判断"""
        if (int(id_card[16]) % 2) == 0:
            return '女'
        else:
            return '男'
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值