pandas,大数据
class AgeAndSexStatistics(object):
"""各年龄段的性别统计"""
def age_and_sex_statistics(self,cer_number_data):
df = self.get_age_and_sex(cer_number_data)
"""建立二阶字典:
input:
DataFrame(df['age_range', 'sex])
such as:
age sex age_range2
0 12 1 10-20
1 11 0 10-20
return:
"""
age_and_sex_dict = {'0-1': {'男': 0, '女': 0}, '1-4': {'男': 0, '女': 0}, '5-9': {'男': 0, '女': 0},
'10-14': {'男': 0, '女': 0}, '15-19': {'男': 0, '女': 0}, '20-24': {'男': 0, '女': 0},
'25-29': {'男': 0, '女': 0}, '30-34': {'男': 0, '女': 0}, '35-39': {'男': 0, '女': 0},
'40-44': {'男': 0, '女': 0}, '45-49': {'男': 0, '女': 0}, '50-54': {'男': 0, '女': 0},
'55-59': {'男': 0, '女': 0}, '60-64': {'男': 0, '女': 0}, '65-69': {'男': 0, '女': 0},
'70-74': {'男': 0, '女': 0}, '75-79': {'男': 0, '女': 0}, '80-84': {'男': 0, '女': 0},
'85-89': {'男': 0, '女': 0}, '90-94': {'男': 0, '女': 0}, '95-99': {'男': 0, '女': 0},
'100+': {'男': 0, '女': 0}}
for _ in range(len(df)):
if df['age_range'][_] in age_and_sex_dict.keys():
if df['sex'][_] == 1:
age_and_sex_dict[df['age_range'][_]]['男'] += 1
else:
age_and_sex_dict[df['age_range'][_]]['女'] += 1
return age_and_sex_dict
def get_age_and_sex(self, cer_number_data):
"""根据身份证获取年龄和性别,并就年龄进行分段"""
# 根据身份证获取性别和年龄
cer_number_data['age'] = cer_number_data['cer_number'] # 1岁没满?
cer_number_data['age'] = cer_number_data['age'].map(self.get_age_by_id_card)
cer_number_data['sex'] = cer_number_data['cer_number']
cer_number_data['sex'] = cer_number_data['sex'].map(self.get_sex_by_id_card)
# 根据年龄分段,并取别称
# cer_number_data['age_range'] = pd.cut(x=cer_number_data["age"], bins=[0, 5, 10]) # (0.0, 5.0]
cer_number_data['age_range'] = pd.cut(x=cer_number_data["age"],
bins=[0, 1, 4, 9, 14, 19, 24, 29, 34, 39, 44, 49, 54, 59, 64, 69, 74, 79, 84,
89, 94, 99, 999],
labels=['0-1', '1-4', '5-9', '10-14', '15-19', '20-24', '25-29', '30-34',
'35-39',
'40-14', '45-49', '50-54', '55-59', '60-64', '65-69', '70-74',
'75-79',
'80-84', '85-89', '90-94', '95-99', '100+'])
return cer_number_data
def get_age_by_id_card(self, id_card):
birth_year = int(id_card[6:10])
today_year = datetime.date.today().year
age = today_year - birth_year
return age
def get_sex_by_id_card(self, id_card):
"""性别:根据身份证倒数第二位判断"""
if (int(id_card[16]) % 2) == 0:
return '女'
else:
return '男'