import datetime as DT
import io
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = 'warn'
content = ''' ssno lname fname pos_title ser gender dob
0 23456789 PLILEY JODY BUDG ANAL 0560 F 031871
1 987654321 NOEL HEATHER PRTG SRVCS SPECLST 1654 F 120852
2 234567891 SONJU LAURIE SUPVY CONTR SPECLST 1102 F 010999
3 345678912 MANNING CYNTHIA SOC SCNTST 0101 F 081692
4 456789123 NAUERTZ ELIZABETH OFF AUTOMATION ASST 0326 F 031387'''
df = pd.read_table(io.BytesIO(content), sep='\s{2,}')
df['dob'] = df['dob'].apply('{:06}'.format)
now = pd.Timestamp(DT.datetime.now())
df['dob'] = pd.to_datetime(df['dob'], format='%m%d%y') # 1
df['dob'] = df['dob'].where(df['dob'] < now, df['dob'] - np.timedelta64(100, 'Y')) # 2
df['age'] = (now - df['dob']).astype('
print(df)
产量
ssno lname fname pos_title ser gender \
0 23456789 PLILEY JODY BUDG ANAL 560 F
1 987654321 NOEL HEATHER PRTG SRVCS SPECLST 1654 F
2 234567891 SONJU LAURIE SUPVY CONTR SPECLST 1102 F
3 345678912 MANNING CYNTHIA SOC SCNTST 101 F
4 456789123 NAUERTZ ELIZABETH OFF AUTOMATION ASST 326 F
dob age
0 1971-03-18 00:00:00 43
1 1952-12-08 18:00:00 61
2 1999-01-09 00:00:00 15
3 1992-08-16 00:00:00 22
4 1987-03-13 00:00:00 27
>看起来你的dob列目前是字符串.第一,
使用pd.to_datetime将它们转换为Timestamps.
>格式’%m%d%y’将最后两位数转换为年,但是
不幸的是假设52意味着2052.因为那可能不是
Heather Noel的出生年份,让我们从dob中减去100年
只要dob比现在大.你可能想要在条件df [‘dob’]
>你可以从现在减去dob获得timedelta64[ns]. To将其转换为年,使用astype(‘< m8 [Y]')或astype('timedelta64 [Y]').