对身份证号码中的日期用*代替
import pandas as pd
import re
fileName = '打码数据.xlsx'
df = pd.read_excel(fileName,dtype=object,keep_default_na=False)
p = re.compile(r'(.{6})(.*)(.{4})')
p1 = re.compile(r'^[1-8].*')
#对日期进行打码
def mask(code):
if p1.search(code):
y = lambda x:'{}{}{}'.format(x.group(1),'********' ,x.group(3))
return p.sub( y ,code)
else:
return code
df['身份证号'] = df['身份证号'].apply(mask)
writer = pd.ExcelWriter('打码后数据.xlsx')
df.to_excel(writer,index=False)
writer.save()
writer.close()
打码前
打码后: