pandas很强大,前几天公司要求利用已知用户身份证、手机号知道客户的星座、性别、年龄等相关信息做用户画像,刚刚开始想到用R语言来实现,后来想到以后性能的问题,放弃了,由于公司没sas,就用pandas快速实现,参考代码:
# coding: UTF-8
'''
Created on 2015年8月25日
@author: ZHOUMEIXU204
'''
import MySQLdb
import pandas as pd
import time
import datetime
Table_id =pd.read_excel(u'D:\\Users\\zhoumeixu204\\Desktop\\全国身份证归属地数据库.xlsx')
print(Table_id.head())
num_str = lambda x: str(x)
num_sub = lambda x:x[0:6]
id_dict = dict(zip(Table_id['BM'].apply(num_str).apply(num_sub), Table_id['DQ']))
con = MySQLdb.connect(host="202.69.27.239", port=8443, user="root", passwd="Pa123456!", db="analyse", use_unicode=True, charset="utf8")
con_dev = MySQLdb.connect(host="202.69.27.239", port=8443, user="root", passwd="Pa123456!", db="analyse_dev", use_unicode=True, charset="utf8")
table_id_decode = pd.read_sql("select * from table_id", con)
f = lambda x: x[0:6]
table_id_decode['cert_address'] = table_id_decode['cert_id'].apply(f).map(id_dict).fillna(u'无法匹配')
def constellation(x):
if len(x)>=15:
monthday=int(x[10:12]+x[12:14])
if monthday>=321 and monthday<=419:
constellation=u'白羊座'
elif monthday>=420 and monthday<=520:
constellation=u'金牛座'
elif monthday>=521 and monthday<=621:
constellation=u'双子座'
elif monthday>=622 and monthday<=722:
constellation=u'巨蟹座'
elif monthday>=723 and monthday<=822:
constellation=u'狮子座'
elif monthday>=823 and monthday<=922:
constellation=u'处女座'
elif monthday>=823 and monthday<=1023:
constellation=u'天枰座'
elif monthday>=1024 and monthday<=1121:
constellation=u'天蝎座'
elif monthday>=1122 and monthday<=1221:
constellation=u'射手座'
elif (monthday>=1222 and monthday<=1231) or (monthday>=101 and monthday<=119) :
constellation=u'魔蝎座'
elif monthday>=120 and monthday<=218:
constellation=u'水瓶座'
elif monthday>=219 and monthday<=320:
constellation=u'双鱼座'
else:
constellation=u'其他'
else:
constellation=u'无法识别'
return(constellation)
def zodiac(x):
if len(x)>=15:
year=int(x[6:10])
if year==11:
zodiac=u'羊'
elif year==10:
zodiac=u'马'
elif year==9:
zodiac=u'蛇'
elif year==8:
zodiac=u'龙'
elif year==7:
zodiac=u'兔'
elif year==6:
zodiac=u'虎'
elif year==5:
zodiac=u'牛'
elif year==4:
zodiac=u'鼠'
elif year==3:
zodiac=u'猪'
elif year==2:
zodiac=u'狗'
elif year==1:
zodiac=u'鸡'
elif year==0:
zodiac=u'猴'
else:
zodiac=u'其他'
else:
zodiac=u'无法获得'
return(zodiac)
def sex(x):
if len(x)==15:
if int(x[len(x)-1])%2==1:
sex=u'男'
else:
sex=u'女'
elif len(x)==18:
if int(x[len(x)-2])%2==1:
sex=u'男'
else:
sex=u'女'
else:
sex=u'无法识别'
return(sex)
time.sleep(0.5)
def birthday(x):
if len(x)>=15:
birthday=x[6:10]+"-"+x[10:12]+"-"+x[12:14]
else:
birthday=u'无法获得'
return(birthday)
def age(x):
if len(x)>=15:
now = datetime.datetime.now()
otherStyleTime =now.strftime("%Y-%m-%d %H:%M:%S")[0:4]
age=str(int(otherStyleTime)-int(x[6:10]))
else:
age=u'无法获得'
return(age)
table_id_decode['zodiac']=table_id_decode['cert_id'].apply(zodiac)
table_id_decode['constellation']=table_id_decode['cert_id'].apply(constellation)
table_id_decode['sex']=table_id_decode['cert_id'].apply(sex)
table_id_decode['birthday']=table_id_decode['cert_id'].apply(birthday)
table_id_decode['age']=table_id_decode['cert_id'].apply(age)
cert_address = table_id_decode.drop('usr_name',axis=1)
# print(cert_address)
cert_address.to_sql("cert_info", con_dev, flavor="mysql", if_exists='replace', index=False)
con.commit()
con.close()
con_dev.commit()
con_dev.close()
print("suceess")