# -*- coding: utf-8 -*-
"""
Created on Sun Sep 16 09:24:18 2018
@author: wangxihe
"""
import os
import pandas as pd
import datetime
import matplotlib.pyplot as plt
import collections
import numpy as np
os.chdir(r'E:\spyderwork\评分卡模型\一特征构建')
plt.rcParams['font.sans-serif']=['SimHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus']=False # 用来正常显示负号
#%%读取数据
MasterData=pd.read_csv('PPD_Training_Master_GBK_3_1_Training_Set.csv',encoding='gbk')
LoginData=pd.read_csv('PPD_LogInfo_3_1_Training_Set.csv',encoding='gbk')
UpdateData=pd.read_csv('PPD_Userupdate_Info_3_1_Training_Set.csv',encoding='gbk')
#%%处理时间格式
#LoginData['Listinginfo1']=pd.to_datetime(LoginData['Listinginfo1'])
LoginData['Listinginfo1']=LoginData['Listinginfo1'].apply(lambda x :datetime.datetime.strptime(x,'%Y-%m-%d'))
LoginData['LogInfo3']=LoginData['LogInfo3'].apply(lambda x :datetime.datetime.strptime(x,'%Y-%m-%d'))
#计算登录天数
LoginData['LogDay']=LoginData['Listinginfo1']-LoginData['LogInfo3']
#LoginData['LogDay']=LoginData[['Listinginfo1','LogInfo3']].apply(lambda x:x[0]-x[1])
LoginData['LogDay']=LoginData['LogDay'].dt.days
#%%
#查看登录天数分布
LoginData['LogDay'].plot(kind='hist',bins=200)
#%%#查看登录天数分布
plt.hist(LoginData['LogDay'