library(openxlsx)
library(smbinning)
library(prettyR)
# dat<-read.xlsx("/home/star/balack_namelist/scorecard/CreditScoreModel-master/cs-training.xlsx",1)
dat<-read.xlsx("/home/star/balack_namelist/scorecard/CreditScoreModel-master/credit.xlsx",1)
summary(dat)
head(dat,2)
str(dat)
#去掉最后一列
dat<-dat[,-11]
#dat<-dat[,-which(names(dat)%in%c("X1"))]#删除第一列方法2
#生成新列——婚姻状况列
dat1<-dat
dat1[,4]<-dat1[,4]-dat1[,3]
table(dat1[,4])
dat1[,4]<-factor(dat1[,4],levels=c(1,2),labels=c("其他","已婚"))
#重命名列
colnames(dat1)<-c("age","income","child","marital","dur_live",
"dur_work","housetype","nation","cardtype","loan")
summary(dat1)
dat1[,c(4,7,8,9)]<-lapply(dat1[,c(4,7,8,9)],as.factor)
?lapply
##盖帽法函数
block<-function(x,lower=T,upper=T){
if(lower){
q1<-quantile(x,0.01)
x[x<=q1]<-q1
}
if(upper){
q99<-quantile(x,0.99)
x[x>q99]<-q99
}
return(x)
}
anyNA(dat1)
#y中0和1互换(yuce weiyuegailv)
dat1$loan<-as.numeric(!as.logical(dat1$loan))
#dat1$loan <- 1- dat1$loan
##对每个变量分箱,求IV
##age
boxplot(age~loan,data=dat1,horizontal=T, frame=F,
col="lightgray",main="Distribution")
age<-smbinning(dat1,"loan","age")
write.csv(age$ivtable,"/home/star/balack_namelist/scorecard/CreditScoreModel-master/ivtable.csv",row.names = F)
age$ivtable
par(mfrow=c(2,2))
smbinning.plot(age,option="dist",sub="年龄")
smbinning.plot(age,option="WoE",sub="年龄")
smbinning.plot(age,option="goodrate",sub="年龄")
smbinning.plot(age,option="badrate",sub="年龄")
par(mfrow=c(1,1))
age$iv
cred_iv<-c("年龄"=age$iv)
##income
boxplot(income~loan,data=dat1,horizontal=T, frame=F,
col="lightgray",main="Distribution")
dat1$income<-block(dat1$income)
boxplot(income~loan,data=dat1,horizontal=T, frame=F,
col="lightgray",main="Distribution")
income<-smbinning(dat1,"loan","income")
income$ivtable
smbinning.plot(income,option="WoE",sub="收入")
income$iv
cred_iv<-c(cred_iv,"收入"=income$iv)
##child
boxplot(child~loan,data=dat1,horizontal=T, frame=F,
col="lightgray",main="Distribution")
dat1$child<-block(dat1$child)
child<-smbinning(dat1,"loan","child")
child$ivtable
smbinning.plot(child,option="WoE",sub="孩子数量")
child$iv
cred_iv<-c(cred_iv,"孩子数量"=child$iv)
##marital
xtab(~marital+loan,data=dat1,chisq=T)
marital<-smbinning.factor(dat1,"loan","marital")
marital$ivtable
smbinning.plot(marital,option="WoE",sub="婚姻状况")
marital$iv
cred_iv<-c(cred_iv,"婚姻状况"=marital$iv)
##dur_live
boxplot(dur_live~loan,data=dat1,horizontal=T,
frame=F, col="lightgray",main="Distribution")
t.test(dur_live~loan,data=dat1)
dur_live<-smbinning(dat1,"loan","dur_live")
dur_live
##dur_work
boxplot(dur_work~loan,data=dat1,horizontal=T,
frame=F, col="lightgray",main="Distribution")
t.test(dur_work~loan,data=dat1)
dur_work<-smbinning(dat1,"loan","dur_work")
dur_work$ivtable
smbinning.plot(dur_work,option="WoE",sub="在现工作时间")
dur_work$
cred_iv<-c(cred_iv,"在现工作时间"=dur_work$iv)
##housetype
xtab(~housetype+loan,data=dat1,chisq=T)
#对分类变量最优分值
housetype<-smbinning.factor(dat1,"loan","housetype")
housetype$ivtable
smbinning.plot(housetype,option="WoE",sub="住房类型")
housetype$iv
cred_iv<-c(cred_iv,"住房种类"=housetype$iv)
##nation
xtab(~nation+loan,data=dat1,chisq=T)
nation<-smbinning.factor(dat1,"loan","nation")
nation$ivtable
smbinning.plot(nation,option="WoE",sub="国籍")
nation$iv
cred_iv<-c(cred_iv,"国籍"=nation$iv)
##cardtype
xtab(~cardtype+loan,data=dat1,chisq=T)
cardtype<-smbinning.factor(dat1,"loan","cardtype")
cardtype$ivtable
smbinning.plot(cardtype,option="WoE",sub="信用卡类型")
cardtype$iv
cred_iv<-c(cred_iv,"信用卡类型"=cardtype$iv)
barplot(cred_iv,main="各变量信息量")
#生成分箱后的新列
dat2<-dat1
dat2<-smbinning.gen(dat2,age,"glage")
dat2<-smbinning.gen(dat2,income,"glincome")
dat2<-smbinning.gen(dat2,child,"glchild")
dat2<-smbinning.factor.gen(dat2,marital,"glmarital")
dat2<-smbinning.gen(dat2,dur_work,"gldur_work")
dat2<-smbinning.factor.gen(dat2,housetype,"glhousetype")
dat2<-smbinning.factor.gen(dat2,nation,"glnation")
dat2<-smbinning.factor.gen(dat2,cardtype,"glcardtype")
summary(dat2)
dat3<-dat2[,c(11:18,10)]
dat3
cred_mod<-glm(loan~. ,data=dat3,family=binomial())
summary(cred_mod)
#打分
?smbinning.scaling
cre_scal<-smbinning.scaling(cred_mod,pdo=45,score=800,odds=50)
cre_scal
cre_scal$minmaxscore
cre_scal$logitscaled
#生成每行对应的分值
dat4<-smbinning.scoring.gen(smbscaled=cre_scal, dataset=dat3)
par(mar=c(5,5,5,5)) # the plot is too large
boxplot(Score~loan,data=dat4,horizontal=T, frame=F,
col="lightgray",main="Distribution")
scaledcard<-cre_scal$logitscaled[[1]][-1,c(1,2,6)]
scaledcard[,1]<-c(rep("年龄",5),rep("收入",3),
rep("孩子数量",2),rep("婚否",2),rep("在现工作时间",5),
rep("住房类型",3),rep("国籍",8),rep("信用卡类型",7))
scaledcard
write.csv(dat4,"/home/star/balack_namelist/scorecard/CreditScoreModel-master/card.csv",row.names = F)
Scorecard R
最新推荐文章于 2021-01-08 11:28:35 发布