R语言——读取数据bankloan,按要求进行相关图的绘制

加载相关包并应用

install.packages("xlsx")
install.packages("ggplot2")
install.packages("gridExtra")
install.packages("dplyr")
library(xlsx)
library(ggplot2)
library(gridExtra)
library(dplyr)

读取数据bankloan

  1. 数据预处理,调整数据类型,将年龄、工龄分组

年龄分组:每隔10岁为一组,

工龄分组:0, 1, 3, 5, 10, 15, 20, 30, 40

bankloan <- read.xlsx(file = "E:\\bankloan.xlsx",
                      sheetIndex = 1)
bankloan$age_group <- cut(bankloan$年龄, breaks = c(0, 10, 20, 30, 40, 50))
bankloan$seniority_group <- cut(bankloan$工龄, breaks = c(0, 1, 3, 5, 10, 15, 20, 30, 40))

注:其中路径改为自己电脑中该文件所在路径。

       2.绘制违约与不违约客户的年龄的条形图

(1表示违约,0表示不违约)

p1 <- ggplot(data=bankloan,mapping = aes(x = 违约, y = 年龄))
geom_bar(stat = 'identity',
         color = 'black',
         fill = 'steelblue')
labs(x = '')
p2 <- ggplot(data = bankloan,
             mapping = aes(x = reorder(违约,-年龄),y = 年龄))+
  geom_bar(stat = 'identity',color = 'black',fill = 'steelblue')+
  labs(x = '')+
  geom_text(mapping = aes(x = 违约,y = 年龄,label = 年龄,vjust = -0.2))+
  geom_hline(yintercept = mean(bankloan$年龄),color = 'red',lty = 'dashed')
grid.arrange(p1,p2,ncol = 2)

      3.绘制不同年龄、教育和工龄的客户收入的直方图

new_bankloan <- data.frame(age_group = c(0, 10, 20, 30, 40, 50),
                           default = c('0', '1'))
p3 <- ggplot(data = new_bankloan,
             mapping = aes(x = age_group)) +
  geom_histogram(color = 'black',
                 fill = 'steelblue',
                 binwidth = 5)
X = seq(from = min(new_bankloan$age_group), to = max(new_bankloan$age_group), length = 10000)
X_dnorm <- dnorm(x = X, mean = mean(new_bankloan$age_group), sd = sd(new_bankloan$age_group))
p4 <- ggplot(data = new_bankloan,
             mapping = aes(x = age_group,
                           y = ..density..)) +
  geom_histogram(color = 'black',
                 fill = 'steelblue',
                 binwidth = 5) +
  geom_density(color = 'red',
               lwd = 1) +
  geom_line(mapping = aes(x = X, y = X_dnorm), data = data.frame(X, X_dnorm),
            color = 'black',
            lwd = 1,
            lty = 2)
plot(p4)
p5 <- ggplot(data = bankloan,
             mapping = aes(x = 受教育程度)) +
  geom_histogram(color = 'black',
                 fill = 'steelblue',
                 binwidth = 5)
X = seq(from = min(bankloan$受教育程度), to = max(bankloan$受教育程度), length = 10000)
X_dnorm <- dnorm(x = X, mean = mean(bankloan$受教育程度), sd = sd(bankloan$受教育程度))
p6 <- ggplot(data = bankloan,
             mapping = aes(x = 受教育程度,
                           y = ..density..)) +
  geom_histogram(color = 'black',
                 fill = 'steelblue',
                 binwidth = 5) +
  geom_density(color = 'red',
               lwd = 1) +
  geom_line(mapping = aes(x = X, y = X_dnorm), data = data.frame(X, X_dnorm),
            color = 'black',
            lwd = 1,
            lty = 2)
plot(p6)
p7 <- ggplot(data = bankloan,
             mapping = aes(x = 工龄)) +
  geom_histogram(color = 'black',
                 fill = 'steelblue',
                 binwidth = 5)
X = seq(from = min(bankloan$工龄), to = max(bankloan$工龄), length = 10000)
X_dnorm <- dnorm(x = X,mean = mean(bankloan$工龄), sd = sd(bankloan$工龄))

p8 <- ggplot(data = bankloan,
             mapping = aes(x = 工龄,
                           y = ..density..)) +
  geom_histogram(color = 'black',
                 fill = 'steelblue',
                 binwidth = 5) +
  geom_density(color = 'red',
               lwd = 1) +
  geom_line(mapping = aes(x = X, y = X_dnorm), data = data.frame(X, X_dnorm),
            color = 'black',
            lwd = 1,
            lty = 2)
plot(p8)

        4.绘制不同年龄、教育和工龄的客户收入的核密度图

p9 <- ggplot(data=bankloan,mapping = aes(x = 年龄, y = 收入))+
  geom_point(alpha = 0.3,color = 'steelblue')+
  geom_density_2d(color = 'black',
                  lwd = 1,
                  h = c(0.4,0.6),
                  n = 300)+
  guides(color = FALSE)
plot(p9) 
p10 <- ggplot(data=bankloan,mapping = aes(x = 受教育程度, y = 收入))+
  geom_point(alpha = 0.3,color = 'steelblue')+
  geom_density_2d(color = 'black',
                  lwd = 1,
                  h = c(0.4,0.6),
                  n = 300)+
  guides(color = FALSE)
plot(p10)
p11 <- ggplot(data=bankloan,mapping = aes(x = 工龄, y = 收入))+
  geom_point(alpha = 0.3,color = 'steelblue')+
  geom_density_2d(color = 'black',
                  lwd = 1,
                  h = c(0.4,0.6),
                  n = 300)+
  guides(color = FALSE)
plot(p11)

        5.绘制不同年龄、教育和工龄的客户收入的箱线图

p12 <- ggplot(data = bankloan,
              mapping = aes(x = factor(年龄),y = 收入))+
  geom_boxplot(fill = 'gray',
               color = 'steelblue',
               outlier.fill = 'red',
               outlier.color = 'red',
               outlier.shape = 24)+
  stat_summary(fun.y = 'mean',
               geom = 'point',
               colour = 'black')+
  labs(x = '工龄')
plot(p12)
p13 <- ggplot(data = bankloan,
              mapping = aes(x = factor(受教育程度),y = 收入))+
  geom_boxplot(fill = 'gray',
               color = 'steelblue',
               outlier.fill = 'red',
               outlier.color = 'red',
               outlier.shape = 24)+
  stat_summary(fun.y = 'mean',
               geom = 'point',
               colour = 'black')+
  labs(x = '受教育程度')
plot(p13)
p14 <- ggplot(data = bankloan,
              mapping = aes(x = factor(工龄),y = 收入))+
  geom_boxplot(fill = 'gray',
               color = 'steelblue',
               outlier.fill = 'red',
               outlier.color = 'red',
               outlier.shape = 24)+
  stat_summary(fun.y = 'mean',
               geom = 'point',
               colour = 'black')+
  labs(x = '工龄')
plot(p14)

          6.根据客户的年龄、教育和工龄对客户收入分组, 绘制不同年龄、教育和工龄的客户收入的小提琴图

bankloan$age_group <- cut(bankloan$年龄, breaks = c(0, 10, 20, 30, 40, 50))
bankloan$seniority_group <- cut(bankloan$工龄, breaks = c(0, 1, 3, 5, 10, 15, 20, 30, 40))
bankloan$education_group <- cut(bankloan$受教育程度, breaks = c(1,2,3,4))
p15 <- ggplot(data = bankloan,
              mapping = aes(x = age_group,y = 收入))+
  geom_violin(fill = 'steelblue',
              scale = 'count')+
  geom_boxplot(width = 0.2,outlier.color = 'red')+
  stat_summary(fun.y = 'mean', geom = 'point', size = 3,
               shape = 18, colour = 'orange')+
  labs(x = '年龄')
plot(p15)
p16 <- ggplot(data = bankloan,
              mapping = aes(x = education_group,y = 收入))+
  geom_violin(fill = 'steelblue',
              scale = 'count')+
  geom_boxplot(width = 0.2,outlier.color = 'red')+
  stat_summary(fun.y = 'mean', geom = 'point', size = 3,
               shape = 18, colour = 'orange')+
  labs(x = '教育')
plot(p16)
p17 <- ggplot(data = bankloan,
              mapping = aes(x = seniority_group,y = 收入))+
  geom_violin(fill = 'steelblue',
              scale = 'count')+
  geom_boxplot(width = 0.2,outlier.color = 'red')+
  stat_summary(fun.y = 'mean', geom = 'point', size = 3,
               shape = 18, colour = 'orange')+
  labs(x = '工龄')
plot(p17)

      7.绘制不同年龄、教育和工龄下客户的收入与负债的散点图

p18 <- ggplot(data = bankloan,
              mapping = aes(x = 收入,y = 负债率,
                            color = age_group,shape = age_group))+
  geom_point()+
  scale_color_manual(values = c('0-9' = '', '10-19' = '', '20-29' = '', '30-39' = '', '40-49' = '', '50-59' = ''))+
  scale_shape_manual(values = 15:20)+
  theme(legend.title = element_blank(),
        legend.background = element_blank())+
  guides(color = guide_legend(nrow = 1))
plot(p18)
p19 <- ggplot(data = bankloan,
              mapping = aes(x = 收入,y = 负债率,
                            color = education_group,shape = age_group))+
  geom_point()+
  scale_color_manual(values = c('1' = '', '2' = '', '3' = '', '4' = ''))+
  scale_shape_manual(values = 15:18)
theme(legend.title = element_blank(),
      legend.background = element_blank())+
  guides(color = guide_legend(nrow = 1))
plot(p19)
p20 <- ggplot(data = bankloan,
              mapping = aes(x = 收入,y = 负债率,
                            age_group = age_group,color = age_group))+
  geom_point()+
  scale_color_manual(values = c('0' = '', '10' = '', '20' = '', '30' = '', '40' = '', '50' = ''))+
  scale_shape_manual(values = 15:20)
theme(legend.title = element_blank(),
      legend.background = element_blank())+
  guides(color = guide_legend(nrow = 1))
plot(p20)

注:其中scale_color_manual函数中颜色需自定义。

  • 4
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值