r语言c5.0编程步骤,机器学习与R语言:C5.0(示例代码)

#----------------------------------------

# 功能描述:演示C50建模过程

# 数据集:汉堡大学信贷模型,信贷数据

#

#----------------------------------------

#第一步:收集数据

# import the CSV file

credit

# 检查数据

table(credit$checking_balance)

table(credit$savings_balance)

# 五数分析法

summary(credit$months_loan_duration)

summary(credit$amount)

# 查看分类变量

table(credit$default)

# 利用随机数来获取训练数据和测试数据,如果需要重复这里的分析,可以使用随机种子set.seed

set.seed(12345)

credit_rand

# 比较数据集

summary(credit$amount)

summary(credit_rand$amount)

head(credit$amount)

head(credit_rand$amount)

# 分割数据集

credit_train

credit_test

# 查看分类变量的占比

prop.table(table(credit_train$default))

prop.table(table(credit_test$default))

## 第三步: 训练模型

library(C50)

#---------------------------------------------

# 创建分类器:

# m

# train: 一个包含训练数据的数据框

# class: 包含训练数据每一行的分类的一个因子向量

# trials: 为一个可选数值,用于控制自助法循环的次数(默认为1)

# costs: 为一个可选矩阵,用于给出与各种类型错误相对应的成本

# 该函数返回一个C5.0模型对象,该对象可用于预测

#

# 进行预测:

# p

# m: 由C5.0(train, class, trials = 1, costs = NULL) 训练的一个模型

# test: 一个包含测试数据的数据框,该数据框和用来创建分类器的训练数据有相同的特征

# type: 取值为“”或者“”标示预测是最可能的类别值或者是原始的预测概率

# 该函数返回一个向量,根据参数type的取值,该向量含有预测的类别值或者原始的预测概率

#

# example:

# credit_model

# credit_prediction

#----------------------------------------------

# 构建决策数据模型

credit_model

# 显示决策树模型

credit_model

# 显示模型详细信息

summary(credit_model)

## 第四步: 评估模型性能

# create a factor vector of predictions on test data

credit_pred

# cross tabulation of predicted versus actual classes

library(gmodels)

CrossTable(credit_test$default, credit_pred,

prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,

dnn = c(‘actual default‘, ‘predicted default‘))

## 第五步: 提升模型性能

## Boosting the accuracy of decision trees

# boosted decision tree with 10 trials

credit_boost10

trials = 10)

credit_boost10

summary(credit_boost10)

credit_boost_pred10

CrossTable(credit_test$default, credit_boost_pred10,

prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,

dnn = c(‘actual default‘, ‘predicted default‘))

# boosted decision tree with 100 trials (not shown in text)

credit_boost100

trials = 100)

credit_boost_pred100

CrossTable(credit_test$default, credit_boost_pred100,

prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,

dnn = c(‘actual default‘, ‘predicted default‘))

## Making some mistakes more costly than others

# create a cost matrix

error_cost

error_cost

# apply the cost matrix to the tree

credit_cost

costs = error_cost)

credit_cost_pred

CrossTable(credit_test$default, credit_cost_pred,

prop.chisq = FALSE, prop.c = FALSE, prop.r = FALSE,

dnn = c(‘actual default‘, ‘predicted default‘))

#### Part 2: Rule Learners -------------------

## Example: Identifying Poisonous Mushrooms ----

## Step 2: Exploring and preparing the data ----

mushrooms

# examine the structure of the data frame

str(mushrooms)

# drop the veil_type feature

mushrooms$veil_type

# examine the class distribution

table(mushrooms$type)

## Step 3: Training a model on the data ----

library(RWeka)

# train OneR() on the data

mushroom_1R

## Step 4: Evaluating model performance ----

mushroom_1R

summary(mushroom_1R)

## Step 5: Improving model performance ----

mushroom_JRip

mushroom_JRip

summary(mushroom_JRip)

# Rule Learner Using C5.0 Decision Trees (not in text)

library(C50)

mushroom_c5rules

summary(mushroom_c5rules)

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值