AdaBoost和随机森林算法实现(sklearn调用)

代码实现:AdaBoost

from numpy import *
import numpy as np
import cv2
import matplotlib.pyplot as plt

import pandas as pd
import time
from numpy import *
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score

def loadDataSet(fileName):
    dataMat = []
    labelMat = []
    with open(fileName) as fr:
        for line in fr.readlines():
            lineArr = line.strip().split('    ')
            dataline = list(map(float, lineArr))
            
            dataMat.append([float(dataline[0]), float(dataline[1]), float(dataline[2])])
            labelMat.append(int(dataline[3]))
    return dataMat, labelMat

#加载训练集
train_data,train_label = loadDataSet('img/txt/left.txt')   #1.加载一个txt数据集
train_data = mat(train_data)
train_data=np.array(train_data, dtype='float32')
# train_label = mat(train_label)
# print(train_data.shape,train_label)

#加载测试集
test_data,test_label = loadDataSet('img/txt/right.txt')   #1.加载一个txt数据集
test_data = mat(test_data)
test_data=np.array(test_data, dtype='float32')
# test_label=mat(test_label)



# n_estimators:基分类器提升(循环)次数,默认是50次,这个值过大,模型容易过拟合;值过小,模型容易欠拟合。
# algorithm:有两种方式SAMME, 和SAMME.R两种,默认是SAMME.R,两者的区别主要是弱学习器权重的度量,前者是
# 对样本集预测错误的概率进行划分的,后者是对样本集的预测错误的比例,即错分率进行划分的,默认是用的SAMME.R.
clf = AdaBoostClassifier(n_estimators=100,algorithm='SAMME.R',learning_rate=0.1)
clf.fit(train_data,train_label)


total=0
for i in range(len(test_data)):
    data=test_data[i]
    label=test_label[i]

    data=np.array(data).reshape(1, -1)
    pred = clf.predict(data)
    total += accuracy_score(pred, [label])

acc=total/len(test_data)
print(acc)

代码实现:随机森林

from numpy import *
import numpy as np
import cv2
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score

def loadDataSet(fileName):
    dataMat = []
    labelMat = []
    with open(fileName) as fr:
        for line in fr.readlines():
            lineArr = line.strip().split('    ')
            dataline = list(map(float, lineArr))
            
            dataMat.append([float(dataline[0]), float(dataline[1]), float(dataline[2])])
            labelMat.append(int(dataline[3]))
    return dataMat, labelMat

#加载训练集
train_data,train_label = loadDataSet('img/txt/left.txt')   #1.加载一个txt数据集
train_data = mat(train_data)
train_data=np.array(train_data, dtype='float32')
# train_label = mat(train_label)
# print(train_data.shape,train_label)

#加载测试集
test_data,test_label = loadDataSet('img/txt/right.txt')   #1.加载一个txt数据集
test_data = mat(test_data)
test_data=np.array(test_data, dtype='float32')
# test_label=mat(test_label)


Xtrain, Xtest, Ytrain, Ytest = train_test_split(test_data,test_label,test_size=0.3)

# 单一决策树
clf = DecisionTreeClassifier(random_state=0)
clf = clf.fit(Xtrain,Ytrain)
score_c = clf.score(Xtest,Ytest)
print("Single Tree:{}".format(score_c))

# 随机森林
# oob_score:交叉验证方法  criterion:与决策树一样,可以选择gini或entropy(信息增益) 
rfc = RandomForestClassifier(criterion='gini', oob_score =True ,n_jobs = 1,random_state =1, max_features = "auto")
rfc = rfc.fit(Xtrain,Ytrain)
score_r = rfc.score(Xtest,Ytest)
print("Random Forest:{}".format(score_r))

results = rfc.predict(Xtest)
results_proba = rfc.predict_proba(Xtest)   # predict_proba(x):给出带有概率值的结果。每个点在所有label的概率和为1. 
print(results)
# print(results_proba)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值