AdaBoost算法
算法原理
数学原理
#!/usr/bin/env python
# encoding: utf-8
from __future__ import print_function
from numpy import *
import matplotlib.pyplot as plt
def loadSimData():
'''
加载数据集
:return:
dataMat :数据集
labelMat : 标签集
'''
datMat = matrix([[1., 2.1],
[2., 1.1],
[1.3, 1.],
[1., 1.],
[2., 1.]])
classLabels = [1.0, 1.0, -1.0, -1.0, 1.0]
return datMat, classLabels
def loadDataSet(fileName):
# get number of fields
numFeat = len(open(fileName).readline().split('\t'))
dataArr = []
labelArr = []
fr = open(fileName)
for line in fr.readlines():
lineArr = []
curLine = line.strip().split('\t')
for i in range(numFeat-1):
lineArr.append(float(curLine[i]))
dataArr.append(lineArr)
labelArr.append(float(curLine[-1]))
return dataArr, labelArr
def stumpClassify(dataMatrix, dimen, threshVal, threshIneq):
'''
:param dataMatrix:
:param dimen: 维数
:param threshVal: 比较值
:param threshIneq: 表示比较大小符号
:return:
retArray 结果集
'''
# 形成一个m*1的向量
retArray = ones((shape(dataMatrix)[0], 1