类别:机器学习个人笔记
参考书籍:《统计学习》、《机器学习实战》、周志华大佬的西瓜书
相关数学公式推导见我上传的手写PDF
任务:
学习《机器学习实战》P78页及P79页程序清单5-1和5-2,完成以下问题:
1)导入数据集’testSet.txt’,用logistic回归训练数据集,并画出决策边界。
2)导入数据集’data.txt’,用logistic回归训练数据集,并画出决策边界。
# 导包
from numpy import *
import pandas as pd
import numpy as np
from math import exp
import matplotlib.pyplot as plt
def loadDataSet(fileName):
dataMat = [];labelMat = []
fr = open(fileName)
for line in fr.readlines():
lineArr = line.strip().split()
dataMat.append([1.0, float(lineArr[0]),float(lineArr[1])])
labelMat.append(int(lineArr[2]))
#print(shape(dataMat),shape(labelMat))
return dataMat, labelMat
def sigmoid(inX):
return 1.0/(1+np.exp(-inX))
def gradAscent(dataMatIn,classLabels):
dataMatrix = mat(dataMatIn)
labelMat = mat(classLabels).transpose()
m,n = shape(dataMatrix)
alpha = 0.001
maxCycles = 500 # 最大迭代次数
weights = ones((n,1))
for k in range(maxCycles):
h = sigmoid(dataMatrix * weights)
error = (labelMat - h)
weights = weights + alpha * dataMatrix.transpose() * error
return weights
def plotBestFit(weights,filename):
dataMat,labelMat = loadDataSet(filename)
dataArr = array(dataMat)
n = shape(dataArr)[0]
xcord1 = []; ycord1 = []