#coding:utf-8 import numpy as np import matplotlib.pyplot as plt import random #from math import exp #数据读取-输出为一行 def loadDataSet(): fr = open('iris.txt') dataMat = []; labelMat = [] for line in fr.readlines(): #if fr.readlines() <= 100: curlines = line.strip().split() dataMat.append([1.0, float(curlines[0]), float(curlines[1])]) labelMat.append(int(curlines[2])) return dataMat, labelMat dataArr, labelMat = loadDataSet() print 'dataArr=', dataArr print 'labelMat',labelMat #计数类别标签 def countLab(lab): count1 = 0; count2 = 0 m = np.shape(labelMat)[0] for i in range(m): if labelMat[i] ==0: count1 +=1 else: count2 +=1 return count1, count2 count1, count2 =countLab(labelMat) print 'biaoqian',count1,count2 #数据标准化处理:Z-score def standarDataSet(): dataArr, labelMat = loadDataSet() m, n = np.shape(dataArr) #求均值 mean_x = np.mean(dataArr, 0) mean_y = np.mean(labelMat, 0) #求方差-axis=0代表对列求解,ddof=1代表n-1 std_x = np.std(dataArr, axis=0, ddof=1) std_y = np.std(labelMat, axis=0, ddof=1) #标准化 sx = (dataArr - np.tile(mean_x, (m, 1)))/std_x sy = (labelMat - mean_y)/std_y return sx, sy #数据标准化处理:Z-score,对于dataArr def standarDataSet_x(): dataArr, labelMat = loadDataSet() m, n = np.shape(dataArr) #求均值 mean_x = np.mean(dataArr, 0) #mean_y = np.mean(labelMat, 0) #求方差-axis=0代表对列求解,ddof=1代表n-1 std_x = np.std(dataArr, axis=0, ddof=1) #std_y = np.std(labelMat, axis=0, ddof=1) #标准化 sx = (dataArr - np.tile(mean_x, (m, 1)))/std_x #sy = (labelMat - mean_y)/std_y return sx dataArr_b, labelMat_b = standarDataSet() print 'bz==',dataArr_b print 'bz',labelMat_b #最大最小值 def Max_Min(dataVal, labelVal): dataVal = np.mat(dataVal) labelVal = np.mat(labelVal) max_x = dataVal.max(0) min_x = dataVal.min(0) return max_x, min_x max_x, min_x = Max_Min(dataArr, labelMat) print '====', max_x, min_x #计算sigmoid函数-sigmoid=1/(1+exp(-z)) def sigmoid(inz): return 1.0/(1 + np.exp(-inz)) #计算梯度上升优化算法00 def gradAscent(dataArr, classLabels): dataMatrix = np.mat(dataArr) labelMat = np.mat(classLabels).transpose() alpha = 0.00001 maxCycles = 200 n = np.shape(dataMatrix)[1] weights = np.mat(np.zeros((n,1))) for k in range(maxCycles): #根据z=w0*x0+w1*x1+...+wn*xn h = sigmoid(dataMatrix * weights) error = (labelMat - h) weights = weights + alpha * dataMatrix.transpose() * error return weights weighs = gradAscent(dataArr, labelMat) weighs_min = weighs.min(0) weighs_max = weighs.max(0) #w = (weighs - weighs_min)/(weighs_max - weighs_min) print 'weighs_min=',weighs_min #print 'w', w print 'weighs',weighs #画出决策边界-标准化后 def plotBestFit_bz(weighs): #dataArr, labelMat = loadDataSet() dataArr_b = standarDataSet_x() dataArr_b = np.array(dataArr_b) n = np.shape(dataArr_b)[0] xcode1 = []; ycode1 = [] xcode2 = []; ycode2 = [] xcode3 = []; ycode3 = [] for i in range(n): if int(labelMat[i]) == 0: xcode1.append(dataArr_b[i,1]) ycode1.append(dataArr_b[i,2]) else: if int(labelMat[i]) == 1: xcode2.append(dataArr_b[i,1]) ycode2.append(dataArr_b[i,2]) else: xcode3.append(dataArr_b[i, 1]) ycode3.append(dataArr_b[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcode1, ycode1, s=68, c='green', marker = 's') ax.scatter(xcode2, ycode2, s=77, c='darkviolet') ax.scatter(xcode3, ycode3, s=38, c='red') x = np.arange(-2.0, 3, 0.01) y = ((-weighs[0] - weighs[1] * x)/weighs[2]) ax.plot(x, y) plt.title('B_zData') plt.xlabel('x1');plt.ylabel('x2') plt.show() #画出决策边界-未标准化 def plotBestFit(weighs): dataArr, labelMat = loadDataSet() #dataArr_b = standarDataSet_x() dataArr = np.array(dataArr) n = np.shape(dataArr)[0] xcode1 = [] ycode1 = [] xcode2 = [] ycode2 = [] for i in range(n): if int(labelMat[i]) == 1: xcode1.append(dataArr[i,1]) ycode1.append(dataArr[i,2]) else: xcode2.append(dataArr[i,1]) ycode2.append(dataArr[i,2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcode1, ycode1, s=30, c='red', marker = 's') ax.scatter(xcode2, ycode2, s=30, c='green') x = np.arange(-2.0, 3, 0.01) y = ((-weighs[0] - weighs[1] * x)/weighs[2]) ax.plot(x, y) plt.title('yuanshiData') plt.xlabel('x1');plt.ylabel('x2') plt.show() #未标准化的图 #plotBestFit(weighs.getA()) #标准化后的图 plotBestFit_bz(weighs.getA()) #plotBestFit_bz(weighs01.getA()) #随机梯度上升 def yhGradAscent(dataMatrix, classLabels): m, n =np.shape(dataMatrix) alpha = 0.00001 weighs = np.ones(n) for i in range(m): h = sigmoid(sum(dataMatrix[i] * weighs)) error = classLabels[i] - h weighs_new = (weighs + alpha * error * dataMatrix[i]) return weighs_new #画出决策边界-标准化后 def plotBestFit_bz01(weighs): #dataArr, labelMat = loadDataSet() dataArr_b = standarDataSet_x() dataArr_b = np.array(dataArr_b) n = np.shape(dataArr_b)[0] xcode1 = []; ycode1 = [] xcode2 = []; ycode2 = [] xcode3 = []; ycode3 = [] for i in range(n): if int(labelMat[i]) == 0: xcode1.append(dataArr_b[i,1]) ycode1.append(dataArr_b[i,2]) else: if int(labelMat[i]) == 1: xcode2.append(dataArr_b[i,1]) ycode2.append(dataArr_b[i,2]) else: xcode3.append(dataArr_b[i, 1]) ycode3.append(dataArr_b[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcode1, ycode1, s=68, c='green', marker = 's') ax.scatter(xcode2, ycode2, s=77, c='darkviolet') ax.scatter(xcode3, ycode3, s=38, c='red') x = np.arange(-2.0, 3, 0.01) y = -((-weighs[0] - weighs[1] * x)/weighs[2]) ax.plot(x, y) plt.title('B_zData_suiji') plt.xlabel('x1');plt.ylabel('x2') plt.show() weighs_new = yhGradAscent(np.array(dataArr), labelMat) print 'weighs_new:', weighs_new plotBestFit_bz01(weighs_new) #随机梯度上升--改进版 def stocGradAscent(dataMatrix, classLabels, numIter=150): m, n = np.shape(dataMatrix) weighs =np.ones(n) for j in range(numIter): dataIndex = range(m) for i in range(m): alpha = 4/(1.0+j+i)+0.01 row = len(dataIndex) randIndex = int(random.uniform(0, row)) #随机抽取一个数 h = sigmoid(sum(dataMatrix[randIndex] * weighs)) error = classLabels[randIndex] - h weighs = weighs + alpha * error * dataMatrix[randIndex] del(dataIndex[randIndex]) return weighs #画出决策边界-标准化后 def plotBestFit_bz02(weighs): #dataArr, labelMat = loadDataSet() dataArr_b = standarDataSet_x() dataArr_b = np.array(dataArr_b) n = np.shape(dataArr_b)[0] xcode1 = []; ycode1 = [] xcode2 = []; ycode2 = [] xcode3 = []; ycode3 = [] for i in range(n): if int(labelMat[i]) == 0: xcode1.append(dataArr_b[i,1]) ycode1.append(dataArr_b[i,2]) else: if int(labelMat[i]) == 1: xcode2.append(dataArr_b[i,1]) ycode2.append(dataArr_b[i,2]) else: xcode3.append(dataArr_b[i, 1]) ycode3.append(dataArr_b[i, 2]) fig = plt.figure() ax = fig.add_subplot(111) ax.scatter(xcode1, ycode1, s=68, c='green', marker = 's') ax.scatter(xcode2, ycode2, s=77, c='darkviolet') ax.scatter(xcode3, ycode3, s=38, c='red') x = np.arange(-2.0, 3, 0.01) y = ((-0.08 * weighs[0] - 2.45 * weighs[1] * x)/weighs[2]) ax.plot(x, y) plt.title('B_zData_suiji_gaijin') plt.xlabel('x1');plt.ylabel('x2') plt.show() weighs_new01 = stocGradAscent(np.array(dataArr), labelMat) print 'weighs_new01:',weighs_new01 plotBestFit_bz02(weighs_new01)
Logistic回归代码测试
最新推荐文章于 2024-02-28 17:12:01 发布