自行编写KNN算法
1.自行编写KNN算法; 2.测试数据从scikit-learn中导入; 3.合并特征数据与标签数据,返回预测标签时不再去遍历所有样本。
import pandas as pd
import numpy as np
from scipy.spatial.distance import euclidean
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
def getPredLabel(trainData,testFeature):
distanceList=[]
for row in trainData:
# 获取四项特征
trainFeature = row[:-1]
# 计算距离
distance=euclidean(trainFeature,testFeature)
distanceList.append(distance)
# 最短距离
pos = np.argmin(distanceList)
# print(pos)
preLabel = trainData[pos][4]
return preLabel
if __name__ == '__main__':
# 加载数据
iris=load_iris()
# 获取源数据
data=iris.data
# 获取列名
col=iris.feature_names
# 获取分类,为0,1,2
label=iris.target
# 合并源数据和分类
data=np.c_[data,label]
trainData,testData=train_test_split(data,test_size=0.2,random_state=12)
accurateAccount=0
for row in testData:
# 获取四项特征
testFeature=row[:-1]
# 预测标签
predLabel = getPredLabel(trainData,testFeature)
# 真实标签
trueLabel = row[4]
if str(predLabel) == str(trueLabel):
accurateAccount = accurateAccount + 1
testSampleNumbers = testData.shape[0]
print("准确率:{:.1f}%".format(accurateAccount/testSampleNumbers*100))
用scikit-learn中的测试数据集,完成葡萄酒的分类
用scikit-learn中的测试数据集,完成葡萄酒的分类。直接调用scikit-learn中的库函数。
import pandas as pd
import numpy as np
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
import matplotlib.pyplot as plt
if __name__ == '__main__':
wine=load_wine()
data=wine.data
X_train,X_test,y_train,y_test= train_test_split(wine.data,wine.target,test_size=0.3,random_state=66)
neighbors = np.arange(1,10)
train_accuracy=np.empty(len(neighbors))
test_accuracy=np.empty(len(neighbors))
for i,k in enumerate(neighbors):
knn=KNeighborsClassifier(n_neighbors=k)
knn.fit(X_train,y_train)
train_accuracy[i]=knn.score(X_train,y_train)
test_accuracy[i]=knn.score(X_test,y_test)
%matplotlib inline
plt.plot(neighbors,test_accuracy,label='TestAccuracy')
plt.plot(neighbors,train_accuracy,label='TrainAccuracy')
plt.legend()
plt.show()
编写最速下降法的Python程序,并进行
编写最速下降法的Python程序,并进行测试;
# 一维最速下降法的算法实现
'''
@name:目标函数
@return:目标函数值
'''
def objectiveFunction1D(theta):
return theta**2+1
'''
@name:梯度函数
@return:函数在自变量处的梯度
'''
def gradientFunction1D(theta):
return 2*theta
'''
@name:最速下降法
@params:
theta0--算法初始点,默认值为0
maxIteration--最大迭代次数
'''
def gradientDescent(objFun,gradFun,theta=-100,learningRate=0.2,precision=1e-4,maxIteration=10000):
for i in range(maxIteration):
gradValue=gradFun(theta)
theta=theta-learningRate*gradValue
if abs(gradValue)<precision:
break
print("第{}次迭代的值为:{:.8f}".format(i+1,theta))
print("\n{}次迭代后,局部最优解为:{:.8f},函数值为:{}".format(i+1,theta,objFun(theta)))
if __name__ == "__main__":
gradientDescent(objectiveFunction1D,gradientFunction1D)
# 二维最速下降法的算法实现
import math
import numpy as np
'''
@name:目标函数
@return:目标函数值
'''
def objectiveFunction1D(theta):
return -math.exp(-theta[0]**2-theta[1]**2)
'''
@name:梯度函数
@return:函数在自变量处的梯度
'''
def gradientFunction1D(theta):
derivative0=2*theta[0]*math.exp(-theta[0]**2-theta[1]**2)
derivative1=2*theta[1]*math.exp(-theta[0]**2-theta[1]**2)
return np.array([derivative0,derivative1])
'''
@name:最速下降法
@params:
theta0--算法初始点,默认值为0
maxIteration--最大迭代次数
'''
def gradientDescent(objFun,gradFun,theta=np.array([1,1]),learningRate=0.2,precision=1e-4,maxIteration=10000):
for i in range(maxIteration):
gradValue=gradFun(theta)
theta=theta-learningRate*gradValue
if np.linalg.norm(gradValue)<precision:
break
print("第{}次迭代的值为:{}".format(i+1,theta))
print("\n{}次迭代后,局部最优解为:{},函数值为:{}".format(i+1,theta,objFun(theta)))
if __name__ == "__main__":
gradientDescent(objectiveFunction1D,gradientFunction1D)
编写用最速下降法求线性回归的
自己编写用最速下降法求线性回归的程序
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm
# 生成随机数据
m=100
pa,pb=-3,3
X=np.random.uniform(pa, pb, size=m).reshape(-1,1)
y= 5*X + 3 + np.random.normal(0,(pb-pa)*0.23,size=m).reshape(-1,1)
# 查看测试数据
%matplotlib inline
plt.scatter(X[:,-1],y)
plt.axis([pa, pb, 5*pa+3, 5*pb+3])
plt.show()
# 假设函数
def h(theta, X):
return theta[0]+theta[1]*X
# 损失函数
def L(theta,X,y):
return (h(theta,X)-y)**2
# 成本函数
def J(theta,X,y):
m=len(y)
jSum=0
for i in range(m):
jSum+=L(theta,X[i],y[i])
return jSum/(2*m)
# 梯度计算函数: gradient function
def GD(theta, X, y):
m=len(y)
gd=np.zeros((2, 1),dtype=float).reshape(-1,1)
for i in range(m):
gd[0] += h(theta, X[i]) - y[i]
gd[1] += (h(theta, X[i]) - y[i])*X[i]
return gd / m
# 梯度下降法:gradient descent algorithm
def GDA(X, y, thetaInit = [0, 0], learningRate = 0.2, pre