KNN----python
def euclidean_dist_squared(X, Xtest):
return np.sum(X**2, axis=1)[:,None] + np.sum(Xtest**2, axis=1)[None] - 2 * np.dot(X,Xtest.T)
return np.sum(X**2, axis=1)[:,None] + np.sum(Xtest**2, axis=1)[None] - 2 * np.dot(X,Xtest.T)
# -*- coding: utf-8 -*-
"""
Created on Fri May 4 09:47:05 2018
@author: sun_y
"""
import numpy as np
from scipy import stats
import utils
class my_knn:
def __init__(self, k):
self.k = k
def fit(self, X, y):
self.X = X # just memorize the trianing data
self.y = y
def predict(self, Xtest):
''' YOUR CODE HERE FOR Q4.1.1 '''
self.Xtest=Xtest
t=self.Xtest.shape[0]
D=[]
# D = utils.euclidean_dist_squared(self.X,self.Xtest)
D = utils.euclidean_dist_squared(self.Xtest,self.X)
yhat =np.zeros(t)
sort_indexD=[]
sort_indexD=D.argsort()
#print(np.shape(sort_indexD))
# print(sort_indexD)
ynear_index=[]
ynear_index=sort_indexD[:,0:self.k]
ynear=np.zeros((t,self.k))
for i in range(t):
for j in range(self.k):
ynear[i,j]=np.array(self.y[ynear_index[i,j]])
yhat[i]=utils.mode(ynear[i,:])
return yhat
【定义数组】
c=np.array([[1,2],[3,4]])
【numpy.shape】 x.shape[0]=2 np.shape(x)=(2,2)
【numpy.sort】 x.sort()
x.argsort()
***************************************************************************************************************************
【load CSV file】
import pandas as pd
# pandas 模块:点击打开链接
locationNum=5&fps=1
import os # os 模块:点击打开链接
df = pd.read_csv(os.path.join('E:\\assignment 2018 with code\\p2o1b_a1-master\\p2o1b_a1-master\\data','fluTrends.csv'))
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.datasets import load_iris
from my_knn import my_knn
iris = load_iris()
n, d = iris.data.shape
"""
print(iris.keys())
print((n_samples, n_features))
print(iris.data[0])
print(iris.target.shape)
print(iris.target)
print(iris.target_names)
print("feature_names:",iris.feature_names)
"""
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.1)
KValue=4
model=my_knn(k=KValue)
model.fit(X_train,y_train)
y_pred = model.predict(X_train)
train_error = np.mean(y_pred != y_train)
print("train error is %f" % train_error)
y_pred_test=model.predict(X_test)
test_error=np.mean(y_pred_test != y_test)
print("test error is %f" % test_error)
【测试结果:】
k=4时
train error is 0.029630
test error is 0.066667
***************************************分割线******************************************************************
【python 自带KNN】
import numpy as np
from sklearn import neighbors
model2 = neighbors.KNeighborsClassifier() #取得knn分类器
model2.fit(X_train,y_train)
y_pred = model2.predict(X_train)
train_error = np.mean(y_pred != y_train)
print("train error is %f" % train_error)
y_pred_test=model2.predict(X_test)
test_error=np.mean(y_pred_test != y_test)
print("test error is %f" % test_error)
train error is 0.022222
test error is 0.066667
测试结果一致
【返回 true 和 false】
return True # 注意大写