python 代码
# coding=utf-8
import random
from sklearn.datasets import load_iris
# 获取鸢尾数据
iris = load_iris()
X = iris.data
y = iris.target
# 评分公式
from sklearn.metrics import accuracy_score
# 乱写的一个分类器
# 评分大概
class MyClf():
def fit(self,train_data,train_target):
self.train_data = train_data
self.train_target = train_target
def predict(self,test_data):
predictions = []
for data in test_data:
# 随机返回一个结果值
target = random.choice(self.train_target)
predictions.append(target)
return predictions
# 使用k-neighbors原理写的分类器
# 此处为了简单,k取值1
from scipy.spatial import distance
def euc(a,b):
return distance.euclidean(a,b)
class MyClf2():
def fit(self,train_data,train_target):
self.train_data = train_data
self.train_target = train_target
def predict(self,test_data):
predictions = []
for data in test_data:
# 随机返回一个结果值
target = self.closest(data)
predictions.append(target)
return predictions
def closest(self,row):
best_dis = euc(row,self.train_data[0])
best_idx = 0
for x in xrange(1,len(self.train_data)):
curr_dis = euc(row,self.train_data[x])
if best_dis>curr_dis:
best_idx=x
best_dis=curr_dis
pass
return self.train_target[best_idx]
# cross_validation 改成 model_selection
# 前者好像是版本问题,过期了
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=.3)
clf = MyClf()
clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
# print predictions
# 评分大概在.3,因为target就3个值
print 'myclf score:',accuracy_score(y_test,predictions)
clf = MyClf2()
clf.fit(X_train,y_train)
predictions = clf.predict(X_test)
# print predictions
print 'myclf2 score:',accuracy_score(y_test,predictions)
截图
我们自己写的分类器的正确率到了97%