# Kaggle的Digits Recognizer题目实现

290人阅读 评论(0)

机器学习看了有一阵子了，一些常用的算法已经有些了解。应该拿个项目/比赛练习一下，看看机器学习到底是如何应用的。Kaggle是个非常不错的机器学习和数据挖掘的比赛网站，网站提供数据，可以拿来练习算法。下面是101里面的第一道题，Digit Recognizer。用Python实现的。应用了KNN，SVM和RF算法。今天先贴上代码，过几天把相应的算法原理也写上，方便深入理解。这段代码是参照别的大神写的代码，进行了一定的修改和优化。数据源可以从如下网址获取：https://www.kaggle.com/c/digit-recognizer/data

import numpy as np
import operator
import csv
import scipy
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier

data = []
labels = []
index = 0
index = index + 1
if rows > 0 & index > rows:
break
if header & index == 1:
continue
if not test:
labels.append(int(line[0]))
line = line[1:]
data.append(np.array(np.int64(line)))
return data, labels

def predictKNN(train, labels, test):
print 'KNN starts...'
KNNobj = KNeighborsClassifier()
KNNobj.fit(train, labels)
predict = KNNobj.predict(test)
pre_pro = KNNobj.predict_proba(test)
max_pre_pro = pre_pro.max(axis = 1)
print 'KNN ends...'
return predict, max_pre_pro

def predicSVC(train, labels, test):
print 'SVC starts...'
SVCobj = SVC(probability=True)
SVCobj.fit(train, labels)
predict = SVCobj.predict(test)
pre_pro = SVCobj.predict_proba(test)
max_pre_pro = pre_pro.max(axis = 1)
print 'SVC ends...'
return predict, max_pre_pro

def predicRF(train, labels, test, label):
print 'RF starts...'
RFobj = RandomForestClassifier(n_estimators=200, n_jobs=2)
RFobj.fit(train, labels)
predict = RFobj.predict(test)
pre_pro = RFobj.predict_proba(test)
max_pre_pro = pre_pro.max(axis = 1)
print 'RF ends...'
return predict, max_pre_pro

class PredicScore:
def __init__(self, predict, score):
self.predict = predict
self.score = score
predict = -1
score = 0

if __name__ = '__main__':
print 'test begins...'
train, labels = read_data('train.csv', rows = 100)
test, label = read_data('test.csv', test = True, rows = 100)
predict_RF, max_pre_pro_RF = predicRF(train, labels, test, label)
predict_KNN, max_pre_pro_KNN = predicKNN(train, labels, test)
predict_SVC, max_pre_pro_SVC = predicSVC(train, labels, test)
index = 0
result = []
for eachscore in max_pre_pro_RF:
KNNclassobj = PredicScore(predict_KNN[index], max_pre_pro_KNN[index])
SVCclassobj = PredicScore(predict_SVC[index], max_pre_pro_SVC[index])
RFclassobj = PredicScore(predict_RF[index], max_pre_pro_RF[index])

scoreArray = []
scoreArray.append(KNNclassobj)
scoreArray.append(SVCclassobj)
scoreArray.append(RFclassobj)

max_score_obj = max(scoreArray, key=operator.attrgetter('score'))
result.append(max_score_obj.predict)
index = index + 1

np.savetxt('submission.csv', result, fmt = '%i', delimiter = ',')
print 'done'



0
0

* 以上用户言论只代表其个人观点，不代表CSDN网站的观点或立场
个人资料
• 访问：8925次
• 积分：172
• 等级：
• 排名：千里之外
• 原创：8篇
• 转载：1篇
• 译文：0篇
• 评论：0条
文章分类
文章存档