# kaggle练习——手写体识别

# -*- coding: utf-8 -*-
"""
Created on Sun Apr 22 10:25:14 2018

@author: zhangsh
"""

import csv
import numpy as np
from sklearn.neighbors import KNeighborsClassifier

# list 转化为 array数组
def listToArray(dataList):
m = len(dataList)  # 获取 list 的长度
dataArray = np.zeros((1,m))
for i in range(m):
dataArray[0,i] = int(dataList[i])
return dataArray

# 将彩色图片转换为黑白，归一化。将非0灰度值转化为1
def dataNormalize(data):
m, n = data.shape
for i in range(m):
for j in range(n):
if data[i,j] != 0:
data[i,j] = 1
return data

# 导入训练集
trainData = []
trainLabel = []
index = 0

with open('train.csv','r') as file:

for line in readCSV:
if index == 0 :  # 去掉第一行，第一行为说明行
n = len(line)  # 一行的长度
index += 1
continue
index += 1
trainLabel.append(line[0])
trainData.extend(line[1:])
file.close()

index = index - 1
n = n - 1
trainLabelArray = listToArray(trainLabel).reshape((index,1))
trainDataArray = listToArray(trainData).reshape((index,n))

return dataNormalize(trainDataArray), trainLabelArray

# 导入测试集
testData = []
index = 0

with open('test.csv','r') as file:

for line in readCSV:
if index == 0:  # 去掉第一行
n = len(line)
index += 1
continue
index += 1
testData.extend(line[:])
file.close()

index = index - 1
testDataArray = listToArray(testData).reshape((index,n))
return dataNormalize(testDataArray)

# 建立模型训练，并测试
def knnClassifier():
trainingData, trainingLabel = loadTrainDataSet()  # 加载训练集
testingData = loadTestDataSet()  # 加载测试集
testResult = []  # 建立一个列表保存测试结果
print(testingData.shape)
knn = KNeighborsClassifier(algorithm = 'ball_tree')  # 建立KNN模型
knn.fit(trainingData, trainingLabel)  # 训练模型

testResult.append(('ImageId','Label'))

i = 1
for line in testingData:
predictLabel = knn.predict(line.reshape((1,-1)))
testResult.append((i,int(predictLabel[0])))
print('预测第%d条数据' %i)
i+=1

with open('result.csv','w',newline='') as file:
writer=csv.writer(file)
writer.writerows(testResult)
if __name__ == "__main__":
knnClassifier()


• 广告
• 抄袭
• 版权
• 政治
• 色情
• 无意义
• 其他

120