KNN

import os
import cv2
import numpy as np

# public leaderboard
# k   score
# 1   0.62718
# 3   0.64014
# 5   0.64081
# 7   0.64022
# 9   0.63733

train_path = 'F:\\ESL\\ESL-master\\ESL-master\\quickdraw-data\\train'
test_path = 'F:\\ESL\\ESL-master\\ESL-master\\quickdraw-data\\released_test\\'

category_mapping = {'airplane': 0, 'ant': 1, 'bear': 2, 'bird': 3, 'bridge': 4,
     'bus'     : 5, 'calendar': 6, 'car': 7, 'chair': 8, 'dog': 9,
     'dolphin' : 10, 'door': 11, 'flower': 12, 'fork': 13, 'truck': 14}

id_list = []
 
def load_data(data_path, train=True):
    data_pairs =[]

    # Get Train Data
    if (train):
        labels = os.listdir(data_path)
        for label in labels:    
            filepath = data_path + "\\" + label
            filename  = os.listdir(filepath)
            for fname in filename:
                ffpath = filepath + "\\" + fname
                data_pair = [ffpath, category_mapping[label]]
                data_pairs.append(data_pair)
    
        data_cnt = len(data_pairs)
        data_x = np.empty((data_cnt, 784), dtype="float32")
        data_y = []

        i = 0
        for data_pair in data_pairs:
            img = cv2.imread(data_pair[0], 0)
            img = cv2.resize(img, (28, 28))
            arr = np.asarray(img, dtype="float32").flatten()
            data_x[i, :] = arr
            data_y.append(data_pair[1])
            i += 1
                
        data_x = data_x / 255
        data_y = np.asarray(data_y)

        return data_x, data_y

    # Get Test Data
    filename = os.listdir(data_path)
    
    for fname in filename:
        ffpath = data_path + fname
        data_pairs.append(ffpath)
        id_list.append(fname.split('.')[0])  
    
    data_cnt = len(data_pairs)
    data_x = np.empty((data_cnt, 784), dtype="float32")
    data_y = []

    i = 0
    for path in data_pairs:
        img = cv2.imread(path, 0)
        img = cv2.resize(img, (28, 28))
        arr = np.asarray(img, dtype="float32").flatten()
        data_x[i, :] = arr  
        data_y.append(i)
        i += 1
                
    data_x = data_x / 255
    data_y = np.asarray(data_y)
     
    return data_x, data_y

def distance(X_test, X_train):
    num_test = X_test.shape[0]
    num_train = X_train.shape[0]
    distances = np.zeros((num_test, num_train))
    dist1 = np.multiply(np.dot(X_test, X_train.T), -2)
    dist2 = np.sum(np.square(X_test), axis=1, keepdims=True)
    dist3 = np.sum(np.square(X_train.T), axis=0, keepdims=True)
    distances = np.sqrt(dist1 + dist2 + dist3)

    return distances

def predict(X_test, X_train, Y_train, k = 1):
    distances = distance(X_test, X_train)
    num_test = X_test.shape[0]
    Y_pred = np.zeros(num_test)
    for i in range(num_test):
        dists_min_k = np.argsort(distances[i])[:k]  
        y_labels_k = Y_train[dists_min_k] 
        Y_pred[i] = np.argmax(np.bincount(y_labels_k))

    return Y_pred


if __name__ == "__main__":
    X_train, Y_train = load_data(train_path, train=True)
    X_test, _ = load_data(test_path, train=False)

    Y_pred = predict(X_test, X_train, Y_train, k=3)

    print('id,categories')

    for i in range(Y_pred.shape[0]):
        print('%s,%d' % (id_list[i], Y_pred[i]))

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值