3.4 10折交叉验证和留一法对率回归的错误率

"""
Author: Victoria
Created on: 2017.9.15 11:00
"""
import numpy as np
import matplotlib.pyplot as plt

def readData():
    """
    Read data from txt file.
    Return:
        X1, y1, X2, y2, X3, y3: X is list with shape [50, 4],
                                y is list with shape [50,]
    """
    X1 = []
    y1 = []
    X2 = []
    y2 = []
    X3 = []
    y3 = []
    #read data from txt file
    with open("../data/bezdekIris.txt", "r") as f:
        for line in f:
            x = []
            iris = line.strip().split(",")
            for attr in iris[0:4]:
                x.append(float(attr))

            if iris[4]=="Iris-setosa":
                X1.append(x)
                y1.append(1)
            elif iris[4]=="Iris-versicolor":
                X2.append(x)
                y2.append(2)
            else:
                X3.append(x)
                y3.append(3)
    return X1, y1, X2, y2, X3, y3

def tenFoldData(X1, X2):
    """
    Generate 10-fold training data. Each fold includes 5 positive and 5 negtive.
    Input:
        X1: list with shape [50, 4]. Instances in X1 belong to positive class.
        X2: list with shape [50, 4]. Instances in X2 belong to negtive class.
    Return:
        folds: list with shape [10, 10, 4].
        y: list with shape [10, 10]
    """
    print (len(X1))
    print (len(X2))
    folds = []
    y = []
    for i in range(10):
        fold = []
        fold += X1[ i*5:(i+1)*5 ]
        fold += X2[ i*5:(i+1)*5 ]
        folds.append(fold)
        y.append([1]*5 + [0]*5)
    return folds, y

def LR(X, y):
    """
    Given training dataset, return optimal params of LR algorithm with Newton method.
    Input:
        X: np.array with shape [N, d]. Input.
        y: np.array with shape [N, 1]. Label.
    Return:
        beta: np.array with shape [1, d]. Optimal params with Newton method
    """
    N, d = X.shape
    lr = 0.001
    #initialization
    beta = np.ones((1, d)) * 0.1
    #shape [N, 1]
    z = X.dot(beta.T)

    for i in range(150):
        #shape [N, 1]
        p1 = np.exp(z) / (1 + np.exp(z))
        #shape [N, N]
        p = np.diag((p1 * (1-p1)).reshape(N))
        #shape [1, d]
        first_order = -np.sum(X * (y - p1), 0, keepdims=True)

        #update
        beta -= first_order * lr
        z = X.dot(beta.T)

    l = np.sum(y*z + np.log( 1+np.exp(z) ) )
    #print l
    return beta


def testing(beta, X, y):
    """
    Given trained LR model, return error number in input X.
    Input:
        beta: np.array with shape [1, d]. params of LR model
        X: np.array with shape [N, d]. Testing instances.
        y: np.array with shape [N, 1]. Testing labels.
    Return:
        error_num: Error num of LR on X.
    """
    predicts = ( X.dot(beta.T) >= 0 )
    error_num = np.sum(predicts != y)
    return error_num

def tenFoldCrossValidation(folds, y):
    """
    Return erroe num of 10-fold cross validation.
    Input:
        folds: list with shape [10, 10, 4].
        y: list with shape [10, 10]
    Return:
        ten_fold_error_nums:
    """
    ten_fold_error_nums = 0
    for i in range(10):
        train_X = folds[:i] + folds[i+1:]
        train_y = y[:i] + y[i+1:]
        val_X = folds[i]
        val_y = y[i]
        train_X = np.array(train_X).reshape(-1, 4)
        train_y = np.array(train_y).reshape([-1, 1])
        val_X = np.array(val_X)
        val_y = np.array(val_y).reshape([-1, 1])
        beta = LR(train_X, train_y)
        error_num = testing(beta, val_X, val_y)
        ten_fold_error_nums += error_num
    return ten_fold_error_nums

def LOO(X, y):
    """
    Return erroe num of LOO.
    Input:
        X: list with shape [100, 4].
        y: list with shape [100]
    Return:
        loo_error_nums:
    """
    loo_error_nums = 0
    for i in range(100):
        train_X = X[:i] + X[i+1:]
        train_y = y[:i] + y[i+1:]
        val_X = X[i]
        val_y = y[i]
        train_X = np.array(train_X).reshape(-1, 4)
        train_y = np.array(train_y).reshape([-1, 1])
        val_X = np.array(val_X)
        val_y = np.array(val_y).reshape([-1, 1])
        beta = LR(train_X, train_y)
        error_num = testing(beta, val_X, val_y)
        loo_error_nums += error_num
    return loo_error_nums

if __name__=="__main__":
    #data read from txt file
    X1, y1, X2, y2, X3, y3 = readData()

    #10-fold cross validation
    print ("10-fold cross validation...")
    #X1 and X2
    folds, y = tenFoldData(X1, X2)
    round1_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
    #X1, X3
    folds, y = tenFoldData(X1, X3)
    round2_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
    #X2, X3
    folds, y = tenFoldData(X3, X2)
    round3_ten_fold_error_nums = tenFoldCrossValidation(folds, y)
    ten_fold_error_nums = round1_ten_fold_error_nums + round2_ten_fold_error_nums \
                          + round3_ten_fold_error_nums

    #LOO
    print ("LOO ...")
    #X1, X2
    X = X1 + X2
    y = [1]*len(X1) + [0]*len(X2)
    round1_loo_error_nums = LOO(X, y)
    #X1, X3
    X = X1 + X3
    y = [1]*len(X1) + [0]*len(X2)
    round2_loo_error_nums = LOO(X, y)
    #X2, X3
    X = X3 + X2
    y = [1]*len(X1) + [0]*len(X2)
    round3_loo_error_nums = LOO(X, y)
    loo_error_nums = round1_loo_error_nums + round2_loo_error_nums \
                     + round3_loo_error_nums
    print (round1_ten_fold_error_nums, round2_ten_fold_error_nums, round3_ten_fold_error_nums)
    print ("10-fold cross validation error num: {}/300".format(ten_fold_error_nums))
    print (round1_loo_error_nums, round2_loo_error_nums, round3_loo_error_nums)
    print ("LOO error num: {}/300".format(loo_error_nums))


10-fold cross validation error num: 4/300
LOO error num: 4/300

最终输出10折交叉验证和留一法错误率一样


查看完成代码及数据集

  • 1
    点赞
  • 9
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值