机器学习基石作业一(NTU-林轩田) Python代码部分

机器学习基石作业一(NTU-林轩田) Python代码部分

Q15

这题是写PLA演算法

import numpy as np
import pandas as pd
def PLA(data):
    n,m = data.shape 
    w = np.zeros([m-1,1])
    counts = 0
    k = 0
    while 1:
        k=0
        for i in range(n):
            temp = data[i]
            target = temp[m-1]
            if temp[0:m-1].reshape(1,m-1).dot(w)==0 and target == 1:
                w  += target*temp[0: m-1].reshape(m-1,1)
                counts+=1
            elif (temp[0:m-1].reshape(1,m-1).dot(w))*target< 0 :
                w  += target*temp[0: m-1].reshape(m-1,1)
                counts+=1
            else:
                k+=1
        print(k)
        if k==n:
            return [w,counts]

调用函数、读取数据和结果部分:

colname = ['x1','x2','x3','x4','target']
PLA_data = pd.read_csv('/Downloads/ntumlone_hw1_hw1_15_train.dat.txt',header = None,sep = '\s' ,engine = 'python')
PLA(np.column_stack((np.ones([400,1]), np.array(PLA_data))))[1]

45次

Q16

在15题的基础上增加了一个随机,之后算2000次取平均值,可以近似为期望。
代码:

函数
import random
def random_PLA(data):
    n,m = data.shape 
    w = np.zeros([m-1,1])
    counts = 0
    k = 0
    ran = random.sample(list(range(n)),n)
    while 1:
        k=0
        for i in ran:
            temp = data[i]
            target = temp[m-1]
            if temp[0:m-1].reshape(1,m-1).dot(w)==0 and target == 1:
                w  += target*temp[0: m-1].reshape(m-1,1)
                counts+=1
            elif (temp[0:m-1].reshape(1,m-1).dot(w))*target< 0 :
                w  += target*temp[0: m-1].reshape(m-1,1)
                counts+=1
            else:
                k+=1
        if k==n:
            return [w,counts]
调用:
mean = 0 
for i in range(2000):
    mean = random_PLA(np.column_stack((np.ones([400,1]), np.array(PLA_data))))[1]+mean
mean = mean/2000
mean

结果为40.1395 次

Q17

在16题的基础上再加了一个学习率,也就是增加了一个超参数。

代码
函数
import random
def rate_PLA(data):
    n,m = data.shape 
    w = np.zeros([m-1,1])
    counts = 0
    k = 0
    ran = random.sample(list(range(n)),n)
    while 1:
        k=0
        for i in ran:
            temp = data[i]
            target = temp[m-1]
            if temp[0:m-1].reshape(1,m-1).dot(w)==0 and target == 1:
                w  += 0.5*target*temp[0: m-1].reshape(m-1,1)
                counts+=1
            elif (temp[0:m-1].reshape(1,m-1).dot(w))*target< 0 :
                w  += 0.5*target*temp[0: m-1].reshape(m-1,1)
                counts+=1
            else:
                k+=1
        if k==n:
            return [w,counts]
调用与运行结果:
mean = 0 
for i in range(2000):
    mean = rate_PLA(np.column_stack((np.ones([400,1]), np.array(PLA_data))))[1]+mean
mean = mean/2000
mean

结果为40.239

Q18

接下来是对于同一个模型使用另外的算法,这个算法算是补充了前一个算法要求数据必须是线性可分的缺点。

代码
函数
import numpy
def acc_rate(data,w):
    n,m = data.shape
    target = data[:,m-1].reshape(n,1)
    x = data[:,0:m-1]
    y = (x.dot(w)*target)>0
    acc = sum(y)/n
    return acc
def Pocket_PLA(data):
    step = 0
    n,m = data.shape
    w = np.zeros([m-1,1])
    w_best = w
    ran = random.sample(list(range(n)),n)
    acc = acc_rate(data,w)
    while 1:
        for i in ran:
            temp = data[i]
            target = temp[m-1]
            if (temp[0:m-1].reshape(1,m-1).dot(w))*target <= 0:
                step +=1
                w = w + target * temp[0:m-1].reshape(m-1,1)
                if acc_rate(data,w) > acc:
                    w_best = w
                    acc = acc_rate(data,w)
            if step == 100 or acc_rate(data,w)==1:
                return [w_best,acc_rate(data,w_best)]

第一个函数用于计算错误率,第二个就是pocket演算法。

数据读取与函数调用和结果
import pandas
colname = ['x1','x2','x3','x4','target']
PLA_data = pd.read_csv('/Users/zhangdi/Downloads/ntumlone_hw1_hw1_18_train.dat.txt',header = None,sep = '\s' ,engine = 'python')
mean = 0 
PLA_test = pd.read_csv('/Users/zhangdi/Downloads/ntumlone_hw1_hw1_18_test.dat.txt',header = None,sep = '\s' ,engine = 'python')
for i in range(2000):
    mean = acc_rate(np.column_stack((np.ones([500,1]), np.array(PLA_test))),Pocket_PLA(np.column_stack((np.ones([500,1]), np.array(PLA_data))))[0])+mean
mean = mean/2000
mean

可以得到结果,正确率为868,错误率为0.132

Q19

意思应该是作一个对比,如果不用pocket,而是采取和前一种算法一样的方式,只是增加了一个最大次数作为限制,可以发现,2000次的平均比pocket差特别大

def Q19_PLA(data):
    step = 0
    n,m = data.shape
    w = np.zeros([m-1,1])
    w_best = w
    ran = random.sample(list(range(n)),n)
    acc = acc_rate(data,w)
    while 1:
        for i in ran:
            temp = data[i]
            target = temp[m-1]
            if (temp[0:m-1].reshape(1,m-1).dot(w))*target <= 0:
                step +=1
                w = w + target * temp[0:m-1].reshape(m-1,1)
            if step == 50 or acc_rate(data,w)==1:
                return [w,acc_rate(data,w)]
for i in range(2000):
    print(i)
    mean = acc_rate(np.column_stack((np.ones([500,1]), np.array(PLA_test))),Q19_PLA(np.column_stack((np.ones([500,1]), np.array(PLA_data))))[0])+mean
mean = mean/2000
mean  

结果为0.63479

Q20

最后一题,不过是将50次改为了100次,可以看得出增加了迭代次数是有助于提高在测试集上的正确率的( 测试集如果与训练集同分布)

代码
函数
def acc_rate(data,w):
    n,m = data.shape
    target = data[:,m-1].reshape(n,1)
    x = data[:,0:m-1]
    y = (x.dot(w)*target)>0
    acc = sum(y)/n
    return acc
def Pocket_PLA(data):
    step = 0
    n,m = data.shape
    w = np.zeros([m-1,1])
    w_best = w
    ran = random.sample(list(range(n)),n)
    acc = acc_rate(data,w)
    while 1:
        for i in ran:
            temp = data[i]
            target = temp[m-1]
            if (temp[0:m-1].reshape(1,m-1).dot(w))*target <= 0:
                step +=1
                w = w + target * temp[0:m-1].reshape(m-1,1)
                if acc_rate(data,w) > acc:
                    w_best = w
                    acc = acc_rate(data,w)
            if step == 100 or acc_rate(data,w)==1:
                return [w_best,acc_rate(data,w_best)]
mean = 0 
PLA_test = pd.read_csv('/Downloads/ntumlone_hw1_hw1_18_test.dat.txt',header = None,sep = '\s' ,engine = 'python')
for i in range(2000):
    print(i)
    mean = acc_rate(np.column_stack((np.ones([500,1]), np.array(PLA_test))),Pocket_PLA(np.column_stack((np.ones([500,1]), np.array(PLA_data))))[0])+mean
mean = mean/2000
mean

结果为0.113424。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值