李宏毅-机器学习&深度学习-笔记-第二练习-二分类

题目:根据数据集训练二分类模型。

#1、通过数据路径读取数据X_train、Y_train、X_test
import numpy as np

np.random.seed(0)
X_train_fpath = 'E:/BaiduNetdiskDownload/course_LeeML20-datasets/course_LeeML20-datasets/hw2/data/X_train'
Y_train_fpath = 'E:/BaiduNetdiskDownload/course_LeeML20-datasets/course_LeeML20-datasets/hw2/data/Y_train'
X_test_fpath = 'E:/BaiduNetdiskDownload/course_LeeML20-datasets/course_LeeML20-datasets/hw2/data/X_test'
output_fpath = 'E:/BaiduNetdiskDownload/course_LeeML20-datasets/course_LeeML20-datasets/hw2/output_{}.csv'

with open(X_train_fpath) as f:
    next(f)
    X_train = np.array([line.strip("\n").split(",")[1:] for line in f], dtype = float)
with open(Y_train_fpath) as f:
    next(f)
    Y_train = np.array([line.strip("\n").split(",")[1] for line in f], dtype = float)
with open(X_test_fpath) as f:
    next(f)
    X_test = np.array([line.strip("\n").split(",")[1:] for line in f], dtype = float)
#2、定义规范化函数_normalize(X, train = True, specified_column = None, X_mean = None, X_std = None)
def _normalize(X, train = True, specified_column = None, X_mean = None, X_std = None):
    if specified_column == None:
        specified_column = np.arange(X.shape[1])  #如果没有指定列,那么每一列都规范化
    if train:
        X_mean = np.mean(X[: , specified_column], 0)
        X_std = np.std(X[: , specified_column], 0)
    X[: , specified_column] = (X[: , specified_column] - X_mean) / (X_std + 1e-8)
    return X, X_mean, X_std
#3、定义数据集划分函数 _train_dev_split(X, Y, dev_ratio = 0.25)
def _train_dev_split(X, Y, dev_ratio = 0.25):
    trainSize = int(len(X) * (1 - dev_ratio))
    return X[:trainSize], Y[:trainSize], X[trainSize:], Y[trainSize:]


X_train, X_mean, X_std = _normalize(X_train, train = True)
X_test, _, _ = _normalize(X_test, train = False, specified_column = None, X_mean = X_mean, X_std = X_std)

dev_ratio = 0.1  #train集合中,10%用于dev
X_train, Y_train, X_dev, Y_dev = _train_dev_split(X_train, Y_train, dev_ratio = dev_ratio)

train_size = X_train.shape[0]
dev_size = X_dev.shape[0]
test_size = X_test.shape[0]
data_dim = X_train.shape[1]
print('Size of training set: {}'.format(train_size))
print('Size of development set: {}'.format(dev_size))
print('Size of testing set: {}'.format(test_size))
print('Dimension of data: {}'.format(data_dim))
#4、定义_shuffle(X, Y)、_sigmoid、_f(X,w,b)、_predict(X, w, b)、_accuracy(Y_pred, Y_label)
def _shuffle(X, Y):
    #将X和Y按照相同的方式打乱顺序
    indexs = np.arange(len(X))
    np.random.shuffle(indexs)
    return X[indexs], Y[indexs]

def _sigmoid(z):
    return np.clip(1 / 1 + np.exp(-z), 1e-8, 1-(1e-8)) #现在结果在一定范围内

def _f(X,w,b):
    return _sigmoid(np.matmul(X, w) + b)   #结果是一个向量

def _predict(X, w, b):
    return np.round(_f(X,w,b)).astype(np.int)  #将计算结果四舍五入

def _accuracy(Y_pred, Y_label):
    return 1 - np.mean(np.abs(Y_pred - Y_label))   #所有预测和真实值差值的绝对值求和,然后计算均值为误差
#5、定义_cross_entropy_loss(y_pred, Y_label)、_gradient(X, Y_label, w, b)
def _cross_entropy_loss(y_pred, Y_label):
    return -np.dot(Y_label,  np.log(y_pred)) - np.dot(1-Y_label, np.log(1-y_pred))

def _gradient(X, Y_label, w, b):
    #根据课程上推导的损失函数对w和b的梯度计算
    y_pre = _f(X, w, b)
    error = Y_label - y_pre
    w_grad = -np.sum(error * X.T, 1)  #结果为每一维参数微分组成的向量
    b_grad = -np.sum(error)
    return w_grad, b_grad
#开始训练
#初始化w和b
w = np.zeros((data_dim,))
b = np.zeros((1,))

iters = 10  #整体循环次数
batchSize = 8  #一次梯度更新计算的记录数
learningRate = 0.2  #学习率

#记录每次整体循环的损失值和准确率
train_loss = []
dev_loss = []
train_acc = []
dev_acc = []

#记录参数更新的次数
step = 1

for i in range(iters):
    #将记录顺序打乱
    X_train, Y_train = _shuffle(X_train, Y_train)
    
    for j in range(int(np.floor(train_size/batchSize))):
        X = X_train[j*batchSize : (j+1)*batchSize]
        Y = Y_train[j*batchSize : (j+1)*batchSize]
        w_grad, b_grad = _gradient(X, Y, w, b)
        print("w_grad, b_grad",w_grad[0], b_grad)
        w = w - learningRate/np.sqrt(step) * w_grad
        b = b - learningRate/np.sqrt(step) * b_grad
        step = step + 1
        
    #在整个训练集训练一次以后,在train和dev集上统计一个准确率和损失值
    trainP = _f(X_train, w, b)
    acc = _accuracy(np.round(trainP), Y_train)
    train_acc.append(acc)
    err = _cross_entropy_loss(trainP, Y_train) / train_size
    train_loss.append(err)

    devP = _f(X_dev, w, b)
    acc = _accuracy(np.round(devP), Y_dev)
    dev_acc.append(acc)
    err = _cross_entropy_loss(devP, Y_dev) / dev_size
    dev_loss.append(err)
    
print('Training loss: {}'.format(train_loss[-1]))
print('Development loss: {}'.format(dev_loss[-1]))
print('Training accuracy: {}'.format(train_acc))
print('Development accuracy: {}'.format(dev_acc[-1]))
import matplotlib.pyplot as plt

# Loss curve
plt.plot(train_loss)
plt.plot(dev_loss)
plt.title('Loss')
plt.legend(['train', 'dev'])
plt.savefig('loss.png')
plt.show()

# Accuracy curve
plt.plot(train_acc)
plt.plot(dev_acc)
plt.title('Accuracy')
plt.legend(['train', 'dev'])
plt.savefig('acc.png')
plt.show()

总结:深入理解梯度下降的计算过程,每次batch_size更新一次梯度。其次,本代码训练的模型准确率不高,需要继续完善。

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值