逻辑回归之参数训练、模型验证

training data的参数训练

使用唐宇迪课程的数据集,代码思路

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math,time

import os
path = 'data' + os.sep + 'LogiReg_data.txt'
pdData = pd.read_csv(path, header=None, names=['Exam 1', 'Exam 2', 'Admitted'])
pdData.head()

# raw data distribution
zero_set = pdData[pdData.Admitted==0]
one_set = pdData[pdData.Admitted==1]
plt.scatter(zero_set['Exam 1'], zero_set['Exam 2'], c='b')
plt.scatter(one_set['Exam 1'], one_set['Exam 2'], c='r')

数据标准化及数据集分割(暂时不适用 交叉验证集),训练使用numpy的ndarray格式

from sklearn import preprocessing
from sklearn.model_selection import train_test_split
# mat = np.mat(pdData)
# X_train, X_test, y_train, y_test = train_test_split(mat[:,:2], mat[:,2], test_size=0.3, random_state=0)
# pdData.iloc[:,2] 这样产生的y_train是一维数组,pdData.iloc[:,2:]是二维   2. np.mat 比 np.array慢1倍多的时间

X_train, X_test, y_train, y_test = train_test_split(pdData.iloc[:,:2], pdData.iloc[:,2:], test_size=0.3, random_state=0)
y_train = np.array(y_train)
y_test = np.array(y_test)
scaler = preprocessing.StandardScaler().fit(X_train)
norm_X_train = scaler.transform(X_train)    # 测试数据也直接用这个scaler训练,这样测试数据用的就是训练数据的分布
norm_X_train.mean(axis=0),norm_X_train.std(axis=0)
norm_X_train = np.insert(norm_X_train,0,values=1,axis=1)
norm_X_train[:3]

模型函数构建

def sigmoid(z):
    return 1/(1 + np.exp(-z) )

def model(X,theta):
    return sigmoid(np.dot(X,theta))

def cost(X, theta, y):
    # y 对应 model(X,theta) ,当然也可以1-y 对应model(X,theta) ,此时gradient 的 error = model(X,theta) - (1- y)
    return -(np.multiply(y,np.log(model(X,theta))) + np.multiply(1-y,np.log(1-model(X,theta)))).mean()
# def cost(X, theta, y):
#     # 这种形式是 h(x)>0.5 分为0类:y 对应 model(X,theta) ,当然也可以1-y 对应model(X,theta) ,此时gradient 的 error = model(X,theta) - (1- y)
#     return -(np.multiply(1-y,np.log(model(X,theta))) + np.multiply(y,np.log(1-model(X,theta)))).mean()

def gradient(X,theta,y):
    grad = np.dot(X.T, (model(X, theta) - y))/(len(X))
    return grad

def shuffleData(X,y):
    # 数据捣乱顺序
    X_y = np.concatenate((X,y), axis=1 )
    np.random.shuffle(X_y)
    X_sample = X_y[:,:X.shape[1]]
    y_sample = X_y[:,X.shape[1]:]
    return X_sample, y_sample

def stop_policy(stop_type, value, threshold):
    if stop_type == "stop_iter": return value == threshold
    if stop_type == "stop_cost": return value < threshold
    if stop_type == "stop_grad": return np.linalg.norm(value) < threshold

def descent(X, theta, y, batch_size,stop_type, alpha, threshold):
    # alpha : 步长
    # batch_size : 梯度下降选择的样本数
    # 迭代次数停止策略
    k = 0
    i = 0 # 迭代次数
    total_samples = len(X)
    cost_lst = [cost(X, theta, y)]   # 注意损失值还是用的全部样本
    init_time = time.time()
    while True:    
        i += 1
        grad = gradient(X[k:k+batch_size], theta, y[k:k+batch_size])
        k += batch_size
        if k >= total_samples:
            k = 0
            X,y = shuffleData(X,y)
        theta = theta - alpha * grad  # 计算出梯度后每次移动一小步进行参数更新
        cost_lst.append(cost(X, theta, y))
        if stop_type == 'stop_iter': value = i
        elif stop_type == 'stop_cost': value =  abs(cost_lst[-1] - cost_lst[-2]) 
        elif stop_type == 'stop_grad' : value = grad
        if stop_policy(stop_type, value, threshold ): break
    print('{} 迭代次数{},theta参数为 {},最后的cost: {:.2f} ,耗时{:03.2f}s'.format(stop_type, i, theta, cost_lst[-1],time.time() - init_time))  
    plt.plot(np.arange(len(cost_lst)), cost_lst)
    return cost_lst

使用标准化数据计算参数

# 只用一个样本输入 norm_X_train[:1,:]
theta = np.zeros((3,1))
init_time = time.time()
y_train[:1,:] = np.array([[1]])
cost_lst = descent(norm_X_train[:1,:],  theta, y_train[:1,:],1, 'stop_iter', alpha=0.001,threshold=5000)   # 只选择一个样本梯度下降
# ,从图中看出随着迭代次数增加,cost必然接近0,因为只有1个样本,必然可以找到

下面图片中的theta实际应该为 3*1 而不是 1*3

 

输入所有样本 norm_X_train,计算cost的时候使用所有样本,参数1,20,70 在梯度时使用到
cost_lst = descent(norm_X_train,  theta, y_train, 1, 'stop_iter', alpha=0.001,threshold=5000)  # 随机梯度下降
cost_lst = descent(norm_X_train,  theta, y_train, 20, 'stop_iter', alpha=0.001,threshold=5000) # mini-batch梯度下降
cost_lst = descent(norm_X_train,  theta, y_train, 70, 'stop_iter', alpha=0.001,threshold=5000) # 批量梯度下降

cost_lst = descent(norm_X_train,  theta, y_train, 70, 'stop_grad', alpha=0.001,threshold=0.02)

更多的迭代次数会使得损失下降的更多!

cost_lst = descent(norm_X_train,  theta, y_train, 1, 'stop_grad', alpha=0.001,threshold=0.0005)

随机梯度下降更快,但是我们需要迭代的次数也需要更多,所以还是用batch的比较合适!!!

cost_lst = descent(norm_X_train,  theta, y_train, 15, 'stop_grad', alpha=0.001,threshold=0.002)

cost_lst = descent(norm_X_train,  theta, y_train, 12, 'stop_cost', alpha=0.001,threshold=0.000001)

非标准化,下降的缓慢,在 'step_iter' 步长要设定小一点,样本量要多,以确保损失值曲线平滑,一般直接使用标准化数据即可

raw_data = pdData.values
raw_data = np.insert(raw_data,0,values=1,axis=1)
cost_lst1 = descent(raw_data[:,:3], theta, raw_data[:,3:],100, 'stop_iter', alpha=0.00001,threshold=5000) 

cost_lst1 = descent(raw_data[:,:3], theta, raw_data[:,3:], 100, 'stop_grad', alpha=0.001,threshold=0.05) 

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值