一、梯度下降
代码如下(示例):
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import make_blobs #创建正态分布数据点的“blobs”的函数
import matplotlib.pyplot as plt
import numpy as np
import argparse
#s型非线性激活函数
def sigmoid_activation(x):
return 1.0/(1+np.exp(-x))
def predict(X, W):
preds = sigmoid_activation(X.dot(W))
#应用函数来设定输出到二进制类标签的阈值
preds[preds <= 0.5] = 0
preds[preds > 0.5] = 1
#返回预测值
return preds
#解析参数
ap = argparse.ArgumentParser()
#迭代次数
ap.add_argument('-e', '--epochs', type=float, default=100, help='# of epochs')
#学习率 通常将0.1、0.01和0.001作为初始学习速率值
ap.add_argument('-a', '--alpha', type=float, default=0.001, help='learning rate')
args = vars(ap.parse_args())
#生成数据集并分类
#生成一个包含1,000个数据点的2类分类问题,其中每个数据点都是一个二维特征向量
"""n_features表示每一个样本有多少特征值
n_samples表示样本的个数
centers是聚类中心点的个数,可以理解为label的种类数
random_state是随机种子,可以固定生成的数据
cluster_std设置每个类别的方差"""
(X, y) = make_blobs(n_samples=1000, n_features=2, centers=2, cluster_std=1.5, random_state=1)
y = y.reshape((y.shape[0],1))
#在特征矩阵(X)插入一列,作为乘 偏置b的值 1*b
X = np.c_[X, np.ones((X.shape[0]))]
#print(X) 1000*3
#分割数据集 对半分
(trainX, testX, trainY, testY) = train_test_split(X, y, test_size=0.5, random_state=42)
#初始化权重矩阵和损失函数
print("[INFO] training...")
W = np.random.randn(X.shape[1],1)
#跟踪每个阶段的损失,绘制损失图像
losses = []
#训练和梯度下降
for epoch in np.arange(0, args['epochs']):
# X.w的值放入激活函数进行预测,preds值每次循环都不会变
preds = sigmoid_activation(trainX.dot(W))
error = preds - trainY
loss = np.sum(error**2)
losses.append(loss)
#梯度下降更新
gradient = trainX.T.dot(error)
#print(gradient.shape) 3*1
W += -args['alpha'] * gradient
#每隔5次迭代显示一次更新
if epoch == 0 or (epoch+1) % 5 == 0:
print("[INFO] epoch={}, loss={:.7f}".format(int(epoch+1), loss))
#验证
print("[INFO] evaluating...")
preds = predict(testX, W)
print(classification_report(testY, preds))
plt.style.use("ggplot") #设置背景样式
plt.figure()
plt.title("Data")
plt.scatter(testX[:, 0], testX[:, 1], marker="o", c=testY, s=30)
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, args["epochs"]), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()
"""
输出:
[INFO] training...
[INFO] epoch=1, loss=261.5620584
[INFO] epoch=5, loss=6.3345808
[INFO] epoch=10, loss=3.3345147
[INFO] epoch=15, loss=2.0877242
[INFO] epoch=20, loss=1.3531976
[INFO] epoch=25, loss=0.9139176
[INFO] epoch=30, loss=0.6475280
[INFO] epoch=35, loss=0.4803836
[INFO] epoch=40, loss=0.3711498
[INFO] epoch=45, loss=0.2968840
[INFO] epoch=50, loss=0.2445672
[INFO] epoch=55, loss=0.2065554
[INFO] epoch=60, loss=0.1781933
[INFO] epoch=65, loss=0.1565438
[INFO] epoch=70, loss=0.1396917
[INFO] epoch=75, loss=0.1263503
[INFO] epoch=80, loss=0.1156313
[INFO] epoch=85, loss=0.1069061
[INFO] epoch=90, loss=0.0997199
[INFO] epoch=95, loss=0.0937372
[INFO] epoch=100, loss=0.0887063
[INFO] evaluating...
precision recall f1-score support
0 1.00 1.00 1.00 250
1 1.00 1.00 1.00 250
accuracy 1.00 500
macro avg 1.00 1.00 1.00 500
weighted avg 1.00 1.00 1.00 500"""
2.随机梯度下降
代码如下(示例):
"""
随机梯度下降率(SGD):算法计算梯度并对小批训练数据更新权重矩阵W,而不是整个训练集
while True:
batch = next_training_batch(data, 256)
Wgradient = evaluate_gradient(loss, batch, W)
W += -alpha * Wgradient
典型的批处理尺寸包括32、64、128和256
"""
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
import numpy as np
import argparse
#激活函数/输出的预测值
def sigmoid_activation(x):
return 1.0/(1+np.exp(-x))
def predict(X, W):
preds = sigmoid_activation(X.dot(W))
preds[preds<=0.5] = 0
preds[preds>0.5] = 1
return preds
#批量
def next_batch(X, y, batchSize):
for i in np.arange(0, X.shape[0], batchSize):
#产生X和y的子集
yield (X[i:i+batchSize], y[i:i+batchSize])
#解析参数
ap = argparse.ArgumentParser()
ap.add_argument('-e','--epochs',type=float,default=100,help='#of epochs')
ap.add_argument('-a','--alpha',type=float,default=0.01,help='learing rate')
ap.add_argument('-b','--batch-size',type=int,default=32,help='size of SGD mini-batches')
args = vars(ap.parse_args())
#数据集
(X, y) = make_blobs(n_samples=1000, n_features=2, centers=2, cluster_std=1.5, random_state=1)
y = y.reshape(y.shape[0],1)
X = np.c_[X,np.ones(X.shape[0])]
(trainX, testX, trainY, testY) = train_test_split(X, y, test_size=0.5, random_state=42)
print("[INFO] training....")
W = np.random.randn(X.shape[1], 1)
losses = []
#小批量采样迭代
for epoch in np.arange(0, args["epochs"]):
epochLoss = []
#批量循环处理我们的数据
for (batchX, batchY) in next_batch(X, y, args['batch_size']):
preds = sigmoid_activation(batchX.dot(W))
error = preds - batchY
epochLoss.append(np.sum(error**2))
#梯度更新
gradient = batchX.T.dot(error)
W += -args['alpha']*gradient
#取所有批次的损失平均值
loss = np.average(epochLoss)
losses.append(loss)
#显示
if epoch == 0 or (epoch+1)%5 == 0:
print('[INFO] epoch={},loss={:.7f}'.format(int(epoch+1),loss))
print("[INFO] evaluating...")
preds = predict(testX, W)
print(classification_report(testY,preds))
# plot the (testing) classification data
plt.style.use("ggplot")
plt.figure()
plt.title("Data")
plt.scatter(testX[:, 0], testX[:, 1], marker="o", c=testY, s=30)
# construct a figure that plots the loss over time
plt.style.use("ggplot")
plt.figure()
plt.plot(np.arange(0, args["epochs"]), losses)
plt.title("Training Loss")
plt.xlabel("Epoch #")
plt.ylabel("Loss")
plt.show()
"""输出:
[INFO] training....
[INFO] epoch=1,loss=0.9169706
[INFO] epoch=5,loss=0.0173029
[INFO] epoch=10,loss=0.0140692
[INFO] epoch=15,loss=0.0122936
[INFO] epoch=20,loss=0.0109826
[INFO] epoch=25,loss=0.0099539
[INFO] epoch=30,loss=0.0091151
[INFO] epoch=35,loss=0.0084128
[INFO] epoch=40,loss=0.0078131
[INFO] epoch=45,loss=0.0072932
[INFO] epoch=50,loss=0.0068370
[INFO] epoch=55,loss=0.0064328
[INFO] epoch=60,loss=0.0060717
[INFO] epoch=65,loss=0.0057468
[INFO] epoch=70,loss=0.0054528
[INFO] epoch=75,loss=0.0051853
[INFO] epoch=80,loss=0.0049408
[INFO] epoch=85,loss=0.0047164
[INFO] epoch=90,loss=0.0045097
[INFO] epoch=95,loss=0.0043186
[INFO] epoch=100,loss=0.0041415
[INFO] evaluating...
precision recall f1-score support
0 1.00 1.00 1.00 250
1 1.00 1.00 1.00 250
accuracy 1.00 500
macro avg 1.00 1.00 1.00 500
weighted avg 1.00 1.00 1.00 500
"""