caffe框架学习四——Off-the-shelf SGD for classification

最新推荐文章于 2022-06-13 12:08:40 发布

Zjhao666

最新推荐文章于 2022-06-13 12:08:40 发布

阅读量465

点赞数

分类专栏： caffe框架学习人工智能

本文链接：https://blog.csdn.net/Zjhao666/article/details/88087324

版权

人工智能同时被 2 个专栏收录

23 篇文章 0 订阅

订阅专栏

caffe框架学习

8 篇文章 0 订阅

订阅专栏

英文原文: https://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/brewing-logreg.ipynb

任务 Use Caffe as a generic SGD optimizer to train logistic regression on non-image HDF5 data.

# -*-coding= UTF-8 -*-
from caffe.proto import caffe_pb2
from caffe import params as P
from caffe import layers as L
import numpy as np
import matplotlib.pyplot as plt
import os
import sys
import caffe
import h5py
import shutil
import tempfile
import sklearn
import sklearn.datasets
import sklearn.linear_model
import pandas as pd

caffe_root = '/home/zjhao/caffe/'

# 生成一个随机的n类分类问题
X, y = sklearn.datasets.make_classification(n_samples=10000,  # 样本个数
                                            n_features=4,  # 特征个数
                                            n_redundant=0,  # 冗余特征数
                                            n_informative=2,  # 信息特征数
                                            n_clusters_per_class=2,  # 每个类的簇数
                                            # 如果为True，则将簇放在超立方体的顶点上。
                                            # 如果为False，则将簇放置在随机多边形的顶点上。
                                            hypercube=False,
                                            random_state=0)  # 确定用于创建数据集的随机数生成

# 分割数据为训练集和测试集
X, Xt, y, yt = sklearn.model_selection.train_test_split(X, y)

# 可视化数据样本
inp = np.random.permutation(X.shape[0])[:1000]  # 随机置换序列，或返回置换范围
df = pd.DataFrame(X[inp])

# 绘制散点图的矩阵
_ = pd.plotting.scatter_matrix(df,
                               figsize=(9, 9),  # 以英寸为单位的元组(宽、高)
                               # kde 核密度估计
                               # hist 对角线直方图
                               diagonal='kde',
                               marker='o',  # Matplotlib标记类型
                               s=40,
                               alpha=.4,  # 应用的透明度
                               c=y[inp])  # 颜色
# plt.show()

# 使用随机梯度下降SGD进行训练
clf = sklearn.linear_model.SGDClassifier(loss='log',  # 使用的损失函数,这里是logistic回归
                                         n_iter=1000,  # 迭代次数
                                         penalty='l2',  # 正则化方式
                                         alpha=5e-4,  # 使正则化项相乘的常数
                                         # “balanced”模式使用y的值自动调整与输入数据中的类频率成反比的权重
                                         # 如n_Samples/(n_class*np.bincount(Y))
                                         class_weight='balanced')  # 与类关联的权重。如果不给，所有的课程都应该有权重1。
clf.fit(X, y)
yt_pred = clf.predict(Xt)
print 'Accuracy: {:.3f}'.format(sklearn.metrics.accuracy_score(yt, yt_pred))

# 保存数据集到HDF5,以用Caffe加载
dirname = os.path.abspath(caffe_root+'examples/hdf5_classification/data')
if not os.path.exists(dirname):
    os.makedirs(dirname)

train_filename = os.path.join(dirname, 'train.h5')
test_filename = os.path.join(dirname, 'test.h5')

# HDF5DataLayer源应该是包含HDF5文件名列表的文件
# 我们将列出相同的数据文件两次
# 1
with h5py.File(train_filename, 'w') as f:
    f['data'] = X
    f['label'] = y.astype(np.float32)
with open(os.path.join(dirname, 'train.txt'), 'w') as f:
    f.write(train_filename+'\n')

# HDF5是相当有效的，但可以进一步压缩。
# 2
comp_kwargs = {'compression': 'gzip', 'compression_opts': 1}
with h5py.File(test_filename, 'w') as f:
    f.create_dataset('data', data=Xt, **comp_kwargs)
    f.create_dataset('label', data=yt.astype(np.float32), **comp_kwargs)
with open(os.path.join(dirname, 'test.txt'), 'w') as f:
    f.write(test_filename+'\n')


# 逻辑斯底回归:数据,矩阵乘法,两类softmax loss
def logreg(hdf5, batch_size):
    n = caffe.NetSpec()
    n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
    n.ip1 = L.InnerProduct(n.data, num_output=2,
                           weight_filler=dict(type='xavier'))
    n.accuracy = L.Accuracy(n.ip1, n.label)
    n.loss = L.SoftmaxWithLoss(n.ip1, n.label)
    return n.to_proto()


train_net_path = caffe_root+'examples/hdf5_classification/logreg_auto_train.prototxt'
with open(train_net_path, 'w') as f:
    f.write(str(logreg(caffe_root+'examples/hdf5_classification/data/train.txt', 10)))

test_net_path = caffe_root+'examples/hdf5_classification/logreg_auto_test.prototxt'
with open(test_net_path, 'w') as f:
    f.write(str(logreg(caffe_root+'examples/hdf5_classification/data/test.txt', 10)))


# 定义solver
def solver(train_net_path, test_net_path):
    s = caffe_pb2.SolverParameter()
    # 指定训练和测试网络的位置
    s.train_net = train_net_path
    s.test_net.append(test_net_path)

    s.test_interval = 1000  # 每1000次迭代测试一次
    s.test_iter.append(250)  # 一次测试测试250个样本

    s.max_iter = 10000  # 总训练次数

    s.base_lr = 0.01  # 设置随机梯度下降SGD的初始学习率

    # 设置'lr_policy'参数来确定训练过程中的学习率修改
    # 这里,我们在每一个stepsize次训练后之后乘一个gamma因子
    # 其他的lr_policy选择参考 https://www.cnblogs.com/laowangxieboke/p/10282096.html
    s.lr_policy = 'step'
    s.gamma = 0.1
    s.stepsize = 5000

    # 设置其他SGD参数
    # 设置一个非零的动量,参考了过去和现在训练的梯度的均值来使得学习更为鲁棒
    # L2权重衰减使学习规范化，以帮助防止模型过拟合。
    s.momentum = 0.9
    s.weight_decay = 5e-4

    # 每1000次迭代显示当前的训练损失和准确性。
    s.display = 1000

    # 每10000次迭代进行一次快照
    s.snapshot = 10000
    # 临时文件f保存在/tmp目录下,以下前缀加了会报错
    # s.snapshot_prefix = 'examples/hdf5_classification/data/train'

    s.solver_mode = caffe_pb2.SolverParameter.GPU

    return s


solver_path = caffe_root+'examples/hdf5_classification/logreg_solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(solver(train_net_path, test_net_path)))

# 训练并评估我们基于Caffe的简单的逻辑斯底回归
caffe.set_mode_gpu()
solver = caffe.get_solver(solver_path)
solver.solve()

accuracy = 0
batch_size = solver.test_nets[0].blobs['data'].num
test_iters = int(len(Xt)/batch_size)
for i in range(test_iters):
    solver.test_nets[0].forward()
    accuracy += solver.test_nets[0].blobs['accuracy'].data
accuracy /= test_iters

print 'Accuracy: {:.3f}'.format(accuracy)


# 两层非线性网络逻辑斯底回归
def nonlinear_net(hdf5, batch_size):
    n = caffe.NetSpec()
    n.data, n.label = L.HDF5Data(batch_size=batch_size, source=hdf5, ntop=2)
    # 定义一个40维的隐含层
    n.ip1 = L.InnerProduct(n.data, num_output=40,
                           weight_filler=dict(type='xavier'))
    # 通过ReLU实现非线性
    n.relu1 = L.ReLU(n.ip1, in_place=True)
    # 记录非线性特征
    n.ip2 = L.InnerProduct(n.ip1, num_output=2,
                           weight_filler=dict(type='xavier'))
    # 和之前一样的正确率和loss
    n.accuracy = L.Accuracy(n.ip2, n.label)
    n.loss = L.SoftmaxWithLoss(n.ip2, n.label)

    return n.to_proto()


train_net_path = caffe_root + \
    'examples/hdf5_classification/nonlinear_auto_train.prototxt'
with open(train_net_path, 'w') as f:
    f.write(str(nonlinear_net(
        caffe_root+'examples/hdf5_classification/data/train.txt', 10)))

test_net_path = caffe_root+'examples/hdf5_classification/nonlinear_auto_test.prototxt'
with open(test_net_path, 'w') as f:
    f.write(
        str(nonlinear_net(caffe_root+'examples/hdf5_classification/data/test.txt', 10)))

solver_path = caffe_root+'examples/hdf5_classification/nonlinear_logreg_solver.prototxt'
with open(solver_path, 'w') as f:
    f.write(str(solver(train_net_path, test_net_path)))

caffe.set_mode_gpu()
solver = caffe.get_solver(solver_path)
solver.solve()

accuracy = 0
batch_size = solver.test_nets[0].blobs['data'].num
test_iters = int(len(Xt) / batch_size)
for i in range(test_iters):
    solver.test_nets[0].forward()
    accuracy += solver.test_nets[0].blobs['accuracy'].data
accuracy /= test_iters

print("Accuracy: {:.3f}".format(accuracy))

# 清理(如果您想检查hdf5_class/data目录，请将其注释掉)
shutil.rmtree(dirname)

Zjhao666

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
caffe框架学习四——Off-the-shelf SGD for classification

英文原文:https://nbviewer.jupyter.org/github/BVLC/caffe/blob/master/examples/brewing-logreg.ipynb任务Use Caffe as a generic SGD optimizer to train logistic regression on non-image HDF5 data.# -*-codi...
复制链接

扫一扫

专栏目录