BP_fetch_mnist

最新推荐文章于 2024-09-15 22:37:33 发布

LiuWenChaocsdn

最新推荐文章于 2024-09-15 22:37:33 发布

阅读量183

点赞数

分类专栏：机器学习文章标签：反向传播神经网络手写识别 mnist 梯度下降

本文链接：https://blog.csdn.net/c_air_c/article/details/95597969

版权

机器学习专栏收录该内容

5 篇文章 0 订阅

订阅专栏

%matplotlib inline
import numpy as np
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import matplotlib
import matplotlib.pyplot as plt

mnist = fetch_openml("mnist_784") # 加载数据集

help('sklearn')

Help on package sklearn:

NAME
    sklearn

DESCRIPTION
    Machine learning module for Python
    ==================================
    
    sklearn is a Python module integrating classical machine
    learning algorithms in the tightly-knit world of scientific Python
    packages (numpy, scipy, matplotlib).
    
    It aims to provide simple and efficient solutions to learning problems
    that are accessible to everybody and reusable in various contexts:
    machine-learning as a versatile tool for science and engineering.
    
    See http://scikit-learn.org for complete documentation.

PACKAGE CONTENTS
    __check_build (package)
    _build_utils (package)
    _config
    _isotonic
    base
    calibration
    cluster (package)
    compose (package)
    covariance (package)
    cross_decomposition (package)
    datasets (package)
    decomposition (package)
    discriminant_analysis
    dummy
    ensemble (package)
    exceptions
    externals (package)
    feature_extraction (package)
    feature_selection (package)
    gaussian_process (package)
    impute
    isotonic
    kernel_approximation
    kernel_ridge
    linear_model (package)
    manifold (package)
    metrics (package)
    mixture (package)
    model_selection (package)
    multiclass
    multioutput
    naive_bayes
    neighbors (package)
    neural_network (package)
    pipeline
    preprocessing (package)
    random_projection
    semi_supervised (package)
    setup
    svm (package)
    tests (package)
    tree (package)
    utils (package)

FUNCTIONS
    clone(estimator, safe=True)
        Constructs a new estimator with the same parameters.
        
        Clone does a deep copy of the model in an estimator
        without actually copying attached data. It yields a new estimator
        with the same parameters that has not been fit on any data.
        
        Parameters
        ----------
        estimator : estimator object, or list, tuple or set of objects
            The estimator or group of estimators to be cloned
        
        safe : boolean, optional
            If safe is false, clone will fall back to a deep copy on objects
            that are not estimators.
    
    config_context(**new_config)
        Context manager for global scikit-learn configuration
        
        Parameters
        ----------
        assume_finite : bool, optional
            If True, validation for finiteness will be skipped,
            saving time, but leading to potential crashes. If
            False, validation for finiteness will be performed,
            avoiding error.  Global default: False.
        
        working_memory : int, optional
            If set, scikit-learn will attempt to limit the size of temporary arrays
            to this number of MiB (per job when parallelised), often saving both
            computation time and memory on expensive operations that can be
            performed in chunks. Global default: 1024.
        
        Notes
        -----
        All settings, not just those presently modified, will be returned to
        their previous values when the context manager is exited. This is not
        thread-safe.
        
        Examples
        --------
        >>> import sklearn
        >>> from sklearn.utils.validation import assert_all_finite
        >>> with sklearn.config_context(assume_finite=True):
        ...     assert_all_finite([float('nan')])
        >>> with sklearn.config_context(assume_finite=True):
        ...     with sklearn.config_context(assume_finite=False):
        ...         assert_all_finite([float('nan')])
        ... # doctest: +ELLIPSIS
        Traceback (most recent call last):
        ...
        ValueError: Input contains NaN, ...
    
    get_config()
        Retrieve current values for configuration set by :func:`set_config`
        
        Returns
        -------
        config : dict
            Keys are parameter names that can be passed to :func:`set_config`.
    
    set_config(assume_finite=None, working_memory=None)
        Set global scikit-learn configuration
        
        .. versionadded:: 0.19
        
        Parameters
        ----------
        assume_finite : bool, optional
            If True, validation for finiteness will be skipped,
            saving time, but leading to potential crashes. If
            False, validation for finiteness will be performed,
            avoiding error.  Global default: False.
        
            .. versionadded:: 0.19
        
        working_memory : int, optional
            If set, scikit-learn will attempt to limit the size of temporary arrays
            to this number of MiB (per job when parallelised), often saving both
            computation time and memory on expensive operations that can be
            performed in chunks. Global default: 1024.
        
            .. versionadded:: 0.20
    
    show_versions()
        Print useful debugging information

DATA
    __SKLEARN_SETUP__ = False
    __all__ = ['calibration', 'cluster', 'covariance', 'cross_decompositio...

VERSION
    0.20.3

FILE
    /root/anaconda3/lib/python3.7/site-packages/sklearn/__init__.py

train_X,test_X,train_y,test_y = train_test_split(mnist.data,mnist.target,test_size = 0.15) #拆分数据集
print(train_y.shape,test_y.shape)

(59500,) (10500,)

fig = plt.figure()
k = 0
for i in np.arange(1):
    grf_mat = train_X[i].reshape((28,28))
    k = 231+ i
    plt.subplot(k)
    plt.imshow(grf_mat,cmap = matplotlib.cm.binary,interpolation = 'nearest')
    plt.axis("off")
    plt.title(train_y[i])
    plt.show()

[外链图片转存失败(img-ZuUdVEot-1562899955977)(output_4_0.png)]

1、数据规整（假设数据无异常值、空值等清洗问题），仅对数据维度做出改变。

# 训练集规整数据格式
train_X = [np.reshape(x,(784,1)) for x in train_X]
train_y_temp = np.zeros((train_y.shape[0],10))
for i,j in zip(np.arange(train_y.shape[0]),train_y):
    i = int(i)
    j = int(j)
    train_y_temp[i][j] = 1
train_y = [np.reshape(y,(10,1)) for y in train_y_temp]

# 测试集数据规整
test_X = [np.reshape(x,(784,1)) for x in test_X]

# 整合数据
train_data = list(zip(train_X,train_y))
test_data = list(zip(test_X,test_y))

print(len(test_data))
print(len(train_data))

10500
59500

2、构建神经网络模型

class Network(object):
    # 初始化网络结构
    def __init__(self,net_sizes):
        self.size = net_sizes
        self.layer_num = len(net_sizes)
        self.weights = [np.random.randn(y,x) for x, y in zip(net_sizes[:-1],net_sizes[1:])]
        self.baises = [np.random.randn(x,1) for x in net_sizes[1:]]
    
    # 定义sigmoid函数
    def sigmoid(x):
        return 1.0/(1.0 + np.exp(-x))
    
    
    # 定义sigmoid 函数导数
    def sigmoid_prim(x):
        y = Network.sigmoid(x)
        return y*(1.0-y)
    
    
    # 构建SGD
    def SGD(self,train_data,test_data,epochs,mini_batch_size,rate):
        if test_data is not None:
            test_len = len(test_data)
        n = len(train_data)
        for i in np.arange(epochs):
            np.random.shuffle(train_data)
            mini_batchs = [train_data[k:k + mini_batch_size] for k in np.arange(0,n,mini_batch_size)]
            for mini_batch in mini_batchs:
                self.update_mini_batch(mini_batch,rate)
            if test_data is not None:
                print("Epochs {0} : {1}/{2}".format(i
                                                    , self.evaluate(test_data)
                                                    , test_len))
            else:
                print("Epoch {0} complete".format(i))
                
    def update_mini_batch(self,mini_batch,rate):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.baises]
        
        for x,y in mini_batch: 
            detal_nabla_w,detal_nabla_b = self.backprop(x,y)
            nabla_w = [nw + dnw for nw,dnw in zip(nabla_w,detal_nabla_w)]
            nabla_b = [bw + dnb for bw,dnb in zip(nabla_b,detal_nabla_b)]
        self.weights = [w - (rate/len(mini_batch))*nw for w,nw in zip(self.weights,nabla_w)]
        self.baises = [b - (rate/len(mini_batch))*nb for b,nb in zip(self.baises,nabla_b)]          
           
    def backprop(self,x,y):
        nabla_w = [np.zeros(w.shape) for w in self.weights]
        nabla_b = [np.zeros(b.shape) for b in self.baises]
        #记录每一个activation，与f(a)
        activation = x # 用于保存带权输入
        activations = [x]
        zs = [] #输出值
        for w,b in zip(self.weights,self.baises):
            z = np.dot(w,activation) + b
            zs.append(z)
            activation = Network.sigmoid(z)
            activations.append(activation)
        # 反向传播
        # 1、输出层到隐藏层的梯度通项
        detal = self.cost_derivative(activations[-1],y)*Network.sigmoid_prim(zs[-1])
        nabla_w[-1] = np.dot(detal,activations[-2].transpose())
        nabla_b[-1] = detal
        # 2、隐层到隐层的梯度
        for i in np.arange(2,self.layer_num):
             # 梯度通项
            sp = Network.sigmoid_prim(zs[-i])
            detal = np.dot(self.weights[-i+1].transpose(),detal)*sp
            nabla_w[-i] = np.dot(detal,activations[-i-1].transpose())
            nabla_b[-i] = detal
        return nabla_w,nabla_b   
    
    def cost_derivative(self, output_activations, y):
        output_activations = output_activations.astype(np.float64)
        y = np.float64(y)
        cost = (output_activations - y)
        return cost
    def evaluate(self, test_data):
        test_results = [(np.argmax(self.feedforward(x)), y) for (x, y) in test_data]
        return sum(int(int(x) == int(y)) for (x, y) in test_results)
    def feedforward(self, a):
        for b, w in zip(self.baises, self.weights):
            a = Network.sigmoid(np.dot(w, a)+b)
        return a