Deep Learning SystemsAlgorithms and Implementation-HW0

记录一下课程学习的心得和分享一下作业,推荐还是自己先写一遍

膜拜一下大佬Tianqi Chen

附上课程主页:https://dlsyscourse.org/lectures/

def add(x, y):
    """ A trivial 'add' function you should implement to get used to the
    autograder and submission system.  The solution to this problem is in the
    the homework notebook.

    Args:
        x (Python number or numpy array)
        y (Python number or numpy array)

    Return:
        Sum of x + y
    """
    ### BEGIN YOUR CODE
    return x + y
    ### END YOUR CODE


def parse_mnist(image_filename, label_filename):
    """ Read an images and labels file in MNIST format.  See this page:
    http://yann.lecun.com/exdb/mnist/ for a description of the file format.

    Args:
        image_filename (str): name of gzipped images file in MNIST format
        label_filename (str): name of gzipped labels file in MNIST format

    Returns:
        Tuple (X,y):
            X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded 
                data.  The dimensionality of the data should be 
                (num_examples x input_dim) where 'input_dim' is the full 
                dimension of the data, e.g., since MNIST images are 28x28, it 
                will be 784.  Values should be of type np.float32, and the data 
                should be normalized to have a minimum value of 0.0 and a 
                maximum value of 1.0. The normalization should be applied uniformly
                across the whole dataset, _not_ individual images.

            y (numpy.ndarray[dtype=np.uint8]): 1D numpy array containing the
                labels of the examples.  Values should be of type np.uint8 and
                for MNIST will contain the values 0-9.
    """
    ### BEGIN YOUR CODE
    with gzip.open(label_filename, 'rb') as f_in:
        with open(label_filename.split('.gz')[0], 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    with open(label_filename.split('.gz')[0], 'rb') as lbpath:
        magic, n = struct.unpack('>II',lbpath.read(8))
        labels = np.fromfile(lbpath,dtype=np.uint8)

    with gzip.open(image_filename, 'rb') as f_in:
        with open(image_filename.split('.gz')[0], 'wb') as f_out:
            shutil.copyfileobj(f_in, f_out)
    with open(image_filename.split('.gz')[0], 'rb') as imgpath:
        magic, num, rows, cols = struct.unpack('>IIII',imgpath.read(16))
        images = np.fromfile(imgpath,dtype=np.uint8).reshape(len(labels), 784)
    images = images.astype(np.float32)
    min_val, max_val =np.min(images), np.max(images)
    images = (images - min_val) / (max_val -min_val)
    return images,labels
    ### END YOUR CODE


def softmax_loss(Z, y):
    """ Return softmax loss.  Note that for the purposes of this assignment,
    you don't need to worry about "nicely" scaling the numerical properties
    of the log-sum-exp computation, but can just compute this directly.

    Args:
        Z (np.ndarray[np.float32]): 2D numpy array of shape
            (batch_size, num_classes), containing the logit predictions for
            each class.
        y (np.ndarray[np.int8]): 1D numpy array of shape (batch_size, )
            containing the true label of each example.

    Returns:
        Average softmax loss over the sample.
    """
    ### BEGIN YOUR CODE
    b = np.zeros((Z.shape[0], Z.shape[1]))
    b[np.arange(Z.shape[0]), y] = 1
    ans = np.exp(Z) * b / (np.exp(Z).sum(axis=1)[:,None])
    return -np.log(ans.sum(axis=1)).mean()
    ### END YOUR CODE


def softmax_regression_epoch(X, y, theta, lr = 0.1, batch=100):
    """ Run a single epoch of SGD for softmax regression on the data, using
    the step size lr and specified batch size.  This function should modify the
    theta matrix in place, and you should iterate through batches in X _without_
    randomizing the order.

    Args:
        X (np.ndarray[np.float32]): 2D input array of size
            (num_examples x input_dim).
        y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
        theta (np.ndarrray[np.float32]): 2D array of softmax regression
            parameters, of shape (input_dim, num_classes)
        lr (float): step size (learning rate) for SGD
        batch (int): size of SGD minibatch

    Returns:
        None
    """
    ### BEGIN YOUR CODE
    input_dim, num_classes =  X.shape[1], theta.shape[1]
    #theta = np.zeros((input_dim,num_classes), dtype=np.float32)
    for i in range(0, y.shape[0], batch):
        X_batch, y_batch = X[i: i+batch],y[i:i+batch]
        pred =  np.matmul(X_batch, theta)
        norm_z  =np.exp(pred) / (np.exp(pred).sum(axis=1)[:,None])
        one_hot = np.zeros((y_batch.shape[0], num_classes),dtype=np.float32)
        one_hot[np.arange(y_batch.shape[0]), y_batch]  = 1.
        dpred =  (-one_hot  + norm_z)/batch
        dtheta = np.matmul(X_batch.T, dpred) 
        theta -= lr * dtheta
        
        
    ### END YOUR CODE


def nn_epoch(X, y, W1, W2, lr = 0.1, batch=100):
    """ Run a single epoch of SGD for a two-layer neural network defined by the
    weights W1 and W2 (with no bias terms):
        logits = ReLU(X * W1) * W2
    The function should use the step size lr, and the specified batch size (and
    again, without randomizing the order of X).  It should modify the
    W1 and W2 matrices in place.

    Args:
        X (np.ndarray[np.float32]): 2D input array of size
            (num_examples x input_dim).
        y (np.ndarray[np.uint8]): 1D class label array of size (num_examples,)
        W1 (np.ndarray[np.float32]): 2D array of first layer weights, of shape
            (input_dim, hidden_dim)
        W2 (np.ndarray[np.float32]): 2D array of second layer weights, of shape
            (hidden_dim, num_classes)
        lr (float): step size (learning rate) for SGD
        batch (int): size of SGD minibatch

    Returns:
        None
    """
    ### BEGIN YOUR CODE
    input_dim, num_classes =  X.shape[1], W2.shape[1]
    for i in range(0, y.shape[0], batch):
        X_batch, y_batch = X[i: i+batch],y[i:i+batch]
        Z1 = np.matmul(X_batch, W1)
        layer1_mask = Z1 > 0
        Z1[~layer1_mask]=0
        pred = np.matmul(Z1, W2)
        norm_z  =np.exp(pred) / (np.exp(pred).sum(axis=1)[:,None])
        one_hot = np.zeros((y_batch.shape[0], num_classes),dtype=np.float32)
        one_hot[np.arange(y_batch.shape[0]), y_batch]  = 1.
        G2 =  (-one_hot  + norm_z)
        G1 = layer1_mask * (np.matmul(G2, W2.T))
        W2 -= lr * np.matmul(Z1.T,G2)/batch
        W1 -= lr * np.matmul(X_batch.T,G1)/batch
    ### END YOUR CODE

Q6

void softmax_regression_epoch_cpp(const float *X, const unsigned char *y,
								  float *theta, size_t m, size_t n, size_t k,
								  float lr, size_t batch)
{
    /**
     * A C++ version of the softmax regression epoch code.  This should run a
     * single epoch over the data defined by X and y (and sizes m,n,k), and
     * modify theta in place.  Your function will probably want to allocate
     * (and then delete) some helper arrays to store the logits and gradients.
     *
     * Args:
     *     X (const float *): pointer to X data, of size m*n, stored in row
     *          major (C) format
     *     y (const unsigned char *): pointer to y data, of size m
     *     theta (float *): pointer to theta data, of size n*k, stored in row
     *          major (C) format
     *     m (size_t): number of examples
     *     n (size_t): input dimension
     *     k (size_t): number of classes
     *     lr (float): learning rate / SGD step size
     *     batch (int): SGD minibatch size
     *
     * Returns:
     *     (None)
     */

    /// BEGIN YOUR CODE
    
    float pred[batch][k], norm_z[batch][k], dpred[batch][k], dtheta[n][k];
    for(int b_start = 0; b_start < m; b_start += batch)
    {
        //std::cout<<b_start<<std::endl;
        //cal pred
        float total = 0;
        for(int row = 0; row < batch; row++)
        {
            for(int col=0; col < k; col++)
            {
                pred[row][col] = 0;
                for(int step=0; step < n; step++)
                    pred[row][col] += X[b_start*n + row*n +step] * theta[step * k + col];
                total += pred[row][col];
            }
        }

        for(int row=0; row < batch;row++)
        {
            float sum = 0;
            for(int col=0; col <k;col++){
                norm_z[row][col] = exp(pred[row][col]);
                sum+=norm_z[row][col];
            }
            for(int col=0; col <k;col++){
                norm_z[row][col] /= sum;

            }
      
            
        }

        for(int row=0; row < batch; row++)
        {
            for(int col=0; col < k; col++)
                dpred[row][col] = norm_z[row][col] / batch;
        }

        for(int row=0; row < batch; row++)
        {
            char label = y[b_start  + row];
            dpred[row][label] = -1.0/(batch * 1.0) + dpred[row][label];
        }

     
        //cal dtheta
        for(int row=0; row < n; row ++){
            for(int col = 0; col < k; col++){
                dtheta[row][col] = 0;
                for(int step = 0; step < batch; step++){
                    dtheta[row][col] += X[b_start * n + row+ step * n] * dpred[step][col];
                }
               
            }
           
        }
        
        for(int row=0; row < n; row++)
        {
            for(int col=0; col <k;col++)
                theta[row*k+col]-= lr * dtheta[row][col];
        }
    }
    
    /// END YOUR CODE
}

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值