CMU 10-414/714: Deep Learning Systems——hw0

SnakE_BlacK

已于 2024-05-02 10:16:39 修改

阅读量1.1k

点赞数 14

分类专栏： CMU 10-414/714 文章标签：深度学习 python numpy

于 2024-04-14 16:04:12 首次发布

本文链接：https://blog.csdn.net/SnakE_BlacK/article/details/137745033

版权

CMU 10-414/714 专栏收录该内容

1 篇文章 0 订阅

订阅专栏

hw0

Question 1: A basic add function, and testing/autograding basics

def add(x, y):    
	return x + y

Question 2: Loading MNIST data

def parse_mnist(image_filename, label_filename):    
	with gzip.open(image_file, 'rb') as f:    
		img_magic, img_num, img_w, img_h = struct.unpack('>IIII', f.read(16))
		imgs = np.frombuffer(f.read(img_num * img_h * img_w), dtype=np.uint8).reshape(img_num, img_w*img_h).astype(np.float32)/255
			
	with gzip.open(label_filename, 'rb') as f:    
		labels_magic, labels_num = struct.unpack('>II', f.read(8))
		labels = np.frombuffer(f.read(labels_num), dtype=np.uint8)

	return imgs, labels

Note:

gzip.open : 打开以.gz为后缀的文件，按字节读取。
struct.unpack : 其中 I 为4字节无符号整数，4个 I就是16字节，>大端读取。
np.frombuffer : 从缓冲区中读取指定字节的数据。
mnist file format :

image : 前面16个字节，以每4个字节划分，分别代表magic，图片总数量，单张图片的宽度，单张图片的高度。后面的字节就是图片的数据。
label : 前面8个字节，同理，分别代表magic，标签的总数量，后面为标签的数据。

Question 3: Softmax loss

先在外部定义一个softmax函数

def softmax(x): 
	return np.exp(x) / np.sum(np.exp(x), axis=1, keepdims=True)

def softmax_loss(Z, y):
	return np.mean(-np.log(softmax(Z)[np.indices(y.shape)[0], y])) # np.indices 返回给定形状数组的元素索引序号

Question 4: Stochastic gradient descent for softmax regression

def softmax_regression_epoch(X, y, theta, lr=0.1, batch=100):
	# X: m x n, theta: n x k, y : m x 1
	# x: batch x n, y_hat: batch x 1
	# Z: batch x kc 
	for i in range(X.shape[0]//batch): 	# 以batch为批次，分为 m//batch 批

		x = X[0+i*batch:batch+i*batch]  # 每次截取batch长度 
		y_hat = y[0+i*batch:batch+i*batch] 
		Z = softmax(np.matmul(x, theta))  
		Z[np.arange(batch), y_hat] -= 1 # 将每行预测类别与真实类别一致的索引处的值-1


		grad = np.matmul(x.transpose(), Z) / batch  
		theta -= lr * grad

Question 5: SGD for a two-layer neural network

for i in range(X.shape[0]//batch):
	x = X[i*batch:batch+i*batch]      
   	y_hat = y[i*batch:batch+i*batch]      
   	z = np.matmul(x, W1)      
    np.maximum(0, z, z)	# ReLU(XW1)      
   	G2 = softmax(np.matmul(z, W2))      
  	G2[np.arange(batch), y_hat] -= 1      
  	G1 = np.multiply(np.where(z>0, 1, z),(G2 @ W2.transpose()))
    W1 -= lr/batch * (x.transpose() @ G1)      
    W2 -= lr/batch * (z.transpose() @ G2)

Question 6: Softmax regression in C++

void mat_mul(const float* X, const float* Y, float* Z, size_t m, size_t n, size_t k)

{
  for(size_t i=0; i<m; ++i)  
  {
  	for(size_t j=0; j<k; ++j)  
    {
      Z[i*k+j] = 0; 
      for(size_t l=0; l<n; ++l) 
        Z[i*k+j] += X[i*n+l]*Y[l*k+j]; 
    }
  }
}

for(size_t b=0; b<ceil(m/batch); ++b) 
{
    float* Z = new float[b*k];
    const float* x = &X[b*batch*n];
    mat_mul(X, theta, Z, m, n, k);
    for(size_t i=0; i<b*k; ++i) Z[i]=exp(Z[i]); //exp(Z)
    for(size_t i=0; i<b; ++i)
    {
      float sum = 0;
      for(size_t j=0; j<k; ++j)
        sum += Z[i*k+j];
      for(size_t j=0; j<k; ++j)
        Z[i*k+j] /= sum;    //normalize(Z)
    }
    for(size_t i=0; i<b; ++i)
      Z[i*k+y[b*batch+i]] -= 1; //Z-y
    float* grad = new float[n*k];
    float* x_T = new float[n*b];
    for(size_t i=0; i<b; ++i)
      for(size_t j=0; j<n; ++j)
        x_T[j*b+i] = x[i*n+j];
    mat_mul(x_T, Z, grad, n, b,k);
    for(size_t i=0; i<n*k; ++i)
      theta[i] -= lr/batch * grad[i];
    delete[] Z;
    delete[] x_T;
    delete[] grad;
}