斯坦福CS224N课程_Python简明复习教程_cs224n-python-review-code-updated （第二部分）-CSDN博客

本文是斯坦福CS224N课程中关于Python Numpy库的复习教程，涵盖了创建和操作数组、矩阵变换、数组串联、数组索引、矩阵运算以及速度比较。介绍了Numpy在神经网络数据预处理中的应用，如创建全1矩阵、批量处理等。此外，还简要提及了Matplotlib库的基本可视化功能，如线图、散点图和直方图。

摘要由CSDN通过智能技术生成

上一篇链接

斯坦福CS224N课程_Python简明复习教程_cs224n-python-review-code-updated （第一部分）_放肆荒原的博客-CSDN博客

0.0.5 4. Numpy

非常强大的python工具，用于处理矩阵和高维数组。

[32]:

import numpy as np

[33]:

# create arrays
# 创建数组
a = np.array([[1,2],[3,4],[5,6]])
print(a)
print(a.shape)
# create all-zero/one arrays
# 创建全0/全1数组
b = np.ones((3,4)) # np.zeros((3,4)) 
print(b)
print(b.shape)
# create identity matrix
# 创建标识矩阵
c = np.eye(5)
print(c)
print(c.shape)

[[1 2]
[3 4]
[5 6]]
(3, 2)
[[1. 1. 1. 1.]
[1. 1. 1. 1.]
[1. 1. 1. 1.]]
(3, 4)
[[1. 0. 0. 0. 0.]
[0. 1. 0. 0. 0.]
[0. 0. 1. 0. 0.]
[0. 0. 0. 1. 0.]
[0. 0. 0. 0. 1.]]
(5, 5)

（请注意上述输出结果的缩进关系）

[34]:

# reshaping arrays
a = np.arange(8)         # [8,] for-loops里相同的range() 
b = a.reshape((4,2))     # shape [4,2]
c = a.reshape((2,2,-1))  # shape [2,2,2] -- -1表示自动填充
d = c.flatten()          # shape [8,]
e = np.expand_dims(a, 0) # [1,8]
f = np.expand_dims(a, 1) # [8,1]
g = e.squeeze()          # shape[8, ] -- 去除不必要的维度 
print(a)
print(b)

[0 1 2 3 4 5 6 7]
[[0 1]
[2 3]
[4 5]
[6 7]]

[35]:

# concatenating arrays
# 串联数组
a = np.ones((4,3))
b = np.ones((4,3))
c = np.concatenate([a,b], 0)
print(c.shape)
d = np.concatenate([a,b], 1)
print(d.shape)

(8, 3)
(4, 6)

[36]:

# one application is to create a batch for NN
# 为NN创建的全1矩阵
x1 = np.ones((32,32,3))
x2 = np.ones((32,32,3))
x3 = np.ones((32,32,3))
# --> to create a batch of shape (3,32,32,3) 
# --> 创建shape (3,32,32,3)的矩阵
x = [x1, x2, x3]
x = [np.expand_dims(xx, 0) for xx in x] # xx 的shape变为(1,32,32,3) 
x = np.concatenate(x, 0)
print(x.shape)

(3, 32, 32, 3)

（相当有用的技巧，数据处理时经常会用到）

[37]:

# access array slices by index
# 按索引访问数组片
a = np.zeros([10, 10])
a[:3] = 1
a[:, :3] = 2
a[:3, :3] = 3
rows = [4,6,7]
cols = [9,3,5]
a[rows, cols] = 4
print(a)

[[3. 3. 3. 1. 1. 1. 1. 1. 1. 1.]
[3. 3. 3. 1. 1. 1. 1. 1. 1. 1.]
[3. 3. 3. 1. 1. 1. 1. 1. 1. 1.]
[2. 2. 2. 0. 0. 0. 0. 0. 0. 0.]
[2. 2. 2. 0. 0. 0. 0. 0. 0. 4.]
[2. 2. 2. 0. 0. 0. 0. 0. 0. 0.]
[2. 2. 2. 4. 0. 0. 0. 0. 0. 0.]
[2. 2. 2. 0. 0. 4. 0. 0. 0. 0.]
[2. 2. 2. 0. 0. 0. 0. 0. 0. 0.]
[2. 2. 2. 0. 0. 0. 0. 0. 0. 0.]]

[38]:

# transposition
# 换位
a = np.arange(24).reshape(2,3,4)
print(a.shape)
print(a)
a = np.transpose(a, (2,1,0)) #交换第0轴和第2轴
print(a.shape)
print(a)

(2, 3, 4)
[[[0 1 2 3]
[4 5 6 7]
[8 91011]]

[[12 13 14 15]
[16 17 18 19]
[20 21 22 23]]]

(4, 3, 2)
[[[ 0 12]
[ 4 16]
[ 8 20]]

[[ 1 13]
[ 5 17]
[ 9 21]]

[[ 2 14]
[ 6 18]
[10 22]]

[[ 3 15]
[ 7 19]
[11 23]]]

（请注意输出结果的缩进和方括号，表达了维度关系）

[39]:

c = np.array([[1,2],[3,4]])
# pinv is pseudo inversion for stability
# 用pinv计算违逆矩阵
print(np.linalg.pinv(c))
# l2 norm by default, read documentation for more options 
# 默认是l2正则化，请阅读文档查看更多选项
print(np.linalg.norm(c))
# summing a matrix
# 矩阵求和
print(np.sum(c))
# the optional axis parameter
# 可选的轴参数
print(c)
print(np.sum(c, axis=0)) # 沿轴0求和 
print(np.sum(c, axis=1)) # 沿轴1求和

[[-2. 1. ]
[ 1.5 -0.5]]
5.477225575051661
10
[[1 2]
[3 4]]
[4 6]
[3 7]

[40]:

# dot product
# 点乘
c = np.array([1,2])
d = np.array([3,4])
print(np.dot(c,d))

[41]:

# matrix multiplication
# 矩阵乘法
a = np.ones((4,3)) # 4,3
b = np.ones((3,2)) # 3,2 --> 4,2 
print(a @ b)       # 与a.dot(b)相同
c = a @ b          # (4,2)

# automatic repetition along axis
# 沿轴自动重复
d = np.array([1,2,3,4]).reshape(4,1) 
print(c + d)

# handy for batch operation
# 方便批操作
batch = np.ones((3,32))
weight = np.ones((32,10))
bias = np.ones((1,10))
print((batch @ weight + bias).shape)

[[3. 3.]
[3. 3.]
[3. 3.]
[3. 3.]]
[[4. 4.]
[5. 5.]
[6. 6.]
[7. 7.]]
(3, 10)

[42]:

# speed test: numpy vs list
# 速度测试：numpy vs list
a = np.ones((100,100))
b = np.ones((100,100))

def matrix_multiplication(X, Y):
    result = [[0]*len(Y[0]) for _ in range(len(X))] 
    for i in range(len(X)):
        for j in range(len(Y[0])): 
            for k in range(len(Y)):
                result[i][j] += X[i][k] * Y[k][j] 
    return result

import time

# run numpy matrix multiplication for 10 times
# 运行numpy矩阵乘法10次
start = time.time() 
for _ in range(10):
    a@b
end = time.time()
print("numpy spends {} seconds".format(end-start))

# run list matrix multiplication for 10 times
# 运行列表矩阵乘法10次
start = time.time() 
for _ in range(10):
    matrix_multiplication(a,b)
end = time.time()
print("list operation spends {} seconds".format(end-start))

# the difference gets more significant as matrices grow in size!
# 随着矩阵尺寸的增大，差异变得更加显著！

numpy spends 0.001990079879760742 seconds
list operation spends 8.681961059570312 seconds

[43]:

# element-wise operations, for examples
# 元素级运算，如：
np.log(a)
np.exp(a)
np.sin(a)
# operation with scalar is interpreted as element-wise 
# 带有标量的运算被解释为元素级运算
a*3

array([[3., 3., 3., ..., 3., 3., 3.],
[3., 3., 3., ..., 3., 3., 3.],
[3., 3., 3., ..., 3., 3., 3.],
...,
[3., 3., 3., ..., 3., 3., 3.],
[3., 3., 3., ..., 3., 3., 3.],
[3., 3., 3., ..., 3., 3., 3.]])

0.0.6 5. Matplotlib

强大的可视化工具，有许多在线教程。我们这里只讲基本的。

[44]:

import matplotlib.pyplot as plt

[45]:

# line plot
# 画线
x = [1,2,3]
y = [1,3,2]
plt.plot(x,y)

[<matplotlib.lines.Line2D at 0x17b1b50a040>]

[46]:

# scatter plot
# 画点
plt.scatter(x,y)

[47]:

# bar plots
# 直方图
plt.bar(x,y)

[48]:

# plot configurations
# 绘图配置
x = [1,2,3]
y1 = [1,3,2]
y2 = [4,0,4]

# set figure size
# 设置图形大小
plt.figure(figsize=(5,5))

# set axes
# 设置轴
plt.xlim(0,5)
plt.ylim(0,5)
plt.xlabel("x label")
plt.ylabel("y label")

# add title
# 添加标题
plt.title("My Plot")

plt.plot(x,y1, label="data1", color="red", marker="*")
plt.plot(x,y2, label="data2", color="green", marker=".")
plt.legend()