import numpy as np
t1 = np.array([1, 2, 3])
t1
array([1, 2, 3])
t2 = np.array(range(10))
t2
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
t3 = np.arange(12)
t3
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
t4 = np.arange(4, 8, 2)
t4
array([4, 6])
t4.dtype
dtype('int32')
修改数据类型的方法
t4 = np.array(range(10), dtype='float64')
t4
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
t5 = t4.astype('float32')
t5.dtype
dtype('float32')
import random
t6 = np.array([random.random() for i in range(1, 10)])
t6
array([0.69002913, 0.60309892, 0.57732269, 0.77670478, 0.49087636,
0.05686646, 0.50341497, 0.78348396, 0.97891676])
t7 = np.round(t6, 2)
t7
array([0.69, 0.6 , 0.58, 0.78, 0.49, 0.06, 0.5 , 0.78, 0.98])
生成特殊数组
np.zeros((3, 4))
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
np.ones((3,))
array([1., 1., 1.])
np.random.random((4, 3, 4))
array([[[0.88949406, 0.516348 , 0.9338736 , 0.8761512 ],
[0.80969742, 0.0557918 , 0.84568932, 0.70281596],
[0.97124603, 0.62063392, 0.20347112, 0.60842865]],
[[0.83164314, 0.75154962, 0.55629461, 0.7950253 ],
[0.71041516, 0.46290726, 0.66937629, 0.66167786],
[0.30038413, 0.72894563, 0.42310383, 0.96849788]],
[[0.55985491, 0.60339171, 0.91343052, 0.99101353],
[0.24031093, 0.18791789, 0.13428558, 0.23939572],
[0.19660544, 0.14944018, 0.45764254, 0.57232685]],
[[0.70098855, 0.92525333, 0.19818721, 0.54227531],
[0.37353857, 0.57298138, 0.89641575, 0.13471736],
[0.17977541, 0.18406087, 0.26044415, 0.37039553]]])
np.random.rand(3,2)
array([[0.77886627, 0.20044878],
[0.82558843, 0.74687223],
[0.04423197, 0.82414292]])
np.random.randint(10,size=(5,4))
array([[1, 5, 8, 0],
[4, 2, 7, 6],
[4, 8, 7, 8],
[9, 7, 2, 3],
[3, 4, 5, 5]])
np.random.random_sample()
0.816308250363168
np.random.randint(0,10,5)
array([2, 4, 8, 4, 5])
mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)
array([ 0.03436545, -0.08164443, 0.13160285, 0.0430074 , -0.05111399,
0.09617187, -0.10320152, -0.14053349, -0.19079794, -0.0438002 ])
np.set_printoptions(precision=2)
mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)
array([ 0.14, 0.02, -0.16, -0.07, -0.03, -0.14, -0.08, 0.03, -0.02,
-0.08])
arr = np.arange(10)
mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)
array([-0.13, 0. , 0.04, 0.01, 0.06, 0.04, 0.01, -0.15, -0.04,
0.08])
arr = np.arange(10)
print(arr)
[0 1 2 3 4 5 6 7 8 9]
np.random.shuffle(arr)
arr
array([7, 1, 2, 0, 9, 4, 6, 5, 8, 3])
数组的形状
t1 = np.arange(12)
t1
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
t1.ndim
1
t1.shape
(12,)
t1.reshape((2, 6))
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11]])
t1.reshape((12,))
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
t2 = np.arange(24).reshape((2,3,4))
t2.shape[0],t2.shape[1],t2.shape[2]
(2, 3, 4)
t2.reshape((t2.shape[0]*t2.shape[1],t2.shape[2]))
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]])
t2.flatten()
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23])
axis轴
添加新轴–np.newaxis
ar = np.arange(10)
ar
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ar = ar[np.newaxis,:]
ar
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
ar.shape
(1, 10)
ar = ar[:,np.newaxis]
ar
array([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]])
ar.shape
(1, 1, 10)
压缩–squeeze
ar = ar.squeeze()
ar
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ar.shape
(10,)
文件读取
file_path = './can.csv'
np.loadtxt(file_path, delimiter=',', dtype='int')
array([[ 1, 20, 1, 0, 0],
[ 1, 20, 1, 0, 0],
[ 1, 20, 0, 0, 0],
...,
[ 3, 100, 1, 0, 0],
[ 3, 100, 0, 0, 0],
[ 3, 100, 1, 0, 0]])
np.loadtxt(file_path, delimiter=',', dtype='int', unpack=True)
array([[ 1, 1, 1, ..., 3, 3, 3],
[ 20, 20, 20, ..., 100, 100, 100],
[ 1, 1, 0, ..., 1, 0, 1],
[ 0, 0, 0, ..., 0, 0, 0],
[ 0, 0, 0, ..., 0, 0, 0]])
转置
t1 = np.arange(24).reshape(4,6)
t1
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
t1.T
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
np.transpose(t1)
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
t1.swapaxes(1, 0)
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
t1.swapaxes(0, 1)
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
file_path = './can.csv'
t2 = np.loadtxt(file_path, delimiter=',', dtype='int', unpack=True)
t2.T
array([[ 1, 20, 1, 0, 0],
[ 1, 20, 1, 0, 0],
[ 1, 20, 0, 0, 0],
...,
[ 3, 100, 1, 0, 0],
[ 3, 100, 0, 0, 0],
[ 3, 100, 1, 0, 0]])
t2[2]
array([1, 1, 0, ..., 1, 0, 1])
t2.T[[2,8,3]]
array([[ 1, 20, 0, 0, 0],
[ 1, 20, 0, 0, 0],
[ 1, 20, 0, 0, 0]])
t2.T[[2,8,3], :]
array([[ 1, 20, 0, 0, 0],
[ 1, 20, 0, 0, 0],
[ 1, 20, 0, 0, 0]])
t2.T[2,3]
0
t2[:,[2,8,2]]
array([[ 1, 1, 1],
[20, 20, 20],
[ 0, 0, 0],
[ 0, 0, 0],
[ 0, 0, 0]])
数据修改
t2[t2<2] = 3
t2
array([[ 3, 3, 3, ..., 3, 3, 3],
[ 20, 20, 20, ..., 100, 100, 100],
[ 3, 3, 3, ..., 3, 3, 3],
[ 3, 3, 3, ..., 3, 3, 3],
[ 3, 3, 3, ..., 3, 3, 3]])
t2[t2>10] = 30
t2
array([[ 3, 3, 3, ..., 3, 3, 3],
[30, 30, 30, ..., 30, 30, 30],
[ 3, 3, 3, ..., 3, 3, 3],
[ 3, 3, 3, ..., 3, 3, 3],
[ 3, 3, 3, ..., 3, 3, 3]])
np.where(t2<4, 100, 200)
array([[100, 100, 100, ..., 100, 100, 100],
[200, 200, 200, ..., 200, 200, 200],
[100, 100, 100, ..., 100, 100, 100],
[100, 100, 100, ..., 100, 100, 100],
[100, 100, 100, ..., 100, 100, 100]])
t2.clip(4, 20)
array([[ 4, 4, 4, ..., 4, 4, 4],
[20, 20, 20, ..., 20, 20, 20],
[ 4, 4, 4, ..., 4, 4, 4],
[ 4, 4, 4, ..., 4, 4, 4],
[ 4, 4, 4, ..., 4, 4, 4]])
t2 = t2.astype(float)
t2
array([[ 3., 3., 3., ..., 3., 3., 3.],
[30., 30., 30., ..., 30., 30., 30.],
[ 3., 3., 3., ..., 3., 3., 3.],
[ 3., 3., 3., ..., 3., 3., 3.],
[ 3., 3., 3., ..., 3., 3., 3.]])
t2[2, 2] = np.nan
t2
array([[ 3., 3., 3., ..., 3., 3., 3.],
[30., 30., 30., ..., 30., 30., 30.],
[ 3., 3., nan, ..., 3., 3., 3.],
[ 3., 3., 3., ..., 3., 3., 3.],
[ 3., 3., 3., ..., 3., 3., 3.]])
np.count_nonzero(t2)
765000
np.count_nonzero(t2 != t2)
1
np.isnan(t2)
array([[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, True, ..., False, False, False],
[False, False, False, ..., False, False, False],
[False, False, False, ..., False, False, False]])
np.sum(t2)
nan
t3 = np.arange(12).reshape(3, 4)
t3
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
np.sum(t3)
66
np.sum(t3, axis=0)
array([12, 15, 18, 21])
np.sum(t3, axis=1)
array([ 6, 22, 38])
t2
np.mean(t2, axis=0)
array([8.4, 8.4, nan, ..., 8.4, 8.4, 8.4])
np.mean(t2, axis=1)
array([ 2.67, 30. , nan, 2.99, 2.99])
np.median(t2, axis=0)
array([ 3., 3., nan, ..., 3., 3., 3.])
np.median(t2, axis=1)
array([ 3., 30., nan, 3., 3.])
np.ptp(t2, axis=0)
array([27., 27., nan, ..., 27., 27., 27.])
np.ptp(t2, axis=1)
array([ 1., 0., nan, 5., 4.])
np.std(t2, axis=0)
array([10.8, 10.8, nan, ..., 10.8, 10.8, 10.8])
np.std(t2, axis=1)
array([0.47, 0. , nan, 0.12, 0.08])
numpy处理缺失值和nan
t1 = np.arange(12).reshape(3,4).astype(float)
t1
array([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]])
t1[1,2:] = np.nan
t1
array([[ 0., 1., 2., 3.],
[ 4., 5., nan, nan],
[ 8., 9., 10., 11.]])
def fill_ndarry():
for i in range(t1.shape[1]):
tem_col = t1[:, i]
nan_num = np.count_nonzero(tem_col != tem_col)
if nan_num != 0:
tem_not_nan_col = tem_col[tem_col == tem_col]
tem_col[np.isnan(tem_col)] = tem_not_nan_col.mean()
return t1
if __name__ == '__main__':
data = fill_ndarry()
print(data)
[[ 0. 1. 2. 3.]
[ 4. 5. 6. 7.]
[ 8. 9. 10. 11.]]
数组的拼接
a = np.array([[123,456,789], [3214,456,134]])
a
array([[ 123, 456, 789],
[3214, 456, 134]])
b= np.array([[1235,3124,432], [43,13,134]])
b
array([[1235, 3124, 432],
[ 43, 13, 134]])
c = np.concatenate((a, b))
c
array([[ 123, 456, 789],
[3214, 456, 134],
[1235, 3124, 432],
[ 43, 13, 134]])
c.shape
(4, 3)
v = np.concatenate((a,b), axis=0)
v
array([[ 123, 456, 789],
[3214, 456, 134],
[1235, 3124, 432],
[ 43, 13, 134]])
v = np.concatenate((a,b), axis=1)
v
array([[ 123, 456, 789, 1235, 3124, 432],
[3214, 456, 134, 43, 13, 134]])
np.hstack((a,b))
array([[ 123, 456, 789, 1235, 3124, 432],
[3214, 456, 134, 43, 13, 134]])
np.vstack((a,b))
array([[ 123, 456, 789],
[3214, 456, 134],
[1235, 3124, 432],
[ 43, 13, 134]])
行列交换
t = np.arange(12).reshape(3, 4)
t
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
t[[1,2],:] = t[[2,1], :]
t
array([[ 0, 1, 2, 3],
[ 8, 9, 10, 11],
[ 4, 5, 6, 7]])
t[:,[0,2]] = t[:,[2,0]]
t
array([[ 2, 1, 0, 3],
[10, 9, 8, 11],
[ 6, 5, 4, 7]])
np.argmax(t, axis=0)
array([1, 1, 1, 1], dtype=int64)
np.argmin(t,axis=0)
array([0, 0, 0, 0], dtype=int64)
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
np.random.seed(10)
t = np.random.randint(0,20,(3,4))
t
array([[ 9, 4, 15, 0],
[17, 16, 17, 8],
[ 9, 0, 10, 8]])
t_new = t.copy()
t_new
array([[ 9, 4, 15, 0],
[17, 16, 17, 8],
[ 9, 0, 10, 8]])
运算–真值判断
x = np.array([1,1,1,0])
y = np.array([1,1,1,1])
print(x)
y
[1 1 1 0]
array([1, 1, 1, 1])
与
np.logical_and(x,y)
array([ True, True, True, False])
或
np.logical_or(x,y)
array([ True, True, True, True])
非
np.logical_not(x,y)
array([0, 0, 0, 1])