Numpy练习
import numpy as np
a = np.array([1,2,3,4,5])
b = np.array(range(1,6))
c = np.arange(1,6)
数组类名
type(a)
type(b)
type(c)
numpy.ndarray
数据类型
a.dtype
dtype('int32')
b.dtype
dtype('int32')
c.dtype
dtype('int32')
int8,uint8–i1,u1(类型代码)
int16,uint16–i2,u2
int32,uint32–i4,u4
int64,uint64–i8,u8
float16 – f2
float32 – f4,f
float64 – f8,d
float128 --f16,g
complex64,complex128 – c8,c16
complex256 --c32
bool – ?
创建数组时指定类型
a = np.array([1,0,1,0,2],dtype='?')
a
array([ True, False, True, False, True], dtype=bool)
a = np.array([1,0,1,0,1],dtype=np.bool)
a
array([ True, False, True, False, True], dtype=bool)
修改数组类型
a.astype('f2')
array([ 1., 0., 1., 0., 1.], dtype=float16)
b = a.astype('f2')
np.round(b,2)
array([ 1., 0., 1., 0., 1.], dtype=float16)
数组的形状
a = np.arange(12)
a.shape #查看形状
(12,)
a
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
a.reshape(3,4)
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
a.shape
(12,)
a
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
修改形状后没有变量接收是没有意义的
b = a.reshape(3,4)
b
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
b = b.reshape(12,)
b
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
b.shape
(12,)
b = a.reshape(3,4)
b
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
b = b.flatten() #不需要知道元素个数直接变1维数组
b.shape
(12,)
数组计算
a = np.arange(16)
a = a.reshape(4,4)
a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
a + 1 #加法
array([[ 1, 2, 3, 4],
[ 5, 6, 7, 8],
[ 9, 10, 11, 12],
[13, 14, 15, 16]])
a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
a * 2 #乘法
array([[ 0, 2, 4, 6],
[ 8, 10, 12, 14],
[16, 18, 20, 22],
[24, 26, 28, 30]])
numpy的广播机制:每个元素都会进行运算
b = np.array([[1,2,3,4],[3,5,7,9]])
b
array([[1, 2, 3, 4],
[3, 5, 7, 9]])
a + b
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-50-f96fb8f649b6> in <module>()
----> 1 a + b
ValueError: operands could not be broadcast together with shapes (4,4) (2,4)
a * b
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-51-50927f39610b> in <module>()
----> 1 a * b
ValueError: operands could not be broadcast together with shapes (4,4) (2,4)
a - b
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-52-a5f966a4b1f4> in <module>()
----> 1 a - b
ValueError: operands could not be broadcast together with shapes (4,4) (2,4)
c = np.array([1,2,3,4])
c
array([1, 2, 3, 4])
a + c
array([[ 1, 3, 5, 7],
[ 5, 7, 9, 11],
[ 9, 11, 13, 15],
[13, 15, 17, 19]])
a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]])
c.shape
(4,)
d = np.arange(32).reshape(2,4,4)
d
array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15]],
[[16, 17, 18, 19],
[20, 21, 22, 23],
[24, 25, 26, 27],
[28, 29, 30, 31]]])
d + a
array([[[ 0, 2, 4, 6],
[ 8, 10, 12, 14],
[16, 18, 20, 22],
[24, 26, 28, 30]],
[[16, 18, 20, 22],
[24, 26, 28, 30],
[32, 34, 36, 38],
[40, 42, 44, 46]]])
只有后缘维度轴长度相符或其中一方长度为1才能进行计算
读取文件
np.loadtxt(fname,dtype = np.float,delimiter=None,skiprows=0,usecols=None,unpack=False)
fname:文件、字符串或产生器,可以是.gz,.bz2
dtype:数据类型,默认float
delimiter:分隔符,默认空格
skiprows:跳过前N行
usercols:读取指定列,索引、元组类型
unpack:Ture读入属性写入不同数组变量;False(默认)写入同一数组变量
转置
t = np.arange(36).reshape(6,6)
t.transpose()
array([[ 0, 6, 12, 18, 24, 30],
[ 1, 7, 13, 19, 25, 31],
[ 2, 8, 14, 20, 26, 32],
[ 3, 9, 15, 21, 27, 33],
[ 4, 10, 16, 22, 28, 34],
[ 5, 11, 17, 23, 29, 35]])
t.swapaxes(1,0)
array([[ 0, 6, 12, 18, 24, 30],
[ 1, 7, 13, 19, 25, 31],
[ 2, 8, 14, 20, 26, 32],
[ 3, 9, 15, 21, 27, 33],
[ 4, 10, 16, 22, 28, 34],
[ 5, 11, 17, 23, 29, 35]])
t.T
array([[ 0, 6, 12, 18, 24, 30],
[ 1, 7, 13, 19, 25, 31],
[ 2, 8, 14, 20, 26, 32],
[ 3, 9, 15, 21, 27, 33],
[ 4, 10, 16, 22, 28, 34],
[ 5, 11, 17, 23, 29, 35]])
import numpy as np
a = np.arange(12).reshape(3,4)
a
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
索引取行
a[0]
array([0, 1, 2, 3])
a[2]
array([ 8, 9, 10, 11])
取一列
a[:,1]
array([1, 5, 9])
a[:,2]
array([ 2, 6, 10])
切片取多行
a[0:2]
array([[0, 1, 2, 3],
[4, 5, 6, 7]])
切片取多列
a[:,0:2]
array([[0, 1],
[4, 5],
[8, 9]])
a[:,2:4]
array([[ 2, 3],
[ 6, 7],
[10, 11]])
索引列表取多行多列
a[[1,2],:]
array([[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
a[:,[2,3]]
array([[ 2, 3],
[ 6, 7],
[10, 11]])
a = np.arange(24).reshape(4,6)
a
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
a[:,2:4] = 0
a
array([[ 0, 1, 0, 0, 4, 5],
[ 6, 7, 0, 0, 10, 11],
[12, 13, 0, 0, 16, 17],
[18, 19, 0, 0, 22, 23]])
行列交叉点
a
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
a[1,[2,3]]
array([8, 9])
a[1:2,2:3]
array([[8]])
a[1:3,2:4]
array([[ 8, 9],
[14, 15]])
a
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
取特定位置的一个或多个点
a[[1,2],[2,3]]
array([ 8, 15])
a[[0,2],[0,3]]
array([ 0, 15])
a中小于10的替换为0
a < 10
array([[ True, True, True, True, True, True],
[ True, True, True, True, False, False],
[False, False, True, True, False, False],
[False, False, True, True, False, False]])
a[a<10] = 0
a
array([[ 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 10, 11],
[12, 13, 0, 0, 16, 17],
[18, 19, 0, 0, 22, 23]])
三元运算符
把小于10的替换为0,大于10的替换为10
a = np.arange(24).reshape(4,6)
a
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
np.where(a<10,0,10)
array([[ 0, 0, 0, 0, 0, 0],
[ 0, 0, 0, 0, 10, 10],
[10, 10, 10, 10, 10, 10],
[10, 10, 10, 10, 10, 10]])
a
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
clip裁剪
clip(x,y),小于x的替换为x,大于y的替换为y
a
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
a.clip(5,10)
array([[ 5, 5, 5, 5, 5, 5],
[ 6, 7, 8, 9, 10, 10],
[10, 10, 10, 10, 10, 10],
[10, 10, 10, 10, 10, 10]])
nan:表示不是一个数字。数据缺失时,或出现不合适计算时(0/0,无穷大-无穷大等待)。type:float
inf:无穷大。一个数字除以0时.type:float
a=a.astype(float)
a[3,3] = np.nan
a
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., nan, 22., 23.]])
a[3,3] = np.inf
a
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., inf, 22., 23.]])
np.nan == np.nan
False
np.count_nonzero(a) #统计非零元素个数
23
a != a
array([[False, False, False, False, False, False],
[False, False, False, False, False, False],
[False, False, False, False, False, False],
[False, False, False, True, False, False]])
np.count_nonzero(a!=a) #统计nan
1
np.count_nonzero(np.isnan(a)) #判断nan
1
常用统计函数
1,求和
a
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., nan, 22., 23.]])
a.sum(axis = 0) #列方向求和
array([36., 40., 44., nan, 52., 56.])
np.sum(a,axis = 0)
array([36., 40., 44., nan, 52., 56.])
a.sum() #全部之和
nan
nan与任何值计算都为nan
2,均值
a.mean(axis = 1) #行方向平均值
array([ 2.5, 8.5, 14.5, nan])
np.mean(a,axis = 1)
array([ 2.5, 8.5, 14.5, nan])
3,中值
np.median(a,axis = 0)
/Library/Frameworks/Python.framework/Versions/3.6/lib/python3.6/site-packages/numpy/lib/function_base.py:3400: RuntimeWarning: Invalid value encountered in median for 1 results
r = func(a, **kwargs)
array([ 9., 10., 11., nan, 13., 14.])
4,最大值
a.max(axis = 1)
array([ 5., 11., 17., nan])
5,最小值
a.min(axis = 1)
array([ 0., 6., 12., nan])
6,极值
np.ptp(a,axis = 1)
array([ 5., 5., 5., nan])
7,标准差
a.std(axis = 1)
array([1.70782513, 1.70782513, 1.70782513, nan])
以列均值替换nan
a.shape
(4, 6)
a
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., nan, 22., 23.]])
a[a == a].mean()
11.08695652173913
def fill_nan_mean(t):
for i in range(t.shape[1]):
temp_col = t[:,i]
nan_num = np.count_nonzero(temp_col != temp_col) #统计当前列nan
if nan_num != 0:
temp_not_nan_col = temp_col[temp_col == temp_col] #当前列其他非nan
temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean() #替换为均值
return t
a
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., nan, 22., 23.]])
fill_nan_mean(a)
array([[ 0., 1., 2., 3., 4., 5.],
[ 6., 7., 8., 9., 10., 11.],
[12., 13., 14., 15., 16., 17.],
[18., 19., 20., 9., 22., 23.]])
数组拼接
t1 = np.arange(12).reshape(2,6)
t2 = np.arange(25,37).reshape(2,6)
t1
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11]])
t2
array([[25, 26, 27, 28, 29, 30],
[31, 32, 33, 34, 35, 36]])
t3 = np.vstack((t1,t2)) #上下拼接,纵向
t3
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[25, 26, 27, 28, 29, 30],
[31, 32, 33, 34, 35, 36]])
np.hstack((t1,t2)) #横向
array([[ 0, 1, 2, 3, 4, 5, 25, 26, 27, 28, 29, 30],
[ 6, 7, 8, 9, 10, 11, 31, 32, 33, 34, 35, 36]])
行列交换
t3[[1,2]] = t3[[2,1]] #行交换
t3
array([[ 0, 1, 2, 3, 4, 5],
[25, 26, 27, 28, 29, 30],
[ 6, 7, 8, 9, 10, 11],
[31, 32, 33, 34, 35, 36]])
t3[:,[3,4]] = t3[:,[4,3]]
t3 #列交换
array([[ 0, 1, 2, 4, 3, 5],
[25, 26, 27, 29, 28, 30],
[ 6, 7, 8, 10, 9, 11],
[31, 32, 33, 35, 34, 36]])
获取最大最小值位置
np.argmax(t3,axis = 1)
array([5, 5, 5, 5])
np.argmin(t3,axis = 0)
array([0, 0, 0, 0, 0, 0])
创建全0数组
np.zeros((3,4))
array([[0., 0., 0., 0.],
[0., 0., 0., 0.],
[0., 0., 0., 0.]])
创建全1数组
np.ones((3,4))
array([[1., 1., 1., 1.],
[1., 1., 1., 1.],
[1., 1., 1., 1.]])
创建对角线为1的正方形方阵
np.eye(3)
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])