numpy数值计算基础
numpy数组
import numpy as np
arr1 = np.array([1,2,3,4])
arr2 = np.array([[1,2,3],[2,3,4],[3,4,5],[4,5,6]])
print(arr1, arr1.shape, arr1.size, arr1.dtype)
print(arr2, arr2.dtype, arr2.size)
arr2.shape = 3,4
arr3 = arr2.reshape(3,4)
print(arr2,'\n',arr3)
[1 2 3 4] (4,) 4 int32
[[1 2 3]
[2 3 4]
[3 4 5]
[4 5 6]] int32 12
[[1 2 3 2]
[3 4 3 4]
[5 4 5 6]]
[[1 2 3 2]
[3 4 3 4]
[5 4 5 6]]
arr4 = np.arange(1,10,1)
print(arr4)
arr5 = np.linspace(2,10,5)
print(arr5)
arr6 = np.zeros((2,3,2)) # ones,zeros,diag
print(arr6)
[1 2 3 4 5 6 7 8 9]
[ 2. 4. 6. 8. 10.]
[[[0. 0.]
[0. 0.]
[0. 0.]]
[[0. 0.]
[0. 0.]
[0. 0.]]]
生成随机值
import numpy as np
import matplotlib.pyplot as plt
random1 = np.random.random(10) # 随机数
print(random1)
random2 = np.random.rand(30) # 均匀分布的随机数
print(random2)
random3 = np.random.randn(30) # 正态分布的随机数
print(random3)
# plt.figure()
# plt.plot(random4)
np1 = np.arange(1,10,1)
print(np1)
np1 = np1.tolist()
np2 = np.random.shuffle(np1) # 随机排序
print('np2==',np2)
[0.18182497 0.18340451 0.30424224 0.52475643 0.43194502 0.29122914
0.61185289 0.13949386 0.29214465 0.36636184]
[0.45606998 0.78517596 0.19967378 0.51423444 0.59241457 0.04645041
0.60754485 0.17052412 0.06505159 0.94888554 0.96563203 0.80839735
0.30461377 0.09767211 0.68423303 0.44015249 0.12203823 0.49517691
0.03438852 0.9093204 0.25877998 0.66252228 0.31171108 0.52006802
0.54671028 0.18485446 0.96958463 0.77513282 0.93949894 0.89482735]
[ 0.73846658 0.17136828 -0.11564828 -0.3011037 -1.47852199 -0.71984421
-0.46063877 1.05712223 0.34361829 -1.76304016 0.32408397 -0.38508228
-0.676922 0.61167629 1.03099952 0.93128012 -0.83921752 -0.30921238
0.33126343 0.97554513 -0.47917424 -0.18565898 -1.10633497 -1.19620662
0.81252582 1.35624003 -0.07201012 1.0035329 0.36163603 -0.64511975]
[1 2 3 4 5 6 7 8 9]
np2== None
索引访问数组
# python 索引是从0开始的
import numpy as np
arr = np.arange(10)
print(arr[5])
print(arr[:5])
print(arr[-1])
print(arr[-4:-1])
print("***************")
arr2 = np.array([[1,2,3],[2,3,4],[5,6,7],[6,7,8]])
print(arr2)
print(arr2[:,2]) # 第三列
print(arr2[1,:]) # 第二行
5
[0 1 2 3 4]
9
[6 7 8]
***************
[[1 2 3]
[2 3 4]
[5 6 7]
[6 7 8]]
[3 4 7 8]
[2 3 4]
变换数组
import numpy as np
arr = np.arange(12)
# 设置数组形状
arr = arr.reshape(3,4) # 设置数组形状
print(arr.shape)
arr1 = arr.flatten() # 数组展平(横向)
arr2 = arr.flatten('F')
print(arr1,arr1.shape,arr2,arr2.shape)
print('*****************************')
# 数组组合 concatenate(axis=1:横向 axis=0:纵向)
arr1 = arr
arr2 = 2*arr
print(arr1,arr2,arr1.shape)
arr_heng = np.concatenate((arr1,arr2),axis=1) # 横向组合
print(arr_heng,arr_heng.shape)
arr_zong = np.concatenate((arr1,arr2),axis=0) # 纵向组合
print(arr_zong,arr_zong.shape)
# 数组分割(split)
arr1 = np.arange(16).reshape(4,4)
arr3 = np.split(arr1,2,axis=1) # 横向分割
arr4 = np.split(arr1,2,axis=0) # 纵向分割
(3, 4)
[ 0 1 2 3 4 5 6 7 8 9 10 11] (12,) [ 0 4 8 1 5 9 2 6 10 3 7 11] (12,)
*****************************
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]] [[ 0 2 4 6]
[ 8 10 12 14]
[16 18 20 22]] (3, 4)
[[ 0 1 2 3 0 2 4 6]
[ 4 5 6 7 8 10 12 14]
[ 8 9 10 11 16 18 20 22]] (3, 8)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[ 0 2 4 6]
[ 8 10 12 14]
[16 18 20 22]] (6, 4)
利用numpy进行统计分析
save函数以二进制的格式保存数据
np.save(file,arr,allow_pickle=True,fix_imports=True)
file: 保存的文件名字,需要指定路径,否则保存在默认路径下(当前目录)
arr:保存的数组,扩展名为.npy是系统自动添加的。
import numpy as np
arr = np.arange(100)
arr = arr.reshape(10,10)
np.save('save_arr',arr) # np.save(file,arr,allow_pickle=True,fix_imports=True)
print(arr)
[[ 0 1 2 3 4 5 6 7 8 9]
[10 11 12 13 14 15 16 17 18 19]
[20 21 22 23 24 25 26 27 28 29]
[30 31 32 33 34 35 36 37 38 39]
[40 41 42 43 44 45 46 47 48 49]
[50 51 52 53 54 55 56 57 58 59]
[60 61 62 63 64 65 66 67 68 69]
[70 71 72 73 74 75 76 77 78 79]
[80 81 82 83 84 85 86 87 88 89]
[90 91 92 93 94 95 96 97 98 99]]
如果保存多个数组,使用函数savez,其文件扩展名为.npz
import numpy as np
arr1 = np.array([[1,2,3],[2,3,4]])
arr2 = np.arange(0,1.0,0.1)
np.savez('arr_savezz',arr1,arr2)
print(arr1,arr2)
[[1 2 3]
[2 3 4]] [0. 0.1 0.2 0.3 0.4 0.5 0.6 0.7 0.8 0.9]
load函数读取二进制文件
data = np.load('arr_save.npy')
print(data)
savetxt函数
np.savetxt(fname, x, delimiter=’ ‘, newline=’\n’,header=’ ‘, footer=’’,comments=’#’)
fname:文件名
x:数组数据
delimiter: 数据分隔符
loadtxt函数执行的是相反的操作
arr = np.arange(0,12,1).reshape(4,-1)
# fmt = '%d':保存为整数
np.savetxt('arr.txt',arr,fmt='%d',delimiter=',')
data = np.loadtxt('arr.txt',delimiter=',')
print(data)
[[ 0. 1. 2.]
[ 3. 4. 5.]
[ 6. 7. 8.]
[ 9. 10. 11.]]
统计分析函数
排序sort
import numpy as np
np.random.seed(42) # 设置随机种子
arr = np.random.randint(1,10,size=10) # 生成随机数组
print(arr)
arr.sort()
print(arr)
arr2 = np.random.randint(1,10,size=(3,3))
print(arr2)
arr2.sort(axis=1) # 横向排序
print(arr2)
arr2.sort(axis=0)
print(arr2) # 纵向排序
[7 4 8 5 7 3 7 8 5 4]
[3 4 4 5 5 7 7 7 8 8]
[[8 8 3]
[6 5 2]
[8 6 2]]
[[3 8 8]
[2 5 6]
[2 6 8]]
[[2 5 6]
[2 6 8]
[3 8 8]]
数据去重、复制
data = np.array([1,2,3,4,3,1,5,6,7,8,9])
data_ = np.unique(data)
print(data_)
data = np.tile(data_,2) # 整体复制
print(data)
data = np.repeat(data_,2) # 逐个复制
print(data)
[1 2 3 4 5 6 7 8 9]
[1 2 3 4 5 6 7 8 9 1 2 3 4 5 6 7 8 9]
[1 1 2 2 3 3 4 4 5 5 6 6 7 7 8 8 9 9]
常用统计函数
# sum, mean, std, var, min, max
arr = np.arange(20)
arr = arr.reshape(4,5)
print(arr)
print(arr.sum())
print(arr.mean())
print(arr.min())
print(arr.var())
[[ 0 1 2 3 4]
[ 5 6 7 8 9]
[10 11 12 13 14]
[15 16 17 18 19]]
190
9.5
0
33.25
矩阵
# mat matrix
import numpy as np
matr1 = np.mat('1 2 3; 4 5 6; 7 8 9')
print(matr1)
matr2 = np.matrix([[1,2,3],[4,5,6],[7,8,9]])
print(matr2)
# + - * /
print(matr1*2)
print(matr1*matr2)
print(matr1-matr2)
print(matr1+matr2)
print('************************************')
# 转置 共轭转置 逆矩阵
print(matr1.T) # 转置
print(matr1.H) # 共轭转置
print(matr1.I) # 逆矩阵
[[1 2 3]
[4 5 6]
[7 8 9]]
[[1 2 3]
[4 5 6]
[7 8 9]]
[[ 2 4 6]
[ 8 10 12]
[14 16 18]]
[[ 30 36 42]
[ 66 81 96]
[102 126 150]]
[[0 0 0]
[0 0 0]
[0 0 0]]
[[ 2 4 6]
[ 8 10 12]
[14 16 18]]
************************************
[[1 4 7]
[2 5 8]
[3 6 9]]
[[1 4 7]
[2 5 8]
[3 6 9]]
[[ 3.15251974e+15 -6.30503948e+15 3.15251974e+15]
[-6.30503948e+15 1.26100790e+16 -6.30503948e+15]
[ 3.15251974e+15 -6.30503948e+15 3.15251974e+15]]