numpy学习----数组与向量化计算(参考《利用Python进行数据分析》)
#4.1
import numpy as np
#随机生成数组
data=np.random.randn(2,3)
data
array([[ 0.08733309, -1.20700919, 0.76849156],
[ 0.38783398, -0.75058736, 0.36083027]])
print(data*10)
print(data+data)
[[ 0.87333085 -12.07009187 7.68491561]
[ 3.87833976 -7.50587364 3.60830273]]
[[ 0.17466617 -2.41401837 1.53698312]
[ 0.77566795 -1.50117473 0.72166055]]
data.shape
(2, 3)
data.dtype
dtype('float64')
4.1.1生成ndarray
data1=[6,7,5,8,0,1]#这是一个列表
data2=[[1,2,3],[4,5,6]]
arr1=np.array(data1)#转成一个数组
arr2=np.array(data2)
print(arr2)
arr1
[[1 2 3]
[4 5 6]]
array([6, 7, 5, 8, 0, 1])
#生成一个全部是0的数组
np.zeros(10)
array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
#arange是生成一个递增的数组
np.arange(10)
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
4.1.2ndarray的数据类型,float64是双精度浮点值
#还可以为数组定义类型
arr3=np.array([1,2,3],dtype=np.float64)
arr3#输出结果后面带这黑点的是float
array([1., 2., 3.])
#还可以显示的更改数据类型
arr4=np.arange(6)
print(arr4.dtype)
float_arr4=arr4.astype(np.float)
print(float_arr4.dtype)
#甚至astype还可以将字符串的数字,转成数字类型的,这里不再演示
int32
float64
4.1.3Numpy数组算术
#可以数组与数字+-*/,或者是比大小
#还可以数组和数组,其中的*是对应位置的数相乘
arr5=np.arange(10)
arr6=np.arange(10)
print(arr5*arr6)
print(arr5>arr6)#对应位相比较
[ 0 1 4 9 16 25 36 49 64 81]
[False False False False False False False False False False]
4.1.4索引与切片
arr=np.arange(10)
arr[5]
5
arr[1:5]#切片从1到5,左闭右开
array([1, 2, 3, 4])
arr[7:9]=12
print(arr)#特备注意,这里的切片是对原数组的操作,并不是复制
print(arr[4:5].copy())#这样才是复制
[ 0 1 2 3 4 5 6 12 12 9]
[4]
#对于高维的数据
arr2d=np.array([[1,2,3],[4,5,6]])
print(arr2d[0])
print(arr2d[1][0])
#多维数组可以进行多组切片
print(arr2d[:2,1:])#0到2
[1 2 3]
4
[[2 3]
[5 6]]
4.1.5布尔索引
names=np.array(['liuxue','qudong','hiehie'])
data=np.random.randn(7,4)#生成7行4列的正态分布数据
data
array([[-1.57305328, 0.09707246, 0.63709895, -0.12350854],
[-1.53336994, 0.24407403, -1.26416064, -0.49466223],
[ 0.08100775, -0.41158703, 0.91988958, -1.9296198 ],
[ 0.63585761, -0.02683479, -0.45008594, -0.76703009],
[ 1.58682373, 1.55437249, 1.38946301, -0.53603832],
[-2.02749675, 0.6593739 , -0.83075286, -0.10332118],
[ 0.79847446, -1.3708947 , 0.95560336, -0.51128584]])
names=='bob'
array([False, False, False])
#利用布尔值索引的时候,是对数据进行拷贝
data[[True, False, False,False, False, False,True]]#把数据为true的显示
array([[-1.57305328, 0.09707246, 0.63709895, -0.12350854],
[ 0.79847446, -1.3708947 , 0.95560336, -0.51128584]])
4.1.4神奇索引
arr=np.empty((8,4))#生成8行4列的空数组
#进行赋值
for i in range(8):
arr[i]=i
arr
array([[0., 0., 0., 0.],
[1., 1., 1., 1.],
[2., 2., 2., 2.],
[3., 3., 3., 3.],
[4., 4., 4., 4.],
[5., 5., 5., 5.],
[6., 6., 6., 6.],
[7., 7., 7., 7.]])
arr[[4,3,0,6]]#是挺神奇的
array([[4., 4., 4., 4.],
[3., 3., 3., 3.],
[0., 0., 0., 0.],
[6., 6., 6., 6.]])
#先生成数组,再进行调行列
arr=np.arange(32).reshape(8,4)
arr
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11],
[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23],
[24, 25, 26, 27],
[28, 29, 30, 31]])
arr[[1,5,7,4],[0,3,2,3]]#输出的是(1,5).。。。。
array([ 4, 23, 30, 19])
4.1.7数组转置和换轴
print(arr)
print(arr.T)#数组的转置
print(np.dot(arr.T,arr))#内积
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]
[12 13 14 15]
[16 17 18 19]
[20 21 22 23]
[24 25 26 27]
[28 29 30 31]]
[[ 0 4 8 12 16 20 24 28]
[ 1 5 9 13 17 21 25 29]
[ 2 6 10 14 18 22 26 30]
[ 3 7 11 15 19 23 27 31]]
[[2240 2352 2464 2576]
[2352 2472 2592 2712]
[2464 2592 2720 2848]
[2576 2712 2848 2984]]
4.2数组函数
还有好多,看到的时候自己查表
arr=np.arange(10)
arr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
np.sqrt(arr)#数组的每个元素都开平方
array([0. , 1. , 1.41421356, 1.73205081, 2. ,
2.23606798, 2.44948974, 2.64575131, 2.82842712, 3. ])
np.exp(arr)#计算每一个数的ex值
array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
2.98095799e+03, 8.10308393e+03])
x=np.random.randn(8)
y=np.random.randn(8)
print(x)
print(y)
[-1.43117964 -1.03580864 0.20196677 -0.36892217 0.93891054 0.52159798
-0.48145093 -0.50254911]
[-4.60382458e-01 -1.28791643e+00 5.02343696e-01 1.27297624e+00
9.15871436e-04 -2.40082981e-01 9.07518518e-01 -8.73266259e-01]
4.3使用数组进行面向数组编程
points=np.arange(-5,5,0.01)
#print(points)
xs,ys=np.meshgrid(points,points)#生成网格点坐标矩阵
ys
array([[-5. , -5. , -5. , ..., -5. , -5. , -5. ],
[-4.99, -4.99, -4.99, ..., -4.99, -4.99, -4.99],
[-4.98, -4.98, -4.98, ..., -4.98, -4.98, -4.98],
...,
[ 4.97, 4.97, 4.97, ..., 4.97, 4.97, 4.97],
[ 4.98, 4.98, 4.98, ..., 4.98, 4.98, 4.98],
[ 4.99, 4.99, 4.99, ..., 4.99, 4.99, 4.99]])
xs
array([[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
...,
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99],
[-5. , -4.99, -4.98, ..., 4.97, 4.98, 4.99]])
z=np.sqrt(xs**2+ys**2)#对网格数据计算x^2+y^2
z
array([[7.07106781, 7.06400028, 7.05693985, ..., 7.04988652, 7.05693985,
7.06400028],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
...,
[7.04988652, 7.04279774, 7.03571603, ..., 7.0286414 , 7.03571603,
7.04279774],
[7.05693985, 7.04985815, 7.04278354, ..., 7.03571603, 7.04278354,
7.04985815],
[7.06400028, 7.05692568, 7.04985815, ..., 7.04279774, 7.04985815,
7.05692568]])
import matplotlib.pyplot as plt#准备画图类,生成网格数据的灰度变化
plt.imshow(z,cmap=plt.cm.gray);plt.colorbar()
<matplotlib.colorbar.Colorbar at 0x17bd2a3d470>
4.3.1将条件逻辑作为数组作
xarr=np.arange(10)
yarr=np.arange(-5,5,1)
xarr
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
yarr
array([-5, -4, -3, -2, -1, 0, 1, 2, 3, 4])
cond=np.array([True,False,True,True,False,True,False,True,True,False])
result=[(x if c else y) for x,y,c in zip(xarr,yarr,cond)]#根据cond的值来判断选哪一个,这种比较慢,效率低下,返回结果是一个列表
result
[0, -4, 2, 3, -1, 5, 1, 7, 8, 4]
#np.where很好
result=np.where(cond,xarr,yarr)
result
array([ 0, -4, 2, 3, -1, 5, 1, 7, 8, 4])
#将数组中的正值全部换成2
arr=np.random.randn(4,4)
arr>0
array([[False, True, True, False],
[ True, False, False, False],
[False, False, True, False],
[ True, True, True, False]])
np.where(arr>0,2,-2)#如果是正值就变成2否则是-2
array([[-2, 2, 2, -2],
[ 2, -2, -2, -2],
[-2, -2, 2, -2],
[ 2, 2, 2, -2]])
np.where(arr>0,2,arr)
array([[-0.5598994 , 2. , 2. , -0.56522329],
[ 2. , -0.09428299, -0.90856628, -0.39663604],
[-0.92615272, -0.11062836, 2. , -0.48575249],
[ 2. , 2. , 2. , -1.26207953]])
4.3.2数学和统计方法
arr=np.random.randn(5,4)
arr
array([[-0.51644393, 0.79666822, 1.21414109, 1.29154233],
[-1.33620648, 0.13675079, -1.05243122, -0.8174846 ],
[ 1.39762222, 0.32267359, -0.87654034, -1.58227177],
[-0.36803539, -1.47324206, 0.05496088, 0.25541244],
[ 0.40238408, -0.83189796, 0.40920533, -0.99968687]])
print(np.mean(arr))#均值
-0.1786439835176989
print(np.sum(arr))#求和
-3.572879670353978
print(np.std(arr))#标准差
0.9140485163987038
4.3.3布尔值数组的方法
arr=np.random.randn(100)
(arr>0).sum()#正值的个数
58
#any检查数组中是否至少有一个True
bools=np.array([False,False,True,False])
bools.any()
True
#是否全部是True
bools.all()
False
4.3.4排序
arr.sort()#这是对原数组进行排序
array([-2.98524643, -2.63142812, -2.45486723, -1.8076942 , -1.61882378,
-1.44427752, -1.3796187 , -1.08645309, -1.0534991 , -0.97946174,
-0.94816293, -0.89359945, -0.88004669, -0.86972448, -0.86912799,
-0.86413391, -0.79987113, -0.78122383, -0.73298803, -0.70982191,
-0.68343762, -0.64954971, -0.54632708, -0.52503766, -0.45612528,
-0.44697164, -0.43352159, -0.42277377, -0.4202717 , -0.39234742,
-0.37292247, -0.31369547, -0.24877789, -0.21364612, -0.20042959,
-0.16734588, -0.13525561, -0.13097985, -0.09642911, -0.04575062,
-0.04132369, -0.02116476, 0.02834716, 0.03780822, 0.06828828,
0.10123055, 0.1707879 , 0.17384056, 0.21871923, 0.21926709,
0.24215028, 0.25895239, 0.2632248 , 0.40128374, 0.40757036,
0.41084607, 0.41872534, 0.4490776 , 0.45338206, 0.45605221,
0.45766068, 0.49352961, 0.53685214, 0.55999231, 0.57211964,
0.58594505, 0.58741451, 0.61654091, 0.6370439 , 0.67254064,
0.67382018, 0.76068954, 0.77182885, 0.81010822, 0.81991775,
0.84196902, 0.84849718, 0.90116848, 0.90293832, 0.9081463 ,
0.90964332, 1.07269295, 1.11472327, 1.11944295, 1.14475475,
1.23570537, 1.29390305, 1.30999776, 1.31923855, 1.4107488 ,
1.41246593, 1.4248844 , 1.44918818, 1.64979638, 1.6561739 ,
1.73319071, 1.75054115, 2.22711196, 2.56106661, 2.92345958])
4.3.5唯一值与其他集合逻辑
ints=np.array([2,2,2,2,3,3,3,4,4,4])
np.unique(ints)#去重并排序,对拷贝数据进行去重
ints
array([2, 2, 2, 2, 3, 3, 3, 4, 4, 4])
np.in1d(ints,[2,3])#查看后面数组的值是否在原来的数组中
array([ True, True, True, True, True, True, True, False, False,
False])
4.4使用数组进行文件输入输出
#大多数还是用pandas来操作数据
arr=np.arange(10)
np.save('数组数据',arr)#将数据保存到磁盘,后缀是.npy格式
np.load("数组数据.npy")#从磁盘中读取数据
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
4.5线性代数
x=np.arange(1,7,dtype=np.float64).reshape(2,3)
y=x.T
y=y+3
y
array([[4., 7.],
[5., 8.],
[6., 9.]])
#x.dot(y)#x和y相乘,等价于:
np.dot(x,y)
array([[ 32., 50.],
[ 77., 122.]])
#numpy.linalg矩阵分解的函数集
from numpy.linalg import inv,qr
X=np.random.randn(5,5)
mat=X.T.dot(X)#内积一下
inv(mat)#逆矩阵
mat.dot(inv(mat))
array([[ 1.00000000e+00, 0.00000000e+00, -8.88178420e-16,
8.88178420e-16, 0.00000000e+00],
[-1.04950770e-16, 1.00000000e+00, 6.93889390e-18,
-1.21430643e-15, -1.23165367e-16],
[ 8.32667268e-17, -4.44089210e-16, 1.00000000e+00,
4.44089210e-16, 6.66133815e-16],
[ 5.82867088e-16, -6.21724894e-15, -8.88178420e-16,
1.00000000e+00, 1.11022302e-15],
[-5.55111512e-17, 8.88178420e-16, -8.88178420e-16,
2.66453526e-15, 1.00000000e+00]])
q,r=qr(mat)#矩阵的QR分解
r
array([[-4.74739724, 4.17561413, -2.41605872, 1.4556586 , 2.62883658],
[ 0. , -4.37048078, 0.62420262, -2.38651785, 1.71674838],
[ 0. , 0. , -3.84369948, -4.33709713, 2.94215212],
[ 0. , 0. , 0. , -0.48574305, -1.55517707],
[ 0. , 0. , 0. , 0. , 0.32885184]])
4.6伪随机数
sample=np.random.normal(size=(4,4))#4*4的正太哦分布样本数组
sample
array([[ 1.19522011, 0.2407074 , -0.1234693 , -0.72991516],
[-0.63274475, -0.73563912, 2.04438794, 1.86788497],
[ 0.70277899, 0.16402667, -0.24096592, 1.29678846],
[ 0.35293785, 1.3245612 , 1.34871637, -0.3686893 ]])
#伪随机数生成器rng
rng=np.random.RandomState(1234)
rng.randn(10)
array([ 0.47143516, -1.19097569, 1.43270697, -0.3126519 , -0.72058873,
0.88716294, 0.85958841, -0.6365235 , 0.01569637, -2.24268495])
#还有很多函数
随机漫步例子
import random
position=0
walk=[position]
steps=1000
#随机漫步1000次
for i in range(steps):
step=1 if random.randint(0,1) else -1
position+=step
walk.append(position)
plt.plot(walk[:100])#展示前100次
[<matplotlib.lines.Line2D at 0x17bd297b160>]