numpy 学习

学习自:哔哩哔哩 上 Python3 快速入门数据分析 洺宇吖

import  numpy  as  np
import  random
t1 = np.arange(12)
#t1.shape 一维数组是其个数;二维数组 shape两个值;三维列表则shape是三个值
t1
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
#返回数组的维度,这里是一维数组,返回数据一个
t1.shape
(12,)
t2 = np.array([[1,2,3],[4,5,6]])
t2
array([[1, 2, 3],
       [4, 5, 6]])
#这里是二维数组
t2.shape
(2, 3)
#三维数组 ,可以几维数组就返回一个数字
t3 = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
t3
t3.shape
(2, 2, 3)
#改变数据维度
t3.reshape(3,4)
array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])
t5 = np.arange(24).reshape(2,3,4)
t5
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])
t5.reshape(4,6)
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])
#变成一维数组  4*6 = 24 ,统计元素的所有的个数
t5.reshape(24,)
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])
#二维数组 24*1
t5.reshape(24,1)
array([[ 0],
       [ 1],
       [ 2],
       [ 3],
       [ 4],
       [ 5],
       [ 6],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [17],
       [18],
       [19],
       [20],
       [21],
       [22],
       [23]])
#二维数组  1*24
t5.reshape(1,24)
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23]])
#查找元素的个数
tt5 = t5.reshape(4,6)
#tt5.shape
t6 = tt5.reshape((tt5.shape[0]*tt5.shape[1]),)
t6
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])
#展开成一维数组
t5.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

t5 = t5.reshape(4,6)
t5

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

#数组与数字计算,应用于数组中每个数字都计算,广播机制
t5+2

array([[ 2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13],
       [14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25]])

t5*2

array([[ 0,  2,  4,  6,  8, 10],
       [12, 14, 16, 18, 20, 22],
       [24, 26, 28, 30, 32, 34],
       [36, 38, 40, 42, 44, 46]])

# 0/0   nan 不是一个数字;   number/0 = inf  无穷
t5/0  

/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: divide by zero encountered in true_divide
  """Entry point for launching an IPython kernel.
/usr/local/lib/python3.6/site-packages/ipykernel_launcher.py:1: RuntimeWarning: invalid value encountered in true_divide
  """Entry point for launching an IPython kernel.

array([[nan, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf],
       [inf, inf, inf, inf, inf, inf]])

t5.shape

(4, 6)

t6 = np.arange(100,124).reshape(4,6)
t

t5+t6

array([[100, 102, 104, 106, 108, 110],
       [112, 114, 116, 118, 120, 122],
       [124, 126, 128, 130, 132, 134],
       [136, 138, 140, 142, 144, 146]])

t7=np.arange(0,6)
t7

array([0, 1, 2, 3, 4, 5])

t5-t7

array([[ 0,  0,  0,  0,  0,  0],
       [ 6,  6,  6,  6,  6,  6],
       [12, 12, 12, 12, 12, 12],
       [18, 18, 18, 18, 18, 18]])

t8 = np.arange(4).reshape(4,1)
t8

array([[0],
       [1],
       [2],
       [3]])

t5

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

t5-t8

array([[ 0,  1,  2,  3,  4,  5],
       [ 5,  6,  7,  8,  9, 10],
       [10, 11, 12, 13, 14, 15],
       [15, 16, 17, 18, 19, 20]])

#numpy 创建数组  ndarray 类型
a = np.array([1,2,3,4,5,6,7,8,9])
a,a.shape,type(a)

(array([1, 2, 3, 4, 5, 6, 7, 8, 9]), (9,), numpy.ndarray)

a2 = np.arange(10)
a3 = np.array(np.arange(4,10,2))  #  start  end  step
a2,a3,a2.dtype   #dtype 为数组里面数据的类型,不指定是根据电脑位数

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([4, 6, 8]), dtype('int64'))

a4 = np.array(range(1,4),dtype=float)#制定数据类型
a4 ,a4.dtype

(array([1., 2., 3.]), dtype('float64'))

a5 = np.array([1,1,0,0,1,0], dtype=bool)
a5 , a5.dtype

(array([ True,  True, False, False,  True, False]), dtype('bool'))

#修改其中的数据类型
a6 = a5.astype('int8')
a6, a6.dtype

(array([1, 1, 0, 0, 1, 0], dtype=int8), dtype('int8'))

a7 = np.array([random.random() for i in range(10)])
a7

array([0.74051059, 0.54355674, 0.71288582, 0.14214711, 0.57256945,
       0.22807275, 0.31526748, 0.24873185, 0.55322492, 0.94061721])

#取小数固定位数,四舍五入
a8 = np.round(a7,2)
a8,round(0.28932,3)

(array([0.74, 0.54, 0.71, 0.14, 0.57, 0.23, 0.32, 0.25, 0.55, 0.94]), 0.289)

#axis  块  行  列  顺序对应 0  1   2


a9 = np.arange(20).reshape(4,5)#行数从0开始  
a9

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

a9[2]#  一行  ndarray

array([10, 11, 12, 13, 14])

a9[2:]  #多行数组#
a9[[0,2],:]   #取单行 1行,3行

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14]])

# 都好前 行,后面列
a9[1,0:3],a9[1,:]

(array([5, 6, 7]), array([5, 6, 7, 8, 9]))

#取列
a9[:,0]

array([  0,  20,  40,  60,  80, 100, 120, 140, 160, 180])

#取连续多列
a9[:,2:]

array([[  2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,
         15,  16,  17,  18,  19],
       [ 22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,
         35,  36,  37,  38,  39],
       [ 42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  52,  53,  54,
         55,  56,  57,  58,  59],
       [ 62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
         75,  76,  77,  78,  79],
       [ 82,  83,  84,  85,  86,  87,  88,  89,  90,  91,  92,  93,  94,
         95,  96,  97,  98,  99],
       [102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114,
        115, 116, 117, 118, 119],
       [122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134,
        135, 136, 137, 138, 139],
       [142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154,
        155, 156, 157, 158, 159],
       [162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174,
        175, 176, 177, 178, 179],
       [182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194,
        195, 196, 197, 198, 199]])

#取第三行,第四列
ab = a9[2,3]
ab ,ab.dtype 

(43, dtype('int64'))

#取多行,多列
#取第三行到第五行,第二列至第四列   取的是交叉点
a9[2:4,1:3] 

array([[41, 42],
       [61, 62]])

#取多个不相邻的点   取 0,0   2,1 点的值
c = a9[[0,2],[0,1]]
c

array([ 0, 41])

#小于10的值赋值为3  ,为  true的位置赋值  bool 索引
a9[a9<10]=3
a9

array([[ 3,  3,  3,  3,  3],
       [ 3,  3,  3,  3,  3],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

a10 = np.arange(20).reshape(4,5)
a10

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

#numpy  的三元运算符  t<10 0,  否则10
np.where(a10<10,0,10)
a10  #不变 

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

#clip  裁剪   小的替换为小数,大的替换为大数
a10.clip(5,15)

array([[ 5,  5,  5,  5,  5],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 15, 15, 15, 15]])

a10 = a10.astype(np.float64)
a10.dtype
a10[3,4] = np.nan   #nan 是浮点型
a10

array([[ 0.,  1.,  2.,  3.,  4.],
       [ 5.,  6.,  7.,  8.,  9.],
       [10., 11., 12., 13., 14.],
       [15., 16., 17., 18., nan]])

#数组拼接   数值拼接  vstack   hstack
t1 = np.arange(10).reshape(2,5)
t2 = np.arange(10,20,1).reshape(2,5)
t1,t2
np.vstack((t1,t2))    #在最后一行的下一行开始添加  垂直  意思跟效果相反

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

np.hstack((t1,t2))   #在每组数据后面添加   水平  意思跟效果相反

array([[ 0,  1,  2,  3,  4, 10, 11, 12, 13, 14],
       [ 5,  6,  7,  8,  9, 15, 16, 17, 18, 19]])

#数组行列交换
t1[[1,0],:] = t1[[0,1],:]
t1

array([[5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4]])

#构造全0 全1   对角线为1数组
a12 = np.zeros((3,5))
a13 = np.ones((3,5))
a14 = np.eye(3)
a12  
a13  
a14

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

#numpy.linspace(start, stop, num=50, endpoint=True, retstep=False, dtype=None)[source]
np.linspace(0,10,50)

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

#获取最大值  最小值 
t1 = np.arange(10).reshape(2,5)
np.argmax(t1,axis=0)   #列
#np.argmin(t1,axis=1)  #行

array([1, 1, 1, 1, 1])

#随机数
#np.random.rand   np.random.randn  标准正态分布  .randint  .uniform   均匀分布   .normal从正态分布中随机取样   seed 随机数种子
np.random.randint(10,20,(4,5))  #范围  [10-20)  数组维度 (4,5)

array([[18, 12, 15, 13, 10],
       [18, 17, 12, 15, 18],
       [15, 18, 15, 14, 10],
       [10, 15, 12, 15, 12]])

np.random.uniform(2,10,(3,4))

array([[5.9807993 , 8.70762938, 4.3888976 , 8.07379709],
       [4.49501347, 2.66463837, 6.35263396, 6.65691726],
       [2.23461468, 2.76954826, 8.70847978, 9.1583151 ]])

np.random.seed(10)    #加种子之后,每次随机数都是相同的
t = np.random.randint(0,20,(3,4))
t

array([[ 9,  4, 15,  0],
       [17, 16, 17,  8],
       [ 9,  0, 10,  8]])

#copy 视图  view  复制
#  a=b  全拷贝,修改任何一个另外一个也被修改  a=b[:]   
# a = b.copy  互不影响  复制

#nan 1. 0/0  2. float  类型,数据有缺失则会为NAN  3. 无穷大-无穷大为NAN
#INF   -inf  ,3/0  python 中报错, numpy中 是 inf   -inf
np.nan == np.nan

False

np.nan != np.nan

True

t2 = t2.astype(np.float64)  #注意这里修改后要重新赋值给t2
t2[:,0]=np.nan

np.count_nonzero(t2)    #统计nan的个数
t2

array([[nan, 11., 12., 13., 14.],
       [nan, 16., 17., 18., 19.]])

np.count_nonzero(t2!=t2)  #统计其中有nan的结果

2

np.isnan(t2),t2[np.isnan(t2)]
(array([[ True, False, False, False, False],
        [ True, False, False, False, False]]), array([nan, nan]))
t1,np.sum(t1),np.sum(t1,axis=0),np.sum(t1,axis=1)
(array([[0, 1, 2, 3, 4],
        [5, 6, 7, 8, 9]]), 45, array([ 5,  7,  9, 11, 13]), array([10, 35]))
#求和 np .sum   mean  median  max  min  ptp 极值  std  
ttt1 = np.arange(16).reshape(4,4)
ttt1
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])
np.sum(ttt1)   #不加axis时是计算的所有的
120
np.mean(ttt1) ,np.median(ttt1) , np.max(ttt1) ,np.min(ttt1)  ,np.ptp(ttt1)  , np.std(ttt1)
(7.5, 7.5, 15, 0, 15, 4.6097722286464435)
np.sum(ttt1,axis =0)# 列求和
array([24, 28, 32, 36])
np.sum(ttt1,axis = 1)  #  行求和
array([ 6, 22, 38, 54])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值