数据处理Numpy学习笔记

import numpy as np
t1 = np.array([1, 2, 3])
t1
array([1, 2, 3])
t2 = np.array(range(10))
t2
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
t3 = np.arange(12)
t3
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
t4 = np.arange(4, 8, 2)
t4
array([4, 6])
t4.dtype
dtype('int32')

修改数据类型的方法

t4 = np.array(range(10), dtype='float64')
t4
array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])
t5 = t4.astype('float32')
t5.dtype
dtype('float32')
import random
t6 = np.array([random.random() for i in range(1, 10)])
t6
array([0.69002913, 0.60309892, 0.57732269, 0.77670478, 0.49087636,
       0.05686646, 0.50341497, 0.78348396, 0.97891676])
# 保留小数
t7 = np.round(t6, 2)
t7
array([0.69, 0.6 , 0.58, 0.78, 0.49, 0.06, 0.5 , 0.78, 0.98])

生成特殊数组

np.zeros((3, 4))
array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])
np.ones((3,))
array([1., 1., 1.])
np.random.random((4, 3, 4))
array([[[0.88949406, 0.516348  , 0.9338736 , 0.8761512 ],
        [0.80969742, 0.0557918 , 0.84568932, 0.70281596],
        [0.97124603, 0.62063392, 0.20347112, 0.60842865]],

       [[0.83164314, 0.75154962, 0.55629461, 0.7950253 ],
        [0.71041516, 0.46290726, 0.66937629, 0.66167786],
        [0.30038413, 0.72894563, 0.42310383, 0.96849788]],

       [[0.55985491, 0.60339171, 0.91343052, 0.99101353],
        [0.24031093, 0.18791789, 0.13428558, 0.23939572],
        [0.19660544, 0.14944018, 0.45764254, 0.57232685]],

       [[0.70098855, 0.92525333, 0.19818721, 0.54227531],
        [0.37353857, 0.57298138, 0.89641575, 0.13471736],
        [0.17977541, 0.18406087, 0.26044415, 0.37039553]]])
np.random.rand(3,2)
array([[0.77886627, 0.20044878],
       [0.82558843, 0.74687223],
       [0.04423197, 0.82414292]])
np.random.randint(10,size=(5,4))  # 随机五行四列0-10的数字
array([[1, 5, 8, 0],
       [4, 2, 7, 6],
       [4, 8, 7, 8],
       [9, 7, 2, 3],
       [3, 4, 5, 5]])
np.random.random_sample()  # 随机采样取值
0.816308250363168
np.random.randint(0,10,5)  # 0-10 取5个值
array([2, 4, 8, 4, 5])
mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)  # 创建随机高斯的结果
array([ 0.03436545, -0.08164443,  0.13160285,  0.0430074 , -0.05111399,
        0.09617187, -0.10320152, -0.14053349, -0.19079794, -0.0438002 ])
np.set_printoptions(precision=2)  # 设置高斯的精度 小数后2位
mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)  # 创建随机高斯的结果
array([ 0.14,  0.02, -0.16, -0.07, -0.03, -0.14, -0.08,  0.03, -0.02,
       -0.08])
arr = np.arange(10)

# 数组的形状
mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)  # 创建随机高斯的结果
array([-0.13,  0.  ,  0.04,  0.01,  0.06,  0.04,  0.01, -0.15, -0.04,
        0.08])
arr = np.arange(10)
print(arr)
[0 1 2 3 4 5 6 7 8 9]
np.random.shuffle(arr)  # 洗牌--乱序
arr
array([7, 1, 2, 0, 9, 4, 6, 5, 8, 3])

数组的形状

t1 = np.arange(12)
t1
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
t1.ndim  # 查看数组的维度
1
t1.shape
(12,)
t1.reshape((2, 6))   # 一维转二维
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])
t1.reshape((12,))  # 二维转一维
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])
t2 = np.arange(24).reshape((2,3,4))
t2.shape[0],t2.shape[1],t2.shape[2]
(2, 3, 4)
t2.reshape((t2.shape[0]*t2.shape[1],t2.shape[2]))
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])
t2.flatten()  # 扁平
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

axis轴

添加新轴–np.newaxis

ar = np.arange(10)
ar
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ar = ar[np.newaxis,:]  # 添加在前面
ar
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
ar.shape
(1, 10)
ar = ar[:,np.newaxis]
ar
array([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]])
ar.shape
(1, 1, 10)

压缩–squeeze

ar = ar.squeeze()  # 压缩操作
ar
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
ar.shape
(10,)

文件读取

file_path = './can.csv'
np.loadtxt(file_path, delimiter=',', dtype='int')
array([[  1,  20,   1,   0,   0],
       [  1,  20,   1,   0,   0],
       [  1,  20,   0,   0,   0],
       ...,
       [  3, 100,   1,   0,   0],
       [  3, 100,   0,   0,   0],
       [  3, 100,   1,   0,   0]])
np.loadtxt(file_path, delimiter=',', dtype='int', unpack=True)
array([[  1,   1,   1, ...,   3,   3,   3],
       [ 20,  20,  20, ..., 100, 100, 100],
       [  1,   1,   0, ...,   1,   0,   1],
       [  0,   0,   0, ...,   0,   0,   0],
       [  0,   0,   0, ...,   0,   0,   0]])

转置

t1 = np.arange(24).reshape(4,6)
t1
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])
t1.T
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])
np.transpose(t1)
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])
t1.swapaxes(1, 0)
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])
t1.swapaxes(0, 1)
array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])
file_path = './can.csv'
t2 = np.loadtxt(file_path, delimiter=',', dtype='int', unpack=True)
t2.T
array([[  1,  20,   1,   0,   0],
       [  1,  20,   1,   0,   0],
       [  1,  20,   0,   0,   0],
       ...,
       [  3, 100,   1,   0,   0],
       [  3, 100,   0,   0,   0],
       [  3, 100,   1,   0,   0]])
t2[2]
array([1, 1, 0, ..., 1, 0, 1])
t2.T[[2,8,3]]
array([[ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0]])
t2.T[[2,8,3], :]
array([[ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0]])
t2.T[2,3]
0
t2[:,[2,8,2]]
array([[ 1,  1,  1],
       [20, 20, 20],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0]])

数据修改

t2[t2<2] = 3
t2
array([[  3,   3,   3, ...,   3,   3,   3],
       [ 20,  20,  20, ..., 100, 100, 100],
       [  3,   3,   3, ...,   3,   3,   3],
       [  3,   3,   3, ...,   3,   3,   3],
       [  3,   3,   3, ...,   3,   3,   3]])
t2[t2>10] = 30
t2
array([[ 3,  3,  3, ...,  3,  3,  3],
       [30, 30, 30, ..., 30, 30, 30],
       [ 3,  3,  3, ...,  3,  3,  3],
       [ 3,  3,  3, ...,  3,  3,  3],
       [ 3,  3,  3, ...,  3,  3,  3]])
np.where(t2<4, 100, 200)  # 小于4赋值100,反之200
array([[100, 100, 100, ..., 100, 100, 100],
       [200, 200, 200, ..., 200, 200, 200],
       [100, 100, 100, ..., 100, 100, 100],
       [100, 100, 100, ..., 100, 100, 100],
       [100, 100, 100, ..., 100, 100, 100]])
t2.clip(4, 20)   # 小于4的赋值为4,大于20的赋值为20
array([[ 4,  4,  4, ...,  4,  4,  4],
       [20, 20, 20, ..., 20, 20, 20],
       [ 4,  4,  4, ...,  4,  4,  4],
       [ 4,  4,  4, ...,  4,  4,  4],
       [ 4,  4,  4, ...,  4,  4,  4]])
t2 = t2.astype(float)
t2
array([[ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [30., 30., 30., ..., 30., 30., 30.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.]])
t2[2, 2] = np.nan  # 第三行第三列赋值non,必须为浮点数类型
t2
array([[ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [30., 30., 30., ..., 30., 30., 30.],
       [ 3.,  3., nan, ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.]])
np.count_nonzero(t2)  # 统计不为0的个数
765000
np.count_nonzero(t2 != t2)  # 拿到为nan的个数
1
np.isnan(t2)  # nan处为True
array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])
np.sum(t2)
nan
t3 = np.arange(12).reshape(3, 4)
t3
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
np.sum(t3)
66
np.sum(t3, axis=0)  # 计算列和
array([12, 15, 18, 21])
np.sum(t3, axis=1)  # 计算行和
array([ 6, 22, 38])
t2
np.mean(t2, axis=0)  # 求列均值
array([8.4, 8.4, nan, ..., 8.4, 8.4, 8.4])
np.mean(t2, axis=1)  # 求行均值
array([ 2.67, 30.  ,   nan,  2.99,  2.99])
np.median(t2, axis=0)  # 求列中值
array([ 3.,  3., nan, ...,  3.,  3.,  3.])
np.median(t2, axis=1)  # 求行中值
array([ 3., 30., nan,  3.,  3.])
np.ptp(t2, axis=0)  # 列极大值和极小值之差
array([27., 27., nan, ..., 27., 27., 27.])
np.ptp(t2, axis=1)  # 行极大值和极小值之差
array([ 1.,  0., nan,  5.,  4.])
np.std(t2, axis=0)  # 列标准差
array([10.8, 10.8,  nan, ..., 10.8, 10.8, 10.8])
np.std(t2, axis=1)  # 行标准差
array([0.47, 0.  ,  nan, 0.12, 0.08])

numpy处理缺失值和nan

t1 = np.arange(12).reshape(3,4).astype(float)
t1
array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])
t1[1,2:] = np.nan
t1
array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5., nan, nan],
       [ 8.,  9., 10., 11.]])
def fill_ndarry():
    for i in range(t1.shape[1]):
        tem_col = t1[:, i]
        nan_num = np.count_nonzero(tem_col != tem_col)
        if nan_num != 0:
            tem_not_nan_col = tem_col[tem_col == tem_col]
            tem_col[np.isnan(tem_col)] = tem_not_nan_col.mean()
    return t1

if __name__ == '__main__':
    data = fill_ndarry()
    print(data)
[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]

数组的拼接

a = np.array([[123,456,789], [3214,456,134]])
a
array([[ 123,  456,  789],
       [3214,  456,  134]])
b= np.array([[1235,3124,432], [43,13,134]])
b
array([[1235, 3124,  432],
       [  43,   13,  134]])
c = np.concatenate((a, b))
c
array([[ 123,  456,  789],
       [3214,  456,  134],
       [1235, 3124,  432],
       [  43,   13,  134]])
c.shape
(4, 3)
v = np.concatenate((a,b), axis=0)  # 按列拼接
v
array([[ 123,  456,  789],
       [3214,  456,  134],
       [1235, 3124,  432],
       [  43,   13,  134]])
v = np.concatenate((a,b), axis=1)  # 按行拼接
v
array([[ 123,  456,  789, 1235, 3124,  432],
       [3214,  456,  134,   43,   13,  134]])
np.hstack((a,b))  # 按行拼接
array([[ 123,  456,  789, 1235, 3124,  432],
       [3214,  456,  134,   43,   13,  134]])
np.vstack((a,b))  # 按列拼接
array([[ 123,  456,  789],
       [3214,  456,  134],
       [1235, 3124,  432],
       [  43,   13,  134]])

行列交换

t = np.arange(12).reshape(3, 4)
t
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])
t[[1,2],:] = t[[2,1], :]  # 行交换
t
array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11],
       [ 4,  5,  6,  7]])
t[:,[0,2]] = t[:,[2,0]]  # 列交换
t
array([[ 2,  1,  0,  3],
       [10,  9,  8, 11],
       [ 6,  5,  4,  7]])
np.argmax(t, axis=0)  # 获取每一列最大值的位置
array([1, 1, 1, 1], dtype=int64)
np.argmin(t,axis=0)  # 获取每一列最小值的位置
array([0, 0, 0, 0], dtype=int64)
np.eye(3)  # 创建对角线为1的3*3数组
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])
np.random.seed(10)
t = np.random.randint(0,20,(3,4))
t
array([[ 9,  4, 15,  0],
       [17, 16, 17,  8],
       [ 9,  0, 10,  8]])
t_new = t.copy()
t_new
array([[ 9,  4, 15,  0],
       [17, 16, 17,  8],
       [ 9,  0, 10,  8]])

运算–真值判断

x = np.array([1,1,1,0])
y = np.array([1,1,1,1])
print(x)
y
[1 1 1 0]





array([1, 1, 1, 1])

np.logical_and(x,y)  # 对应位置一个为假,就是假
array([ True,  True,  True, False])

np.logical_or(x,y)  # 对应位置一个为真就是真
array([ True,  True,  True,  True])

np.logical_not(x,y)  # 取反
array([0, 0, 0, 1])
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小刘私坊

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值