数据处理Numpy学习笔记

小刘私坊

已于 2023-05-09 20:16:12 修改

阅读量126

点赞数

分类专栏：数据处理文章标签： numpy 学习笔记

于 2023-05-09 20:11:28 首次发布

本文链接：https://blog.csdn.net/Python_Ghost/article/details/130587871

版权

数据处理专栏收录该内容

5 篇文章 1 订阅

订阅专栏

import numpy as np
t1 = np.array([1, 2, 3])
t1

array([1, 2, 3])

t2 = np.array(range(10))
t2

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

t3 = np.arange(12)
t3

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

t4 = np.arange(4, 8, 2)
t4

array([4, 6])

t4.dtype

dtype('int32')

修改数据类型的方法

t4 = np.array(range(10), dtype='float64')
t4

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

t5 = t4.astype('float32')
t5.dtype

dtype('float32')

import random
t6 = np.array([random.random() for i in range(1, 10)])
t6

array([0.69002913, 0.60309892, 0.57732269, 0.77670478, 0.49087636,
       0.05686646, 0.50341497, 0.78348396, 0.97891676])

# 保留小数
t7 = np.round(t6, 2)
t7

array([0.69, 0.6 , 0.58, 0.78, 0.49, 0.06, 0.5 , 0.78, 0.98])

生成特殊数组

np.zeros((3, 4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

np.ones((3,))

array([1., 1., 1.])

np.random.random((4, 3, 4))

array([[[0.88949406, 0.516348  , 0.9338736 , 0.8761512 ],
        [0.80969742, 0.0557918 , 0.84568932, 0.70281596],
        [0.97124603, 0.62063392, 0.20347112, 0.60842865]],

       [[0.83164314, 0.75154962, 0.55629461, 0.7950253 ],
        [0.71041516, 0.46290726, 0.66937629, 0.66167786],
        [0.30038413, 0.72894563, 0.42310383, 0.96849788]],

       [[0.55985491, 0.60339171, 0.91343052, 0.99101353],
        [0.24031093, 0.18791789, 0.13428558, 0.23939572],
        [0.19660544, 0.14944018, 0.45764254, 0.57232685]],

       [[0.70098855, 0.92525333, 0.19818721, 0.54227531],
        [0.37353857, 0.57298138, 0.89641575, 0.13471736],
        [0.17977541, 0.18406087, 0.26044415, 0.37039553]]])

np.random.rand(3,2)

array([[0.77886627, 0.20044878],
       [0.82558843, 0.74687223],
       [0.04423197, 0.82414292]])

np.random.randint(10,size=(5,4))  # 随机五行四列0-10的数字

array([[1, 5, 8, 0],
       [4, 2, 7, 6],
       [4, 8, 7, 8],
       [9, 7, 2, 3],
       [3, 4, 5, 5]])

np.random.random_sample()  # 随机采样取值

0.816308250363168

np.random.randint(0,10,5)  # 0-10 取5个值

array([2, 4, 8, 4, 5])

mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)  # 创建随机高斯的结果

array([ 0.03436545, -0.08164443,  0.13160285,  0.0430074 , -0.05111399,
        0.09617187, -0.10320152, -0.14053349, -0.19079794, -0.0438002 ])

np.set_printoptions(precision=2)  # 设置高斯的精度 小数后2位

mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)  # 创建随机高斯的结果

array([ 0.14,  0.02, -0.16, -0.07, -0.03, -0.14, -0.08,  0.03, -0.02,
       -0.08])

arr = np.arange(10)

# 数组的形状

mu,sigma = 0, 0.1
np.random.normal(mu, sigma, 10)  # 创建随机高斯的结果

array([-0.13,  0.  ,  0.04,  0.01,  0.06,  0.04,  0.01, -0.15, -0.04,
        0.08])

arr = np.arange(10)
print(arr)

[0 1 2 3 4 5 6 7 8 9]

np.random.shuffle(arr)  # 洗牌--乱序
arr

array([7, 1, 2, 0, 9, 4, 6, 5, 8, 3])

数组的形状

t1 = np.arange(12)
t1

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

t1.ndim  # 查看数组的维度

t1.shape

(12,)

t1.reshape((2, 6))   # 一维转二维

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11]])

t1.reshape((12,))  # 二维转一维

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

t2 = np.arange(24).reshape((2,3,4))
t2.shape[0],t2.shape[1],t2.shape[2]

(2, 3, 4)

t2.reshape((t2.shape[0]*t2.shape[1],t2.shape[2]))

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

t2.flatten()  # 扁平

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

axis轴

添加新轴–np.newaxis

ar = np.arange(10)
ar

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

ar = ar[np.newaxis,:]  # 添加在前面
ar

array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])

ar.shape

(1, 10)

ar = ar[:,np.newaxis]
ar

array([[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]]])

ar.shape

(1, 1, 10)

压缩–squeeze

ar = ar.squeeze()  # 压缩操作
ar

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

ar.shape

(10,)

文件读取

file_path = './can.csv'
np.loadtxt(file_path, delimiter=',', dtype='int')

array([[  1,  20,   1,   0,   0],
       [  1,  20,   1,   0,   0],
       [  1,  20,   0,   0,   0],
       ...,
       [  3, 100,   1,   0,   0],
       [  3, 100,   0,   0,   0],
       [  3, 100,   1,   0,   0]])

np.loadtxt(file_path, delimiter=',', dtype='int', unpack=True)

array([[  1,   1,   1, ...,   3,   3,   3],
       [ 20,  20,  20, ..., 100, 100, 100],
       [  1,   1,   0, ...,   1,   0,   1],
       [  0,   0,   0, ...,   0,   0,   0],
       [  0,   0,   0, ...,   0,   0,   0]])

转置

t1 = np.arange(24).reshape(4,6)

t1

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

t1.T

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

np.transpose(t1)

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

t1.swapaxes(1, 0)

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

t1.swapaxes(0, 1)

array([[ 0,  6, 12, 18],
       [ 1,  7, 13, 19],
       [ 2,  8, 14, 20],
       [ 3,  9, 15, 21],
       [ 4, 10, 16, 22],
       [ 5, 11, 17, 23]])

file_path = './can.csv'
t2 = np.loadtxt(file_path, delimiter=',', dtype='int', unpack=True)

t2.T

array([[  1,  20,   1,   0,   0],
       [  1,  20,   1,   0,   0],
       [  1,  20,   0,   0,   0],
       ...,
       [  3, 100,   1,   0,   0],
       [  3, 100,   0,   0,   0],
       [  3, 100,   1,   0,   0]])

t2[2]

array([1, 1, 0, ..., 1, 0, 1])

t2.T[[2,8,3]]

array([[ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0]])

t2.T[[2,8,3], :]

array([[ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0],
       [ 1, 20,  0,  0,  0]])

t2.T[2,3]

t2[:,[2,8,2]]

array([[ 1,  1,  1],
       [20, 20, 20],
       [ 0,  0,  0],
       [ 0,  0,  0],
       [ 0,  0,  0]])

数据修改

t2[t2<2] = 3

t2

array([[  3,   3,   3, ...,   3,   3,   3],
       [ 20,  20,  20, ..., 100, 100, 100],
       [  3,   3,   3, ...,   3,   3,   3],
       [  3,   3,   3, ...,   3,   3,   3],
       [  3,   3,   3, ...,   3,   3,   3]])

t2[t2>10] = 30

t2

array([[ 3,  3,  3, ...,  3,  3,  3],
       [30, 30, 30, ..., 30, 30, 30],
       [ 3,  3,  3, ...,  3,  3,  3],
       [ 3,  3,  3, ...,  3,  3,  3],
       [ 3,  3,  3, ...,  3,  3,  3]])

np.where(t2<4, 100, 200)  # 小于4赋值100，反之200

array([[100, 100, 100, ..., 100, 100, 100],
       [200, 200, 200, ..., 200, 200, 200],
       [100, 100, 100, ..., 100, 100, 100],
       [100, 100, 100, ..., 100, 100, 100],
       [100, 100, 100, ..., 100, 100, 100]])

t2.clip(4, 20)   # 小于4的赋值为4，大于20的赋值为20

array([[ 4,  4,  4, ...,  4,  4,  4],
       [20, 20, 20, ..., 20, 20, 20],
       [ 4,  4,  4, ...,  4,  4,  4],
       [ 4,  4,  4, ...,  4,  4,  4],
       [ 4,  4,  4, ...,  4,  4,  4]])

t2 = t2.astype(float)

t2

array([[ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [30., 30., 30., ..., 30., 30., 30.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.]])

t2[2, 2] = np.nan  # 第三行第三列赋值non，必须为浮点数类型

t2

array([[ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [30., 30., 30., ..., 30., 30., 30.],
       [ 3.,  3., nan, ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.],
       [ 3.,  3.,  3., ...,  3.,  3.,  3.]])

np.count_nonzero(t2)  # 统计不为0的个数

np.count_nonzero(t2 != t2)  # 拿到为nan的个数

np.isnan(t2)  # nan处为True

array([[False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       [False, False, False, ..., False, False, False],
       [False, False, False, ..., False, False, False]])

np.sum(t2)

nan

t3 = np.arange(12).reshape(3, 4)

t3

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

np.sum(t3)

np.sum(t3, axis=0)  # 计算列和

array([12, 15, 18, 21])

np.sum(t3, axis=1)  # 计算行和

array([ 6, 22, 38])

t2
np.mean(t2, axis=0)  # 求列均值

array([8.4, 8.4, nan, ..., 8.4, 8.4, 8.4])

np.mean(t2, axis=1)  # 求行均值

array([ 2.67, 30.  ,   nan,  2.99,  2.99])

np.median(t2, axis=0)  # 求列中值

array([ 3.,  3., nan, ...,  3.,  3.,  3.])

np.median(t2, axis=1)  # 求行中值

array([ 3., 30., nan,  3.,  3.])

np.ptp(t2, axis=0)  # 列极大值和极小值之差

array([27., 27., nan, ..., 27., 27., 27.])

np.ptp(t2, axis=1)  # 行极大值和极小值之差

array([ 1.,  0., nan,  5.,  4.])

np.std(t2, axis=0)  # 列标准差

array([10.8, 10.8,  nan, ..., 10.8, 10.8, 10.8])

np.std(t2, axis=1)  # 行标准差

array([0.47, 0.  ,  nan, 0.12, 0.08])

numpy处理缺失值和nan

t1 = np.arange(12).reshape(3,4).astype(float)
t1

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5.,  6.,  7.],
       [ 8.,  9., 10., 11.]])

t1[1,2:] = np.nan

t1

array([[ 0.,  1.,  2.,  3.],
       [ 4.,  5., nan, nan],
       [ 8.,  9., 10., 11.]])

def fill_ndarry():
    for i in range(t1.shape[1]):
        tem_col = t1[:, i]
        nan_num = np.count_nonzero(tem_col != tem_col)
        if nan_num != 0:
            tem_not_nan_col = tem_col[tem_col == tem_col]
            tem_col[np.isnan(tem_col)] = tem_not_nan_col.mean()
    return t1

if __name__ == '__main__':
    data = fill_ndarry()
    print(data)

[[ 0.  1.  2.  3.]
 [ 4.  5.  6.  7.]
 [ 8.  9. 10. 11.]]

数组的拼接

a = np.array([[123,456,789], [3214,456,134]])
a

array([[ 123,  456,  789],
       [3214,  456,  134]])

b= np.array([[1235,3124,432], [43,13,134]])
b

array([[1235, 3124,  432],
       [  43,   13,  134]])

c = np.concatenate((a, b))
c

array([[ 123,  456,  789],
       [3214,  456,  134],
       [1235, 3124,  432],
       [  43,   13,  134]])

c.shape

(4, 3)

v = np.concatenate((a,b), axis=0)  # 按列拼接

array([[ 123,  456,  789],
       [3214,  456,  134],
       [1235, 3124,  432],
       [  43,   13,  134]])

v = np.concatenate((a,b), axis=1)  # 按行拼接

array([[ 123,  456,  789, 1235, 3124,  432],
       [3214,  456,  134,   43,   13,  134]])

np.hstack((a,b))  # 按行拼接

array([[ 123,  456,  789, 1235, 3124,  432],
       [3214,  456,  134,   43,   13,  134]])

np.vstack((a,b))  # 按列拼接

array([[ 123,  456,  789],
       [3214,  456,  134],
       [1235, 3124,  432],
       [  43,   13,  134]])

行列交换

t = np.arange(12).reshape(3, 4)
t

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

t[[1,2],:] = t[[2,1], :]  # 行交换

array([[ 0,  1,  2,  3],
       [ 8,  9, 10, 11],
       [ 4,  5,  6,  7]])

t[:,[0,2]] = t[:,[2,0]]  # 列交换

array([[ 2,  1,  0,  3],
       [10,  9,  8, 11],
       [ 6,  5,  4,  7]])

np.argmax(t, axis=0)  # 获取每一列最大值的位置

array([1, 1, 1, 1], dtype=int64)

np.argmin(t,axis=0)  # 获取每一列最小值的位置

array([0, 0, 0, 0], dtype=int64)

np.eye(3)  # 创建对角线为1的3*3数组

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

np.random.seed(10)
t = np.random.randint(0,20,(3,4))
t

array([[ 9,  4, 15,  0],
       [17, 16, 17,  8],
       [ 9,  0, 10,  8]])

t_new = t.copy()

t_new

array([[ 9,  4, 15,  0],
       [17, 16, 17,  8],
       [ 9,  0, 10,  8]])

运算–真值判断

x = np.array([1,1,1,0])
y = np.array([1,1,1,1])
print(x)
y

[1 1 1 0]





array([1, 1, 1, 1])

与

np.logical_and(x,y)  # 对应位置一个为假，就是假

array([ True,  True,  True, False])

或

np.logical_or(x,y)  # 对应位置一个为真就是真

array([ True,  True,  True,  True])

非

np.logical_not(x,y)  # 取反

array([0, 0, 0, 1])

小刘私坊

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
打赏
0
评论
数据处理Numpy学习笔记

【代码】数据处理Numpy学习笔记。
复制链接

扫一扫

专栏目录

数据处理Numpy学习笔记

修改数据类型的方法

生成特殊数组

数组的形状

axis轴

添加新轴–np.newaxis

压缩–squeeze

文件读取

转置

数据修改

numpy处理缺失值和nan

数组的拼接

行列交换

运算–真值判断

与

或

非

“相关推荐”对你有帮助么？