Numpy学习笔记

最新推荐文章于 2024-08-12 23:09:11 发布

E_Satellite

最新推荐文章于 2024-08-12 23:09:11 发布

阅读量258

点赞数 4

文章标签： numpy 学习笔记

本文链接：https://blog.csdn.net/E_Satellite/article/details/140912126

版权

"""
"""
# 学习numpy的原因：快速、方便、科学计算的基础库
# numpy是一个在Python中做科学计算的急促库，重在数值计算，也是大部分Python科学计算库的基础库
# 多用在大型、多维数组上执行数值运算

# 创建数组
import numpy as np
import random

t1 = np.array([1,2,3])
print(f"t1的内容是：{t1}")
print(f"t1的类型是：{type(t1)}")

t2 = np.array(range(10))
print(f"t2的内容是：{t2}")
print(f"t2的类型是：{type(t2)}")

# np.arange方法,用法和range类似,帮助快速生成一堆数字
t3 = np.arange(4,10,2)
print(f"t3的内容是：{t3}")
print(f"t3的类型是：{type(t3)}")

# numpy中的数据类型
t5 = np.array([1,1,0,1,0,0],dtype=bool)
print(t5)
print(f"t5的数据类型是：{t5.dtype}")

t6 = np.array([1,1,0,1,0,0],dtype=float)
print(t6)
print(f"t6的数据类型是：{t6.dtype}")

t7 = np.array([1,1,0,1,0,0],dtype=int)
print(t7)
print(f"t7的数据类型是：{t7.dtype}")

# 调整数据类型,方法是astype
t8 = t7.astype("int8")
print(t8)
print(f"t8的数据类型是：{t8.dtype}")

# numpy中的小数
t7 = np.array([random.random() for i in range(10)])
print(f"t7的内容是：{t7}")
print("t7的数据类型是：{t7.dtype}")

t8 = np.round(t7,2)
print(f"t8的内容是：{t8}")
print(f"t8的数据类型是：{t8.dtype}")

"""
演示数组的形状
"""
import numpy as np

## 数组的形状,打印数组的形状xx.shape
t1 = np.arange(12)
print(t1)
print(f"t1的形状是：{t1.shape}")
print()

t2 = np.array([[1,2,3],[4,5,6]])
print(t2)
print(f"t2的形状是{t2.shape}")
print()

t3 = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
print(t3)
print(f"t3的形状是{t3.shape}")
print()

## 改变数组的形状，方法是reshape(),reshape不会对本身发生改变
## (行,列) or (块,行,列)
t4 = np.arange(12)
print(t4)
print(t4.shape)

t5 = t4.reshape((3,4))
print(t5)

# flatten()是展开成一维的，且不会对本身作用
t6 = t5.flatten()
print(t6)

## 数组的计算
## 广播原则
t = np.arange(24).reshape((4,6))
print(t)
t1 = t + 2
print(t1)

t2 = t * 2
print(t2)

t3 = t/0
print(t3)
# nan表示不是一个数字，inf表示无穷
# 输出结果：
# [[nan inf inf inf inf inf]
#  [inf inf inf inf inf inf]
#  [inf inf inf inf inf inf]
#  [inf inf inf inf inf inf]]

# 计算会以整行整列计算
# 要求是在某一维度上是一样的
t1 = np.arange(5)
#[0 1 2 3 4]
t2 = np.arange(5).reshape(5,1)
# [[0]
#  [1]
#  [2]
#  [3]
# [4]]
t3 = np.arange(10).reshape(2,5)
# [[0 1 2 3 4]
#  [5 6 7 8 9]]
t4 = np.arange(30).reshape(6,5)
# [[ 0  1  2  3  4]
#  [ 5  6  7  8  9]
#  [10 11 12 13 14]
#  [15 16 17 18 19]
#  [20 21 22 23 24]
#  [25 26 27 28 29]]
t5 = np.arange(20).reshape(5,4)
# [[ 0  1  2  3  4]
#  [ 5  6  7  8  9]
#  [10 11 12 13 14]
#  [15 16 17 18 19]]
print(t4-t1)
# print(t4-t3)  无法计算
print(t5-t2)

"""
演示广播机制
"""
import numpy as np

## numpy的广播机制
t5 = np.array(range(24))
t5 = t5.reshape((4,6))
print(t5)
t5+=2
print(t5)

"""
轴可以理解为方向，使用0，1，2.....表示
"""

# 有了轴的概念之后，我们计算会更加方便，比如计算一个2维数组的平均值，必须指定是计算哪个方向上的数字平均值
# (0,1,2...)

"""
演示numpy读取数据,索引和切片操作,数值的修改
"""

# np.loadtxt方法是从文本文件里面读内容
# np.loadtxt(fname,dtype=np.float,delimiter=None,skiprows=0,usecols=None,unpack=False)
# frame：文件、字符串或产生器
# dtype：数据类型，默认np.float
# delimiter：分割字符串，默认是任何空格改为逗号
# skiprows：跳过前x行，一般跳过第一行表头
# usecols：读取指定的列，索引，元组类型
# unpack：如果True，读入属性将分别写入不同数组变量，默认false

import numpy as np

#点击,喜欢,不喜欢,评论数量
us_file_path = "./US_video_data_numbers.csv"
uk_file_path = "./GB_video_data_numbers.csv"

# unpack参数的作用，为True行列互换(转置效果)
# 对于numpy数组的转置方法是xx.transpose()和xx.swapaxes(1,0)
t1 = np.loadtxt(us_file_path,delimiter=",",dtype="int",unpack=True)
t2 = np.loadtxt(uk_file_path,delimiter=",",dtype="int")

# print(t1)
print(t2)


# 索引和切片操作，索引从0开始
# 取行
print(t2[2])
# 取连续多行
print(t2[2:])
# 取不连续的多行
print(t2[[2,8,10]])
# 取列
print(t2[1,:]) # 第2行每一列
print(t2[2:,:]) # 第3行后的每一列
print(t2[[2,10,3],:]) # 第2、10、3行的每一列
print(t2[:,0]) # 每一行的第一列
# 取连续的多列
print(t2[:,2:]) #第3列后的每一列
# 取不连续的多列
print(t2[:,[0,2,]]) #每一行的第一列、第三列
# 取行和列，取第3行第4列的值,类型是numpy.int64
print(t2[2,3])
# 取第3行到第5行，第2列到第4列的值
b = t2[2:4,1:3]
# 取多个不相邻的点，选的点是(0,0),(2,1)
c = t2[[0,2],[0,1]]
print(c)

## 数值的修改
t = np.arange(24).reshape(4,6)
# [[ 0  1  2  3  4  5]
#  [ 6  7  8  9 10 11]
#  [12 13 14 15 16 17]
#  [18 19 20 21 22 23]]
t[:,2:4] = 0
print(t)
t3 = t<10
print(t3)

# 打印大于10的值
t4 = t[t>10]
print(t4)

# numpy三元运算符
print(np.where(t<10,0,10))

# clip操作,clip(a,b)小于a的变成a,大于b的变成b
# 但是操作对nan不会有影响
t = np.arange(24).reshape((4,6))
print(t.clip(10,18))
t = t.astype(float)
t[3,3] = np.nan
print(t)

"""
numpy中的nan和inf

nan(NAN,Nan):not a number表示不是一个数字
什么时候numpy中会出现nan：
      当我们读取本地的文件为float的时候，如果有缺失，就会出现nan
      当做了一个不合适的计算的时候(比如无穷大(inf)减去无穷大)
inf(-inf,inf):infinity,inf表示正无穷，-inf表示负无穷
什么时候回出现inf包括（-inf，+inf）
      比如一个数字除以0，（python中直接会报错，numpy中是一个inf或者-inf

"""
import numpy as np

## nan的注意点
# 两个nan是不相等的，np.nan!=np.nan
print(np.nan==np.nan)
print(np.nan!=np.nan)
# 利于以上特性，计算数值中nan的个数
t = np.arange(24).reshape(4,6).astype(float)
t[:,0] = 0
print(np.count_nonzero(t)) # 方法np.count_nonzero 统计不为0的个数
t[3,3] = np.nan
print(np.count_nonzero(t!=t))         # 计算nan的个数
print(np.count_nonzero(np.isnan(t)))     # 计算nan的个数

## 为何要统计nan的个数？
## 因为nan和任何值进行计算都为nan
# sum是求和方法
print(np.sum(t))
t1 = np.arange(12).reshape((3,4))
print(np.sum(t1))
print(np.sum(t1,axis=0)) # 计算每一列上的和
print(np.sum(t1,axis=1)) # 计算每一行上的和

# 求和：t.sum(axis=None)
# 均值：t.mean(a,axis=None)  受离群点的影响较大
# 中值：np.median(t,axis=None)
# 最大值：t.max(axis=None)
# 最小值：t.min(axis=None)
# 极值：np.ptp(t,axis=None) 即最大值和最小值只差
# 标准差：t.std(axis=None)
# 可以将nan替换成均值
# 计算一组数据中一行或者一列的均值
print(t1)
print(t1.sum(axis=0))   # 求和
print(t1.mean(axis=0))  # 求均值
print(np.median(t1))

"""

"""

import numpy as np

t1 = np.arange(12).reshape((3,4)).astype("float")

# 指第2行第三列到最后全部
t1[1,2:] = np.nan
# print(t1)
def fill_ndarray(t1):
    for i in range(t1.shape[1]): # 遍历每一列
        # 二维的t1中，t1.shape[1]会返回列
        # 所以range(列数)就是遍历每一列了
        temp_col = t1[:,i]  #当前这一列
        nan_num = np.count_nonzero(temp_col!=temp_col)
        if nan_num!=0:
            # 当前一列不为nan的array
            temp_not_nan_col = temp_col[temp_col==temp_col]
            # 选中当前为nan的位置，把值赋为不为nan的均值
            temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
    return t1

if __name__ == '__main__':
    t1 = np.arange(24).reshape((4,6)).astype("float")
    t1[1,2:] = np.nan
    print(t1)
    t1 = fill_ndarray(t1)
    print(t1)

"""
把两个国家的数据放在一起进行研究
"""

# 竖直拼接vertically   方法np.vstack(t1,t2)
# 水平拼接horizontally 方法np.hstack(t1,t2)
# 分割的方向和拼接是一样的

"""
演示数组的行列交换
"""
import numpy as np
t = np.arange(12,24).reshape(3,4)
print(t)
# 行交换
t[[1,2],:] = t[[2,1]:,]
print(t)
# 列交换
t[:,[1,2]] = t[:,[2,1]]
print(t)

E_Satellite

关注

4
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
Numpy学习笔记

print(np.count_nonzero(t)) # 方法np.count_nonzero 统计不为0的个数。print(np.count_nonzero(np.isnan(t))) # 计算nan的个数。# 对于numpy数组的转置方法是xx.transpose()和xx.swapaxes(1,0)=t)) # 计算nan的个数。print(np.sum(t1,axis=0)) # 计算每一列上的和。print(np.sum(t1,axis=1)) # 计算每一行上的和。
复制链接

扫一扫