numpy

孤舟独钓寒江雪

已于 2022-10-23 16:19:05 修改

阅读量371

点赞数

分类专栏： python 文章标签： numpy python 机器学习

于 2022-07-30 09:19:56 首次发布

本文链接：https://blog.csdn.net/weixin_55000908/article/details/126067423

版权

python 专栏收录该内容

11 篇文章 0 订阅

订阅专栏

numpy数组表示

向量是一维的数组（行向量、列向量）
矩阵是指具有两个维度的数组
三维或更高维度的数组通常用张量（tensor）表示。

numpy中的一维向量既可以作为行向量，也可以作为列向量存在。
np.tile(

a = np.array([1,2,3])  
b = np.array([[-4,2,-3],[-2,-3,1]])

a@b.T  # array([-9, -5]) 此时的a为行向量，结果为行向量
b@a  # array([-9, -5])  此时a为列向量，结果也是列向量


np.where() # 函数的使用

import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
import sklearn

一. 基础知识

numpy：支持大量的维度数组与矩阵运算
ndarray：对象（存放同类型元素的多维数组）

1.1 Numpy 基本的数据类型

np.bool_, np.bool8
np.int_, np.intc, np.intp, np.int8, np.int16, np.int32, np.int64
np.uint, np.uint8, np.uint16, np.uint32, np.uint64
np.float_, np.float16, np.float32, np.float64
np.complex_, np.complex64, np.complex128

1.2 字符串代码

每一个内建类型都有唯一定义它的字符串代码。

b布尔型   i(有符号）整型   u（无符号）整型 
f浮点型   c复数浮点型  
m时间间隔（timedelta） M日期时间（datetime） 
O（python）对象 
S,a（byte）字符串 
U   Unicode 
V  （void）原始数据

1.3 数据类型对象 dtype

dt = np.dtype(np.int32)
dt_1 = np.dtype('i8') # int8, int16, int32, int64 可以使用字符串 'i1','i2','i4','i8'
dt_2 = np.dtype('>f4') # 大端法（高位组放在最前面）
dt_3 = np.dtype([('name','S20'), ('age','<i1'), ("mark", '>f4')])  # 结构化数据类型

students = np.array([("lizhong", 25, 95), ("xiaohong", 19, 89.5), ('zhangjun', 30, 60)], dtype=dt_3)
print(students)
print(students['age'])

1.3 数据类型转换

arr = np.array([1,2,3])
float_arr = arr.astype(np.float64)
arr.tolist() # 转化为list

1.4 Ndarray 属性

a = np.arange(24).reshape(2,4,3)
print(a)
print(a.ndim, a.shape, a.size, a.dtype, a.itemsize)
print(a.flags)  # 内存信息
print(a.real, a.imag)
print(a.data)

二. 创建 Ndarray

array(object, dtype=None, *, copy=True, order='K', subok=False, ndmin=0, like=None)

a = np.array([[1,2,3],[4,5,6]], dtype=float, ndmin=3).reshape(3, 2)  # a.dtype=float64 
b = a.astype(int)  # astype()  改变数据类型
a, a.dtype,  b, b.dtype

2.1 创建固定格式 Ndarray

a = np.empty([3,2], dtype='i1', order='C')  # 随机填充
b = np.empty([2,2,3], dtype=[('x', "S1"), ("y",'>i4'), ('z', np.float32)], order='C')
c = np.zeros([4,5], dtype=float, order='F')  # 以0填充
d = np.ones([1,2,3], dtype=np.int16, order='F')  # 以1填充
e = np.eye(3)  # 对角线为1
f = np.full([2,3], 6)  # 所有恒为6

g = np.random.random((2,2))

x = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
y = np.empty_like(x)

x, y
for i in range(4):
    y[i, :] = x[i, :]+i
x, y

2.2 从list, tuple, buffer, iter创建 Ndarray

x = [(1,2,3),(4,5,6)]
y = np.asarray(x, dtype=np.int16, order="C")

buf = b'Hello World'
from_buf = np.frombuffer(buf, dtype='S1', count=-1, offset=0)

it = iter(range(6))
from_it = np.fromiter(it, dtype=float, count=-1).reshape(2,3)

2.3 从range, linspace, logspace创建 Ndarray

ar = np.arange(0, 10, 2, dtype=float)  # 数值范围
li = np.linspace(1, 10, 10, endpoint=True, retstep=True, dtype=int)  # 等差数列
lg = np.logspace(1.0, 2.0, num=10, endpoint=True, base=10.0, dtype=None)  # 等比数列

三. 切片和索引

3.1 slice

a = np.arange(10)
sl = slice(2,7,2)  # [start, stop, step]
a[sl], a[2:7:2]

## （:）冒号 
b = np.array([[1,2,3],[3,4,5],[4,5,6]])
b[1], b[2], b[1:]  # b[1], b[2]

## 省略号（...）使得选择元组的长度与数组的维度相同
print(b[...,1])  # 第2列元素
print(b[1,...])  # 第2行元素
print(b[...,1:])  # 第2列及剩下的所有元素（第3列）

3.2 数组索引

# 数组索引 (0,0) (1,1) (2,0)
x = np.array([[1,2],[3,4],[5,6]])
y = x[[0,1,2],[0,1,0]]
y

# 获取4x3数组中四个角的元素，行索引是[0,0]和[3,3].而列索引是[0,2]和[0,2]
x = np.arange(12).reshape(4,3)
rows = np.array([[0,0],[3,3]])
cols = np.array([[0,2],[0,2]])
y = x[rows, cols] # 2*2        y = x[[0,0,3,3],[0,2,0,2]] 1*4
y

# 借助省略号（...）与索引数组组合
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
b = a[1:3, 1:3]  # 第2,3行  第2,3列
c = a[1:3, [1,2]] # 第2,3行 第2,3列
d = a[...,1:] # 第2,3列，行与数组维度相同

3.3 布尔索引

x = np.arange(12).reshape(4,3)
print(x[x>5])  # 获取大于5的元素
a = np.array([1, 2+6j, 5, 3.5+5j])
print(a[np.iscomplex(a)])
b = np.array([np.nan, 1, 2, np.nan, 3,4,5])
print(b[~np.isnan(b)])  # 过滤掉非复数元素

a = np.array([[1,2], [3, 4], [5, 6]])
bool_idx = (a > 2)
print(bool_idx)
print(a[bool_idx])  # print(a[a > 2])

3.4 花式索引（利用整数数组进行索引）

x = np.arange(32).reshape((8,4))
print(x[[4,2,1,7]]) # 顺序索引数组（第4,2,1,7行）
print(x[[-4,-2,-1,-7]]) # 传入倒序索引数组（第4,6,7,1行）

四. Broadcast

# 对不同形状的数组进行数值计算的方式。
a = np.arange(12).reshape(4,3)
b = np.array([1,1,1])
c = np.tile(b, [4,1])  # 重复b的各个维度
print(a+b)
print(a+c)

五. Iterable

a = np.arange(6).reshape(2,3)
for x in np.nditer(a):
    print(x, end=",")
for x in np.nditer(a.T):  # a与a.T的遍历顺序相同，由于在内存中的存储顺序相同
    print(x, end=",")
for x in np.nditer(a.T.copy(order='C')):  # 与a的遍历顺序不同，由于在内存中的存储顺序不一样
    print(x, end=",")
for x in np.nditer(a.T.copy(order="F")):
    print(x, end=",")
    
print() # 通过显示设置，强制nditer对象使用某种顺序
for x in np.nditer(a, order='C'):  
    print(x, end=",")
for x in np.nditer(a, order="F"):
    print(x, end=",")

print()# nditer 对象遍历权限 （read-only、read-write、write-only）
for x in np.nditer(a, op_flags=['readwrite']):
    x[...] = 2*x
print(a)

5.1 外部循环

b = np.arange(0, 60, 5).reshape(3,4)
print(b)
for x in np.nditer(b, flags=['external_loop'], order="F"): # 迭代器将每列组合为一维数组
    print(x, end=",")

5.2 广播迭代

c = np.array([1,2,3,4],dtype=int)
for x,y in np.nditer([b, c]):
    print("%d:%d" % (x, y), end=", ")

六. Ndarray 操作

6.1 修改数组形状

## a.reshape: 
a = np.arange(8).reshape(4,2, order='C')

## np.resize()

## a.flat:  A 1-D iterator over the array.
for element in a.flat:
    print(element)
a, a.T, a.flat[3], a.T.flat[3]

## a.flatten:  Return a copy of the array collapsed into one dimension. modification wont affect the original data
a.flatten(), a.flatten(order='F')

## ravel:  Return a contiguous flattened array.  view of the array, modifications affect the original data
a.ravel()

6.2 翻转数组

np.transpose(a, axes=None)

a = np.arange(12).reshape(3,4)
# Reverse or permute（排列） the axes of an array; returns the modified array.
a.T, np.transpose(a)

np.rollaxis(a, axis, start=0)

# Roll the specified axis backwards, until it lies in a given position.
a = np.arange(8).reshape(2,2,2)
a, np.where(a==6), a[1,1,0]

b = np.rollaxis(a, 2, 0)  # 将轴2 滚到轴0（宽度到深度）
b, np.where(b==6), b[0,1,1]

c = np.rollaxis(a, 2,1) # 将轴2 滚动到轴1（宽度到高度）
c, np.where(c==6), c[1,0,1]

np.swapaxes(a, axis1, axis2)

# Interchange two axes of an array.
x = np.array([[[0,1],[2,3]],[[4,5],[6,7]]])
np.swapaxes(x, 2, 0) # 交换轴0（深度方向）到轴2（宽度方向）

6.3 修改数组维度

broadcast()

# Produce an object that mimics broadcasting.
x = np.array([[1],[2],[3]])
y = np.array([4,5,6])
b = np.broadcast(x,y) # 对y广播x

r,c = b.iters
print(next(r), next(c))
print(next(r), next(c))
print(b.shape)  # 3x3

b = np.broadcast(x, y)  # 若以上一步的广播结果+next后，结果不同
c = np.empty(b.shape)
c.flat = [u+v for (u,v) in b]
print(c)
print(x+y)  # built_in broadcasting

np.broadcast_to(array, shape, subok=False)

# Broadcast an array to a new shape.
a = np.arange(4)
b = np.broadcast_to(a,(4,4))
a.shape, b.shape  # (4,) (4, 4)

np.expand_dims(a, axis)

# Insert a new axis that will appear at the `axis` position in the expanded array shape.
x = np.array(([1,2],[3,4]))
y = np.expand_dims(x, axis=0)
x.shape, y.shape  # (2, 2) (1, 2, 2)
y = np.expand_dims(x, axis=1)
x.shape, y.shape # (2, 2) (2, 1, 2)

np.squeeze(axis=None)

# Remove single-dimensional entries from the shape of `a`.
a = np.arange(6).reshape(1,2,3)
b = np.squeeze(a, axis=0)
a.shape, b.shape  # (1, 2, 3) (2, 3)

np.unsqueeze()

6.4 连接数组

a = np.array([1, 2, 3])
b = np.array([4, 5, 6])
np.r_[a,b,a]
np.c_[b,a,b]

np.concatenate((a1, a2, ...), axis=0, out=None) 连接

# Join a sequence of arrays along an existing axis.
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])
c = np.concatenate((a,b),axis=0)
d = np.concatenate((a,b),axis=1)
c.shape,d.shape #((4, 2), (2, 4))  # 秩不变

np.stack(arrays, axis=0, out=None) 堆叠

# Join a sequence of arrays along a new axis.
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])
c = np.stack((a,b), 0)
d = np.stack((a,b), 1)
c.shape, d.shape # ((2, 2, 2), (2, 2, 2))  # 增加秩

np.hstack(tup)

# Stack arrays in sequence horizontally (column wise).
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])
c = np.hstack((a,b))
c, c.shape  # (2, 4))

np.vstack(tup):

# Stack arrays in sequence vertically (row wise).
a = np.array([[1,2],[3,4]])
b = np.array([[5,6],[7,8]])
c = np.vstack((a,b))
c, c.shape # (4, 2))

6.5 分割数组

np.split(ary, indices_or_sections, axis=0)

# Split an array into multiple sub-arrays as views into `ary`.
a = np.arange(9)
b = np.split(a,3) # 平均切分
c = np.split(a, [4, 7])  # 按指定位置分割

a = np.arange(16).reshape(4,4)
b = np.split(a, 2, 0) # 沿水平方向分割
c = np.split(a, 2, 1)  # 沿垂直方向分割

np.hsplit(ary, indices_or_sections)

# Split an array into multiple sub-arrays horizontally (column-wise).
a = np.floor(10*np.random.random((2,6)))
np.hsplit(a, 3)

np.vsplit(ary, indices_or_sections)

# Split an array into multiple sub-arrays vertically (row-wise).
b = np.arange(16).reshape(4,4)
np.vsplit(b, 2)

6.6 添加和删除元素

np.resize(a, new_shape)

# Return a new array with the specified shape.
 a=np.array([[0,1],[2,3]])
 print(np.resize(a, (2,3))
 print(np.resize(a, (1,4))
 print(np.resize(a, (2,4))

np.append(arr, values, axis=None)

# Append values to the end of an array.
a = np.array([[1,2,3],[4,5,6]])
b = np.append(a, [7,8,9])  # 向数组添加元素
c = np.append(a, [[7,8,9]], axis=0)  # 列 添加元素
d = np.append(a, [[5,5,5], [7,8,9]], axis=1)  # 行 添加元素

np.insert(arr, obj, values, axis=None)

# Insert values along the given axis before the given indices.
a = np.array([[1,2,3],[4,5,6]])
b = np.insert(a, 3, [11,12])
c = np.insert(a,1,[11], axis=0)  # 沿轴0 广播
d = np.insert(a,1,11, axis=1)  # 沿轴1 广播
a,b,c,d

np.delete(arr, obj, axis=None)

# Return a new array with sub-arrays along an axis deleted.
a = np.arange(12).reshape(3,4)
b = np.delete(a, 5)
c = np.delete(a, 1, axis=1)  # 删除第二列
d = np.delete(a, np.s_[::2])
a,b,c,d

np.unique(ar, return_index=False, return_inverse=False, return_counts=False, axis=None)

# Returns the sorted unique elements of an array.
a = np.array([5,2,6,2,7,5,6,8,2,9])
u,indics = np.unique(a, return_index=True)  # 去重后的数组，及在原数组中的索引
u,indics = np.unique(a, return_inverse=True)  # 去重后的数组，及原数组中元素在该数组中的索引
u[indics]  # 重构原数组
u,indics = np.unique(a, return_counts=True) # 去重后的数组，及在原数组中的数量

七. 位运算

bitwise_and，bitwise_or，invert，binary_repr

bin(13),bin(17)  # 0b1101 0b10001

bin(np.bitwise_and(13,17))  # 1    0b1
bin(np.bitwise_or(12,17))  # 29   0b11101

np.binary_repr(np.invert(np.array([13], dtype=np.uint8)).item())  # 242  0b11110010

np.binary_repr(13, width=8)  # 00001101  
np.binary_repr(242, width=8)  # 11110010

left_shift，right_shift

np.left_shift(10,2)  # 40
np.right_shift(40,2)  # 10

八. 字符串函数

定义在 numpy.char中

# Return element-wise string concatenation for two arrays of str or unicode.
print(np.char.add(["hello", 'Google'],[" world", ' Search']))

# Return (a * i), that is string multiple concatenation,element-wise.
print(np.char.multiply("RUNOOB ",3))

print(np.char.center("RUNOOB", 20, fillchar='*'))  # Return a copy of `a` with its elements centered in a string of length `width`.

print(np.char.capitalize('runoob'))  # Return a copy of `a` with only the first character of each element capitalized.
print(np.char.title("i like runoob"))  #  Title case words start with uppercase characters, all remaining cased characters are lowercase.

print(np.char.lower(["RUNoob","GOOgle"]))
print(np.char.upper(['runoob','facebook']))

print(np.char.split('i like runoob?'))
print(np.char.split('www.runoob.com', sep='.'))
print(np.char.splitlines('i\nlike\r\nrunoob?'))  # 以换行符作为分隔符来分割字符串
print(np.char.splitlines('i\rlike\n\rrunoob?'))  # "\n","\r","\r\n" 都可用作换行符

print(np.char.strip(["arunooba","admin","java"],"a"))  # 用于移除开头或结尾处的特定字符
print(np.char.join([':','-'],['runoob','google']))  # 通过指定分隔符来连接数组中的元素或字符串

print(np.char.replace('i like runoob','oo','cc'))  # 使用新字符串替换字符串中的所有子字符串

a = np.char.encode('runoob','cp500')
a, np.char.decode(a, 'cp500')

九. 数学函数

9.1 三角函数

a = np.array([0, 30, 45, 60, 90])  # 角度
np.around(np.sin(a*np.pi/180), 1)  # 通过乘 np.pi/180 角度转化为弧度
np.around(np.cos(a*np.pi/180), 1)
np.tan(a*np.pi/180)

9.2 反三角函数

a = np.array([0,30,45,60, 90])  # 角度
sin = np.sin(a*np.pi/180) 
inv = np.arcsin(sin)  # 返回值以弧度为单位
print(np.degrees(inv))  # 将弧度转换为角度

cos = np.cos(a*np.pi/180)
inv = np.arccos(cos)
print(np.degrees(inv))

tan = np.tan(a*np.pi/180)
inv = np.arctan(tan)
print(np.degrees(inv))

9.3 舍入函数

a = np.array([1.0, 15.55, 123, 0.467,-2.532, -1.34])
np.around(a, decimals=1)  # 保留小数点后一位四舍五入
np.around(a, decimals=-1) # 小数点前一位四舍五入
np.floor(a)  # 向下取整
np.ceil(a)  # 向上取整

9.4 随机函数

博文

np.random.seed(0)    # 设置随机数种子 

np.random.rand(3,2)  # 随机样本取自 均匀分布[0,1)  浮点数
np.random.randn(10)  # 随机样本取自 标准正太分布
np.random.randint(1, 100, 10) # 随机样本取自  整数[low,high）


# shuffle 洗牌   在原数组上
# Modify a sequence in-place by shuffling its contents.
arr = np.arange(10)
np.random.shuffle(arr) 
arr # array([8, 1, 5, 6, 3, 4, 2, 7, 0, 9])

# Multi-dimensional arrays are only shuffled along the first axis.
arr = np.arange(9).reshape((3, 3))
np.random.shuffle(arr) 
arr


# permute 排列   不在原数组上
# Randomly permute a sequence, or return a permuted range.
np.random.permutation(10) # array([3, 2, 1, 7, 8, 6, 5, 4, 9, 0])

#  Multi-dimensional array is only shuffled along its first index.
arr = np.arange(9).reshape((3, 3))
np.random.permutation(arr) 


# choice(a, size=None, replace=True, p=None)  按概率选择
# Generates a random sample from a given 1-D array.
# a: 1-D array-like or int    
# size: int or tuple of ints    
# replace: 是否重用元素   p:抽取元素的概率
np.random.choice(5, 3) # Generate a uniform random sample from np.arange(5) of size 3

np.random.choice(5, 3, p=[0.1, 0, 0.3, 0.6, 0])
aa_milne_arr = ['pooh', 'rabbit', 'piglet', 'Christopher']
np.random.choice(aa_milne_arr, 5, p=[0.5, 0.1, 0.1, 0.3])


# uniform(low=0.0, high=1.0, size=None)  取自均匀分布[low,high)
s = np.random.uniform(-1,0,10)
np.all(s>=-1)
np.all(s<0)


# normal(loc=0.0, scale=1.0, size=None)  取自正太分布 loc:均值  scale:标准差
mu, sigma = 0, 0.1 # mean and standard deviation
s = np.random.normal(mu, sigma, 1000)
abs(mu - np.mean(s)) # 0.0
abs(sigma - np.std(s, ddof=1)) # 0.


# poisson(lam=1.0, size=None)  取自泊松分布 lam: 发生率或发生次数
x = np.random.poisson(lam=5, size=10000)  # lam为λ  size为k

pillar = 15
a = plt.hist(x, bins=pillar, density=True, range=[0, pillar], color='g', alpha=0.5)
# 之前版本density为normed，都是将直方图改为频率分布直方图
plt.plot(a[1][0:pillar], a[0], 'r')
plt.grid()
plt.show()

9.5 算术函数

a = np.array([[1,2], [3,4]])
b = np.array([[2,3], [3,4]])
c = np.array([5,6])
d = np.array([0.25, 1.33, 1, 100])

np.add(a,b)       # 广播相加 a+c
np.subtract(a,b)  # 广播相减 a-c

# Multiply arguments element-wise. 
# a*b   元素乘法（对应位置元素相乘）
np.multiply(a, b) # 广播乘法  a*c  

# a@b  点乘（内积）
np.dot(a,b)        # 广播乘法  a@c        

np.divide(a,b)     # 广播相除 a/c

np.sqrt([1, 4, 9])  # 开方
np.power(a, 2)  # 幂方
np.power(a, b)

np.reciprocal(d)  # 返回逐个元素的倒数

np.mod(a, b)  # np.mod(a, c)  取余数
np.remainder(a,b) # 同 np.mod() 
# np.mod is np.remainder => True
# np.mod.__name__ => 'remainder'

9.6 统计函数

a = np.array([[3,7,5],[8,4,3],[2,4,9]])

# Sum of array elements over a given axis.
np.sum(a)
np.sum(a, axis=0)

# Return the maximum of an array or maximum along an axis.
np.amax(a)  # 数组中最大值 9
np.amax(a, 1)  # 各行最大值 [7 8 9]
np.amax(a, 0)  # 各列最大值 [8 7 9]

# Return the minimum of an array or minimum along an axis.
np.amin(a)  # 数组中最小值 2
np.amin(a, 1) # 各行最小值 [3 3 2]
np.amin(a, 0)  # 各列最小值 [2 4 3]

# Returns the indices of the maximum values along an axis.
np.argmax(a)
np.argmax(a, 0)
np.argmax(a, 1)

# Returns the indices of the minimum values along an axis.
np.argmin(a)
np.argmin(a, 0)
np.argmin(a, 1)

# Range of values (maximum - minimum) along an axis.
np.ptp(a)          # 数组中（最大值-最小值）即9-2=7
np.ptp(a, axis=1)  # 各行（最大值-最小值） [4 5 7]
np.ptp(a, axis=0)  # 各列（最大值-最小值） [6 3 6]

# Returns the q-th percentile(s) of the array elements. 分位数值
a = np.array([[10,7,4],[3,2,1]])
print(np.percentile(a, 50))
print(np.percentile(a, 50, axis=1))  # 各行
print(np.percentile(a, 50, axis=0))  # 各列
print(np.percentile(a, 50, axis=1, keepdims=True))  # 保持维度不变

# median(a, axis=None, out=None, overwrite_input=False, keepdims=False)  中位数
a = np.array([[30,65,70],[80,95,10],[50,90,60]])
print(np.median(a))
print(np.median(a, axis=0)) # 轴0（列）的中位数
print(np.median(a, axis=1)) # 轴1（行）的中位数

# mean(a, axis=None, dtype=None, out=None, keepdims=<no value>)  算术平均值
np.mean(a)  # 算术平均值
np.mean(a, axis=0)  # 轴0（列）算术平均值
np.mean(a, axis=1)  # 轴1（行）算术平均值

# average(a, axis=None, weights=None, returned=False)  加权平均值
np.average(a)
np.average(a, weights=[[4,3, 2],[2,1,1], [1, 0, 1]])
np.average(a, weights=[1,2, 3], axis=0) # 轴0（列）加权平均值
np.average(a, weights=[2,1, 1], axis=1) # 轴1（行）加权平均值

# std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=<no value>)  标准差
np.std(a)
np.std(a, axis=0) # 轴0（列）标准差
np.std(a, axis=1) # 轴1（行）标准差

# var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=<no value>)  方差
np.var(a)
np.var(a, axis=0)  # 轴0（列）方差
np.var(a, axis=1)  # 轴1（行）方差

9.7 排序函数

# sort(a, axis=-1, kind=None, order=None)
a = np.array([[3,7],[9,1]])
np.sort(a, kind="quicksort") # heapsort, mergesort, timsort
np.sort(a, axis=0)  # 按轴0（列）排序
np.sort(a, axis=1)  # 按轴1（行）排序

dt = np.dtype([('name','S10'),("age", int)])  # 结构化数据类型
a = np.array([('raju',31),("anil",25),('ravi',17),("amar",27)], dtype=dt)
np.sort(a, order=['name','age'])


# argsort(a, axis=-1, kind=None, order=None)
## Returns the indices that would sort an array.
x = np.array([3,1,2])
y = np.argsort(x)
y, x[y]  # 以排序后的顺序重构原数组


# lexsort(keys, axis=-1)
## Perform an indirect stable sort using a sequence of keys.
a = [1,5,1,4,3,4,4] # First column
b = [9,4,0,4,0,2,1] # Second column
ind = np.lexsort((b,a)) # Sort by a, then by b, Return indices 
print(ind)  # [2 0 4 6 5 3 1]
[(a[i],b[i]) for i in ind]  # [(1, 0), (1, 9), (3, 0), (4, 1), (4, 2), (4, 4), (5, 4)]


# Sort a complex array using the real part first, then the imaginary part.
np.sort_complex([1+2j, 2-1j, 3-2j, 3+2j, -1+3j])
# [-1.+3.j  1.+2.j  2.-1.j  3.-2.j  3.+2.j]

9.8 条件筛选函数

# partition(a, kth, axis=-1, kind='introselect', order=None)  分割
##  Return a partitioned copy of an array.
a = np.array([3, 4, 2, 1])
np.partition(a, 3)  # [2, 1, 3, 4]
np.partition(a, (1, 3))  # [1, 2, 3, 4]

# argpartition(a, kth, axis=-1, kind='introselect', order=None)
## Array of indices that partition `a` along the specified axis.
x = np.array([3, 4, 2, 1])
x[np.argpartition(x, 3)]
x[np.argpartition(x, (1, 3))]

# argmax(a, axis=None, out=None)
## Returns the indices of the maximum values along an axis.
a = np.array([[30, 40, 70],[80, 20, 10], [50,90,60]])
np.argmax(a)
np.argmax(a, axis=0) # 沿轴0（列）的最大值索引   第1列，第2行
np.argmax(a, axis=1) # 沿轴1（行）的最大值索引   第2行，第1列

# argmin(a, axis=None, out=None)
## Returns the indices of the minimum values along an axis.
np.argmin(a)
np.argmin(a, axis=0)
np.argmin(a, axis=1)

## Return the indices of the elements that are non-zero.
a = np.array([[30,40,0],[0,20,10],[50,0,60]])
np.nonzero(a)
a[np.nonzero(a)]

# where(condition, [x, y])
## Return elements chosen from `x` or `y` depending on `condition`.
x = np.arange(9.).reshape(3,3)
y = np.where(x>3)
x[y]
np.where(x==6)
# np.argwhere(a==1)

# extract(condition, arr)
## Return the elements of an array that satisfy some condition.
x = np.arange(9.).reshape(3,3)
condition = np.mod(x, 2)==0  # 定义条件，选择偶数
np.extract(condition, x)

9.9 字节交换

data = 0x 00 12 34 56
大端模式：数据的高字节保存在内存的低地址中，数据的低字节保存在内存的高地址中。[00 12 34 56] （阅读习惯一直）
小端模式：数据的高字节保存在内存的高地址中，数据的高字节保存在内存的低地址中。[56 43 12 00]

# np.ndarray.byteswap()  将 Ndarray 中每个元素中的字节进行大小端转换。
a = np.array([1, -1, 256, 8755], dtype=np.int16)
list(map(hex, a))  # 以十六进制表示内存中的数据
a, a.byteswap(True)
list(map(hex, a))

十. 副本和视图

赋值操作不产生副本

a = np.arange(6).reshape(2,3)
b = a
id(a), id(b)  # id 相同

b[0,0] = 100  # 修改值影响原变量
a, b

视图和浅拷贝

a = np.arange(6).reshape(3,2)
b = a.view()
id(a),id(b)  # id不同

b[0, 0] = 100  # 修改值影响
a, b

b.shape = 1,6
a, b  # 修改维度不影响

切片创建视图（浅拷贝）

a = np.arange(12)
b = a[3:]
c = a[3:]  
id(b), id(c), id(a[3:])  # id 不同（区别于赋值引用）

b[1] = 123
c[2] = 234 # 切片创建视图,，修改数据会影响到原始数组

b,c, b is c  # False

副本和深拷贝

a = np.array([[10, 10], [2,3], [4,5]])
b = a.copy() 

id(a), id(b), b is a  # id 不同   False
b[0, 0] = 100   # 修改副本数据，不会影响原始数据
a, b

十一. 矩阵库 Matrix

ndarray：可以是任意维数据
matrix：只能是2维数据

# 矩阵乘法与逐项乘法不同
# ndarray的dot和a@b是矩阵乘法，a*b和multiply()是逐项相乘
a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6], [7, 8]])
print('dot =',np.dot(a, b)) # [[1*5+2*7=19  1*6+2*8=22], [3*5+4*7=43  3*6+4*8=50]] 矩阵乘法
print('@ =',a@b)
print('* =',a*b)
print('mul =', np.multiply(a,b), '\n')


# matrix的dot和c*d, c@d是矩阵乘法， multiply()是逐项相乘
c = np.mat([[1,2], [3,4]])
d = np.mat([[5,6], [7,8]])
print('dot =', np.dot(c, d))
print('* =', c*d)
print('@ =', c@d)
print('mul =', np.multiply(c, d))

Matrix 定义

import numpy.matlib as mb
a = mb.empty((2,2))  # mb.zeros(), mb.ones(), mb.eye(), mb.identity(), mb.rand(), 
print(a, type(a))

b = np.mat((2,3))
c = np.array([(2,3)])
print(b, type(b))
print(c, type(c))

二者转换

import numpy.matlib as mb

a = np.mat('1,2;3,4')
print(a, type(a))
b = np.asarray(a)  # matrix->Ndarray
print(b, type(b))
c = np.asmatrix(b) # Ndarr

十二. 线性代数

内积（Inner product）和点积（dot product）的区别：

内积：一般是用在内积空间中的
点积：一般是用在欧几里得空间中的
欧几里得空间是内积空间的一种特殊情况。

dot(a, b, out=None)

# Dot product of two arrays.   两数组点积
# 1-D
a = np.array([1, 2, 3])
b = np.array([3, 2, 1])
print(np.dot(a, b))  # 向量内积 inner product of vectors

# 2-D
a = np.array([[0, 1], [2, 3]])
b = np.array([[4, 5], [6, 7]])
print(np.dot(a, b))  # E.q: np.matmul(a,b) or a@b    矩阵乘积法（与np.inner不同）

# 0-D
a = np.array([[0, 1], [1, 2]])
print(np.dot(a, 2))  # E.q: np.multiply(a, 2) or a*2

vdot(a, b)

# Return the dot product of two vectors.
a = np.array([[0, 1], [2, 3]])
b = np.array([[4, 5], [6, 7]])
print(np.vdot(a, b))  # 0*4 + 1*5 + 2*6 + 3*7 = 38   对应项相乘

inner(a,b)

# Inner product of two arrays.
a = np.array([[0, 1], [2, 3]])
b = np.array([[4, 5], [6, 7]])
print(np.inner(a, b))  # [[0*4+1*5  0*6+1*7], [2*4+3*5  2*6+3*7]   [[5 7],[23 33]]   对应项相乘

matmul() Matrix product of two arrays.

a = np.array([[0, 1], [2, 3]])
b = np.array([[4, 5], [6, 7]])
np.dot(a, b),np.matmul(a, b) # 与 np.dot() 都是矩阵乘积法
# array([[ 6,  7],
#       [26, 31]])

linalg.det(a)

# Compute the determinant of an array . 计算矩阵的行列式值
a = np.array([[1,2],[3,4]])
print(np.linalg.det(a))  # -2

lnalg.solve()

# Solve a linear matrix equation, or system of linear scalar equations.  求解线性方程组
a = np.array([[3,1], [1,2]])
b = np.array([9,8])
x = np.linalg.solve(a, b)
print(x)  # array([2.,  3.])
## 3 * x0 + x1 = 9`` and ``x0 + 2 * x1 = 8`

linalg.inv()

# Compute the (multiplicative) inverse of a matrix.
x = np.array([[1,2], [3,4]])
y = np.linalg.inv(x)
print(np.dot(x, y))

十三. IO

save/load

a = np.array([1,2,3,4,5])
np.save('outfile.npy',a)
b = np.load('outfile.npy')

savez()

Save several arrays into a single file in uncompressed .npz format.

a = np.array([[1,2,3],[4,5,6]])
b = np.arange(0,1.0,0.1)
c = np.sin(b)
np.savez("runoob.npz", a, b, sin_array=c)  # c使用关键字参数
r = np.load("Runoob.npz")
r.files
# r['arr_0']
# r['arr_1']
# r["sin_array"]

savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None)

Save an array to a text file.

a = np.array([1,2,3,4,5])
np.savetxt('out.txt', a)
b = np.loadtxt('out.txt')

a = np.arange(0, 10, 0.5).reshape(4, -1)
np.savetxt('out_1.txt', a, fmt="%d", delimiter=",") # 保存为整数，以逗号分隔
b = np.loadtxt('out_1.txt', delimiter=",")  # load 时也要指定逗号分隔

十四

# histogram() 数据的评率分布的图形表示
a = np.array([22,87,5,43,56,73,55,54,11,20,51,5,79,31,27])
hist, bins = np.histogram(a, bins=[0, 20, 40, 60, 80, 100])

# tensor 与 ndarray 转换 （底层内存共享）
a = torch.arange(12, dtype=torch.float32).reshape(3,4)  # tensor
b = a.detach().numpy()  # tensor 转换为 ndarray
c = torch.from_numpy(b)  # ndarray 转换为 tensor
a[0, 1] = 100
print(a, a.dtype)
print(b, b.dtype)
print(c, c.dtype)

a = np.ones((2,2), dtype=np.float32)  # ndarray
b = torch.Tensor(a)  # numpy 转换为 Tensor
c = b.numpy()  # tensor 转换为 numpy
d = torch.tensor(a)  # numpy 转换为 tensor   不共享内存
a[1, 0]=100
print(a, a.dtype)
print(b, b.dtype)
print(c, c.dtype)
print(d, d.dtype)

a = np.ones([2,3])    # float64
b = torch.Tensor(a)    # ndarray 转换 torch        torch.float32   (数据类型不同，此处进行拷贝，不共享内存)
c = torch.from_numpy(a)  # 注意c的类型          torch.float64    共享内存
d = torch.tensor(a)  # 无论什么类型，tensor()都会进行数据拷贝，不会共享内存
a[0,1] = 100
print(a, a.dtype)
print(b, b.dtype)
print(c, c.dtype)
print(d, d.dtype) 

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

a.dtype
b.dtype
c.dtype
d.dtype