文章目录
numpy
为什么学习numpy?
- 快速
- 方便
- 科学计算的基础库
of:一个在python中做科学计算的基础库,重在数值运算,也是大部分python科学计算的基础库,多用在大型、多维数组上执行数值运算
常用方法
import numpy as np
# 创建数组
t1 = np.array([1,2,3,4])
print(t1)
print(type(t1))
# 创建随机数组
t2 = np.array(range(10))
print(t2)
t3 = np.arange(10)
print(t3)
print(t3.dtype)
# 设置数据类型
t4 = np.array([1,2,3,4], dtype=np.int)
# 修改数据类型
t4.astype(np.float)
# 保留几位小数
print(np.round([1.364,2.31844,3.2,],2))
# 一维数组
In [13]: t1 = np.array([1,2,3])
In [14]: t1.shape
Out[14]: (3,)
# 二维数组
In [16]: t2 = np.array([[1,2,3],[4,5,6]])
In [17]: t2.shape
Out[17]: (2, 3)
# 三维数组
In [18]: t3 = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
In [19]: t3.shape
Out[19]: (2, 2, 3)
In [20]: t3
Out[20]:
array([[[ 1, 2, 3],
[ 4, 5, 6]],
[[ 7, 8, 9],
[10, 11, 12]]])
# 多维数组不直观,常用于转换为二维数组等
In [24]: t4 = np.arange(12)
In [25]: t4
Out[25]: array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
In [26]: t4.reshape(3,4)
Out[26]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
In [27]: t5 = np.arange(24).reshape((2,3,4))
In [28]: t5
Out[28]:
array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]],
[[12, 13, 14, 15],
[16, 17, 18, 19],
[20, 21, 22, 23]]])
In [29]: t5.reshape((4,6))
Out[29]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
# 二维数组转一维数组
In [32]: t5.flatten()
Out[32]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23])
In [34]: t5.reshape((24,))
Out[34]:
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23])
# 数组的计算
# 数组与一个数字运算
In [36]: t5
Out[36]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
In [37]: t5+2
Out[37]:
array([[ 2, 3, 4, 5, 6, 7],
[ 8, 9, 10, 11, 12, 13],
[14, 15, 16, 17, 18, 19],
[20, 21, 22, 23, 24, 25]]) # 数组内每一个值与该数字运算
In [38]: t5/0
G:\python36\Scripts\ipython3:1: RuntimeWarning: divide by zero encountered in true_divide
G:\python36\Scripts\ipython3:1: RuntimeWarning: invalid value encountered in true_divide
Out[38]:
array([[nan, inf, inf, inf, inf, inf],
[inf, inf, inf, inf, inf, inf],
[inf, inf, inf, inf, inf, inf],
[inf, inf, inf, inf, inf, inf]]) # nan not a number inf 极限
# 数组与数组运算,同种规格
n [40]: t6 = np.arange(100,124).reshape((4,6))
In [41]: t6
Out[41]:
array([[100, 101, 102, 103, 104, 105],
[106, 107, 108, 109, 110, 111],
[112, 113, 114, 115, 116, 117],
[118, 119, 120, 121, 122, 123]])
In [42]: t5 + t6
Out[42]:
array([[100, 102, 104, 106, 108, 110],
[112, 114, 116, 118, 120, 122],
[124, 126, 128, 130, 132, 134],
[136, 138, 140, 142, 144, 146]]) # 对应位置像加减乘除
# 多维数组运算 (4,6)和(4,1)或是(4,6)和(1,6)是可以运算的,其他报错,多维数组:(3,3,3)和(3,2)不能运算,(3,3,2)和(3,2)可以运算,从末位开始比对---后缘维度
In [43]: t7 = np.arange(0,6)
In [44]: t7
Out[44]: array([0, 1, 2, 3, 4, 5])
In [45]: t5 - t7
Out[45]:
array([[ 0, 0, 0, 0, 0, 0],
[ 6, 6, 6, 6, 6, 6],
[12, 12, 12, 12, 12, 12],
[18, 18, 18, 18, 18, 18]])
In [46]: t5
Out[46]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
In [47]: t8 = np.arange(4).reshape((4,1))
In [48]: t5-t8
Out[48]:
array([[ 0, 1, 2, 3, 4, 5],
[ 5, 6, 7, 8, 9, 10],
[10, 11, 12, 13, 14, 15],
[15, 16, 17, 18, 19, 20]])
In [49]: t8
Out[49]:
array([[0],
[1],
[2],
[3]]) # 相同维度相加减,一排一排运算或者一列一列运算 不同维度,直接报错,长了,短了,都不行
numpy读取数据
一般不使用numpy读取数据,使用pandas。numpy有对应的方法
np.loadtxt(frame.dtype=np.float,delimiter=None,skiprows=0,usecols=None,unpack=False)
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-uWF9ikuU-1573695190777)(C:\Users\weichen\AppData\Local\Temp\1573621278312.png)]
import numpy as np
us_file_path = r"G:\作业\dataAsc4\house_prices.csv"
t1 = np.loadtxt(us_file_path, encoding="utf8", skiprows=1, dtype="str")
print(t1)
# 转置 # 横轴与竖轴内容互换
In [51]: t2 = np.arange(24).reshape((4,6))
In [52]: t2
Out[52]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
# 方法一
In [53]: t2.transpose()
Out[53]:
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
# 方法二
In [54]: t2.T
Out[54]:
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
# 方法三
In [55]: t2.swapaxes(1,0)
Out[55]:
array([[ 0, 6, 12, 18],
[ 1, 7, 13, 19],
[ 2, 8, 14, 20],
[ 3, 9, 15, 21],
[ 4, 10, 16, 22],
[ 5, 11, 17, 23]])
numpy索引和切片
操作和python中列表差不多
# 连续用:,不连续用 ,
In [61]: t2
Out[61]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
# 取单行
In [56]: t2[1]
Out[56]: array([ 6, 7, 8, 9, 10, 11])
In [57]: t2[0]
Out[57]: array([0, 1, 2, 3, 4, 5])
# 取多行
In [60]: t2[[1,3]]
Out[60]:
array([[ 6, 7, 8, 9, 10, 11],
[18, 19, 20, 21, 22, 23]])
# 取列
In [71]: t2
Out[71]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
In [72]: t2[:,2]
Out[72]: array([ 2, 8, 14, 20])
In [73]: t2[:,[2]]
Out[73]:
array([[ 2],
[ 8],
[14],
[20]])
In [74]: t2[:,[2,5]]
Out[74]:
array([[ 2, 5],
[ 8, 11],
[14, 17],
[20, 23]])
# 取多行多列
In [65]: t2
Out[65]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
In [66]: t2[:,2:]
Out[66]:
array([[ 2, 3, 4, 5],
[ 8, 9, 10, 11],
[14, 15, 16, 17],
[20, 21, 22, 23]])
In [67]: t2[:,[0,2]]
Out[67]:
array([[ 0, 2],
[ 6, 8],
[12, 14],
[18, 20]])
In [68]: t2[2,3]
Out[68]: 15
In [69]: t2[2:5,1:4]
Out[69]:
array([[13, 14, 15],
[19, 20, 21]])
# 多个不相邻的点 一一对应
In [78]: t2[[0,2,2],[0,1,3]]
Out[78]: array([ 0, 13, 15])
numpy数值修改
In [85]: t2[t2<10]=6
In [86]: t2
Out[86]:
array([[ 6, 6, 6, 6, 6, 6],
[ 6, 6, 6, 6, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
# 三元运算符
In [88]: np.where(t2<=6,100,300)
Out[88]:
array([[100, 100, 100, 100, 100, 100],
[100, 100, 100, 100, 300, 300],
[300, 300, 300, 300, 300, 300],
[300, 300, 300, 300, 300, 300]])
# clip(n1,n2),将小于n1的替换为n1,大于n1的替换为n2
In [166]: t
Out[166]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
In [167]: t.clip(5,20)
Out[167]:
array([[ 5, 5, 5, 5],
[ 5, 5, 6, 7],
[ 8, 9, 10, 11]])
numpy数组拼接
In [91]: t1 = np.arange(12).reshape((2,6))
In [92]: t2 = np.arange(12,24).reshape((2,6))
In [93]: t1
Out[93]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11]])
In [94]: t2
Out[94]:
array([[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
In [95]: np.vstack((t1,t2)) # 竖直拼接
Out[95]:
array([[ 0, 1, 2, 3, 4, 5],
[ 6, 7, 8, 9, 10, 11],
[12, 13, 14, 15, 16, 17],
[18, 19, 20, 21, 22, 23]])
In [96]: np.hstack((t1,t2)) # 水平拼接
Out[96]:
array([[ 0, 1, 2, 3, 4, 5, 12, 13, 14, 15, 16, 17],
[ 6, 7, 8, 9, 10, 11, 18, 19, 20, 21, 22, 23]])
数组的行列交换
In [99]: t1 = np.arange(12).reshape((3,4))
In [100]: t1
Out[100]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
In [101]: t1[[1,2],:] = t1[[2,1],:]
In [102]: t1
Out[102]:
array([[ 0, 1, 2, 3],
[ 8, 9, 10, 11],
[ 4, 5, 6, 7]])
In [103]: t1[:,[0,2]] = t1[:,[2,0]]
In [104]: t1
Out[104]:
array([[ 2, 1, 0, 3],
[10, 9, 8, 11],
[ 6, 5, 4, 7]])
方法补充
# 创建全为1的数组
In [106]: np.ones((2,3))
Out[106]:
array([[1., 1., 1.],
[1., 1., 1.]])
# 创建全是0的数组
In [107]: np.zeros((2,3))
Out[107]:
array([[0., 0., 0.],
[0., 0., 0.]])
# 创建一个对角线为1的正方形的数组(矩阵):np.eye(3)
In [109]: np.eye(3)
Out[109]:
array([[1., 0., 0.],
[0., 1., 0.],
[0., 0., 1.]])
# 最大值和最小值的位置
In [110]: np.eye(4)
Out[110]:
array([[1., 0., 0., 0.],
[0., 1., 0., 0.],
[0., 0., 1., 0.],
[0., 0., 0., 1.]])
In [111]: t = np.eye(4)
In [112]: np.argmax(t,axis=0) # 每一列的最大值
Out[112]: array([0, 1, 2, 3], dtype=int64)
In [113]: t[t==1] = -1
In [114]: np.argmin(t,axis=1) # 每一行的最小值
Out[114]: array([0, 1, 2, 3], dtype=int64)
In [115]: t
Out[115]:
array([[-1., 0., 0., 0.],
[ 0., -1., 0., 0.],
[ 0., 0., -1., 0.],
[ 0., 0., 0., -1.]])
numpy生成随机数
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-t9ajOfSE-1573695190779)(C:\Users\weichen\AppData\Local\Temp\1573626400512.png)]
In [121]: np.random.randint(10,20,(4,5))
Out[121]:
array([[17, 15, 12, 17, 11],
[16, 12, 13, 10, 16],
[14, 16, 19, 10, 14],
[14, 10, 17, 18, 17]])
numpy.random.rand(d0,d1,…,dn)
rand函数根据给定维度生成[0,1)之间的数据,包含0,不包含1
dn表格每个维度
返回值为指定维度的array
In [130]: np.random.rand(4,2)
Out[130]:
array([[0.99664546, 0.07568626],
[0.32825647, 0.09504973],
[0.68395415, 0.52404761],
[0.50391448, 0.63193894]])
numpy.random.randn(d0,d1,…,dn)
randn函数返回一个或一组样本,具有标准正态分布。
dn表格每个维度
返回值为指定维度的array
In [131]: np.random.randn(4,2)
Out[131]:
array([[-0.71170214, -1.11190203],
[ 0.04165672, 0.83025189],
[-0.95336471, -3.32257962],
[ 0.119543 , -1.33577682]])
注意点
copy和view
a=b 完全不复制,a和b互相影响
a = b[:],视图的操作,一种切片,会创建新的对象a,但是a的数据完全由b保管,他们两个的数据变化是一致的
a = b.copy(),复制,a和b互相不影响
nan和inf
-
nan:not a number 表示这不是一个数字
什么时候会出现:
- 当我们读取本地文件为float的时候,如果有缺失,就会出现nan,当做了一个不合适的计算的时候,(比如无穷大(inf)减去无穷大)
-
inf(-inf,inf):infinity,inf表示正无穷,-inf表示负无穷
什么时候会出现inf:
-
比如一个数字除以0,(python中直接会报错,numpy中是一个inf或者-inf)
In [123]: a = np.inf In [124]: type(a) Out[124]: float In [125]: b = np.nan In [126]: type(b) Out[126]: float In [133]: a = np.nan In [134]: b = np.nan In [135]: a == b Out[135]: False # 可以用于判断数组中nan的个数
-
numpy中常用的统计函数
# 求和
In [138]: t
Out[138]:
array([[ 0, 1, 2, 3],
[ 4, 5, 6, 7],
[ 8, 9, 10, 11]])
In [143]: t.sum(axis=0) # 每一列的和
Out[143]: array([12, 15, 18, 21])
# 均值
In [144]: t.mean(axis=0)
Out[144]: array([4., 5., 6., 7.])
# 中值
In [147]: np.median(t,axis=0)
Out[147]: array([4., 5., 6., 7.])
# 最大值
In [149]: t.max(axis=0)
Out[149]: array([ 8, 9, 10, 11])
# 极值--最大值和最小值之差
In [150]: np.ptp(t)
Out[150]: 11
# 标准差:一组数据平均值分散程度的一种度量,一个教大的标准差,代表大部分数值和其平均值之间差异教大,一个较小的标准差,代表这些数值较接近平均值反应数据波动稳定情况,越大表示波动越大,
In [151]: t.std()
Out[151]: 3.452052529534663
实例(替换nan)
def fill_ndarray(t1):
for i in range(t1.shape[1]): # 遍历每一列
temp_col = t1[:,i]
nan_num = np.count_nonzero(tempcol!=temp_col)
if nan_num !=0: # 不为0,说明当前这一列有nan
# 当前一列不为nan的array
temp_not_nan_col = temp_col[temp_col==temp_col]
# 选中当前为nan的位置,把值赋给不为nan的均值
temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
return t1
if __name__ == "__main__":
t1 = t1 = np.arange(12).reshape((3,4)).astype('float')
t1[1,2:]=np.nan
fill_ndarray(t1)
值反应数据波动稳定情况,越大表示波动越大,
In [151]: t.std()
Out[151]: 3.452052529534663
## 实例(替换nan)
```python
def fill_ndarray(t1):
for i in range(t1.shape[1]): # 遍历每一列
temp_col = t1[:,i]
nan_num = np.count_nonzero(tempcol!=temp_col)
if nan_num !=0: # 不为0,说明当前这一列有nan
# 当前一列不为nan的array
temp_not_nan_col = temp_col[temp_col==temp_col]
# 选中当前为nan的位置,把值赋给不为nan的均值
temp_col[np.isnan(temp_col)] = temp_not_nan_col.mean()
return t1
if __name__ == "__main__":
t1 = t1 = np.arange(12).reshape((3,4)).astype('float')
t1[1,2:]=np.nan
fill_ndarray(t1)