001机器学习-numpy
numpy&pandas有什么用?
这两个模块应用于数据分析,很重要。tensorflow、机器学习、神经网络都会用到数据分析,使用numpy、pandas模块会使数据分析计算非常非常快,比python中自带的字典,list计算快,因为是c语言写的,基于C语言,并且运用了矩阵运算。
numpy&pandas安装
一种是:官网下载,或者百度下载数据集
一种是:直接通过命令行的方式,
sudo pip3 install numpy或pip3 install numpy/pandas
管理员身份
或者
pip升级错误 完美解决:You are using pip version 9.0.1, however version 18.0 is available.
numpy属性
import numpy as np
array = np.array([[1,2,3],
[2,3,4]])
print(array)
print('number of dim:',array.ndim)#数组的维度,返回的只有一个数
print('shape:',array.shape)#表示各位维度大小的元组。返回的是一个元组。
print('size:',array.size)#元素个数
numpy创建array
import numpy as np
a = np.array([2,23,4],dtype=np.int)
#print(a.dtype)
b = np.array([2,23,4],dtype=np.float16)
#print(b.dtype)
c = np.zeros((3,4))
#print(c)
#[[0. 0. 0. 0.]
# [0. 0. 0. 0.]
# [0. 0. 0. 0.]]
d = np.ones((3,4),dtype=np.int16)
#print(d)
#[[1 1 1 1]
# [1 1 1 1]
# [1 1 1 1]]
e = np.empty((3,4)) #生成几乎接近为0的数字
#print(e)
#[[3.56043053e-307 1.60219306e-306 2.44763557e-307 1.69119330e-306]
# [1.33514617e-307 3.56043053e-307 1.37961641e-306 8.06612192e-308]
# [1.11259940e-306 1.42419394e-306 8.34447260e-308 2.29179042e-312]]
f = np.arange(10,20,2)
#print(f)
# [10 12 14 16 18]
g = np.arange(12).reshape(3,4)
#print(g)
#[[ 0 1 2 3]
# [ 4 5 6 7]
# [ 8 9 10 11]]
h = np.linspace(1,10,5)#生成线段
#print(h)
#[ 1. 3.25 5.5 7.75 10. ]
i = np.linspace(1,10,5).reshape((2,3))
numpy基础运算
import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4)
#print(a,b)
#[10 20 30 40] [0 1 2 3]
#c = a-b
#[10 19 28 37]
#c = a+b
#c = a*b
#c = b**2 #平方**2,三次方**3,四次方**4
#c = 10*np.sin(a) # np.cos(a) np.tan(a)
#[-5.44021111 9.12945251 -9.88031624 7.4511316 ]
#print(c)
#print(b)
#print(b<3) # b==3
#[0 1 2 3]
#[ True True True False]
a1 = np.array([[1,1],
[0,1]])
b1 = np.arange(4).reshape((2,2))
c1 = a1*b1 #逐个相乘
c_dot = np.dot(a1,b1) #矩阵相乘 或者 a.dot(b)
#print(a1)
#print(b1)
#print(c1)
#print(c_dot)
#[[1 1]
# [0 1]]
#[[0 1]
# [2 3]]
#[[0 1]
# [0 3]]
#[[2 4]
# [2 3]]
import numpy as np
A = np.arange(2,14).reshape((3,4))
print(A)
print(np.argmin(A))
print(np.argmax(A))
print(np.mean(A))#平均值
print(A.mean)#平均值
print(np.average(A))#平均值#老版本
#print(A.average) #老版本
print(np.median(A))#中位数
print(np.cumsum(A))#裴波那契数列
print(np.diff(A))#同行两两间差值
print(np.nonzero(A))#非零的数坐标
#(array([0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2], dtype=int32),
#array([0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3], dtype=int32))
print(np.sort(A))#逐行排序
print(np.transpose(A))#矩阵反转
print(A.T)
print((A.T).dot(A))#矩阵乘法
print(np.clip(A,5,9))#所以小于5的都设置为5,所有大于9的都设置为9
print(np.mean(A,axis=0))
numpy的索引
import numpy as np
A = np.arange(3,15).reshape((3,4))
print(A)
print(A[2][1])
print(A[2,1])
print(A[2,:])
print(A[:,1])
print(A[1,1:3])
for row in A:
print(row)
for col in A.T:#对称后取列
print(col)
print(A.flatten())#返回一个array
for item in A.flat:
print(item)
numpy的array合并
import numpy as np
a = np.array([1,1,1])
b = np.array([2,2,2])
print(np.vstack((a,b))) #vertical stack 上下合并
c = np.vstack((a,b))
print(a.shape,c.shape)
d = np.hstack((a,b)) #horizontal stack 左右合并
print(a.shape,d.shape)
print(a[np.newaxis,:]) #横向维度
print(a[:,np.newaxis]) #纵向维度,拆分
e = np.concatenate((a,b,b,a),axis=0)
print(e)
numpy的array分割
import numpy as np
a = np.array([1,1,1])
b = np.array([2,2,2])
print(np.vstack((a,b))) #vertical stack 上下合并
c = np.vstack((a,b))
print(a.shape,c.shape)
d = np.hstack((a,b)) #horizontal stack 左右合并
print(a.shape,d.shape)
print(a[np.newaxis,:]) #横向维度
print(a[:,np.newaxis]) #纵向维度,拆分
e = np.concatenate((a,b,b,a),axis=0)
print(e)
numpy的copy&deep copy
>>> import numpy as np
>>> a = np.arange(4)
>>> a
array([0, 1, 2, 3])
>>> b = a
>>> c = a
>>> d = b
>>> a[0] = 0.3
>>> a
array([0, 1, 2, 3])
>>> a[0] = 11
>>> a
array([11, 1, 2, 3])
>>> b
array([11, 1, 2, 3])
>>> c
array([11, 1, 2, 3])
>>> d
array([11, 1, 2, 3])
>>> b is a
True
>>> d is a
True
>>> d[1:3] = [22,33]
>>> a
array([11, 22, 33, 3])
>>> b
array([11, 22, 33, 3])
>>> c
array([11, 22, 33, 3])
>>> d
array([11, 22, 33, 3])
>>> b = a.copy() # deep copy
>>> b
array([11, 22, 33, 3])
>>> a[3] = 44
>>> a
array([11, 22, 33, 44])
>>> b
array([11, 22, 33, 3])