课堂学习笔记
机器学习—数据科学包一
numpy的学习
numpy的属性
import numpy as np
array = np.array([[1, 2],
[3, 4],
[5, 6]])
print(array)
print('num of dim:', array.ndim)
print('shape:', array.shape)
print('size:', array.size)
输出
[[1 2]
[3 4]
[5 6]]
num of dim: 2
shape: (3, 2)
size: 6
numpy的创建array
array()
import numpy as np
array = np.array([2, 3, 4], dtype=np.int)
print(array.dtype)
输出:
int64
dtype还可以是 np.float np.int np.int32 np.int64等等
zeros()
输入:
a = np.zeros((3,4))
print(a)
输出:
[[0. 0. 0. 0.]
[0. 0. 0. 0.]
[0. 0. 0. 0.]]
np.ones()
np.empty()
arange()
a = np.arange(2,10,2)
print(a)
[2 4 6 8]
arange() reshape()
a = np.arange(12).reshape((3,4))
print(a)
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
linspace()
a = np.linspace(1, 10 ,5)
print(a)
[ 1. 3.25 5.5 7.75 10. ]
numpy的基础运算一
import numpy as np
a = np.array([10,20,30,40])
b = np.arange(4)
print(a, b)
c = a+b
d = a-b
print(c)
print(d)
输出:
[10 20 30 40] [0 1 2 3]
[10 21 32 43]
[10 19 28 37]
平方 b**2
np.sin()
np.cos()
b = np.arange(4)
print(b<3)
输出
[ True True True False]
乘法
a = np.array([[1,1],
[0,1]])
b = np.arange(4).reshape((2,2))
c = a*b //按位乘
d = np.dot(a,b) //标准的矩阵乘法 dot()
e = a.dot(b)
print(c)
print(d)
print(e)
输出:
[[0 1]
[0 3]]
[[2 4]
[2 3]]
[[2 4]
[2 3]]
输入:
a = np.random.random([2,4])
print(a)
print(np.sum(a))
print(np.max(a))
print(np.min(a))
输出:
[[0.94885187 0.38031492 0.213684 0.22319111]
[0.0539021 0.47663326 0.26024245 0.71382893]]
3.2706486386349116
0.9488518713445446
0.05390209558116199
行运算
np.sum(a, axis=1)
列运算
np.sum(a, axis=0)
输入:
a = np.random.random([2,4])
print(a)
print(np.sum(a, axis=1)) //每行求和
print(np.max(a, axis=0)) //每列最大的
print(np.min(a, axis=1)) //每列最小的
输出:
[[0.80573928 0.70621011 0.15774035 0.75615217]
[0.47725911 0.62603335 0.09190359 0.97828574]]
[2.42584192 2.1734818 ]
[0.80573928 0.70621011 0.15774035 0.97828574]
[0.15774035 0.09190359]
numpy的基础运算二
最小值 最大值的索引
a = np.arange(2,14).reshape((3,4))
print(a)
print(np.argmin(a)) //最小值的索引
print(np.argmax(a)) //最大值的索引
输出:
[[ 2 3 4 5]
[ 6 7 8 9]
[10 11 12 13]]
0
11
平均值
a = np.arange(2,14).reshape((3,4))
print(np.mean(a))
print(a.mean())
print(np.average(a))
中位数
np.median(a)
逐渐相加
a = np.arange(2,14).reshape((3,4))
print(a)
print(np.cumsum(a))
输出:
[[ 2 3 4 5]
[ 6 7 8 9]
[10 11 12 13]]
[ 2 5 9 14 20 27 35 44 54 65 77 90]
每个数相差
a = np.arange(2,14).reshape((3,4))
print(a)
print(np.diff(a))
输出:
[[ 2 3 4 5]
[ 6 7 8 9]
[10 11 12 13]]
[[1 1 1]
[1 1 1]
[1 1 1]]
找出非零的数
np.nonzero(a)
逐行排序
np.sort(a)
矩阵按对角线反转 转置矩阵
np.transpose(a)
a.T
给定一个矩阵,矩阵里小于5的数置为5,大于9的数置为9,中间的数不变
np.clip(a, 5, 9)
numpy的索引
a = np.arange(3,15).reshape((3,4))
print(a)
print(a[2][1])
print(a[2,1])
print(a[2,:])
print(a[:,1])
print(a[1,1:3])
输出:
[[ 3 4 5 6]
[ 7 8 9 10]
[11 12 13 14]]
12
12
[11 12 13 14]
[ 4 8 12]
[8 9]
迭代
a = np.arange(3,15).reshape((3,4))
print(a)
for row in a:
print(row)
for col in a.T:
print(col)
print(a.flatten())
for item in a.flat:
print(item)
输出:
[[ 3 4 5 6]
[ 7 8 9 10]
[11 12 13 14]]
[3 4 5 6] //行迭代
[ 7 8 9 10]
[11 12 13 14]
[ 3 7 11] //列迭代
[ 4 8 12]
[ 5 9 13]
[ 6 10 14]
[ 3 4 5 6 7 8 9 10 11 12 13 14]
3
4
5
6
7
8
9
10
11
12
13
14
numpy的array合并
合并
输入:
a = np.array([1,1,1])
b = np.array([2,2,2])
print(np.vstack((a,b)))
print(np.hstack((a,b)))
输出:
[[1 1 1]
[2 2 2]]
[1 1 1 2 2 2]
数组 加纬度
[3: ] 变成[3:1]
a = np.array([1,1,1])[:,np.newaxis]
b = np.array([2,2,2])[:,np.newaxis]
print(np.vstack((a,b)))
print(np.hstack((a,b)))
多个array合并
a = np.array([1,1,1])[:,np.newaxis]
b = np.array([2,2,2])[:,np.newaxis]
c=np.concatenate((a,b,b,a), axis=0)
print(c)
d=np.concatenate((a,b,b,a), axis=1)
print(d)
numpy的array分割
逐行分割 axis=1
逐列分割 axis=0
a = np.arange(12).reshape((3,4))
print(a)
print(np.split(a,3,axis=0))
print(np.split(a,2,axis=1))
结果:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]])]
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2, 3],
[ 6, 7],
[10, 11]])]
不等分 分割
a = np.arange(12).reshape((3,4))
print(a)
print(np.array_split(a,3,axis=1))
结果:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2],
[ 6],
[10]]), array([[ 3],
[ 7],
[11]])]
另一种分割 垂直 or 水平
a = np.arange(12).reshape((3,4))
print(a)
print(np.vsplit(a,3))
print(np.hsplit(a,2))
结果:
[[ 0 1 2 3]
[ 4 5 6 7]
[ 8 9 10 11]]
[array([[0, 1, 2, 3]]), array([[4, 5, 6, 7]]), array([[ 8, 9, 10, 11]])]
[array([[0, 1],
[4, 5],
[8, 9]]), array([[ 2, 3],
[ 6, 7],
[10, 11]])]
numpy的copy & deep copy
输入:
a = np.arange(4)
b=a
d=b
a[0]=99
print(a)
print(b)
print(d)
d[0]=99999
print(d)
print(a)
输出:
[99 1 2 3]
[99 1 2 3]
[99 1 2 3]
[99999 1 2 3]
[99999 1 2 3]
直接赋值,改变影响所有
deep copy
仅此一次赋值
a = np.arange(4)
b=a.copy()
a[0]=99999
print(a)
print(b)
输出:
[99999 1 2 3]
[0 1 2 3]