Machine Learning1: Numpy Tutorial

Machine Learning1: Numpy Tutorial

Learning material: 唐宇迪《python数据分析与机器学习实战》
如果有一起学习机器学习的小伙伴们可以留言,人多的话,可以拉群一起讨论!

import numpy
from numpy import pi
# 1. numpy.array()
# 建立numpy array
vector = numpy.array([5, 10, 15, 20])
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print(vector)
print(matrix)
[ 5 10 15 20]
[[ 5 10 15]
 [20 25 30]
 [35 40 45]]
# 2. vector.shape
# 查看numpy array的结构

vector = numpy.array([5, 10, 15, 20])
print(vector.shape)
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)
(4,)
(2, 3)
# 3. numpy array重的所有元素都必须是同类型
# 如果改变其中一个元素的类型,则所有元素类型跟着改变

# 4. numbers.dtype 查看数据类型

numbers = numpy.array([1, 2, 3, 4])
print(numbers)
print(numbers.dtype)
numbers1 = numpy.array([1, 2, 3, 4.0])
print(numbers1)
print(numbers1.dtype)
numbers2 = numpy.array([1, 2, 3, '4'])
print(numbers2)
print(numbers2.dtype)
[1 2 3 4]
int64
[1. 2. 3. 4.]
float64
['1' '2' '3' '4']
<U21
# 读取数据
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
print(world_alcohol)
[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]
# 5. numpy array 取值
uruguay_other_1986 = world_alcohol[1,4]
third_country = world_alcohol[2,2]
print(uruguay_other_1986)
print(third_country)
0.5
Cte d'Ivoire
# 6. numpy array 切片
vector = numpy.array([5, 10, 15, 20])
print(vector[0:3])  
[ 5 10 15]
# 7. numpy array 取列
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,1])
[10 25 40]
# 8. numpy array 取行
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[0,:])
[ 5 10 15]
# 9. numpy array 元素对比,返回一个dtype=bool的numpy array

vector = numpy.array([5, 10, 15, 20])
vector == 10
array([False,  True, False, False])
# 10. 用bool列表作为索引,返回numpy array中对应的为true的值
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print(equal_to_ten)
print(vector[equal_to_ten])
[False  True False False]
[10]
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
second_column_25 = (matrix[:,1] == 25)
print(second_column_25)
print(matrix[second_column_25, :])
[False  True False]
[[20 25 30]]
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
print(equal_to_ten_or_five)
vector[equal_to_ten_or_five] = 50
print(vector)
[ True  True False False]
[50 50 15 20]
matrix = numpy.array([
            [5, 10, 15], 
            [20, 25, 30],
            [35, 40, 45]
         ])
second_column_25 = matrix[:,1] == 25
print (second_column_25)
matrix[second_column_25, 1] = 10
print (matrix)
[False  True False]
[[ 5 10 15]
 [20 10 30]
 [35 40 45]]
# 11. vetor.astype(float)更改numpy array中的dtype
vector = numpy.array(["1", "2", "3"])
print (vector.dtype)
print (vector)
vector = vector.astype(float)
print (vector.dtype)
print (vector)
<U1
['1' '2' '3']
float64
[1. 2. 3.]
# 12. 求极值
vector = numpy.array([5, 10, 15, 20])

# vector.min()求最小值
print(vector.min())

# vector.max()求最大值
print(vector.max())

matrix = numpy.array([
            [5, 10, 15], 
            [20, 25, 30],
            [35, 40, 45]
         ])
print(matrix.min())
print(matrix.max())
5
20
5
45
# 13. 求和
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
# 行求和
print(matrix.sum(axis=1))

# 列求和
print(matrix.sum(axis=0))
[ 30  75 120]
[60 75 90]
# 14. numpy.arange(15) 创建一个从0~14一行的的numpy array

# 15. numpy.reshape(3, 5) 将numpy array变形为3行5列

a = numpy.arange(15).reshape(3, 5)
print(a)
a = a.reshape(5, 3)
print(a)

# 16. a.ndim 显示numpy array的维数
print(a.ndim)

# 17. a.size 显示矩阵中的数据数量
print(a.size)

# 18. a.dtype.name 显示矩阵中数据的类型名称
print(a.dtype.name)
[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
2
15
int64
# 16. numpy.zeros((3, 4)) 创建一个三行四列全是0的矩阵,默认为float型,可以指定 np.zeros( (2,3,4), dtype=np.int32 )
print(numpy.zeros((3, 4)))

# 17. numpy.ones((3, 4)) 创建一个三行四列全是1的矩阵,默认为float型,可以指定 np.ones( (2,3,4), dtype=np.int32 )
print(numpy.ones((3, 4)))
[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]
# 18. numpy.arange(10, 30, 5) 创建一个从10开始30结束的,步长为5的矩阵
print(numpy.arange(10, 30, 5))
[10 15 20 25]
# 19. numpy.random.random((2, 3)) 创建一个随机的两行三列的矩阵,有正有负,从-1~1之间
# 第一个random为调用模块,第二个random为调用函数
print(numpy.random.random((2, 3)))
[[0.63130971 0.70306379 0.7532219 ]
 [0.46562968 0.78682549 0.3383525 ]]
print(numpy.linspace(0, 2*pi, 100))
# 20. 创建一个从0开始,步长为2*pi,一共100个数据的矩阵
[0.         0.06346652 0.12693304 0.19039955 0.25386607 0.31733259
 0.38079911 0.44426563 0.50773215 0.57119866 0.63466518 0.6981317
 0.76159822 0.82506474 0.88853126 0.95199777 1.01546429 1.07893081
 1.14239733 1.20586385 1.26933037 1.33279688 1.3962634  1.45972992
 1.52319644 1.58666296 1.65012947 1.71359599 1.77706251 1.84052903
 1.90399555 1.96746207 2.03092858 2.0943951  2.15786162 2.22132814
 2.28479466 2.34826118 2.41172769 2.47519421 2.53866073 2.60212725
 2.66559377 2.72906028 2.7925268  2.85599332 2.91945984 2.98292636
 3.04639288 3.10985939 3.17332591 3.23679243 3.30025895 3.36372547
 3.42719199 3.4906585  3.55412502 3.61759154 3.68105806 3.74452458
 3.8079911  3.87145761 3.93492413 3.99839065 4.06185717 4.12532369
 4.1887902  4.25225672 4.31572324 4.37918976 4.44265628 4.5061228
 4.56958931 4.63305583 4.69652235 4.75998887 4.82345539 4.88692191
 4.95038842 5.01385494 5.07732146 5.14078798 5.2042545  5.26772102
 5.33118753 5.39465405 5.45812057 5.52158709 5.58505361 5.64852012
 5.71198664 5.77545316 5.83891968 5.9023862  5.96585272 6.02931923
 6.09278575 6.15625227 6.21971879 6.28318531]
# 21. 矩阵的运算

a = numpy.array( [20,30,40,50] )
b = numpy.arange(4)
print(a)
print(b)
print(a-b)
print(b**2)
print(a<35)
[20 30 40 50]
[0 1 2 3]
[20 29 38 47]
[0 1 4 9]
[ True  True False False]
# 22. 矩阵的运算2
A = numpy.array( [[1,1],
               [0,1]] )
B = numpy.array( [[2,0],
               [3,4]] )
print(A)
print(B)
# 对应位置相乘
print (A*B)
# 矩阵的乘法,行*列相加
print (A.dot(B))
print (numpy.dot(A, B))
[[1 1]
 [0 1]]
[[2 0]
 [3 4]]
[[2 0]
 [0 4]]
[[5 4]
 [3 4]]
[[5 4]
 [3 4]]
# 23. 矩阵的运算 e 和 平方根
import numpy as np
B = np.arange(3)
print (B)
print (np.exp(B))
print (np.sqrt(B))
[0 1 2]
[1.         2.71828183 7.3890561 ]
[0.         1.         1.41421356]
# 24. 矩阵的运算

# numpy.floor(矩阵) 矩阵数据向下取整
a = np.floor(10*np.random.random((3,4)))
print (a)

# a.ravel() 将多行矩阵拉成一个一行的向量
print (a.ravel())

print(a.reshape((6, 2)))
# print (a)
# a.T 行列变化,矩阵转置
print (a.T)

# a.reshape(3, -1) 根据行数,自动确定列数
print(a.reshape(3,-1))
[[1. 3. 4. 8.]
 [6. 1. 5. 6.]
 [6. 7. 2. 3.]]
[1. 3. 4. 8. 6. 1. 5. 6. 6. 7. 2. 3.]
[[1. 3.]
 [4. 8.]
 [6. 1.]
 [5. 6.]
 [6. 7.]
 [2. 3.]]
[[1. 6. 6.]
 [3. 1. 7.]
 [4. 5. 2.]
 [8. 6. 3.]]
[[1. 3. 4. 8.]
 [6. 1. 5. 6.]
 [6. 7. 2. 3.]]
# 25. 矩阵的拼接

a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print (a)
print ('---')
print (b)
print ('---')

# 进行横向拼接
print (np.hstack((a,b)))

# 进行竖向拼接
print(np.vstack((a,b)))
[[2. 1.]
 [6. 5.]]
---
[[4. 1.]
 [3. 3.]]
---
[[2. 1. 4. 1.]
 [6. 5. 3. 3.]]
[[2. 1.]
 [6. 5.]
 [4. 1.]
 [3. 3.]]
# 26. 矩阵的切分

a = np.floor(10*np.random.random((2,12)))
print (a)

# 横向切分,平均分为3份
print (np.hsplit(a,3))

# 横向切分,横向3号位和4号位分别切一刀
print (np.hsplit(a,(3,4)))
a = np.floor(10*np.random.random((12,2)))
print (a)
# 竖向切分
print(np.vsplit(a,3))
[[2. 3. 4. 0. 2. 1. 5. 2. 9. 5. 2. 5.]
 [4. 9. 7. 4. 9. 8. 1. 3. 4. 2. 1. 8.]]
[array([[2., 3., 4., 0.],
       [4., 9., 7., 4.]]), array([[2., 1., 5., 2.],
       [9., 8., 1., 3.]]), array([[9., 5., 2., 5.],
       [4., 2., 1., 8.]])]
[array([[2., 3., 4.],
       [4., 9., 7.]]), array([[0.],
       [4.]]), array([[2., 1., 5., 2., 9., 5., 2., 5.],
       [9., 8., 1., 3., 4., 2., 1., 8.]])]
[[5. 2.]
 [3. 0.]
 [2. 0.]
 [2. 8.]
 [8. 0.]
 [6. 3.]
 [1. 4.]
 [0. 9.]
 [3. 1.]
 [8. 2.]
 [1. 3.]
 [7. 4.]]
[array([[5., 2.],
       [3., 0.],
       [2., 0.],
       [2., 8.]]), array([[8., 0.],
       [6., 3.],
       [1., 4.],
       [0., 9.]]), array([[3., 1.],
       [8., 2.],
       [1., 3.],
       [7., 4.]])]
# 27. 复制

# = 复制
a = np.arange(12)
b = a
# a and b are two names for the same ndarray object
print(b is a)
b.shape = 3,4
print (a.shape)

# a和b指向同一块地址,改变一个另一个也变
print (id(a))
print (id(b))

# 浅拷贝,a和c虽然指向不同的地址,但是共用同一套数据
c = a.view()
print(c is a)
c.shape = 2,6
print (a.shape)
c[0,4] = 1234
print(a)

# 深拷贝,a和d没有任何关系
d = a.copy() 
print(d is a)
d[0,0] = 9999
print (d) 
print (a)
True
(3, 4)
4517211552
4517211552
False
(3, 4)
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
False
[[9999    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
# 28. 矩阵的最大值索引

import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print (data)

# 每一列最大值的索引
ind = data.argmax(axis=0)
print (ind)

# 根据ind索引的行号,找到列号,从而找到最大值
data_max = data[ind, range(data.shape[1])]
print (data_max)
print(all(data_max == data.max(axis=0)))
[[ 0.          0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
 [ 0.98935825  0.41211849 -0.54402111 -0.99999021]
 [-0.53657292  0.42016704  0.99060736  0.65028784]
 [-0.28790332 -0.96139749 -0.75098725  0.14987721]]
[2 0 3 1]
[0.98935825 0.84147098 0.99060736 0.6569866 ]
True
# 29. 矩阵的扩展
a = np.arange(0, 40, 10)
print(a)

# np.tile(a, (3, 5)) 矩阵的行扩展为3倍,列扩展为5倍
b = np.tile(a, (2, 3)) 
print (b)
[ 0 10 20 30]
[[ 0 10 20 30  0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30  0 10 20 30]]
# 30. 矩阵的排序

a = np.array([[4, 3, 5], [1, 2, 1]])
print (a)

# axis=1 行排序
b = np.sort(a, axis=1)
print (b)
a.sort(axis=1)
print (a)

# axis=0 列排序
a.sort(axis=0)
print (a)

# argsort 求最小值的索引
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
print (j)
# 根据索引进行排序后即为从小到大排序
print (a[j])
[[4 3 5]
 [1 2 1]]
[[3 4 5]
 [1 1 2]]
[[3 4 5]
 [1 1 2]]
[[1 1 2]
 [3 4 5]]
[2 3 1 0]
[1 2 3 4]

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值