Machine Learning1: Numpy Tutorial

最新推荐文章于 2024-10-15 11:02:22 发布

中二的IT少女腻

最新推荐文章于 2024-10-15 11:02:22 发布

阅读量203

点赞数

分类专栏：机器学习文章标签：机器学习 numpy 大数据数据分析 python

本文链接：https://blog.csdn.net/ZoeyXu/article/details/105908588

版权

机器学习专栏收录该内容

1 篇文章 0 订阅

订阅专栏

Machine Learning1: Numpy Tutorial

Learning material: 唐宇迪《python数据分析与机器学习实战》
如果有一起学习机器学习的小伙伴们可以留言，人多的话，可以拉群一起讨论！

import numpy
from numpy import pi

# 1. numpy.array()
# 建立numpy array
vector = numpy.array([5, 10, 15, 20])
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print(vector)
print(matrix)

[ 5 10 15 20]
[[ 5 10 15]
 [20 25 30]
 [35 40 45]]

# 2. vector.shape
# 查看numpy array的结构

vector = numpy.array([5, 10, 15, 20])
print(vector.shape)
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)

(4,)
(2, 3)

# 3. numpy array重的所有元素都必须是同类型
# 如果改变其中一个元素的类型，则所有元素类型跟着改变

# 4. numbers.dtype 查看数据类型

numbers = numpy.array([1, 2, 3, 4])
print(numbers)
print(numbers.dtype)
numbers1 = numpy.array([1, 2, 3, 4.0])
print(numbers1)
print(numbers1.dtype)
numbers2 = numpy.array([1, 2, 3, '4'])
print(numbers2)
print(numbers2.dtype)

[1 2 3 4]
int64
[1. 2. 3. 4.]
float64
['1' '2' '3' '4']
<U21

# 读取数据
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
print(world_alcohol)

[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ...
 ['1987' 'Africa' 'Malawi' 'Other' '0.75']
 ['1989' 'Americas' 'Bahamas' 'Wine' '1.5']
 ['1985' 'Africa' 'Malawi' 'Spirits' '0.31']]

# 5. numpy array 取值
uruguay_other_1986 = world_alcohol[1,4]
third_country = world_alcohol[2,2]
print(uruguay_other_1986)
print(third_country)

0.5
Cte d'Ivoire

# 6. numpy array 切片
vector = numpy.array([5, 10, 15, 20])
print(vector[0:3])

[ 5 10 15]

# 7. numpy array 取列
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,1])

[10 25 40]

# 8. numpy array 取行
matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[0,:])

[ 5 10 15]

# 9. numpy array 元素对比，返回一个dtype=bool的numpy array

vector = numpy.array([5, 10, 15, 20])
vector == 10

array([False,  True, False, False])

# 10. 用bool列表作为索引，返回numpy array中对应的为true的值
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print(equal_to_ten)
print(vector[equal_to_ten])

[False  True False False]
[10]

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
second_column_25 = (matrix[:,1] == 25)
print(second_column_25)
print(matrix[second_column_25, :])

[False  True False]
[[20 25 30]]

vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
print(equal_to_ten_or_five)
vector[equal_to_ten_or_five] = 50
print(vector)

[ True  True False False]
[50 50 15 20]

matrix = numpy.array([
            [5, 10, 15], 
            [20, 25, 30],
            [35, 40, 45]
         ])
second_column_25 = matrix[:,1] == 25
print (second_column_25)
matrix[second_column_25, 1] = 10
print (matrix)

[False  True False]
[[ 5 10 15]
 [20 10 30]
 [35 40 45]]

# 11. vetor.astype(float)更改numpy array中的dtype
vector = numpy.array(["1", "2", "3"])
print (vector.dtype)
print (vector)
vector = vector.astype(float)
print (vector.dtype)
print (vector)

<U1
['1' '2' '3']
float64
[1. 2. 3.]

# 12. 求极值
vector = numpy.array([5, 10, 15, 20])

# vector.min()求最小值
print(vector.min())

# vector.max()求最大值
print(vector.max())

matrix = numpy.array([
            [5, 10, 15], 
            [20, 25, 30],
            [35, 40, 45]
         ])
print(matrix.min())
print(matrix.max())

# 13. 求和
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
# 行求和
print(matrix.sum(axis=1))

# 列求和
print(matrix.sum(axis=0))

[ 30  75 120]
[60 75 90]

# 14. numpy.arange(15) 创建一个从0～14一行的的numpy array

# 15. numpy.reshape(3, 5) 将numpy array变形为3行5列

a = numpy.arange(15).reshape(3, 5)
print(a)
a = a.reshape(5, 3)
print(a)

# 16. a.ndim 显示numpy array的维数
print(a.ndim)

# 17. a.size 显示矩阵中的数据数量
print(a.size)

# 18. a.dtype.name 显示矩阵中数据的类型名称
print(a.dtype.name)

[[ 0  1  2  3  4]
 [ 5  6  7  8  9]
 [10 11 12 13 14]]
[[ 0  1  2]
 [ 3  4  5]
 [ 6  7  8]
 [ 9 10 11]
 [12 13 14]]
2
15
int64

# 16. numpy.zeros((3, 4)) 创建一个三行四列全是0的矩阵，默认为float型，可以指定 np.zeros( (2,3,4), dtype=np.int32 )
print(numpy.zeros((3, 4)))

# 17. numpy.ones((3, 4)) 创建一个三行四列全是1的矩阵，默认为float型，可以指定 np.ones( (2,3,4), dtype=np.int32 )
print(numpy.ones((3, 4)))

[[0. 0. 0. 0.]
 [0. 0. 0. 0.]
 [0. 0. 0. 0.]]
[[1. 1. 1. 1.]
 [1. 1. 1. 1.]
 [1. 1. 1. 1.]]

# 18. numpy.arange(10, 30, 5) 创建一个从10开始30结束的，步长为5的矩阵
print(numpy.arange(10, 30, 5))

[10 15 20 25]

# 19. numpy.random.random((2, 3)) 创建一个随机的两行三列的矩阵，有正有负，从-1～1之间
# 第一个random为调用模块，第二个random为调用函数
print(numpy.random.random((2, 3)))

[[0.63130971 0.70306379 0.7532219 ]
 [0.46562968 0.78682549 0.3383525 ]]

print(numpy.linspace(0, 2*pi, 100))
# 20. 创建一个从0开始，步长为2*pi，一共100个数据的矩阵

[0.         0.06346652 0.12693304 0.19039955 0.25386607 0.31733259
 0.38079911 0.44426563 0.50773215 0.57119866 0.63466518 0.6981317
 0.76159822 0.82506474 0.88853126 0.95199777 1.01546429 1.07893081
 1.14239733 1.20586385 1.26933037 1.33279688 1.3962634  1.45972992
 1.52319644 1.58666296 1.65012947 1.71359599 1.77706251 1.84052903
 1.90399555 1.96746207 2.03092858 2.0943951  2.15786162 2.22132814
 2.28479466 2.34826118 2.41172769 2.47519421 2.53866073 2.60212725
 2.66559377 2.72906028 2.7925268  2.85599332 2.91945984 2.98292636
 3.04639288 3.10985939 3.17332591 3.23679243 3.30025895 3.36372547
 3.42719199 3.4906585  3.55412502 3.61759154 3.68105806 3.74452458
 3.8079911  3.87145761 3.93492413 3.99839065 4.06185717 4.12532369
 4.1887902  4.25225672 4.31572324 4.37918976 4.44265628 4.5061228
 4.56958931 4.63305583 4.69652235 4.75998887 4.82345539 4.88692191
 4.95038842 5.01385494 5.07732146 5.14078798 5.2042545  5.26772102
 5.33118753 5.39465405 5.45812057 5.52158709 5.58505361 5.64852012
 5.71198664 5.77545316 5.83891968 5.9023862  5.96585272 6.02931923
 6.09278575 6.15625227 6.21971879 6.28318531]

# 21. 矩阵的运算

a = numpy.array( [20,30,40,50] )
b = numpy.arange(4)
print(a)
print(b)
print(a-b)
print(b**2)
print(a<35)

[20 30 40 50]
[0 1 2 3]
[20 29 38 47]
[0 1 4 9]
[ True  True False False]

# 22. 矩阵的运算2
A = numpy.array( [[1,1],
               [0,1]] )
B = numpy.array( [[2,0],
               [3,4]] )
print(A)
print(B)
# 对应位置相乘
print (A*B)
# 矩阵的乘法，行*列相加
print (A.dot(B))
print (numpy.dot(A, B))

[[1 1]
 [0 1]]
[[2 0]
 [3 4]]
[[2 0]
 [0 4]]
[[5 4]
 [3 4]]
[[5 4]
 [3 4]]

# 23. 矩阵的运算 e 和 平方根
import numpy as np
B = np.arange(3)
print (B)
print (np.exp(B))
print (np.sqrt(B))

[0 1 2]
[1.         2.71828183 7.3890561 ]
[0.         1.         1.41421356]

# 24. 矩阵的运算

# numpy.floor(矩阵) 矩阵数据向下取整
a = np.floor(10*np.random.random((3,4)))
print (a)

# a.ravel() 将多行矩阵拉成一个一行的向量
print (a.ravel())

print(a.reshape((6, 2)))
# print (a)
# a.T 行列变化，矩阵转置
print (a.T)

# a.reshape(3, -1) 根据行数，自动确定列数
print(a.reshape(3,-1))

[[1. 3. 4. 8.]
 [6. 1. 5. 6.]
 [6. 7. 2. 3.]]
[1. 3. 4. 8. 6. 1. 5. 6. 6. 7. 2. 3.]
[[1. 3.]
 [4. 8.]
 [6. 1.]
 [5. 6.]
 [6. 7.]
 [2. 3.]]
[[1. 6. 6.]
 [3. 1. 7.]
 [4. 5. 2.]
 [8. 6. 3.]]
[[1. 3. 4. 8.]
 [6. 1. 5. 6.]
 [6. 7. 2. 3.]]

# 25. 矩阵的拼接

a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print (a)
print ('---')
print (b)
print ('---')

# 进行横向拼接
print (np.hstack((a,b)))

# 进行竖向拼接
print(np.vstack((a,b)))

[[2. 1.]
 [6. 5.]]
---
[[4. 1.]
 [3. 3.]]
---
[[2. 1. 4. 1.]
 [6. 5. 3. 3.]]
[[2. 1.]
 [6. 5.]
 [4. 1.]
 [3. 3.]]

# 26. 矩阵的切分

a = np.floor(10*np.random.random((2,12)))
print (a)

# 横向切分，平均分为3份
print (np.hsplit(a,3))

# 横向切分，横向3号位和4号位分别切一刀
print (np.hsplit(a,(3,4)))
a = np.floor(10*np.random.random((12,2)))
print (a)
# 竖向切分
print(np.vsplit(a,3))

[[2. 3. 4. 0. 2. 1. 5. 2. 9. 5. 2. 5.]
 [4. 9. 7. 4. 9. 8. 1. 3. 4. 2. 1. 8.]]
[array([[2., 3., 4., 0.],
       [4., 9., 7., 4.]]), array([[2., 1., 5., 2.],
       [9., 8., 1., 3.]]), array([[9., 5., 2., 5.],
       [4., 2., 1., 8.]])]
[array([[2., 3., 4.],
       [4., 9., 7.]]), array([[0.],
       [4.]]), array([[2., 1., 5., 2., 9., 5., 2., 5.],
       [9., 8., 1., 3., 4., 2., 1., 8.]])]
[[5. 2.]
 [3. 0.]
 [2. 0.]
 [2. 8.]
 [8. 0.]
 [6. 3.]
 [1. 4.]
 [0. 9.]
 [3. 1.]
 [8. 2.]
 [1. 3.]
 [7. 4.]]
[array([[5., 2.],
       [3., 0.],
       [2., 0.],
       [2., 8.]]), array([[8., 0.],
       [6., 3.],
       [1., 4.],
       [0., 9.]]), array([[3., 1.],
       [8., 2.],
       [1., 3.],
       [7., 4.]])]

# 27. 复制

# = 复制
a = np.arange(12)
b = a
# a and b are two names for the same ndarray object
print(b is a)
b.shape = 3,4
print (a.shape)

# a和b指向同一块地址，改变一个另一个也变
print (id(a))
print (id(b))

# 浅拷贝，a和c虽然指向不同的地址，但是共用同一套数据
c = a.view()
print(c is a)
c.shape = 2,6
print (a.shape)
c[0,4] = 1234
print(a)

# 深拷贝，a和d没有任何关系
d = a.copy() 
print(d is a)
d[0,0] = 9999
print (d) 
print (a)

True
(3, 4)
4517211552
4517211552
False
(3, 4)
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
False
[[9999    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]
[[   0    1    2    3]
 [1234    5    6    7]
 [   8    9   10   11]]

# 28. 矩阵的最大值索引

import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print (data)

# 每一列最大值的索引
ind = data.argmax(axis=0)
print (ind)

# 根据ind索引的行号，找到列号，从而找到最大值
data_max = data[ind, range(data.shape[1])]
print (data_max)
print(all(data_max == data.max(axis=0)))

[[ 0.          0.84147098  0.90929743  0.14112001]
 [-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
 [ 0.98935825  0.41211849 -0.54402111 -0.99999021]
 [-0.53657292  0.42016704  0.99060736  0.65028784]
 [-0.28790332 -0.96139749 -0.75098725  0.14987721]]
[2 0 3 1]
[0.98935825 0.84147098 0.99060736 0.6569866 ]
True

# 29. 矩阵的扩展
a = np.arange(0, 40, 10)
print(a)

# np.tile(a, (3, 5)) 矩阵的行扩展为3倍，列扩展为5倍
b = np.tile(a, (2, 3)) 
print (b)

[ 0 10 20 30]
[[ 0 10 20 30  0 10 20 30  0 10 20 30]
 [ 0 10 20 30  0 10 20 30  0 10 20 30]]

# 30. 矩阵的排序

a = np.array([[4, 3, 5], [1, 2, 1]])
print (a)

# axis=1 行排序
b = np.sort(a, axis=1)
print (b)
a.sort(axis=1)
print (a)

# axis=0 列排序
a.sort(axis=0)
print (a)

# argsort 求最小值的索引
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
print (j)
# 根据索引进行排序后即为从小到大排序
print (a[j])

[[4 3 5]
 [1 2 1]]
[[3 4 5]
 [1 1 2]]
[[3 4 5]
 [1 1 2]]
[[1 1 2]
 [3 4 5]]
[2 3 1 0]
[1 2 3 4]

中二的IT少女腻

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫

专栏目录