Python科学计算库Numpy的基本使用

最近在学习基础的基于python的机器学习教程,觉得很有必要记录一下以便后期学习及回顾,本内容均来自此学习视频https://www.bilibili.com/video/av30300809/?p=8,更详细的内容可直接前往此网站学习。(注:以下代码运行环境为python 2.7)

genfromtxt()

import numpy

world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
print(type(world_alcohol))    # <type 'numpy.ndarray'>

numpy.array()

#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
vector = numpy.array([5, 10, 15, 20])
#When we input a list of lists, we get a matrix as a result:
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print vector  	# [ 5 10 15 20]
print matrix  	
"""
[[ 5 10 15]
[20 25 30]
[35 40 45]]
"""

ndarray.shape

#We can use the ndarray.shape property to figure out how many elements are in the array
vector = numpy.array([1, 2, 3, 4])
print(vector.shape)		#(4,)
#For matrices, the shape property contains a tuple with 2 elements.
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)		# (2, 3)

c = array([[1,1],[1,2],[1,3],[1,4]])  
c.shape  	# (4, 2)  
# shape[0]表示第二维的长度
c.shape[0]  		# 4  
# shape[1]表示第一维的长度
c.shape[1]  		# 2  

ndarray.dtype

#Each value in a NumPy array has to have the same data type
#NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays. 
#You can check the data type of a NumPy array using the dtype property.
numbers = numpy.array([1, 2, 3, 4])
numbers.dtype		# dtype('int32')

delimiter参数

world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
print(world_alcohol)
"""
[[u'1986' u'Western Pacific' u'Viet Nam' u'Wine' u'0']
 [u'1986' u'Americas' u'Uruguay' u'Other' u'0.5']
 [u'1985' u'Africa' u"Cte d'Ivoire" u'Wine' u'1.62']
 ..., 
 [u'1987' u'Africa' u'Malawi' u'Other' u'0.75']
 [u'1989' u'Americas' u'Bahamas' u'Wine' u'1.5']
 [u'1985' u'Africa' u'Malawi' u'Spirits' u'0.31']]
 """
uruguay_other_1986 = world_alcohol[1,4]
third_country = world_alcohol[2,2]
print uruguay_other_1986		# 0.5
print third_country		# Cte d'Ivoire

[ : ]

vector = numpy.array([5, 10, 15, 20])
print(vector[0:3])  	# [ 5 10 15]

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 	])
print(matrix[:,1])		# [10 25 40]

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
print(matrix[:,0:2])		
"""
[[ 5 10]
 [20 25]
 [35 40]]
"""

==比较

import numpy
# it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
vector = numpy.array([5, 10, 15, 20])
vector == 10		# array([False,  True, False, False], dtype=bool)

matrix = numpy.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])
matrix == 25
"""
array([[False, False, False],
       [False,  True, False],
       [False, False, False]], dtype=bool)
"""

#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print equal_to_ten		# [False  True False False]
print(vector[equal_to_ten])		# [10]

#We can also perform comparisons with multiple conditions
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)
print equal_to_ten_and_five		# [False False False False]

vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
vector[equal_to_ten_or_five] = 50
print(vector)		# [50 50 15 20]

ndarray.astype()

#We can convert the data type of an array with the ndarray.astype() method.
vector = numpy.array(["1", "2", "3"])
print vector.dtype		# |S1
print vector		# ['1' '2' '3']
vector = vector.astype(float)
print vector.dtype		# float64
print vector		# [ 1.  2.  3.]

axis参数

# The axis dictates which dimension we perform the operation on
# 1 means that we want to perform the operation on each row, and 0 means on each column
matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=1)		# array([ 30,  75, 120])

matrix = numpy.array([
                [5, 10, 15], 
                [20, 25, 30],
                [35, 40, 45]
             ])
matrix.sum(axis=0)		# array([60, 75, 90])

arange()、reshape()、ndim、dtype、size 、random.random()

import numpy as np
a = np.arange(15).reshape(3, 5)
print a
'''
array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])
'''

a.shape		# (3, 5)

#the number of axes (dimensions) of the array
b = numpy.zeros([2,2,3,4])
b.ndim		# 4

a.dtype.name		# 'int32'

#the total number of elements of the array
a.size		#15

#To create sequences of numbers
np.arange( 10, 30, 5 )		# array([10, 15, 20, 25])

np.random.random((2,3))		''' array([[ 0.40130659,  0.45452825,  0.79776512],
       								[ 0.63220592,  0.74591134,  0.64130737]])'''

linspace()

# 以下表示从0到2*pi之间取100个数,间隔去平均
from numpy import pi
np.linspace( 0, 2*pi, 100 )
'''
array([ 0.        ,  0.06346652,  0.12693304,  0.19039955,  0.25386607,
        0.31733259,  0.38079911,  0.44426563,  0.50773215,  0.57119866,
        0.63466518,  0.6981317 ,  0.76159822,  0.82506474,  0.88853126,
        0.95199777,  1.01546429,  1.07893081,  1.14239733,  1.20586385,
        1.26933037,  1.33279688,  1.3962634 ,  1.45972992,  1.52319644,
        1.58666296,  1.65012947,  1.71359599,  1.77706251,  1.84052903,
        1.90399555,  1.96746207,  2.03092858,  2.0943951 ,  2.15786162,
        2.22132814,  2.28479466,  2.34826118,  2.41172769,  2.47519421,
        2.53866073,  2.60212725,  2.66559377,  2.72906028,  2.7925268 ,
        2.85599332,  2.91945984,  2.98292636,  3.04639288,  3.10985939,
        3.17332591,  3.23679243,  3.30025895,  3.36372547,  3.42719199,
        3.4906585 ,  3.55412502,  3.61759154,  3.68105806,  3.74452458,
        3.8079911 ,  3.87145761,  3.93492413,  3.99839065,  4.06185717,
        4.12532369,  4.1887902 ,  4.25225672,  4.31572324,  4.37918976,
        4.44265628,  4.5061228 ,  4.56958931,  4.63305583,  4.69652235,
        4.75998887,  4.82345539,  4.88692191,  4.95038842,  5.01385494,
        5.07732146,  5.14078798,  5.2042545 ,  5.26772102,  5.33118753,
        5.39465405,  5.45812057,  5.52158709,  5.58505361,  5.64852012,
        5.71198664,  5.77545316,  5.83891968,  5.9023862 ,  5.96585272,
        6.02931923,  6.09278575,  6.15625227,  6.21971879,  6.28318531])
'''

*和dot()

#The matrix product can be performed using the dot function or method
A = np.array( [[1,1],
               [0,1]] )
B = np.array( [[2,0],
               [3,4]] )
print A		'''[[1 1]
 				[0 1]]'''
print B		'''[[2 0]
 				[3 4]]'''
print A*B		'''[[2 0]
 					[0 4]]'''
print A.dot(B)		'''[[5 4]
 						[3 4]]'''
print np.dot(A, B)		'''[[5 4]
 							[3 4]]'''

exp()、sqrt()

import numpy as np
B = np.arange(3)
print B		# [0 1 2]
# 表示e的B次方
print np.exp(B)		# [1.         2.71828183 7.3890561 ]
# 表示B的平方根
print np.sqrt(B)		# [0.         1.         1.41421356]

floor()、ravel()、resize()

#Return the floor of the input
a = np.floor(10*np.random.random((3,4)))
print a			'''
				[[4. 7. 8. 4.]
				 [6. 1. 6. 2.]
 				 [5. 3. 7. 6.]]'''
a.shape		# (3, 4)
## flatten the array
print a.ravel()		# [4. 7. 8. 4. 6. 1. 6. 2. 5. 3. 7. 6.]
a.shape = (6, 2)
print a 		'''[[4. 7.]
 					[8. 4.]
 					[6. 1.]
					[6. 2.]
 					[5. 3.]
 					[7. 6.]]'''
print a.T		'''[[4. 8. 6. 6. 5. 7.]
 					[7. 4. 1. 2. 3. 6.]]'''
# resize无返回值,会改变矩阵结构,reshape则有返回值,不会改变结构
print a.resize((2,6))			# None
print a							'''
								[[4. 7. 8. 4. 6. 1. ]
								 [6. 2. 5. 3. 7. 6.]]'''

#If a dimension is given as -1 in a reshaping operation, the other dimensions are automatically calculated:
a.reshape(3,-1)		'''
					[[4. 7. 8. 4.]
				 	 [6. 1. 6. 2.]
 				 	 [5. 3. 7. 6.]]'''

hstack()、vstack()、hsplit()、vsplit()

a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print a
print '---'
print b
print '---'
# np.vstack():在竖直方向上堆叠
# np.hstack():在水平方向上平铺
print np.hstack((a,b))
'''
[[ 5.  6.]
 [ 1.  5.]]
---
[[ 8.  6.]
 [ 9.  0.]]
---
[[ 5.  6.  8.  6.]
 [ 1.  5.  9.  0.]]
'''

a = np.floor(10*np.random.random((2,12)))
#print a
#print np.hsplit(a,3)
#print np.hsplit(a,(3,4))   # Split a after the third and the fourth column
a = np.floor(10*np.random.random((12,2)))
print a		''' [[ 5.  2.]
				 [ 1.  3.]
				 [ 9.  6.]
				 [ 2.  2.]
				 [ 7.  2.]
				 [ 8.  2.]
				 [ 1.  7.]
				 [ 2.  8.]
				 [ 4.  4.]
				 [ 8.  5.]
				 [ 4.  3.]
				 [ 2.  3.]]'''
np.vsplit(a,3)		'''[array([[ 5.,  2.],
        					   [ 1.,  3.],
          					   [ 9.,  6.],
        					   [ 2.,  2.]]), array([[ 7.,  2.],
        					   [ 8.,  2.],
       						   [ 1.,  7.],
                               [ 2.,  8.]]), array([[ 4.,  4.],
        					   [ 8.,  5.],
        					   [ 4.,  3.],
        					   [ 2.,  3.]])]'''

copy of array

#Simple assignments make no copy of array objects or of their data.
a = np.arange(12)
b = a
# a and b are two names for the same ndarray object
print b is a		# True
b.shape = 3,4
print a.shape		# (3,4)
print id(a)		# 82691200
print id(b)		# 82691200

view()浅复制和copy()深复制

#The view method creates a new array object that looks at the same data.
c = a.view()
c is a		# False
c.shape = 2,6
print a.shape		# (3,4)
c[0,4] = 1234
print a		'''[   0    1    2    3]
			   [1234    5    6    7]
 			   [   8    9   10   11]]'''

#The copy method makes a complete copy of the array and its data.
d = a.copy() 
d is a		# False
d[0,0] = 9999
print d 		'''[[9999    1    2    3]
					[1234    5    6    7]
 					[   8    9   10   11]]'''
print a			'''[[   0    1    2    3]
 					[1234    5    6    7]
 					[   8    9   10   11]]'''

先找索引,然后通过索引找对应值、argmax()

import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print data			'''[[ 0.          0.84147098  0.90929743  0.14112001]
 						[-0.7568025  -0.95892427 -0.2794155   0.6569866 ]
 						[ 0.98935825  0.41211849 -0.54402111 -0.99999021]
 						[-0.53657292  0.42016704  0.99060736  0.65028784]
						[-0.28790332 -0.96139749 -0.75098725  0.14987721]]'''
# argmax(axis=0)方法表示纵轴从大到小排序后形成一维矩阵
ind = data.argmax(axis=0)
print ind			#[2 0 3 1]
data_max = data[ind, range(data.shape[1])]
print data_max			# [0.98935825 0.84147098 0.99060736 0.6569866 ]
all(data_max == data.max(axis=0))		# True

tile()

import numpy as np
a = np.arange(0, 40, 10)
# 对矩阵进行扩展操作
b = np.tile(a, (3, 5)) 
print b			'''[[ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]
 					[ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]
 					[ 0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30  0 10 20 30]]'''

sort()、argsort()

a = np.array([[4, 3, 5], [1, 2, 1]])
print a		'''[[4 3 5]
 				[1 2 1]]'''
b = np.sort(a, axis=1)
print b			'''[[3 4 5]
 					[1 1 2]]'''
a.sort(axis=1)
print a			'''[[3 4 5]
 					[1 1 2]]'''
a = np.array([4, 3, 1, 2])
j = np.argsort(a)		
print j				# [2 3 1 0]
print a[j]			# [1 2 3 4]
  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值