最近在学习基础的基于python的机器学习教程,觉得很有必要记录一下以便后期学习及回顾,本内容均来自此学习视频https://www.bilibili.com/video/av30300809/?p=8,更详细的内容可直接前往此网站学习。(注:以下代码运行环境为python 2.7)
genfromtxt()
import numpy
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",")
print(type(world_alcohol)) # <type 'numpy.ndarray'>
numpy.array()
#The numpy.array() function can take a list or list of lists as input. When we input a list, we get a one-dimensional array as a result:
vector = numpy.array([5, 10, 15, 20])
#When we input a list of lists, we get a matrix as a result:
matrix = numpy.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
print vector # [ 5 10 15 20]
print matrix
"""
[[ 5 10 15]
[20 25 30]
[35 40 45]]
"""
ndarray.shape
#We can use the ndarray.shape property to figure out how many elements are in the array
vector = numpy.array([1, 2, 3, 4])
print(vector.shape) #(4,)
#For matrices, the shape property contains a tuple with 2 elements.
matrix = numpy.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape) # (2, 3)
c = array([[1,1],[1,2],[1,3],[1,4]])
c.shape # (4, 2)
# shape[0]表示第二维的长度
c.shape[0] # 4
# shape[1]表示第一维的长度
c.shape[1] # 2
ndarray.dtype
#Each value in a NumPy array has to have the same data type
#NumPy will automatically figure out an appropriate data type when reading in data or converting lists to arrays.
#You can check the data type of a NumPy array using the dtype property.
numbers = numpy.array([1, 2, 3, 4])
numbers.dtype # dtype('int32')
delimiter参数
world_alcohol = numpy.genfromtxt("world_alcohol.txt", delimiter=",", dtype="U75", skip_header=1)
print(world_alcohol)
"""
[[u'1986' u'Western Pacific' u'Viet Nam' u'Wine' u'0']
[u'1986' u'Americas' u'Uruguay' u'Other' u'0.5']
[u'1985' u'Africa' u"Cte d'Ivoire" u'Wine' u'1.62']
...,
[u'1987' u'Africa' u'Malawi' u'Other' u'0.75']
[u'1989' u'Americas' u'Bahamas' u'Wine' u'1.5']
[u'1985' u'Africa' u'Malawi' u'Spirits' u'0.31']]
"""
uruguay_other_1986 = world_alcohol[1,4]
third_country = world_alcohol[2,2]
print uruguay_other_1986 # 0.5
print third_country # Cte d'Ivoire
[ : ]
vector = numpy.array([5, 10, 15, 20])
print(vector[0:3]) # [ 5 10 15]
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
print(matrix[:,1]) # [10 25 40]
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
print(matrix[:,0:2])
"""
[[ 5 10]
[20 25]
[35 40]]
"""
==比较
import numpy
# it will compare the second value to each element in the vector
# If the values are equal, the Python interpreter returns True; otherwise, it returns False
vector = numpy.array([5, 10, 15, 20])
vector == 10 # array([False, True, False, False], dtype=bool)
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix == 25
"""
array([[False, False, False],
[False, True, False],
[False, False, False]], dtype=bool)
"""
#Compares vector to the value 10, which generates a new Boolean vector [False, True, False, False]. It assigns this result to equal_to_ten
vector = numpy.array([5, 10, 15, 20])
equal_to_ten = (vector == 10)
print equal_to_ten # [False True False False]
print(vector[equal_to_ten]) # [10]
#We can also perform comparisons with multiple conditions
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_and_five = (vector == 10) & (vector == 5)
print equal_to_ten_and_five # [False False False False]
vector = numpy.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
vector[equal_to_ten_or_five] = 50
print(vector) # [50 50 15 20]
ndarray.astype()
#We can convert the data type of an array with the ndarray.astype() method.
vector = numpy.array(["1", "2", "3"])
print vector.dtype # |S1
print vector # ['1' '2' '3']
vector = vector.astype(float)
print vector.dtype # float64
print vector # [ 1. 2. 3.]
axis参数
# The axis dictates which dimension we perform the operation on
# 1 means that we want to perform the operation on each row, and 0 means on each column
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix.sum(axis=1) # array([ 30, 75, 120])
matrix = numpy.array([
[5, 10, 15],
[20, 25, 30],
[35, 40, 45]
])
matrix.sum(axis=0) # array([60, 75, 90])
arange()、reshape()、ndim、dtype、size 、random.random()
import numpy as np
a = np.arange(15).reshape(3, 5)
print a
'''
array([[ 0, 1, 2, 3, 4],
[ 5, 6, 7, 8, 9],
[10, 11, 12, 13, 14]])
'''
a.shape # (3, 5)
#the number of axes (dimensions) of the array
b = numpy.zeros([2,2,3,4])
b.ndim # 4
a.dtype.name # 'int32'
#the total number of elements of the array
a.size #15
#To create sequences of numbers
np.arange( 10, 30, 5 ) # array([10, 15, 20, 25])
np.random.random((2,3)) ''' array([[ 0.40130659, 0.45452825, 0.79776512],
[ 0.63220592, 0.74591134, 0.64130737]])'''
linspace()
# 以下表示从0到2*pi之间取100个数,间隔去平均
from numpy import pi
np.linspace( 0, 2*pi, 100 )
'''
array([ 0. , 0.06346652, 0.12693304, 0.19039955, 0.25386607,
0.31733259, 0.38079911, 0.44426563, 0.50773215, 0.57119866,
0.63466518, 0.6981317 , 0.76159822, 0.82506474, 0.88853126,
0.95199777, 1.01546429, 1.07893081, 1.14239733, 1.20586385,
1.26933037, 1.33279688, 1.3962634 , 1.45972992, 1.52319644,
1.58666296, 1.65012947, 1.71359599, 1.77706251, 1.84052903,
1.90399555, 1.96746207, 2.03092858, 2.0943951 , 2.15786162,
2.22132814, 2.28479466, 2.34826118, 2.41172769, 2.47519421,
2.53866073, 2.60212725, 2.66559377, 2.72906028, 2.7925268 ,
2.85599332, 2.91945984, 2.98292636, 3.04639288, 3.10985939,
3.17332591, 3.23679243, 3.30025895, 3.36372547, 3.42719199,
3.4906585 , 3.55412502, 3.61759154, 3.68105806, 3.74452458,
3.8079911 , 3.87145761, 3.93492413, 3.99839065, 4.06185717,
4.12532369, 4.1887902 , 4.25225672, 4.31572324, 4.37918976,
4.44265628, 4.5061228 , 4.56958931, 4.63305583, 4.69652235,
4.75998887, 4.82345539, 4.88692191, 4.95038842, 5.01385494,
5.07732146, 5.14078798, 5.2042545 , 5.26772102, 5.33118753,
5.39465405, 5.45812057, 5.52158709, 5.58505361, 5.64852012,
5.71198664, 5.77545316, 5.83891968, 5.9023862 , 5.96585272,
6.02931923, 6.09278575, 6.15625227, 6.21971879, 6.28318531])
'''
*和dot()
#The matrix product can be performed using the dot function or method
A = np.array( [[1,1],
[0,1]] )
B = np.array( [[2,0],
[3,4]] )
print A '''[[1 1]
[0 1]]'''
print B '''[[2 0]
[3 4]]'''
print A*B '''[[2 0]
[0 4]]'''
print A.dot(B) '''[[5 4]
[3 4]]'''
print np.dot(A, B) '''[[5 4]
[3 4]]'''
exp()、sqrt()
import numpy as np
B = np.arange(3)
print B # [0 1 2]
# 表示e的B次方
print np.exp(B) # [1. 2.71828183 7.3890561 ]
# 表示B的平方根
print np.sqrt(B) # [0. 1. 1.41421356]
floor()、ravel()、resize()
#Return the floor of the input
a = np.floor(10*np.random.random((3,4)))
print a '''
[[4. 7. 8. 4.]
[6. 1. 6. 2.]
[5. 3. 7. 6.]]'''
a.shape # (3, 4)
## flatten the array
print a.ravel() # [4. 7. 8. 4. 6. 1. 6. 2. 5. 3. 7. 6.]
a.shape = (6, 2)
print a '''[[4. 7.]
[8. 4.]
[6. 1.]
[6. 2.]
[5. 3.]
[7. 6.]]'''
print a.T '''[[4. 8. 6. 6. 5. 7.]
[7. 4. 1. 2. 3. 6.]]'''
# resize无返回值,会改变矩阵结构,reshape则有返回值,不会改变结构
print a.resize((2,6)) # None
print a '''
[[4. 7. 8. 4. 6. 1. ]
[6. 2. 5. 3. 7. 6.]]'''
#If a dimension is given as -1 in a reshaping operation, the other dimensions are automatically calculated:
a.reshape(3,-1) '''
[[4. 7. 8. 4.]
[6. 1. 6. 2.]
[5. 3. 7. 6.]]'''
hstack()、vstack()、hsplit()、vsplit()
a = np.floor(10*np.random.random((2,2)))
b = np.floor(10*np.random.random((2,2)))
print a
print '---'
print b
print '---'
# np.vstack():在竖直方向上堆叠
# np.hstack():在水平方向上平铺
print np.hstack((a,b))
'''
[[ 5. 6.]
[ 1. 5.]]
---
[[ 8. 6.]
[ 9. 0.]]
---
[[ 5. 6. 8. 6.]
[ 1. 5. 9. 0.]]
'''
a = np.floor(10*np.random.random((2,12)))
#print a
#print np.hsplit(a,3)
#print np.hsplit(a,(3,4)) # Split a after the third and the fourth column
a = np.floor(10*np.random.random((12,2)))
print a ''' [[ 5. 2.]
[ 1. 3.]
[ 9. 6.]
[ 2. 2.]
[ 7. 2.]
[ 8. 2.]
[ 1. 7.]
[ 2. 8.]
[ 4. 4.]
[ 8. 5.]
[ 4. 3.]
[ 2. 3.]]'''
np.vsplit(a,3) '''[array([[ 5., 2.],
[ 1., 3.],
[ 9., 6.],
[ 2., 2.]]), array([[ 7., 2.],
[ 8., 2.],
[ 1., 7.],
[ 2., 8.]]), array([[ 4., 4.],
[ 8., 5.],
[ 4., 3.],
[ 2., 3.]])]'''
copy of array
#Simple assignments make no copy of array objects or of their data.
a = np.arange(12)
b = a
# a and b are two names for the same ndarray object
print b is a # True
b.shape = 3,4
print a.shape # (3,4)
print id(a) # 82691200
print id(b) # 82691200
view()浅复制和copy()深复制
#The view method creates a new array object that looks at the same data.
c = a.view()
c is a # False
c.shape = 2,6
print a.shape # (3,4)
c[0,4] = 1234
print a '''[ 0 1 2 3]
[1234 5 6 7]
[ 8 9 10 11]]'''
#The copy method makes a complete copy of the array and its data.
d = a.copy()
d is a # False
d[0,0] = 9999
print d '''[[9999 1 2 3]
[1234 5 6 7]
[ 8 9 10 11]]'''
print a '''[[ 0 1 2 3]
[1234 5 6 7]
[ 8 9 10 11]]'''
先找索引,然后通过索引找对应值、argmax()
import numpy as np
data = np.sin(np.arange(20)).reshape(5,4)
print data '''[[ 0. 0.84147098 0.90929743 0.14112001]
[-0.7568025 -0.95892427 -0.2794155 0.6569866 ]
[ 0.98935825 0.41211849 -0.54402111 -0.99999021]
[-0.53657292 0.42016704 0.99060736 0.65028784]
[-0.28790332 -0.96139749 -0.75098725 0.14987721]]'''
# argmax(axis=0)方法表示纵轴从大到小排序后形成一维矩阵
ind = data.argmax(axis=0)
print ind #[2 0 3 1]
data_max = data[ind, range(data.shape[1])]
print data_max # [0.98935825 0.84147098 0.99060736 0.6569866 ]
all(data_max == data.max(axis=0)) # True
tile()
import numpy as np
a = np.arange(0, 40, 10)
# 对矩阵进行扩展操作
b = np.tile(a, (3, 5))
print b '''[[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]
[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]
[ 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30 0 10 20 30]]'''
sort()、argsort()
a = np.array([[4, 3, 5], [1, 2, 1]])
print a '''[[4 3 5]
[1 2 1]]'''
b = np.sort(a, axis=1)
print b '''[[3 4 5]
[1 1 2]]'''
a.sort(axis=1)
print a '''[[3 4 5]
[1 1 2]]'''
a = np.array([4, 3, 1, 2])
j = np.argsort(a)
print j # [2 3 1 0]
print a[j] # [1 2 3 4]