数据分析—numpy
import pandas as pd
import numpy as np
创建普通数组
a = np. array( [ 1 , 2 , 3 ] )
b = np. array( [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 7 , 8 , 9 ] ] )
b[ 1 , 1 ] = 10
print ( a. shape)
print ( b. shape)
print ( a. dtype)
print ( b)
(3,) (3, 3) int64 [[1 2 3] [4 5 6] [7 8 9]]
创建结构数组
personalType = np. dtype( { 'names' : 'name' , 'age' , 'chinese' , 'math' , 'english' ] ,
'formats' : [ 'S25' , 'i' , 'i' , 'i' , 'f' ] } )
students = np. array( [ ( "huzai" , 22 , 99 , 99 , 99.5 ) , ( "huzai" , 22 , 99 , 99 , 99.5 ) ] , dtype= personalType)
age = students[ : ] [ 'age' ]
print ( np. mean( age) )
22.0
print ( students)
[(b'huzai', 22, 99, 99, 99.5) (b'huzai', 22, 99, 99, 99.5)]
创建连续数组
x1 = np. arange( 1 , 11 , 2 )
x2 = np. linspace( 1 , 9 , 5 )
print ( x1)
print ( x2)
[1 3 5 7 9] [1. 3. 5. 7. 9.]
数组间的算数运算
print ( np. add( x1, x2) )
print ( np. subtract( x1, x2) )
print ( np. multiply( x1, x2) )
print ( np. divide( x1, x2) )
[ 2. 6. 10. 14. 18.]
[0. 0. 0. 0. 0.]
[ 1. 9. 25. 49. 81.]
[1. 1. 1. 1. 1.]
统计函数
数组中的最值 np.amin() amax()
a = np. array( [ [ 1 , 3 , 7 ] , [ 2 , 5 , 8 ] , [ 6 , 4 , 9 ] ] )
print ( np. amin( a) )
print ( np. amin( a, 0 ) )
1 [1 3 7] [1 2 4]
统计最大值与最小值之差 ptp()
print ( np. ptp( a) )
print ( np. ptp( a, 0 ) )
8 [5 2 2] [6 6 5]
统计数组的百分位数 percentile(a, p, axis) a:数组名 p 代表百分比 axis代表是行还是列
print ( np. percentile( a, 50 ) )
print ( np. percentile( a, 50 , 0 ) )
print ( np. percentile( a, 50 , 1 ) )
5.0 [2. 4. 8.] [3. 5. 6.]
统计数组中的中位数以及平均数 median() mean()
print ( np. median( a) )
print ( np. median( a, 0 ) )
print ( np. median( a, 1 ) )
5.0 [2. 4. 8.] [3. 5. 6.]
数组中的加权平均值 average(a,weights)
b = np. array( [ 1 , 2 , 3 , 4 ] )
wts = np. array( [ 1 , 2 , 3 , 4 ] )
print ( np. average( b) )
print ( np. average( b, weights= wts) )
2.5 3.0
统计数组中的标准差(std())与方差(var())
print ( np. std( b) )
print ( np. var( b) )
1.118033988749895 1.25
Numpy排序
print ( a)
print ( np. sort( a) )
print ( np. sort( a, 0 ) )
[[1 3 7] [2 5 8] [6 4 9]] [[1 3 7] [2 5 8] [4 6 9]] [[1 3 7] [2 4 8] [6 5 9]]