import numpy as np
import matplotlib. pyplot as plt
import pandas as pd
import seaborn as sns
import statsmodels as sm
4.1 notebook的使用
a = [ 1 , 2 , 3 ]
def add_sum ( list , sum = 0 ) :
'''求和函数'''
for num in list :
sum += num
return sum
4.2 numpy基础知识
my_arr = np. arange( 1000000 )
my_list = list ( range ( 1000000 ) )
for _ in range ( 100 ) :
my_arr2 = my_arr* 2
for _ in range ( 100 ) :
mylist2 = [ x* 2 for x in my_list]
data = np. random. randn( 2 , 3 )
print ( data)
print ( data* 10 )
print ( data+ data)
print ( data. shape)
print ( data. dtype)
np. arange( 15 )
series1 = [ 1 , 2 , 3 , 5 , 6 , 7 ]
arr1 = np. array( series1)
print ( arr1)
series2 = [ [ 1 , 2 , 3 , 4 ] , [ 5 , 6 , 7 , 8 ] ]
arr2 = np. array( series2)
print ( arr2)
print ( arr2. ndim)
print ( arr2. shape)
print ( arr2. dtype)
arr3 = np. zeros( 10 )
arr4 = np. zeros( ( 2 , 3 ) )
arr5 = np. ones( ( 3 , 5 ) )
arr6 = np. empty( ( 2 , 3 , 3 ) )
print ( arr3)
print ( arr4)
print ( arr5)
print ( arr6)
arr = np. array( [ 1 , 2 , 3 , 4 , 5 ] )
print ( arr. dtype)
arr1 = arr. astype( np. float64)
print ( arr1. dtype)
arr2 = arr1. astype( np. int32)
print ( arr2. dtype)
arr = np. array( [ '1' , '2' , '3' , '4' , '5' ] )
print ( arr. dtype)
print ( arr. astype( 'float64' ) . dtype)
print ( arr)
print ( arr. astype( 'float64' ) )
arr1 = np. arange( 10 )
print ( arr1. dtype)
arr2 = np. array( [ 1.0 , 2.0 , 3.0 , 4.0 , 5.0 ] )
print ( arr2. dtype)
arr3 = arr1. astype( arr2. dtype)
print ( arr3. dtype)
arr4 = arr3. astype( 'u4' )
print ( arr4)
arr = np. array( [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] ] )
print ( arr)
arr1 = 1 / arr
print ( arr1)
arr2 = arr* arr
print ( arr2)
arrsqrt = arr** 0.5
print ( arrsqrt)
arr2> arr
4.3 numpy的索引和切片
arr = np. arange( 10 )
print ( arr)
arr[ 4 ]
arr[ 5 : 8 ]
arr[ 5 : 8 ] = 12
print ( arr)
slice_arr = arr[ 5 : 8 ]
slice_arr[ 1 ] = 10
print ( arr)
slice_arr[ : ] = 10
print ( arr)
slice_arr = arr[ 5 : 8 ] . copy( )
slice_arr[ : ] = 111
print ( arr)
arr = np. array( [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 7 , 8 , 9 ] ] )
print ( arr)
print ( arr[ 0 , 1 ] )
arr3d = np. array( [ [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] ] , [ [ 7 , 8 , 9 ] , [ 10 , 11 , 12 ] ] ] )
print ( arr3d)
print ( arr3d[ 0 ] )
print ( arr3d[ 1 ] )
slice_arr3d = arr3d[ 0 ] . copy( )
arr3d[ 0 ] = 23
print ( arr3d)
arr3d[ 0 ] = slice_arr3d
print ( arr3d)
print ( arr3d[ 0 , 1 , 2 ] )
arr2d = np. array( [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 7 , 8 , 9 ] ] )
print ( arr2d[ : 2 ] )
print ( arr2d[ : 2 , 1 : ] )
print ( arr2d[ 1 , : 2 ] )
print ( arr2d[ : , : 2 ] )
arr2d[ : 2 , 1 : ] = 0
print ( arr2d)
names = np. array( [ 'zhao' , 'qian' , 'sun' , 'li' , 'fen' , 'chen' , 'chu' , 'wei' ] )
data = np. random. randn( 8 , 4 )
print ( names)
print ( data)
names == 'sun'
print ( '\n' )
print ( data[ names== 'sun' ] )
print ( data[ names== 'sun' , : 2 ] )
print ( data[ names!= 'sun' , : 2 ] )
print ( '\n' )
cond = names== 'sun'
print ( data[ ~ cond, : 2 ] )
print ( data[ ( names== 'sun' ) | ( names== 'zhao' ) , : 2 ] )
data[ data< 0 ] = 0
print ( data)
data[ names!= 'sun' ] = 100
print ( data)
arr = np. empty( ( 8 , 4 ) )
for i in range ( 8 ) :
arr[ i] = i
print ( arr[ [ 2 , 3 , 1 , 5 ] ] )
print ( arr[ [ - 1 , - 2 , - 3 ] ] )
print ( arr[ [ 1 , 2 , 3 , 4 ] , [ 0 , 1 , 2 , 3 ] ] )
print ( arr[ [ 1 , 2 , 3 , 4 ] ] [ : , [ 0 , 1 , 2 , 3 ] ] )
4.4 numpy的基本运算
转置:.T transpose() swapaxes() 元素级别的运算:根号、指数、maximum
arr = np. arange( 15 ) . reshape( ( 3 , 5 ) )
print ( arr)
print ( arr. T)
print ( np. dot( arr. T, arr) )
arr = np. arange( 16 ) . reshape( ( 2 , 2 , 4 ) )
print ( arr)
print ( arr. transpose( ( 1 , 0 , 2 ) ) )
print ( arr. swapaxes( 1 , 2 ) )
arr = np. arange( 10 )
print ( np. sqrt( arr) )
print ( np. exp( arr) )
x = np. random. randn( 8 )
y = np. random. randn( 8 )
np. maximum( x, y)
arr = np. random. randn( 7 ) * 5
remainder, whole_part = np. modf( arr)
print ( remainder)
print ( whole_part)
4.5 numpy的数据处理
随机数组的生成 可视化 条件逻辑对数组进行处理 描述性统计 排序 集合、唯一化 存储和加载
points = np. arange( - 5 , 5 , 0.01 )
xs, ys = np. meshgrid( points, points)
z = np. sqrt( xs ** 2 + ys ** 2 )
print ( z)
plt. imshow( z, cmap= plt. cm. gray)
plt. colorbar( )
plt. title( 'Image plot of $\sqrt{x^2+y^2}$ for a grid of values' )
xarr = np. array( [ 1.1 , 1.2 , 1.3 , 1.4 , 1.5 ] )
yarr = np. array( [ 2.1 , 2.2 , 2.3 , 2.4 , 2.5 ] )
cond = np. array( [ True , False , True , True , False ] )
result = [ ( x if c else y)
for x, y, c in zip ( xarr, yarr, cond) ]
print ( result)
result = np. where( cond, xarr, yarr)
print ( result)
arr = np. random. randn( 3 , 4 )
print ( arr> 0 )
result = np. where( arr> 0 , 2 , - 2 )
print ( result)
result = np. where( arr> 0 , 2 , arr)
print ( result)
arr = np. random. randn( 5 , 4 )
print ( arr)
print ( arr. mean( ) )
print ( np. mean( arr) )
print ( arr. mean( axis= 1 ) )
print ( arr. mean( axis= 0 ) )
print ( arr. sum ( 1 ) )
arr = np. array( [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ] )
print ( arr. cumsum( ) )
arr = np. array( [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] , [ 7 , 8 , 9 ] ] )
print ( arr. cumsum( axis= 0 ) )
print ( arr. cumprod( axis= 1 ) )
arr = np. random. randn( 100 )
print ( ( arr> 0 ) . sum ( ) )
bools = np. array( [ False , False , True , False ] )
print ( bools. any ( ) )
print ( bools. all ( ) )
arr = np. random. randn( 6 )
print ( arr)
arr. sort( )
print ( arr)
arr = np. random. randn( 3 , 4 )
print ( arr)
arr. sort( 1 )
print ( arr)
large_arr = np. random. randn( 1000 )
large_arr. sort( )
large_arr[ int ( 0.05 * len ( large_arr) ) ]
names = np. array( [ 'Bob' , 'Joe' , 'Will' , 'Bob' , 'Will' , 'Joe' , 'Joe' ] )
print ( np. unique( names) )
print ( sorted ( set ( names) ) )
values = np. array( [ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 2 ] )
print ( np. in1d( values, [ 2 , 3 , 6 ] ) )
print ( np. intersect1d( values, [ 2 , 3 , 5 ] ) )
print ( np. union1d( values, [ 2 , 3 , 5 , 8 ] ) )
print ( np. setdiff1d( values, [ 2 , 3 , 5 , 8 ] ) )
print ( np. setxor1d( values, [ 2 , 3 , 9 , 0 ] ) )
arr = np. random. randn( 3 , 4 )
np. save( 'some_array' , arr)
arr_load = np. load( 'some_array.npy' )
print ( arr_load)
np. savez( 'arr_group.npz' , a= arr, b= arr_load)
group = np. load( 'arr_group.npz' )
print ( group[ 'a' ] )
4.6 numpy线性代数
矩阵乘法 矩阵分解QR SVD 逆、行列式、特征值 方程组
x = np. array( [ [ 1 , 2 , 3 ] , [ 4 , 5 , 6 ] ] )
y = np. array( [ [ 2 , 3 , 4 ] , [ 5 , 6 , 7 ] , [ 6 , 7 , 8 ] ] )
print ( np. dot( x, y) )
print ( x. dot( y) )
from numpy. linalg import inv, qr, det
X = np. random. randn( 4 , 4 )
mat = X. T. dot( X)
A = inv( mat)
Q = A. dot( mat)
P = A. dot( X. T)
print ( P)
print ( Q)
q, r= qr( mat)
print ( q)
print ( r)
print ( mat. trace( ) )
print ( np. linalg. det( mat) )
4.7 伪随机数的生成
data = np. random. normal( size= ( 4 , 4 ) )
print ( data)
from random import normalvariate
N= 1000000
samples = [ normalvariate( 0 , 1 ) for _ in range ( N) ]
samples = np. random. normal( size= N)
np. random. seed( 1234 )
arr = np. random. randn( 10 )
print ( arr)
np. random. seed( 1233 )
arr = np. random. randn( 10 )
print ( arr)
4.7 随机漫步实例
纯python语法 numpy累计求和 模拟随机游走的多次实现
import random
position = 0
walk = [ position]
steps = 1000
for i in range ( steps) :
step = 1 if random. randint( 0 , 1 ) else - 1
position += step
walk. append( position)
plt. plot( walk[ : 100 ] )
nsteps = 1000
draws = np. random. randint( 0 , 2 , size= nsteps)
steps = np. where( draws> 0 , 1 , - 1 )
walk = steps. cumsum( )
print ( walk[ : 100 ] )
plt. plot( walk[ : 100 ] )
minwalk = walk. min ( )
maxwalk = walk. max ( )
print ( minwalk, maxwalk)
t = ( np. abs ( walk) >= 10 ) . argmax( )
print ( t)
nwalks = 5000
nsteps = 1000
steps = np. random. randint( 0 , 2 , size= ( nwalks, nsteps) )
steps = np. where( steps> 0 , 1 , - 1 )
walk = np. cumsum( steps, axis= 1 )
print ( walk[ : 100 ] )
min_walk = walk. min ( )
max_walk = walk. max ( )
print ( min_walk, max_walk)
hits30 = ( np. abs ( walk) >= 30 ) . any ( 1 )
print ( hits30)
hist30. sum ( )
crossing_times = ( np. abs ( walk[ hits30] ) >= 30 ) . argmax( 1 )
print ( crossing_times)
print ( crossing_times. mean( ) )