《利用python进行数据分析》第四章numpy

最新推荐文章于 2023-02-20 20:38:25 发布
linph174
最新推荐文章于 2023-02-20 20:38:25 发布
阅读量256
点赞数
分类专栏： python
本文链接：https://blog.csdn.net/linph174/article/details/106087964
版权
python 专栏收录该内容
8 篇文章 0 订阅
订阅专栏

import numpy as np
import datetime
import matplotlib.pyplot as plt
import pandas as pd
def numpy_time_low():
    # 创建了ndarray
    my_arr = np.arange(10000)
    # 创建了list
    my_list = list(range(10000))

    # 打印两个变量的类型
    print(type(my_arr))
    print(type(my_list))

    # numpy的方法比python要快
    start_time = datetime.datetime.now()
    for _ in range(10):
        my_arr2 = my_arr * 2
    end_time = datetime.datetime.now()
    print((end_time - start_time).microseconds)

    start_time = datetime.datetime.now()
    for _ in range(10):
        ny_list2 = [x * 2 for x in my_list]
    end_time = datetime.datetime.now()
    print((end_time-start_time).microseconds)

# 运行结果
'''
<class 'numpy.ndarray'>
<class 'list'>
0
7579
'''

def multi_dimention_arr():
    # 采用随机数方法构建数组
    data = np.random.randn(2,3)

    print(data)
    # 每个元素 * 10
    print(data * 10)
    # 数组中对应元素相加
    print(data+data)
    # 打印矩阵的维度信息
    print(data.shape)
    # 打印矩阵的data type
    print(data.dtype)

    '''
    [[-0.57178027  0.36181498  0.42434348]
    [-0.25002345 -0.06467572  2.14996391]]
    [[-5.71780266  3.6181498   4.24343484]
    [-2.50023447 -0.64675716 21.49963914]]
    [[-1.14356053  0.72362996  0.84868697]
    [-0.50004689 -0.12935143  4.29992783]]
    (2, 3)
    float64
    '''

def product_ndarray():
    data1 = [6,7.5,8,0,1]
    # list to nadrray
    arr1 = np.array(data1)

    data2 = [[1,2,3,4],
             [5,6,7,8]]
    arr2 = np.array(data2)

    print(arr1)
    print(arr2)

    print(arr1.ndim)
    print(arr1.shape)
    print(arr1.dtype)

    print(arr2.ndim)
    print(arr2.shape)
    print(arr2.dtype)

    print(np.zeros(10))
    print(np.zeros((3,6)))
    # 没有初始化的。 python3.6 没有初始化就是随机的
    print(np.empty((2,3,2)))

    print(np.arange(15))

def ndarray_data_type():
    # 强制生命ndarray中的元素的类型为float64
    arr1 = np.array([1,2,3],dtype=np.float64)
    arr2 = np.array([1,2,3],dtype = np.int32)

    print(arr1.dtype)
    print(arr2.dtype)

    # astype函数进行类型转换
    float_arr = arr2.astype(np.float64)
    print(float_arr.dtype)

    numeric_strings = np.array(['1.25','-9.6','42'],dtype = np.string_)
    print(numeric_strings.astype(np.float64))


"""
8位的无符号整形： 
0000 0000   ：  0
1111 1111   ：  255

8位的有符号整形： 
1 000 0000 ：   -0（-128）  
1 111 1111
0 000 0000 ：   +0
0 111 1111：    +127

"""

'''
float64
int32
float64
[ 1.25 -9.6  42.  ]
'''

def numpy_4_operator():
    # arr = np.array([1.,2.,3.],[4.,5.,6.])
    arr = np.array([[1.,2.,3.],[4.,5.,6.]])
    print(arr)

    print(arr*arr)
    print(arr - arr)
    print(1/arr)
    print(arr**0.5)

    arr2 = np.array([[0.,4.,1.],[7.,2.,12.]])
    print(arr2)
    print(arr2>arr)

'''
[[1. 2. 3.]
 [4. 5. 6.]]
[[ 1.  4.  9.]
 [16. 25. 36.]]
[[0. 0. 0.]
 [0. 0. 0.]]
[[1.         0.5        0.33333333]
 [0.25       0.2        0.16666667]]
[[1.         1.41421356 1.73205081]
 [2.         2.23606798 2.44948974]]
[[ 0.  4.  1.]
 [ 7.  2. 12.]]
[[False  True False]
 [ True False  True]]
'''
"""
0 1 2 3 4 12 12 12 8 9(内存里面的)
    arr->
        arr_slice->
                12 12 12
"""
def base_index_slice():
    arr = np.arange(10)
    print(arr)

    print(arr[5])
    print(arr[5:8])

    arr[5:8]= 12
    print(arr)
    # # arr_slice是引用，修改arr_slice会影响arr的值
    arr_slice = arr[5:8]
    print(arr_slice)
    arr_slice[1] = 12345
    print(arr)


    # 二维矩阵
    arr2d = np.array([[1,2,3],[4,5,6],[7,8,9]])
    print(arr2d)
    print(arr2d[2])
    print(arr2d[0][2])
    print(arr2d[0,2])

    # 三维矩阵  （图像处理）
    arr3d = np.array([[[1,2,3],[4,5,6]],[[7,8,9],[10,11,12]]])
    print(arr3d)
    print(arr3d[0])

    old_values = arr3d[0].copy()
    arr3d[0] =42
    print(arr3d)
    print(arr3d[1,0])

    print(arr2d[:2])
    print(arr2d[:2,1:])
    print(arr2d[1,:2])
    temp = arr2d[:2,2]
    print(temp)
    print(temp.ndim)
    print(temp.shape)
    temp = temp.reshape(2,1)
    print(temp)
    print(temp.ndim)
    print(temp.shape)
    temp = temp.reshape(1,2)
    print(temp)
    print(temp.ndim)
    print(temp.shape)
    
    print(arr2d[:,:1])
    print(arr2d[:,0])
'''
[0 1 2 3 4 5 6 7 8 9]
5
[5 6 7]
[ 0  1  2  3  4 12 12 12  8  9]
[12 12 12]
[    0     1     2     3     4    12 12345    12     8     9]
[[1 2 3]
 [4 5 6]
 [7 8 9]]
[7 8 9]
3
3
[[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]
[[1 2 3]
 [4 5 6]]
[[[42 42 42]
  [42 42 42]]

 [[ 7  8  9]
  [10 11 12]]]
[7 8 9]
[[1 2 3]
 [4 5 6]]
[[2 3]
 [5 6]]
[4 5]
[3 6]
1
(2,)
[[3]
 [6]]
2
(2, 1)
[[3 6]]
2
(1, 2)
[[1]
 [4]
 [7]]
[1 4 7]
'''
def bool_index():
    names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
    print(names)

    data = np.random.randn(7,4)
    print(data)

    print(names == 'Bob')
    print(data[names == 'Bob'])

    print(data[names == 'Bob',2:])
    print(data[names == 'Bob',3])

    print(names !='Bob')
    print(data[~(names == 'Bob')])

    cond = names == 'Bob'
    print(type(cond))
    print(cond)

    print(data[~cond])

    mask = (names == 'Bob') | (names == 'Will')
    print(mask)
    print(data[mask])

    data[data<0] = 0
    print(data)

    data[names != 'Joe']=7
    print(data)

'''
结果
['Bob' 'Joe' 'Will' 'Bob' 'Will' 'Joe' 'Joe']
[[-0.18702161 -0.95522846 -0.99339457 -1.56085347]
 [-0.31860902  0.20224305 -0.07895    -0.62026157]
 [-0.36047727 -1.4156936   1.52685842  0.30149966]
 [-1.0082008   0.52368675  0.03219765  2.25731933]
 [ 0.23101367 -0.15218163 -0.40195011  0.9762289 ]
 [ 1.39076376  1.01490761 -0.24728861  0.73721141]
 [ 0.44098812  0.10128918  1.41775841 -0.82299303]]
[ True False False  True False False False]
[[-0.18702161 -0.95522846 -0.99339457 -1.56085347]
 [-1.0082008   0.52368675  0.03219765  2.25731933]]
[[-0.99339457 -1.56085347]
 [ 0.03219765  2.25731933]]
[-1.56085347  2.25731933]
[False  True  True False  True  True  True]
[[-0.31860902  0.20224305 -0.07895    -0.62026157]
 [-0.36047727 -1.4156936   1.52685842  0.30149966]
 [ 0.23101367 -0.15218163 -0.40195011  0.9762289 ]
 [ 1.39076376  1.01490761 -0.24728861  0.73721141]
 [ 0.44098812  0.10128918  1.41775841 -0.82299303]]
<class 'numpy.ndarray'>
[ True False False  True False False False]
[[-0.31860902  0.20224305 -0.07895    -0.62026157]
 [-0.36047727 -1.4156936   1.52685842  0.30149966]
 [ 0.23101367 -0.15218163 -0.40195011  0.9762289 ]
 [ 1.39076376  1.01490761 -0.24728861  0.73721141]
 [ 0.44098812  0.10128918  1.41775841 -0.82299303]]
[ True False  True  True  True False False]
[[-0.18702161 -0.95522846 -0.99339457 -1.56085347]
 [-0.36047727 -1.4156936   1.52685842  0.30149966]
 [-1.0082008   0.52368675  0.03219765  2.25731933]
 [ 0.23101367 -0.15218163 -0.40195011  0.9762289 ]]
[[0.         0.         0.         0.        ]
 [0.         0.20224305 0.         0.        ]
 [0.         0.         1.52685842 0.30149966]
 [0.         0.52368675 0.03219765 2.25731933]
 [0.23101367 0.         0.         0.9762289 ]
 [1.39076376 1.01490761 0.         0.73721141]
 [0.44098812 0.10128918 1.41775841 0.        ]]
[[7.         7.         7.         7.        ]
 [0.         0.20224305 0.         0.        ]
 [7.         7.         7.         7.        ]
 [7.         7.         7.         7.        ]
 [7.         7.         7.         7.        ]
 [1.39076376 1.01490761 0.         0.73721141]
 [0.44098812 0.10128918 1.41775841 0.        ]]

'''
def magical_index():
    arr = np.empty((8,4))

    for i in range(8):
        arr[i]=i

    print(arr)

    print(arr[[4,3,0,6]])
    print(arr[[-3,-5,-7]])

    arr = np.arange(32).reshape((8,4))
    print(arr)
    print(arr[[1,5,7,2]])
    print(arr[[1,5,7,2],[0,3,1,2]])
    print(arr[[1,5,7,2]][:,[0,3,1,2]])
    # 对行排列之后再对行排列
    print(arr[[1,5,7,2]][[0,3,1,2]])
'''
[[0. 0. 0. 0.]
 [1. 1. 1. 1.]
 [2. 2. 2. 2.]
 [3. 3. 3. 3.]
 [4. 4. 4. 4.]
 [5. 5. 5. 5.]
 [6. 6. 6. 6.]
 [7. 7. 7. 7.]]
[[4. 4. 4. 4.]
 [3. 3. 3. 3.]
 [0. 0. 0. 0.]
 [6. 6. 6. 6.]]
[[5. 5. 5. 5.]
 [3. 3. 3. 3.]
 [1. 1. 1. 1.]]
[[ 4  5  6  7]
 [ 8  9 10 11]
 [20 21 22 23]
 [28 29 30 31]]
'''
'''
数组的切片索引
'''
def arr_slice_index():
    arr = np.arange(10)
    arr_slice = arr[5:8]
    arr_slice[0:3]=64
    print(arr)
    print(arr[1:6])

'''
[ 0  1  2  3  4 64 64 64  8  9]
[ 1  2  3  4 64]
'''
'''
数组的转置和换轴
'''
def arr_reverse_alter_axies():
    arr = np.arange(15).reshape(3,5)
    print(arr)

    print(arr.T)
    arr = np.random.randn(6,3)
    print(arr)

    print(np.dot(arr.T,arr))

    arr = np.arange(16).reshape(2,2,4)
    print(arr)

    print(arr.transpose((1,0,2)))
    print(arr.swapaxes(1,2))
'''
通用函数
'''
def common_function():
    arr = np.arange(10)
    print(arr)

    print(np.sqrt(arr))
    print(np.exp(arr))

    x=np.random.randn(8)
    y=np.random.randn(8)
    print(x)
    print(y)

    print(np.maximum(x,y))

    arr= np.random.randn(7)*5
    print(arr)

    #modf函数返回浮点数的小数部分和整数部分
    remainder,whole_part = np.modf(arr)
    print(remainder)
    print(whole_part)
'''
结果
[0 1 2 3 4 5 6 7 8 9]
[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]
[1.00000000e+00 2.71828183e+00 7.38905610e+00 2.00855369e+01
 5.45981500e+01 1.48413159e+02 4.03428793e+02 1.09663316e+03
 2.98095799e+03 8.10308393e+03]
[-0.38198195 -0.41349334 -0.22681757  1.07143114  0.81742682  0.20248671
 -0.70789151 -1.43965095]
[-0.49113055  0.40064256  1.1547515  -0.92835289  0.61822386 -0.76110323
  0.44788687 -0.00267016]
[-0.38198195  0.40064256  1.1547515   1.07143114  0.81742682  0.20248671
  0.44788687 -0.00267016]
[ 3.93518567  2.70134773  4.04039374 -0.55103264 -1.49523371 -3.24249544
 -3.59989243]
[ 0.93518567  0.70134773  0.04039374 -0.55103264 -0.49523371 -0.24249544
 -0.59989243]
[ 3.  2.  4. -0. -1. -3. -3.]
'''
'''
面向数组编程
'''
def face_to_array():
    points = np.arange(-2,2,1)
    print(points)

    xs,ys = np.meshgrid(points,points)
    print(xs)
    print(ys)

    z= np.sqrt(xs ** 2 +ys ** 2)
    print(z)
    
    plt.imshow(z,cmap= plt.cm.gray)
    plt.colorbar()
    plt.title("Image plot of $\sqrt{x^2+y^2}$ for a grid of values")
    # cmd运行python画图命令会自动关闭窗口，加多一行plt.show()，即可把图显示出来
    plt.show()
'''
结果
[-2 -1  0  1]
[[-2 -1  0  1]
 [-2 -1  0  1]
 [-2 -1  0  1]
 [-2 -1  0  1]]
[[-2 -2 -2 -2]
 [-1 -1 -1 -1]
 [ 0  0  0  0]
 [ 1  1  1  1]]
[[2.82842712 2.23606798 2.         2.23606798]
 [2.23606798 1.41421356 1.         1.41421356]
 [2.         1.         0.         1.        ]
 [2.23606798 1.41421356 1.         1.41421356]]
'''
'''
将条件逻辑作为数组操作
'''
def condition_array_op():
    xarray = np.array([1.1,1.2,1.3,1.4,1.5])
    yarray = np.array([2.1,2.2,2.3,2.4,2.5])
    cond = np.array([True,False,True,True,False])

    result = [(x if c else y)for x,y,c in zip(xarray,yarray,cond)]
    print(result)

    result = np.where(cond,xarray,yarray)
    print(result)

    arr = np.random.randn(4,4)
    print(arr)

    print(arr>0)

    print(np.where(arr>0,2,-2))
    print(np.where(arr>0,2,arr))
'''
结果
[1.1, 2.2, 1.3, 1.4, 2.5]k
[1.1 2.2 1.3 1.4 2.5]
[[ 1.39585472  1.53783618  1.04469459 -0.22208552]
 [-0.17490372 -1.18951682  0.06921047  0.3814394 ]
 [-0.77861717 -1.49888853  0.31418079  0.06617863]
 [-0.9405033   0.04861186 -0.51911295 -0.75386056]]
[[ True  True  True False]
 [False False  True  True]
 [False False  True  True]
 [False  True False False]]
[[ 2  2  2 -2]
 [-2 -2  2  2]
 [-2 -2  2  2]
 [-2  2 -2 -2]]
[[ 2.          2.          2.         -0.22208552]
 [-0.17490372 -1.18951682  2.          2.        ]
 [-0.77861717 -1.49888853  2.          2.        ]
 [-0.9405033   2.         -0.51911295 -0.75386056]]
'''
'''
数学和统计方法
'''
def math_statistic_method():
    arr = np.random.randn(5,4)
    print(arr)

    print(arr.mean())
    print(np.mean(arr))
    print(arr.sum)

    print(arr.mean(axis = 1))
    print(arr.sum(axis = 0))

    arr = np.array([0,1,2,3,4,5,6,7])
    print(arr)
    print(arr.cumsum(axis = 0))
    print(arr.cumprod(axis = 0))

    ##bool数值的方法
    arr = np.random.randn(10)
    print(arr)
    #统计正数的个数
    print((arr >0).sum())

    bools = np.array([False,False,True,False])
    # 至少有一个为TRUE    
    print(bools.any())
    # 所有都为true
    print(bools.all())

def sort():
    arr = np.random.randn(5)
    print(arr)
    arr.sort()
    print(arr)

    #20% percentile
    print(arr[int(0.2 * len(arr))])

    arr = np.random.randn(5,3)
    print(arr)
    arr.sort(axis =1)
    print(arr)

'''
集合操作
'''
def set_operate():
    names = np.array(['Bob','Joe','Will','Bob','Will','Joe','Joe'])
    #distinct
    print(np.unique(names))

    print(sorted(set(names)))

    values = np.array([6,0,0,3,2,5,6])
    # np.in1d，可以检查一个数组中的值是否在另外一个数组中，并返回一个布尔值数组
    print(np.in1d(values,[2,3,6]))
'''
['Bob' 'Joe' 'Will']
['Bob', 'Joe', 'Will']
[ True False False  True  True False  True]
'''
'''
文件操作
'''
def file_operate():
    arr = np.arange(10)
    #np.save np.load是高效存取硬盘数据的两大工具函数，数组默认压缩的格式存储，后缀名是.npy 
    np.save('some_arr',arr)

    print(np.load('some_arr.npy'))

    # k-v结构
    np.savez('arr_archive.npz', a= arr,b= arr)
    arch = np.load('arr_archive.npz')
    print(arch)
    # k - v 结构
    # {'a':[0 1 2 3 4 5 6 7 8 9],
    #  'b':[0 1 2 3 4 5 6 7 8 9] }
    print(arch['b'])
    print(arch['a'])
'''
[0 1 2 3 4 5 6 7 8 9]
<numpy.lib.npyio.NpzFile object at 0x00000182B20A6EF0>
[0 1 2 3 4 5 6 7 8 9]
[0 1 2 3 4 5 6 7 8 9]
'''
    
'''
线性代数
'''
def liner_daishu():
    x = np.array([[1.,2.,3.],[4.,5.,6]])
    y = np.array([[6.,23.],[-1,7],[8,9]])

    # 矩阵乘法
    # *
    print(x.dot(y))   
    print(np.dot(x,y))

    from numpy.linalg import inv,qr

    x= np.random.randn(5,5)
    mat =x.T.dot(x)

    print(mat)

    # inv 求逆矩阵
    print(inv(mat))
    print()
    # E矩阵
    # 数值解：3.33333333333333333333333333333333
    # 分析解：10/3
    print(mat.dot(inv(mat)))

    q,r = qr(mat)
    print(q)
    print(r)

'''
伪随机数
'''
def random_num():
    #正态分布
    samples = np.random.normal(size =(4,4))
    print(samples)

    from random import normalvariate
    print(normalvariate(0,1))

'''
随机漫步
'''
def random_walk():
    import random

    position = 0
    walk = [position] 
  
    steps =1000
    for i in range(steps):
        step = 1 if random.randint(0,1) else -1

        # step = 0
        # if random.randint(0,1) == 1:
        #   step = 1
        # else:
        #   step = -1

        position += step
        walk.append(position)

    print(len(walk))
    plt.plot(walk[:100])
    plt.show()
    '''
    效果同上
    '''
    nsteps = 1000
    draws = np.random.randint(0,2,size =nsteps)
    steps = np.where(draw > 0 , 1 ,-1)
    walk = steps.cumsum()
    print(len(walk))

    print(walk.min())
    print(walk.max())

    #朝同一方向，连续走了10步的第一次出现的位置 
    # +8 -2 +4
    print((np.abs(walk)>10).atgmax())  
    #一次性模拟多个随机漫步 
    nwalks = 5000
    nsteps = 1000
    draws = np.random.randint(0,2,size =(nwalks,nsteps))
    steps = np.where(draw > 0, 1 ,-1)
    walks = np.cumsum(1)












if __name__ == "__main__":
   #  numpy_time_low()
   # multi_dimention_arr()
   # product_ndarray()
   # ndarray_data_type()
   # numpy_4_operator()
   # base_index_slice()
   # bool_index()
   # magical_index()
   # arr_slice_index()
   # arr_reverse_alter_axies()
   # common_function()
   # face_to_array()
   # condition_array_op()
   # math_statistic_method()
   # sort()
   # set_operate()
   # file_operate()
   # liner_daishu()
   # random_num()
    random_walk()
linph174
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
《利用python进行数据分析》第四章numpy

向某位大佬学习如下：import numpy as npimport datetimeimport matplotlib.pyplot as pltimport pandas as pddef numpy_time_low(): # 创建了ndarray my_arr = np.arange(10000) # 创建了list my_list = list(range(10000)) # 打印两个变量的类型 print(type(my_arr))
复制链接

扫一扫
专栏目录