python数据分析第3天

python数据分析第3天

numpy的应用

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'
array1 = np.array([42, 45, 62, 56, 35, 79, 67, 74, 30, 28, 54])
array2 = np.array([65, 36, 123, 25, 45, 32, 26, 78, 57, 51, 34])
array3 = np.array([82, 36, 21, 23, 25, 43, 52, 59, 60, 76, 95])
print('均值',array1.mean())
print('方差',array1.var)

print('均值',array2.mean())
print('方差',array2.var)

print('均值',array3.mean())
print('方差',array3.var)

array1.size
array1.shape
array1.ndim
array2.dtype
array2.itemsize
array2.nbytes

array3[0],array3[1],array3[-2]
out:
   (82, 36, 76)


array3[[0, 1, -2]]
out:
    array([82, 36, 76])
array3.take([0, 1, -2])
out:
    array([82, 36, 76])
 array3[array3 > 50]
out:
 array([82, 52, 59, 60, 76, 95])

array3[(array3>50)|(array3 % 2 == 0)]
out:
    array([82, 36, 52, 59, 60, 76, 95])
    
 np.append(array3,1000)    
out:
    array([  82,   36,   21,   23,   25,   43,   52,   59,   60,   76,   95,
       1000])
    

np.insert(array3, 0, 1000)#在0的位置插入1000
out:
    array([1000,   82,   36,   21,   23,   25,   43,   52,   59,   60,   76,
         95])

数组与标量的运算
array4 = np.array([[1, 1, 1], [2, 3, 4], [5, 5, 6]])
array4
out:
array([[1, 1, 1],
       [2, 3, 4],
       [5, 5, 6]])


array4 + 5
out:
 array([[ 6,  6,  6],
       [ 7,  8,  9],
       [10, 10, 11]])

array4 * 5
out:
array([[ 5,  5,  5],
       [10, 15, 20],
       [25, 25, 30]])

5*array4
out:
 array([[ 5,  5,  5],
       [10, 15, 20],
       [25, 25, 30]])

array4 / 5
out:
array([[0.2, 0.2, 0.2],
       [0.4, 0.6, 0.8],
       [1. , 1. , 1.2]])    


array4 ** 5
out:
 array([[   1,    1,    1],
       [  32,  243, 1024],
       [3125, 3125, 7776]], dtype=in
   
 5 ** array4
array([[    5,     5,     5],
       [   25,   125,   625],
       [ 3125,  3125, 15625]], dtype=int32)
    
数组与数组的运算
array5 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
array6 = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
array5


array5([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

array([[1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])

array5 + array6
out:
  array([[ 2,  3,  4],
       [ 6,  7,  8],
       [10, 11, 12]])

array5 ** array6
out:
array([[  1,   2,   3],
       [ 16,  25,  36],
       [343, 512, 729]], dtype=int32)

array7 = np.array([4, 4, 4])
array7
array5 + array7
out:
  array([[ 5,  6,  7],
       [ 8,  9, 10],
       [11, 12, 13]])

array8 = np.array([[4], [3], [2]])
array8
out:
    rray([[4],
       [3],
       [2]])
 array5 + array8 
out:
    array([[ 5,  6,  7],
       [ 7,  8,  9],
       [ 9, 10, 11]])

array9 = np.random.randint(1, 10, (4, 4))#随机生成y1~10的二维数组,4行4列
array9
array([[9, 1, 3, 8],
       [7, 6, 5, 5],
       [4, 2, 2, 2],
       [7, 3, 7, 2]])

当两个数组形状不一致时,如果两个数组的后缘维度(shape属性从后往前看)相同或者其中一个的后缘维度为1,那么这个时候可以通过广播机制让两个数组的形状趋于一致,这种情况是可以进行运算的;如果不能应用广播机制,那么两个数组没有办法进行运算。

# nan - not a number,是一个空值
array10 = np.array([1, 2, 3, np.nan, 4, np.nan, np.inf])
array10

out:
array([ 1.,  2.,  3., nan,  4., nan, inf])

# 判断空值
np.isnan(array10)
out:
array([False, False, False,  True, False,  True, False])

array10[~np.isnan(array10)]
out:
array([ 1.,  2.,  3.,  4., inf])

# 判断无穷大值
np.isinf(array10)
out:
array([False, False, False, False, False, False,  True])


# 正弦和余弦函数
x = np.linspace(-2 * np.pi, 2 * np.pi, 60)
y1, y2 = np.sin(x),np.cos(x)
plt.figure(figsize=(8, 4))
plt.plot(x, y1, color='#0000ff', marker='x')
plt.plot(x, y2, color='coral', marker='o')

# 判断每个元素是否在可容忍误差范围内相近
np.allclose(a, b)

array11 = np.array([1, 4, 9])
array12 = np.array([2, 3, 5])
np.maximum(array11, array12)
np.minimum(array11, array12)

v1 = np.array([3, 1])
v2 = np.array([-1, 2])
v1 + v2

# 通过内积计算向量夹角的余弦值
np.inner(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
常用函数
array13 = np.array([12, 13, 12, 15, 20, 20, 17])
array13
array([12, 13, 12, 15, 20, 20, 17])

# 去重
np.unique(array13)
array([12, 13, 15, 17, 20])


# 堆叠和拼接
np.hstack((array5, array6))
array([[1, 2, 3, 1, 1, 1],
       [4, 5, 6, 2, 2, 2],
       [7, 8, 9, 3, 3, 3]])

np.vstack((array5, array6))
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])
       
np.stack((array5, array6), axis=0)
array([[[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]],

       [[1, 1, 1],
        [2, 2, 2],
        [3, 3, 3]]])
       
 np.stack((array5, array6), axis=1)
 array([[[1, 2, 3],
        [1, 1, 1]],

       [[4, 5, 6],
        [2, 2, 2]],

       [[7, 8, 9],
        [3, 3, 3]]])
        
 np.concatenate((array5, array6))
 array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9],
       [1, 1, 1],
       [2, 2, 2],
       [3, 3, 3]])
np.concatenate((array5, array6), axis=1)
array([[1, 2, 3, 1, 1, 1],
       [4, 5, 6, 2, 2, 2],
       [7, 8, 9, 3, 3, 3]])
       
 array5[1, 1] = 0
array5
array([[1, 2, 3],
       [4, 0, 6],
       [7, 8, 9]])
       
       
# 获取非零元素的索引
np.argwhere(array5)
out:

       [0, 1],
       [0, 2],
       [1, 0],
       [1, 2],
       [2, 0],
       [2, 1],
       [2, 2]], dtype=int64)
 

array13[-1] = 0
array13[2] = 0
array13
array([12, 13,  0, 15, 20, 20,  0])

np.argwhere(array13)
out:
 np.argwhere(array13)
1
np.argwhere(array13)
array([[0],
       [1],
       [3],
       [4],
       [5]], dtype=int64)   


array14 = np.arange(1, 10)

array14
array([1, 2, 3, 4, 5, 6, 7, 8, 9])

# 抽取元素
np.extract(array14 % 3 == 0, array14)
array([3, 6, 9])

array14[array14 % 3 == 0]
array([3, 6, 9])

np.select([array14 < 3, array14 > 5], [array14, array14 ** 2])
array([ 1,  2,  0,  0,  0, 36, 49, 64, 81])

np.where(array14 < 5, array14, array14 * 10)
array([ 1,  2,  3,  4, 50, 60, 70, 80, 90])

# 翻转
np.flip(array5)
array([[9, 8, 7],
       [6, 0, 4],
       [3, 2, 1]])

np.flip(array5, axis=0)
array([[7, 8, 9],
       [4, 0, 6],
       [1, 2, 3]])

np.flip(array5, axis=1)
array([[3, 2, 1],
       [6, 0, 4],
       [9, 8, 7]])

def fib(counts):
    a, b = 0, 1
    for _ in range(counts):
        a, b = b, a + b
        yield a
        

iter_obj = fib(20)
iter_obj

# 通过迭代器创建数组
array15 = np.fromiter(iter_obj, dtype=np.int64)
array15

array([   1,    1,    2,    3,    5,    8,   13,   21,   34,   55,   89,
        144,  233,  377,  610,  987, 1597, 2584, 4181, 6765], dtype=int64)

# 通过重复指定的元素创建数组
array16 = np.repeat([3, 4], 10)
array16

array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])

array17 = np.repeat([[3, 4]], 3, axis=0)
array17
array([[3, 4],
       [3, 4],
       [3, 4]])

array18 = np.repeat([[3, 4]], [2, 3], axis=1)
array18
array([[3, 3, 4, 4, 4]])

# 滚动
np.roll(array14, -3)
array([4, 5, 6, 7, 8, 9, 1, 2, 3])

np.roll(array14, 3)
array([7, 8, 9, 1, 2, 3, 4, 5, 6])

# 调整大小
array19 = np.random.randint(60, 101, 15)
array20 = np.resize(array19, (4, 4))
array20

# 替换(原地替换,不返回新的数组对象)
np.place(array20, array20 < 80, 59)



##以排第5的元素为轴对数组进行划分

##轴前面的元素都比轴小(但未必有序),轴后面的元素都比轴大(但未必有序)

np.partition(array19, 5)

np.partition(array19, 7)
array([63, 65, 68, 74, 66, 70, 77, 80, 81, 82, 83, 88, 87, 93, 99])


np.partition(array19, 10)


# Linear Algebra ---> Matrix
np.linalg?

m1 = np.matrix(array5)
m1

m1.T

np.linalg.det(m1)

m1.I
m2 = np.matrix('1 1 1; 2 2 2; 3 3 3')
m2
np.linalg.det(m2)
m2.A1

m3 = np.array([[1, 0, 2], [-1, 3, 1]])
m4 = np.array([[3, 1], [2, 1], [1, 0]])
m3
m4
m5 = m3 @ m4
m5
np.linalg.inv(m5)
m5.T
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值