python数据分析第3天
numpy的应用
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'
array1 = np.array([42, 45, 62, 56, 35, 79, 67, 74, 30, 28, 54])
array2 = np.array([65, 36, 123, 25, 45, 32, 26, 78, 57, 51, 34])
array3 = np.array([82, 36, 21, 23, 25, 43, 52, 59, 60, 76, 95])
print('均值',array1.mean())
print('方差',array1.var)
print('均值',array2.mean())
print('方差',array2.var)
print('均值',array3.mean())
print('方差',array3.var)
array1.size
array1.shape
array1.ndim
array2.dtype
array2.itemsize
array2.nbytes
array3[0],array3[1],array3[-2]
out:
(82, 36, 76)
array3[[0, 1, -2]]
out:
array([82, 36, 76])
array3.take([0, 1, -2])
out:
array([82, 36, 76])
array3[array3 > 50]
out:
array([82, 52, 59, 60, 76, 95])
array3[(array3>50)|(array3 % 2 == 0)]
out:
array([82, 36, 52, 59, 60, 76, 95])
np.append(array3,1000)
out:
array([ 82, 36, 21, 23, 25, 43, 52, 59, 60, 76, 95,
1000])
np.insert(array3, 0, 1000)#在0的位置插入1000
out:
array([1000, 82, 36, 21, 23, 25, 43, 52, 59, 60, 76,
95])
数组与标量的运算
array4 = np.array([[1, 1, 1], [2, 3, 4], [5, 5, 6]])
array4
out:
array([[1, 1, 1],
[2, 3, 4],
[5, 5, 6]])
array4 + 5
out:
array([[ 6, 6, 6],
[ 7, 8, 9],
[10, 10, 11]])
array4 * 5
out:
array([[ 5, 5, 5],
[10, 15, 20],
[25, 25, 30]])
5*array4
out:
array([[ 5, 5, 5],
[10, 15, 20],
[25, 25, 30]])
array4 / 5
out:
array([[0.2, 0.2, 0.2],
[0.4, 0.6, 0.8],
[1. , 1. , 1.2]])
array4 ** 5
out:
array([[ 1, 1, 1],
[ 32, 243, 1024],
[3125, 3125, 7776]], dtype=in
5 ** array4
array([[ 5, 5, 5],
[ 25, 125, 625],
[ 3125, 3125, 15625]], dtype=int32)
数组与数组的运算
array5 = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
array6 = np.array([[1, 1, 1], [2, 2, 2], [3, 3, 3]])
array5
array5([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
array([[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
array5 + array6
out:
array([[ 2, 3, 4],
[ 6, 7, 8],
[10, 11, 12]])
array5 ** array6
out:
array([[ 1, 2, 3],
[ 16, 25, 36],
[343, 512, 729]], dtype=int32)
array7 = np.array([4, 4, 4])
array7
array5 + array7
out:
array([[ 5, 6, 7],
[ 8, 9, 10],
[11, 12, 13]])
array8 = np.array([[4], [3], [2]])
array8
out:
rray([[4],
[3],
[2]])
array5 + array8
out:
array([[ 5, 6, 7],
[ 7, 8, 9],
[ 9, 10, 11]])
array9 = np.random.randint(1, 10, (4, 4))#随机生成y1~10的二维数组,4行4列
array9
array([[9, 1, 3, 8],
[7, 6, 5, 5],
[4, 2, 2, 2],
[7, 3, 7, 2]])
当两个数组形状不一致时,如果两个数组的后缘维度(shape属性从后往前看)相同或者其中一个的后缘维度为1,那么这个时候可以通过广播机制让两个数组的形状趋于一致,这种情况是可以进行运算的;如果不能应用广播机制,那么两个数组没有办法进行运算。
# nan - not a number,是一个空值
array10 = np.array([1, 2, 3, np.nan, 4, np.nan, np.inf])
array10
out:
array([ 1., 2., 3., nan, 4., nan, inf])
# 判断空值
np.isnan(array10)
out:
array([False, False, False, True, False, True, False])
array10[~np.isnan(array10)]
out:
array([ 1., 2., 3., 4., inf])
# 判断无穷大值
np.isinf(array10)
out:
array([False, False, False, False, False, False, True])
# 正弦和余弦函数
x = np.linspace(-2 * np.pi, 2 * np.pi, 60)
y1, y2 = np.sin(x),np.cos(x)
plt.figure(figsize=(8, 4))
plt.plot(x, y1, color='#0000ff', marker='x')
plt.plot(x, y2, color='coral', marker='o')
# 判断每个元素是否在可容忍误差范围内相近
np.allclose(a, b)
array11 = np.array([1, 4, 9])
array12 = np.array([2, 3, 5])
np.maximum(array11, array12)
np.minimum(array11, array12)
v1 = np.array([3, 1])
v2 = np.array([-1, 2])
v1 + v2
# 通过内积计算向量夹角的余弦值
np.inner(v1, v2) / (np.linalg.norm(v1) * np.linalg.norm(v2))
常用函数
array13 = np.array([12, 13, 12, 15, 20, 20, 17])
array13
array([12, 13, 12, 15, 20, 20, 17])
# 去重
np.unique(array13)
array([12, 13, 15, 17, 20])
# 堆叠和拼接
np.hstack((array5, array6))
array([[1, 2, 3, 1, 1, 1],
[4, 5, 6, 2, 2, 2],
[7, 8, 9, 3, 3, 3]])
np.vstack((array5, array6))
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
np.stack((array5, array6), axis=0)
array([[[1, 2, 3],
[4, 5, 6],
[7, 8, 9]],
[[1, 1, 1],
[2, 2, 2],
[3, 3, 3]]])
np.stack((array5, array6), axis=1)
array([[[1, 2, 3],
[1, 1, 1]],
[[4, 5, 6],
[2, 2, 2]],
[[7, 8, 9],
[3, 3, 3]]])
np.concatenate((array5, array6))
array([[1, 2, 3],
[4, 5, 6],
[7, 8, 9],
[1, 1, 1],
[2, 2, 2],
[3, 3, 3]])
np.concatenate((array5, array6), axis=1)
array([[1, 2, 3, 1, 1, 1],
[4, 5, 6, 2, 2, 2],
[7, 8, 9, 3, 3, 3]])
array5[1, 1] = 0
array5
array([[1, 2, 3],
[4, 0, 6],
[7, 8, 9]])
# 获取非零元素的索引
np.argwhere(array5)
out:
[0, 1],
[0, 2],
[1, 0],
[1, 2],
[2, 0],
[2, 1],
[2, 2]], dtype=int64)
array13[-1] = 0
array13[2] = 0
array13
array([12, 13, 0, 15, 20, 20, 0])
np.argwhere(array13)
out:
np.argwhere(array13)
1
np.argwhere(array13)
array([[0],
[1],
[3],
[4],
[5]], dtype=int64)
array14 = np.arange(1, 10)
array14
array([1, 2, 3, 4, 5, 6, 7, 8, 9])
# 抽取元素
np.extract(array14 % 3 == 0, array14)
array([3, 6, 9])
array14[array14 % 3 == 0]
array([3, 6, 9])
np.select([array14 < 3, array14 > 5], [array14, array14 ** 2])
array([ 1, 2, 0, 0, 0, 36, 49, 64, 81])
np.where(array14 < 5, array14, array14 * 10)
array([ 1, 2, 3, 4, 50, 60, 70, 80, 90])
# 翻转
np.flip(array5)
array([[9, 8, 7],
[6, 0, 4],
[3, 2, 1]])
np.flip(array5, axis=0)
array([[7, 8, 9],
[4, 0, 6],
[1, 2, 3]])
np.flip(array5, axis=1)
array([[3, 2, 1],
[6, 0, 4],
[9, 8, 7]])
def fib(counts):
a, b = 0, 1
for _ in range(counts):
a, b = b, a + b
yield a
iter_obj = fib(20)
iter_obj
# 通过迭代器创建数组
array15 = np.fromiter(iter_obj, dtype=np.int64)
array15
array([ 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89,
144, 233, 377, 610, 987, 1597, 2584, 4181, 6765], dtype=int64)
# 通过重复指定的元素创建数组
array16 = np.repeat([3, 4], 10)
array16
array([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4])
array17 = np.repeat([[3, 4]], 3, axis=0)
array17
array([[3, 4],
[3, 4],
[3, 4]])
array18 = np.repeat([[3, 4]], [2, 3], axis=1)
array18
array([[3, 3, 4, 4, 4]])
# 滚动
np.roll(array14, -3)
array([4, 5, 6, 7, 8, 9, 1, 2, 3])
np.roll(array14, 3)
array([7, 8, 9, 1, 2, 3, 4, 5, 6])
# 调整大小
array19 = np.random.randint(60, 101, 15)
array20 = np.resize(array19, (4, 4))
array20
# 替换(原地替换,不返回新的数组对象)
np.place(array20, array20 < 80, 59)
##以排第5的元素为轴对数组进行划分
##轴前面的元素都比轴小(但未必有序),轴后面的元素都比轴大(但未必有序)
np.partition(array19, 5)
np.partition(array19, 7)
array([63, 65, 68, 74, 66, 70, 77, 80, 81, 82, 83, 88, 87, 93, 99])
np.partition(array19, 10)
# Linear Algebra ---> Matrix
np.linalg?
m1 = np.matrix(array5)
m1
m1.T
np.linalg.det(m1)
m1.I
m2 = np.matrix('1 1 1; 2 2 2; 3 3 3')
m2
np.linalg.det(m2)
m2.A1
m3 = np.array([[1, 0, 2], [-1, 3, 1]])
m4 = np.array([[3, 1], [2, 1], [1, 0]])
m3
m4
m5 = m3 @ m4
m5
np.linalg.inv(m5)
m5.T