Numpy基础2

最新推荐文章于 2024-08-26 18:56:03 发布

陶柏聿

最新推荐文章于 2024-08-26 18:56:03 发布

阅读量90

点赞数 1

分类专栏：机器学习文章标签： python

本文链接：https://blog.csdn.net/qq_45099933/article/details/124462250

版权

机器学习专栏收录该内容

2 篇文章 0 订阅

订阅专栏

继上一篇numpy基础，初次学，欢迎指正

聚合操作

import numpy as np
L=np.random.random(100) #100个01之间的随机数
L

一百个随机数
求和

sum(L)  #求和
#48.86650528484489

numpy也提供了一个求和函数

np.sum(L)  
#48.86650528484489

来看看哪个效率高

big_array=np.random.random(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)        #对numpy中的向量或者矩阵运算是非常快的

#111 ms ± 38.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
#1.43 ms ± 523 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

一个111左右，一个1.43左右，很明显

求最大最小值

np.min(big_array)
np.max(big_array)

也可以用下面这种直接调用的方法，但是不推荐，因为个别的运算符，numpy没有定义在这些向量中，np.这种方法是更全面的

big_array.min()
big_array.max()
big_array.sum()

再来看看二维数组

X=np.arange(16).reshape(4,4)
X
#array([[ 0,  1,  2,  3],
#      [ 4,  5,  6,  7],
#      [ 8,  9, 10, 11],
#      [12, 13, 14, 15]])
np.sum(X)   #0-15的和
#120
np.sum(X,axis=1) #每一行的和
#array([ 6, 22, 38, 54)
np.sum(X,axis=0)  #每一列的和
#array([24, 28, 32, 36])
np.prod(X)  #所有元素的乘积
#0 因为元素中有个0
np.prod(X+1)
#2004189184
np.mean(X)  #平均值
#7.5
np.median(X)  #中位数
#7.5
v=np.array([1,1,2,2,10])    #10这个样本点和1,1,2,2是差距非常大的
np.mean(v)
#3.2
np.median(v)  #中位数是2，可以更好的描述样本
#2.0
np.percentile(big_array,q=50)   #在这个数组中，50%的数都是小于0.5000250899438448的,q代表百分之多少
#0.5000250899438448
np.percentile(big_array,q=100)  #q=100的话，其实就是big_array的最大值
#0.9999985686203592
np.var(big_array)  #方差
#0.08339493861871637
np.std(big_array)  #标准差
#0.2887818183659012
x=np.random.normal(0,1,1000000)   #均值为0，标准差为1
np.mean(x)
#0.00026999274400390757
np.var(x)
#1.0004508192016957
np.std(x)
#1.0002253842018287

索引

np.min(x) #最小值
#-4.938467164255635
np.argmin(x)  #索引值是933851的这个位置就是最小值所在的位置
#933851
x[933851]   #验证一下
#-4.938467164255635
np.argmax(x)  #最大值所在的索引
#733742

排序和使用索引

一维数组

x=np.arange(16)
np.random.shuffle(x)  #对x进行乱序处理
x
#array([ 4,  9,  6,  3, 14,  0, 13,  1,  5,  2, 10, 11, 15,  8,  7, 12])
np.sort(x)  #排序，但是x本身没有变
x.sort() #排序，x本身也变了
#array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])
np.argsort(x)  #按照索引，有序的排列了这些元素
#array([ 6,  4, 10, 14,  7,  2,  8, 13,  1, 15, 12,  9, 11,  0,  5,  3],dtype=int64)
查看一下
x[6],x[4],x[10],x[3]
#(0, 1, 2, 15)
np.partition(x,3)  #快速排序的partition操作，小于3的全放左侧，大于3 的全放右侧
#array([ 0,  1,  2,  3,  4,  5, 15, 11,  8, 14, 12,  6,  7, 10,  9, 13])

二维数组

X=np.random.randint(10,size=(4,4))  #生成4*4的，数值在0-10之间的整数的随机矩阵
#array([[9, 3, 5, 8],
#      [0, 5, 6, 2],
#      [5, 7, 2, 4],
#      [2, 3, 5, 5]])
np.sort(X,axis=1)   #默认值是1，沿着列的方向，即每一行
#array([[3, 5, 8, 9],
#      [0, 2, 5, 6],
#      [2, 4, 5, 7],
#      [2, 3, 5, 5]])
np.sort(X,axis=0)   #传入0，沿着行的方向，即每一列
#array([[0, 3, 2, 2],
#     [2, 3, 5, 4],
#      [5, 5, 5, 5],
#      [9, 7, 6, 8]])
np.argsort(X,axis=1)  #按行，因为有arg，所以返回的是索引
#array([[1, 2, 3, 0],
#      [0, 3, 1, 2],
#      [2, 3, 0, 1],
#      [0, 1, 2, 3]], dtype=int64)
np.argsort(X,axis=0) #同理，按列
#array([[1, 0, 2, 1], 
#      [3, 3, 0, 2],
#      [2, 1, 3, 3],
#      [0, 2, 1, 0]], dtype=int64)

Fancy Indexing

一维数组

x=np.arange(16)     #一维数组    
np.random.shuffle(x)
x
#array([ 9, 13,  0,  5,  6,  8,  4, 10, 11,  7,  1,  2, 14, 12, 15,  3])
访问
x[3:9]
#array([3, 4, 5, 6, 7, 8])
x[3:9:2]  #可是我想访问358怎么办，不是等间距的
#x[3:9:2]  #可是我想访问358怎么办，不是等间距的
x[3:9:2] 
array([3, 5, 7])
#可是我想访问358怎么办，不是等间距的
ind=[3,5,8]  
x[ind]    #得到358这三个元素组成的向量
array([3, 5, 8])
ind=np.array([[0,2],
            [1,3]])
x[ind] #按照索引构建了一个新的矩阵 x[0]=9,x[2]=0,x[1]=13,x[3]=5
#array([[ 9,  0],
#      [13,  5]])

二维数组

X=x.reshape(4,-1)
X
#array([[ 9, 13,  0,  5],
#      [ 6,  8,  4, 10],
#      [11,  7,  1,  2],
#      [14, 12, 15,  3]])
row=np.array([0,1,2])  #行坐标
col=np.array([1,2,3])  #列坐标
X[row,col]             #row=0,col=1意思就是第0行下标为1的元素，即13
#array([13,  4,  2])
X[0,col]   #第0行 的下标为123的元素
#array([13,  0,  5])
X[:2,col]  #前两行的下标为123的元素
#array([[13,  0,  5],
#      [ 8,  4, 10]])
col=[True,False,True,True]
X[0,col]      #True表示取，False表示不取
#array([9, 0, 5])
X[1:3,col]
#array([[ 6,  4, 10],
#      [11,  1,  2]])

Matplotlib基础

折线图

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
x=np.linspace(0,10,100)  #0-10,切分100段
x

在这里插入图片描述

y=np.sin(x)  #求sin值
y

在这里插入图片描述
只看数字什么也看不出来

plt.plot(x,y)   #传入横纵坐标。绘制从0-10的正弦函数
plt.show()

sinx
可以放两个

cosy=np.cos(x)
siny=y.copy()
plt.plot(x,siny)
plt.plot(x,cosy)
plt.show()

sinxcosx

plt.plot(x,siny)
plt.plot(x,cosy,color="red",linestyle="--")# 修改线条为红色，以及----条
plt.xlim(-5,15)  #x轴的范围
plt.ylim(-2,2)   #y轴的范围
plt.show()

在这里插入图片描述
再详细一点

plt.plot(x,siny,label="sin(x)")  #这条线是sinx
plt.plot(x,cosy,color="red",linestyle="--",label="cos(x)")  #这条线的cosx
plt.axis([-1,11,-2,2])  #前两个是x轴的范围，后两个是y轴的范围
plt.xlabel("x axis")   #x方向的图解
plt.ylabel("y axis")   #y方向的图解
plt.legend()   #加上图示label
plt.title("Welcome to the ML World!")   #整个图的title
plt.show()

all

散点图

plt.scatter(x,siny)  #散点
plt.show()

在这里插入图片描述

plt.scatter(x,siny)  #散点
plt.scatter(x,cosy,color="red")
plt.show()

在这里插入图片描述

x=np.random.normal(0,1,10000)  #生成100个服从正态分布的点
y=np.random.normal(0,1,10000)
plt.scatter(x,y,alpha=0.1)  #透明程度，明显中间的颜色深，因为很多点堆在这里
plt.show()

在这里插入图片描述

读取数据和简单的数据探索

import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from sklearn import datasets #数据集
iris=datasets.load_iris()   #鸢尾花数据集
iris.keys()     #数据集包含哪些内容:数据，y代表的label，y_label的名字，数据集的文档
print(iris.DESCR) #数据集的文档，可以仔细看看

iris.data #读取数据
iris.data.shape   #150行4列
#(150, 4)
iris.feature_names  #萼片的长和宽，花瓣的长和宽
#['sepal length (cm)',
#'sepal width (cm)',
#'petal length (cm)',
#'petal width (cm)']

在这里插入图片描述

iris.target     #0，1，2表示三种不同的花
#array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])
iris.target_names  #对应012三种鸢尾花的类型
#array(['setosa', 'versicolor', 'virginica'], dtype='<U10')
X=iris.data[:,:2]  #取所有的行以及前两列
y = iris.target
plt.scatter(X[y==0,0],X[y==0,1],color="red")   #y为0时，
plt.scatter(X[y==1,0],X[y==1,1],color="blue")  #y为1时
plt.scatter(X[y==2,0],X[y==2,1],color="green") #y为2时
plt.show()

在这里插入图片描述

X=iris.data[:,2:]  #花瓣长度和宽度。
plt.scatter(X[y==0,0],X[y==0,1],color="red",marker="o")   #y为0时，
plt.scatter(X[y==1,0],X[y==1,1],color="blue",marker="+")  #y为1时
plt.scatter(X[y==2,0],X[y==2,1],color="green",marker="x") #y为2时
plt.show()