python numpy array倒序_Numpy & Pandas全集

Numpy & Pandas

Numpy:是数值计算的扩展包,它能高效处理N维数组,复杂函数,线性代数.

Panadas:是做数据处理。是python的一个数据分析包

01

aad811a6a25c9ed7427c337eba7b9a46.png

# numpy和pandas有什么用?

在python在科学运算当中最为重要的两个模块,做数据分析必不可少,

简言之:用了会使得计算变得特别快

02

aad811a6a25c9ed7427c337eba7b9a46.png

# 安装numpy和pandas

当然使用anaconda啦,当然直接pip install numpy或者 pip install pandas

03

aad811a6a25c9ed7427c337eba7b9a46.png

# numpy的简单属性

基本属性

import numpy as np

array = np.array([[1,2,3],[4,5,6]]) # 定义一个数组

print(array) # 打印这个数组

print('number of dim:',array.ndim) # 查看数组的维度

print('shape:',array.shape) # 查看矩阵维度的多少

print('size',array.size) # 查看元素

61e98d35988de1c14454db93933d910f.png

numpy创建的array

import numpy as np

a = np.array([2,3,4],dtype = np.int32) # 定义一个列表,dtype定义数据的格式为int32,默认是int64

# dtype还有float32,或者16

print(a.dtype)

# 定义2维列表

b = np.array([[2,3,4],

            [4,5,6]])

# 定义0矩阵

c = np.zeros((3,4)) # 生成3行4列的0矩阵

print(c)

# 定义单位矩阵

d = np.ones((3,4),dtype=np.int32) #生成3行4列的0矩阵

print(d)

# 定义空矩阵(实际是一个几乎接近0的数)

e = np.empty((3,4))

print(e)

# 生成有序的矩阵

f = np.arange(10,22,1).reshape((3,4)) # 10到22,间隔为1,生成为3行4列

print(f)

# 生成线段

g = np.linspace(1,10,6).reshape((2,3)) # 1到10,6个数,生成为2行3列

print(g)

61e98d35988de1c14454db93933d910f.png

numpy的基础运算

import numpy as np

a = np.array([10,20,30,40])

b = np.arange(4)

print(a,b)

c = a-b # 逐个的相加

print(c)

d = a+b # 逐个的相减

print(d)

e = b**2 # 计算平方

print(e)

f = 10*np.sin(a) # 计算三角函数,并*10

print(f)

print(b<3) # 小于3,返回列表的布尔值

# 矩阵的运算

p = np.array([[1,1],[0,1]])

q = np.arange(4).reshape((2,2))

print(p)

print(q)

r = p*q # 矩阵的乘法,逐个相乘

r_dot = np.dot(p,q) # 正规的矩阵运算

r_dot_2 = p.dot(q) # 等价表达

print(r)

print(r_dot)

w = np.random.random((2,4)) # 产生一个随机矩阵,第一个random是引用包名,第二个是函数名

print(w)

w1 = np.sum(w,axis=0)# 求列和,axis参数1,0表示行、列

print(w1)

w2 = np.min(w,axis=0)# 求列最小,axis参数1,0表示行、列

print(w2)

w3 = np.max(w,axis=0)# 求列最大,axis参数1,0表示行、列

print(w3)

import numpy as np

A = np.arange(2,14).reshape((3,4))

print(A)

print(np.argmin(A))# 计算最小值的索引

print(np.argmax(A))# 计算最大值索引

print(A.mean()) # 计算平均值

print(np.average(A))# 计算平均值

print(np.median(A)) # 计算中位数

print(np.cumsum(A))# 逐步的累加

print(np.diff(A))# 逐步的减,就一阶差分

print(np.nonzero(A)) # 找出非零的数,输出值为两个array,表示行列的索引,相当于输出的是非零的坐标

B = np.arange(14,2,-1).reshape((3,4)) # 倒序,注意要设置步长

print(B)

print(np.sort(B))# 按照竹行排序

print(np.transpose(B)) # 转置

print((B.T).dot(B))# B的平方

print(np.clip(B,5,9))# 最小值是5,最大值是9,截尾,将小于5转化为5,大于9的数转化为9,其余保持不变

print(np.mean(A,aixs=0)) # 所有的矩阵都可以指定行计算还是列计算

61e98d35988de1c14454db93933d910f.png

# numpy的索引

import numpy as np

A = np.arange(3,15).reshape((3,4))

print(A)

print(A[1][1])# 找出索引为第一行、第一列的值,其中A[1][1]也可以写成A[1,1]

print(A[1,1])

print(A[1,:])# 找出第一行的所有数

print(A[1,1:3])# 找出第一行的,1到3列的两个数,注意:python的启始位置是0哦

print('=='*30)

print(A.flatten())

for row in A:

    print(row)# 迭代每一行

for column in A.T:

    print(column) # 先转置,行变列,这就样迭代列

for item in A.flat:

    print(item)# 先转变为一行的矩阵,再逐个打印

61e98d35988de1c14454db93933d910f.png

numpy的array合并

import numpy as np

A = np.array([1,1,1]) # 序列

B = np.array([2,2,2]) # 序列

C = np.vstack((A,B))# vertical stack垂直合并

D = np.hstack((A,B))# horizontal stack水平合并

print(C)

print(D)

print(A.shape,C.shape)

print(A.shape,D.shape)

print(A.T.shape) # 这点说明了,转置不能把一个序列变成一个矩阵

print(A[:,np.newaxis]) # 新加维度,由1行3列,变成了3行1列

E = np.concatenate((A,B,B,A),axis=0) # 多个合并,axis=0指定在对行合并

print(E)

F = np.array([1,1,1])[:,np.newaxis]

print(F)

G = np.array([2,2,2])[:,np.newaxis]

print(np.concatenate((F,G,F,G),axis=1)) # 多个合并,axis=1指定在对列合并

61e98d35988de1c14454db93933d910f.png

 numpy的分割

import numpy as np

A = np.arange(12).reshape((3,4))

print(A)

print(np.split(A,2,axis=1))# 传入数组A,分割成2片,并且对列分块

print(np.array_split(A,3,axis=1)) # 不等分割,分成3块,并且对列分块

print(np.vsplit(A,3))# 纵向分割

print(np.hsplit(A,2))# 横向分割

61e98d35988de1c14454db93933d910f.png

numpy的copy&deep copy

对于numpy的赋值,看下边的例子,将a的值赋值给b和c,将b的值赋值给d,这时候a,b,c,d已经关联起来了,改变他们任何之歌元素,同时a,b,c,d都会改变

import numpy as np

a = np.arange(4)

print(a)

b=a

c=a

d=b

a[0]=11

print(a)

print(b is a) # 判断两个元素是否相同

print(b)

print(c is a )

print(c)

print(d is a)

print(d)

d[1:3]=[22,33]

print(d)

print(a)

# 实现深度copy,这时候他们不在关联

b = a.copy # deep copy

print(b)

a[3]=44

print(a)

print(b)

61e98d35988de1c14454db93933d910f.png

Part4

Pandas

Pandas的基本介绍

bc395b7c2252e63f66e02e2143eebfb1.png

import pandas as pd

import numpy as np

# 创建一个pd序列

s = pd.Series([1,3,6,np.nan,44,1]) 

print(s)

dates = pd.date_range('20200820',periods=6) # 行索引

print(dates)

df=pd.DataFrame(np.random.randn(6,4),index=dates,columns=('a','b','c','d')) # 列索引

print(df)

df1=pd.DataFrame(np.arange(12).reshape((3,4)))

print(df1)

df2=pd.DataFrame({'A':1,'B':pd.Timestamp('20130102'),'C':pd.Series(1,index=list(range(4)),dtype='float32'),'D':np.array([3]*4,dtype='int32'),'E':pd.Categorical(['test','train','test','train']),'F':'foo'})

print(df2)

print(df2.dtypes) # 查看数据类型

print(df2.index) # 查看行索引

print(df2.columns)# 查看列索引

print(df2.values) # 查看值

df2.describe() # 查看数据中,数值型数据的基本情况

print(df2.T) # 数据框的转置

print(df2.sort_index(axis=1,ascending=False)) 

# 指定索引按列,并且进行倒序排序

print(df2.sort_values(by='E')) 

# 指定按值排列,指定了第E的列

●pandas的数据选取

import pandas as pd

import numpy as np

dates=pd.date_range('20200820',periods=6)

df=pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns['A','B','C','D'])

print(df)

print(df['A'],df.A)# df['A']与df.A是一样的

print(df[0:3],df['20200821':'20200822'])# 按切片来选择

# 跟高级的一种方法 select by label:loc

print(df.loc['20200821'])

print(df.loc['20200821',['A','B']])

# select by position :iloc

print(df.iloc[3,1])# 找第三行,第一列

print(df.iloc[[1,3,5],1:3])# 找1,3,5行,1到3列

# mixed selection :ix(现在已经弃用了)

# print(df.ix[:3,['A','C']])

# Boolean indexing

print(df)

print(df[df.A>8])

● pandas设置值

如何在特定的位置修改值

import pandas as pd

import numpy as np

dates = pd.date_range('20200822',periods=6)

df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])

print(df)

df.iloc[2,2]=1111 # 直接的位置形式改变值

print(df)

df.loc['20200824','B']=2222# 标签的形式改变

print(df)

df[df.A>4]=88 # 大于4的全部赋值为88

print(df)

df.A[df.A>4]=99 # 只改变A列

print(df)

df['F']=np.nan # 新增F 列,并且赋值为NaN

print(df)

df['E']=pd.Series([1,2,3,4,5,6],index=pd.date_range('20200822',periods=6))

print(df)

●pandas处理丢失数据

import pandas as pd

import numpy as np

dates = pd.date_range('20200820',periods=6)

df = pd.DataFrame(np.arange(24).reshape((6,4)),index=dates,columns=['A','B','C','D'])

df.iloc[0,1]=np.nan

df.iloc[1,2]=np.nan

print(df)

# 如果行或者列有丢失数据,直接丢掉

print(df.dropna(axis=0,how='any')) # 按行丢掉 how={'any','all'},how='all'的情况是,当一行或者一列都是NaN的时候才删去

# 填上NaN

print(df.fillna(value=0)) # 所有缺失值都替换为0

# 检查null

print(df.isnull())

print(np.any(df.isnull())==True)

● pandas导入导出

import pandas as pd

import numpy as np

data = pd.read_csv('1.csv') # pd会自动加上索引

print(data)

data.to_pickle('1_.pickle') # 保存为pickle文件

●  pandas合并concat

import pandas as pd

import numpy as np

# concatenating 串联

# 创建3个dataframe

df1 = pd.DataFrame(np.ones((3,4))*0,columns=['A','B','C','D'])

print(df1)

df2 = pd.DataFrame(np.ones((3,4))*1,columns=['A','B','C','D'])

print(df2)

df3 = pd.DataFrame(np.ones((3,4))*2,columns=['A','B','C','D'])

print(df3)

res = pd.concat([df1,df2,df3],axis=0,ignore_index=True) # 按行合并,ignore_index=True忽略到前端的行索引

print(res)

print('=='*30)

# join,['inner'.'outer'] 能将两个数据框不同的部分处理,相当于交并集

df4 = pd.DataFrame(np.ones((3,4))*0,columns=['A','B','C','D'],index=[1,2,3])

print(df4)

df5 = pd.DataFrame(np.ones((3,4))*1,columns=['A','B','C1','D1'],index=[2,3,4])

print(df5)

res1 = pd.concat([df4,df5],join='outer')# 默认的是outer模式

print(res1)

res1 = pd.concat([df4,df5],join='inner',ignore_index=True)# 只把相同的合并在一起

print(res1)

# join_axes

print('=='*30)

df4 = pd.DataFrame(np.ones((3,4))*0,columns=['A','B','C','D'],index=[1,2,3])

print(df4)

df5 = pd.DataFrame(np.ones((3,4))*1,columns=['A','B','C1','D1'],index=[2,3,4])

print(df5)

#res2 = pd.concat([df4,df5],axis=1,join_axes=[df1.index]) 

res2 = pd.concat([df4,df5.reindex_like(df4)],axis=1) #join_axes被移除,用reindex_like()代替

print(res2)

# append 

print('=='*30)

df4 = pd.DataFrame(np.ones((3,4))*0,columns=['A','B','C','D'])

print(df4)

df5 = pd.DataFrame(np.ones((3,4))*1,columns=['A','B','C','D'])

print(df5)

df6 = pd.DataFrame(np.ones((3,4))*1,columns=['B','C','D','E'],index=[2,3,4])

res3 = df4.append(df5,ignore_index=True)

print(res3)

res4 = df4.append([df5,df6])

print(res4)

s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])

res5=df4.append(s1,ignore_index=True) # 每次只加一条

print(res5)

●  pandas合并merge

import pandas as pd

import numpy as np

# merging two df by key/keys.(may be uesed in database)

# simple example

left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})

right=pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})

print(left)

print(right)

res = pd.merge(left,right,on='key') # 按照key的列合并

print(res)

print('=='*30)

left=pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})

right = pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})

print(left)

print(right)

res1=pd.merge(left,right,on=['key1','key2'],how='inner') # 两列关键词相同合并

print(res1)

res2=pd.merge(left,right,on=['key1','key2'],how='outer') # 两列关键词全部合并

print(res2)

res3=pd.merge(left,right,on=['key1','key2'],how='right') # 两列关键词基于right合并

print(res3)

print('=='*30)

df1=pd.DataFrame({'col1':[0,1],'col_left':['a','b']})

df2 = pd.DataFrame({'col1':[0,2,2],'col_right':[2,2,2]})

print(df1)

print(df2)

res1=pd.merge(df1,df2,on='col1',how='outer',indicator=True) # give the indicator a cunstom name,显示合并方式

res2 = pd.merge(df1,df2,on = 'col1',how = 'outer',indicator='incicator_column')# 修改显示合并方式的名字

print(res1)

print(res2)

print('=='*30)

left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},index=['K0','K1','K2'])

right = pd.DataFrame({'C':['C0','C2','C3'],'B':['D0','B2','B3']},index=['K0','K2','K3'])

print(left)

print(right)

res1=pd.merge(left,right,left_index=True,right_index=True,how='outer') 

res2=pd.merge(left,right,left_index=True,right_index=True,how = 'inner')

print(res1)

print(res2)

print('=='*30)

boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})

girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})

print(boys)

print(girls)

res1 = pd.merge(boys,girls,on='k',suffixes=['_boy','_gilr'],how='inner') 

#res2 = pd.merge(left,right,left_index=True,right_index=True,how = 'inner')

print(res1)

#print(res2)

● pandas plot画图

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

# plot data

# Series

data = pd.Series(np.random.randn(1000),index=np.arange(1000))

data = data.cumsum()

data.plot()

plt.show()

# DataFrame

data=pd.DataFrame(np.random.randn(1000,4),index=np.arange(1000),columns=list('ABCD'))

print(data.head())

data = data.cumsum()

data.plot()

plt.show()

#plot methods:'bar','hist','ked','area','scatter','hexbin','pie'

data=pd.DataFrame(np.random.randn(1000,4),index=np.arange(1000),columns=list('ABCD'))

print(data.head())

data = data.cumsum()

ax = data.plot.scatter(x='A',y='B',color='DarkBlue',label='Class 1')

data.plot.scatter(x='A',y='C',color='DarkGreen',label='Class 2',ax=ax)

# ax=ax,目的是为了一张图上两个,x和y相当于横轴和纵轴

plt.show()

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值