pandas

这篇博客详细介绍了Pandas库中`concat`和`merge`函数的使用,包括如何进行数据的纵向和横向合并,以及不同类型的连接方式。同时,展示了如何处理重叠索引,以及利用`indicator`参数观察合并结果。最后,讨论了如何结合matplotlib进行数据可视化,如绘制Series和DataFrame的累计分布。
摘要由CSDN通过智能技术生成

操作文件

![在这里插入图片描述](https://img-blog.csdnimg.cn/20210523115728318.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzUzNzM5NTQ4,size_16,color_FFFFFF,t_70
在这里插入图片描述
HM6Ly9ibG9nLmNzZG4ubmV0L3FxXzUzNzM5NTQ4,size_16,color_FFFFFF,t_70)

在这里插入图片描述

concat参数ignore_index

import numpy as np
import pandas as pd
#concat
#创建三个序列
df1=pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3=pd.DataFrame(np.ones((3,4))*2,columns=['a','b','c','d'])
print(df1)
print(df2)
print(df3)
# 将数列纵向合并
df4=pd.concat([df1,df2,df3],axis=0,ignore_index=True)
# 将数列横向合并
df5=pd.concat([df1,df2,df3],axis=1,ignore_index=True)
print(df4)
print(df5)

concat的参数 join,[‘inner’,‘outer’]

df1=pd.DataFrame(np.ones((3,4))*0,index=[1,2,3],columns=['a','b','c','d'])
df2=pd.DataFrame(np.ones((3,4))*1,index=[2,3,4],columns=['b','c','d','e'])
print(df1)
print(df2)
#默认为outer,没有用空值补充
df3=pd.concat([df1,df2])
df4=pd.concat([df1,df2],join='outer')
print(df3)
print(df4)
#只合并共有部分
df5=pd.concat([df1,df2],join='inner',ignore_index=True)
print(df5)

merge

# consider two keys
#合并两个key
left = pd.DataFrame({'key1': ['K0', 'K0', 'K1', 'K2'],
                             'key2': ['K0', 'K1', 'K0', 'K1'],
                             'A': ['A0', 'A1', 'A2', 'A3'],
                             'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.DataFrame({'key1': ['K0', 'K1', 'K1', 'K2'],
                              'key2': ['K0', 'K0', 'K0', 'K0'],
                              'C': ['C0', 'C1', 'C2', 'C3'],
                              'D': ['D0', 'D1', 'D2', 'D3']})
print(left)
print(right)
res = pd.merge(left, right, on=['key1', 'key2'], how='inner')  # default for how='inner'
print(res)
# how = ['left', 'right', 'outer', 'inner']
# 系统自带的有'outer','inner'
# 基于类型有本身,'left','right'
#基于left合并,没有用Nan补充
res1 = pd.merge(left, right, on=['key1', 'key2'], how='left')
print(res1)

# indicator,合并之后显示哪个有数据,哪个是Nan,默认名字是_merge
df1 = pd.DataFrame({'col1':[0,1], 'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})
print(df1)
print(df2)
res = pd.merge(df1, df2, on='col1', how='outer')
res1= pd.merge(df1, df2, on='col1', how='outer', indicator=True)
print(res)
print(res1)
# give the indicator a custom name
#修改indicator的名字
res2 = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')
print(res2)

merged by index 通过index进合并

left = pd.DataFrame({'A': ['A0', 'A1', 'A2'],
                     'B': ['B0', 'B1', 'B2']},
                     index=['K0', 'K1', 'K2'])
right = pd.DataFrame({'C': ['C0', 'C2', 'C3'],
                      'D': ['D0', 'D2', 'D3']},
                       index=['K0', 'K2', 'K3'])
print(left)
print(right)
# left_index and right_index
#merge合并默认不考虑index,加上index,转换为只考虑index,类型有四种'left', 'right', 'outer', 'inner'
#全部合并,没有Nan补充
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')
#只有共有的index才合并
res1 = pd.merge(left, right, left_index=True, right_index=True, how='inner')
print(res)
print(res1)
# handle overlapping
boys = pd.DataFrame({'k': ['K0', 'K1', 'K2'], 'age': [1, 2, 3]})
girls = pd.DataFrame({'k': ['K0', 'K0', 'K3'], 'age': [4, 5, 6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy', '_girl'], how='inner')
print(res)

pandas和matplotlib组合绘图

# Series
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()
##data.plot()

# DataFrame
data = pd.DataFrame(np.random.randn(1000, 4), index=np.arange(1000), columns=list("ABCD"))
data = data.cumsum()
data.plot()
plt.show()
# plot methods:
# 'bar', 'hist', 'box', 'kde', 'area', scatter', hexbin', 'pie'
# ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label="Class 1")
# data.plot.scatter(x='A', y='C', color='LightGreen', label='Class 2', ax=ax)
#
# plt.show()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值