import numpy as np
import pandas as pd
left = pd.DataFrame({'key':['K0','K1','K2','K3'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
print(left)
print(right)
res = pd.merge(left,right,on='key')
#on='key'代表合并的依据按key
res
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],
'key2':['K0','K1','K0','K1'],
'A':['A0','A1','A2','A3'],
'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K3'],
'key2':['K0','K0','K0','K0'],
'C':['C0','C1','C2','C3'],
'D':['D0','D1','D2','D3']})
print(left)
print(right)
res = pd.merge(left,right,on=['key1','key2'],how='outer')
#,on=['key1','key2']代表合并的依据按key1,key2两个整体
#how默认为inner,how = ['left','right','inner','outer']
#inner代表尽量将所有的值都合并起来
res
res = pd.merge(left,right,on=['key1','key2'],how='inner')
#,on=['key1','key2']代表合并的依据按key1,key2两个整体
#how默认为inner,how = ['left','right','inner','outer']
#inner只将大家都有的连接起来
res
res = pd.merge(left,right,on=['key1','key2'],how='left')
#只考虑左边的key1,key2
res
res = pd.merge(left,right,on=['key1','key2'],how='right')
#只考虑右边的key1,key2
res
res = pd.merge(left,right,on=['key1','key2'],how='outer',
indicator=True)
#indicator=True显示merge信息
#,on=['key1','key2']代表合并的依据按key1,key2两个整体
#how默认为inner,how = ['left','right','inner','outer']
res
res = pd.merge(left,right,on=['key1','key2'],how='outer',
indicator='indicator_column')
#显示merge信息
res
left = pd.DataFrame({'A':['A0','A1','A2'],
'B':['B0','B1','B2']},
index = ['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],
'D':['D0','D2','D3']},
index = ['K0','K2','K3'])
print(left)
print(right)
res = pd.merge(left,right,left_index=True,right_index=True
,how='outer')
#根据index进行合并
res
boys =pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls =pd.DataFrame({'k':['K0','K0','K2'],'age':[4,5,6]})
print(boys)
print(girls)
res = pd.merge(boys,girls,on='k',suffixes=['_boy','_girl'],
how='outer')
#根据k进行合并
#suffixes=['_boy','_girl']是为了区分开两个表的age
#这种可用于两个表有相同的列,但要把两个列在合并时分开的场景
res
pandas plot
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
data = pd.Series(np.random.randn(1000),
index=np.arange(1000))
#randn函数返回一个或者一组样本,具有标准正态分布
data = data.cumsum()#把这些值给累加起来
data.plot()#绘制出来
plt.show()
data = pd.DataFrame(np.random.randn(1000,4),
index=np.arange(1000),
columns=['A','B','C','D'])
#randn函数返回一个或者一组样本
#这里生成4组1000个数据,具有标准正态分布
data = data.cumsum()#把这些值给累加起来
print(data.head())#打印前面的5行,不写默认为5行
data.plot()
plt.show()
#散点图
ax = data.plot.scatter(x='A',y='B',
color='Blue',label='class 1')
data.plot.scatter(x='A',y='C',color='Red',
label='class 2',ax=ax)
plt.show()