# merge two df by key/keys.(maybe in database)# simple example
left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K1','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})print(left)print(right)
res = pd.merge(left, right, on='key')print(res)# consider two keys
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})print(left)print(right)# how = ['left', 'right', 'outer', 'inner']
res = pd.merge(left, right, on=['key1','key2'], how='right')print(res)# indicator
df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})print(df1)print(df2)
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)print(res)# give the indicator a custom name
res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')print(res)# merged by index
left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},
index=['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],'D':['D0','D2','D3']},
index=['K0','K2','K3'])print(left)print(right)# left_index and right_index
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')print(res)
res = pd.merge(left, right, left_index=True, right_index=True, how='inner')print(res)# handle overlapping
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy','_girl'], how='outer')print(boys)print(girls)print(res)# join 和 merge 类似
8. pandas plot 画图
# plot data# Series
data = pd.Series(np.random.randn(1000), index=np.arange(1000))
data = data.cumsum()
data.plot()
plt.show()# DataFrame
data = pd.DataFrame(np.random.randn(1000,4),
index=np.arange(1000),
columns=list('ABCD'))print(data.head(3))
data = data.cumsum()
data.plot()
plt.show()# plot methods:# 'bar' 'hist' 'box' 'kde' 'area' 'scatter' 'hexbin' 'pie'
ax = data.plot.scatter(x='A', y='B', color='DarkBlue', label='Class 1')
data.plot.scatter(x='A', y='C', color='DarkGreen', label='Class2', ax=ax)
plt.show()