res = pd.concat([df1,df2],join='outer')# 取并集合并
res
res = pd.concat([df1,df2],join='inner',ignore_index =True)# 取交集合并
res
10.append合并
#append
df1 = pd.DataFrame(np.ones((3,4))*0,columns=['a','b','c','d'])
df2 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'])
df3 = pd.DataFrame(np.ones((3,4))*1,columns=['a','b','c','d'],index =[2,3,4])
res = df1.append([df2,df3],ignore_index =True)# 用append合并表
res
s1 = pd.Series([1,2,3,4],index=['a','b','c','d'])
res = df1.append(s1,ignore_index=True)#添加一行
res
#merge
left = pd.DataFrame({'key':['K0','K1','K2','K3'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key':['K0','K1','K2','K3'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})print(left)print(right)
res = pd.merge(left,right,on ='key')# 通过‘key’列合并
res
left = pd.DataFrame({'key1':['K0','K0','K1','K2'],'key2':['K0','K1','K0','K1'],'A':['A0','A1','A2','A3'],'B':['B0','B1','B2','B3']})
right = pd.DataFrame({'key1':['K0','K1','K1','K2'],'key2':['K0','K0','K0','K0'],'C':['C0','C1','C2','C3'],'D':['D0','D1','D2','D3']})
res = pd.merge(left,right,on =['key1','key2'])# 通过‘key1’'key2'列合并
res
11.merge合并
# indicator
df1 = pd.DataFrame({'col1':[0,1],'col_left':['a','b']})
df2 = pd.DataFrame({'col1':[1,2,2],'col_right':[2,2,2]})print(df1)print(df2)
res = pd.merge(df1, df2, on='col1', how='outer', indicator=True)# give the indicator a custom name
res = pd.merge(df1, df2, on='col1', how='outer', indicator='indicator_column')
res
# merged by index
left = pd.DataFrame({'A':['A0','A1','A2'],'B':['B0','B1','B2']},
index=['K0','K1','K2'])
right = pd.DataFrame({'C':['C0','C2','C3'],'D':['D0','D2','D3']},
index=['K0','K2','K3'])print(left)print(right)# left_index and right_index
res = pd.merge(left, right, left_index=True, right_index=True, how='outer')#res = pd.merge(left, right, left_index=True, right_index=True, how='inner')
res
# handle overlapping 合并名字相同含义不同的数据
boys = pd.DataFrame({'k':['K0','K1','K2'],'age':[1,2,3]})
girls = pd.DataFrame({'k':['K0','K0','K3'],'age':[4,5,6]})
res = pd.merge(boys, girls, on='k', suffixes=['_boy','_girl'], how='inner')print(res)
12.pandas 画图
import matplotlib.pyplot as plt
data = pd.Series(np.random.randn(1000),index = np.arange(1000))
data = data.cumsum()
data
data.plot()
plt.show()
data = pd.DataFrame(np.random.randn(1000,4),
index = np.arange(1000),
columns=list('ABCD'))
data = data.cumsum()
data.plot()
plt.show()