import numpy as np import pandas as pd left = pd.DataFrame({ 'key': ['k0', 'k1', 'k2', 'k3'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3'] }) right = pd.DataFrame({ 'key': ['k0', 'k1', 'k2', 'k3'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3'] }) res = pd.merge(left, right, on='key') # 基于key对照进行合并 left = pd.DataFrame({ 'key1': ['k0', 'k1', 'k2', 'k3'], 'key2': ['k0', 'k0', 'k1', 'k2'], 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3'] }) right = pd.DataFrame({ 'key1': ['k0', 'k1', 'k2', 'k3'], 'key2': ['k0', 'k1', 'k1', 'k2'], 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3'] }) res = pd.merge(left, right, on=['key1', 'key2']) # 考虑两列进行合并 默认使用inner合并 print(left) print(right) print(res) ''' key1 key2 A B 0 k0 k0 A0 B0 1 k1 k0 A1 B1 2 k2 k1 A2 B2 3 k3 k2 A3 B3 key1 key2 C D 0 k0 k0 C0 D0 1 k1 k1 C1 D1 2 k2 k1 C2 D2 3 k3 k2 C3 D3 key1 key2 A B C D 0 k0 k0 A0 B0 C0 D0 1 k2 k1 A2 B2 C2 D2 2 k3 k2 A3 B3 C3 D3 ''' res = pd.merge(left, right, on=['key1', 'key2'],how='outer') print(res) ''' key1 key2 A B C D 0 k0 k0 A0 B0 C0 D0 1 k1 k0 A1 B1 NaN NaN 2 k2 k1 A2 B2 C2 D2 3 k3 k2 A3 B3 C3 D3 4 k1 k1 NaN NaN C1 D1 Process finished with exit code 0 ''' res = pd.merge(left, right, on=['key1', 'key2'],how='right') print(res) ''' key1 key2 A B 0 k0 k0 A0 B0 1 k1 k0 A1 B1 2 k2 k1 A2 B2 3 k3 k2 A3 B3 key1 key2 C D 0 k0 k0 C0 D0 1 k1 k1 C1 D1 2 k2 k1 C2 D2 3 k3 k2 C3 D3 key1 key2 A B C D 0 k0 k0 A0 B0 C0 D0 1 k1 k1 NaN NaN C1 D1 2 k2 k1 A2 B2 C2 D2 3 k3 k2 A3 B3 C3 D3 ''' # 使用how参数选择连接方式 res = pd.merge(left, right, how='outer', indicator=True) print(res) # 使用indicator参数 会在每一行后面显示展示了数据来源,默认indicator为False ''' key1 key2 A B C D _merge 0 k0 k0 A0 B0 C0 D0 both 1 k1 k0 A1 B1 NaN NaN left_only 2 k2 k1 A2 B2 C2 D2 both 3 k3 k2 A3 B3 C3 D3 both 4 k1 k1 NaN NaN C1 D1 right_only ''' res = pd.merge(left, right, how="outer", indicator='indictaor_column') # 自定义了indicator名字 ''' key1 key2 A B C D indictaor_column 0 k0 k0 A0 B0 C0 D0 both 1 k1 k0 A1 B1 NaN NaN left_only 2 k2 k1 A2 B2 C2 D2 both 3 k3 k2 A3 B3 C3 D3 both 4 k1 k1 NaN NaN C1 D1 right_only ''' left = pd.DataFrame( { 'A': ['A0', 'A1', 'A2', 'A3'], 'B': ['B0', 'B1', 'B2', 'B3'] }, index = ['k0', 'k1', 'k2', 'k3']) right = pd.DataFrame({ 'C': ['C0', 'C1', 'C2', 'C3'], 'D': ['D0', 'D1', 'D2', 'D3'] }, index= ['k0', 'k1', 'k2', 'k3']) print(left) print(right) res = pd.merge(left, right, left_index=True, right_index=True, how='outer') print(res) ''' A B k0 A0 B0 k1 A1 B1 k2 A2 B2 k3 A3 B3 C D k0 C0 D0 k1 C1 D1 k2 C2 D2 k3 C3 D3 A B C D k0 A0 B0 C0 D0 k1 A1 B1 C1 D1 k2 A2 B2 C2 D2 k3 A3 B3 C3 D3''' # 通过index进行合并 boys = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'], 'Age': [1, 2, 3, 4]}) girls = pd.DataFrame({'key': ['k0', 'k1', 'k2', 'k3'], 'Age': [5, 6, 7, 8]}) res = pd.merge(boys, girls,on='key', how='inner', suffixes=['_boy', '_girl']) # 使用suffixes参数为两个矩阵中相同的列名增加别名 print(res) ''' key Age_boy Age_girl 0 k0 1 5 1 k1 2 6 2 k2 3 7 3 k3 4 8 '''
关于Pandas的基本用法_06
最新推荐文章于 2024-09-17 23:15:58 发布