DataFrame的合并

merge
按指定列进行合并

import pandas as pd
df1 = pd.read_csv('arr.csv')
print(df1)
#     date     s1     s2     s3     s4     s5
# 0  05-21  27.93  28.18  29.39  40.52  26.26
# 1  05-22  58.08  50.61  51.62  48.55  54.03
# 2  05-23  38.67  31.73  57.91  59.23  49.08
# 3  05-24  45.83  31.48  45.94  71.21  46.53
# 4  05-25  70.26  55.96  53.81  58.48  43.23
# 5  05-26  46.61  22.73  45.77  63.63  56.79
# 6  05-27  49.73  40.47  69.13  55.16  58.71
# 7  05-28  34.02  42.02  28.75  34.90  26.43
# 8  05-29  56.64  31.39  43.43  54.65  44.97
# 9  05-30  57.28  64.21  55.79  68.03  54.16


df2 = pd.read_csv('weather.csv')
print(df2)
#     date weather
# 0  05-21   rainy
# 1  05-22   sunny
# 2  05-23   sunny
# 3  05-24  cloudy
# 4  05-25   sunny
# 5  05-26  cloudy
# 6  05-27  cloudy
# 7  05-28   rainy
# 8  05-29   sunny
# 9  05-30   sunny

df_new = pd.DataFrame([['05-30',57.28,64.21,55.70,0,0]],columns=['date','s1','s2','s3','s4','s5'])
df_appended = df1.append(df_new,ignore_index=True)
print(df_appended)
#      date     s1     s2     s3     s4     s5
# 0   05-21  27.93  28.18  29.39  40.52  26.26
# 1   05-22  58.08  50.61  51.62  48.55  54.03
# 2   05-23  38.67  31.73  57.91  59.23  49.08
# 3   05-24  45.83  31.48  45.94  71.21  46.53
# 4   05-25  70.26  55.96  53.81  58.48  43.23
# 5   05-26  46.61  22.73  45.77  63.63  56.79
# 6   05-27  49.73  40.47  69.13  55.16  58.71
# 7   05-28  34.02  42.02  28.75  34.90  26.43
# 8   05-29  56.64  31.39  43.43  54.65  44.97
# 9   05-30  57.28  64.21  55.79  68.03  54.16
# 10  05-30  57.28  64.21  55.70   0.00   0.00

print(df_appended.merge(df2,on='date'))
#      date     s1     s2     s3     s4     s5 weather
# 0   05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 1   05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 2   05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 3   05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 4   05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 5   05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 6   05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 7   05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 8   05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 9   05-30  57.28  64.21  55.79  68.03  54.16   sunny
# 10  05-30  57.28  64.21  55.70   0.00   0.00   sunny

df_new = pd.DataFrame([['06-01',57.28,64.21,55.70,0,0]],columns = ['date','s1','s2','s3','s4','s5'])
df_appended2 = df1.append(df_new,ignore_index=True)
print(df_appended2)
#      date     s1     s2     s3     s4     s5
# 0   05-21  27.93  28.18  29.39  40.52  26.26
# 1   05-22  58.08  50.61  51.62  48.55  54.03
# 2   05-23  38.67  31.73  57.91  59.23  49.08
# 3   05-24  45.83  31.48  45.94  71.21  46.53
# 4   05-25  70.26  55.96  53.81  58.48  43.23
# 5   05-26  46.61  22.73  45.77  63.63  56.79
# 6   05-27  49.73  40.47  69.13  55.16  58.71
# 7   05-28  34.02  42.02  28.75  34.90  26.43
# 8   05-29  56.64  31.39  43.43  54.65  44.97
# 9   05-30  57.28  64.21  55.79  68.03  54.16
# 10  06-01  57.28  64.21  55.70   0.00   0.00


print(df_appended2.merge(df2,on='date',how='left'))
#      date     s1     s2     s3     s4     s5 weather
# 0   05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 1   05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 2   05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 3   05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 4   05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 5   05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 6   05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 7   05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 8   05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 9   05-30  57.28  64.21  55.79  68.03  54.16   sunny
# 10  06-01  57.28  64.21  55.70   0.00   0.00     NaN

print(df_appended2.merge(df2,on='date',how='right'))
#     date     s1     s2     s3     s4     s5 weather
# 0  05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 1  05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 2  05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 3  05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 4  05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 5  05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 6  05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 7  05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 8  05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 9  05-30  57.28  64.21  55.79  68.03  54.16   sunny

print(df_appended2.merge(df2,on='date',how='outer'))
#      date     s1     s2     s3     s4     s5 weather
# 0   05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 1   05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 2   05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 3   05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 4   05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 5   05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 6   05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 7   05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 8   05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 9   05-30  57.28  64.21  55.79  68.03  54.16   sunny
# 10  06-01  57.28  64.21  55.70   0.00   0.00     NaN


#how不写 默认是inner
print(df_appended2.merge(df2,on='date',how='inner'))
#     date     s1     s2     s3     s4     s5 weather
# 0  05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 1  05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 2  05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 3  05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 4  05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 5  05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 6  05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 7  05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 8  05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 9  05-30  57.28  64.21  55.79  68.03  54.16   sunny

print(df_appended2.merge(df1,on=['date','s1']))
#     date     s1   s2_x   s3_x   s4_x   s5_x   s2_y   s3_y   s4_y   s5_y
# 0  05-21  27.93  28.18  29.39  40.52  26.26  28.18  29.39  40.52  26.26
# 1  05-22  58.08  50.61  51.62  48.55  54.03  50.61  51.62  48.55  54.03
# 2  05-23  38.67  31.73  57.91  59.23  49.08  31.73  57.91  59.23  49.08
# 3  05-24  45.83  31.48  45.94  71.21  46.53  31.48  45.94  71.21  46.53
# 4  05-25  70.26  55.96  53.81  58.48  43.23  55.96  53.81  58.48  43.23
# 5  05-26  46.61  22.73  45.77  63.63  56.79  22.73  45.77  63.63  56.79
# 6  05-27  49.73  40.47  69.13  55.16  58.71  40.47  69.13  55.16  58.71
# 7  05-28  34.02  42.02  28.75  34.90  26.43  42.02  28.75  34.90  26.43
# 8  05-29  56.64  31.39  43.43  54.65  44.97  31.39  43.43  54.65  44.97
# 9  05-30  57.28  64.21  55.79  68.03  54.16  64.21  55.79  68.03  54.16



join
按索引进行合并

import pandas as pd
df1 = pd.read_csv('arr.csv',index_col='date')
df2 = pd.read_csv('weather.csv',index_col='date')
print(df1.join(df2))
#           s1     s2     s3     s4     s5 weather
# date
# 05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 05-30  57.28  64.21  55.79  68.03  54.16   sunny


df1 = pd.read_csv('arr.csv')
df2 = pd.read_csv('weather.csv')
print(df1.join(df2,lsuffix='A',rsuffix='B'))
#    dateA     s1     s2     s3     s4     s5  dateB weather
# 0  05-21  27.93  28.18  29.39  40.52  26.26  05-21   rainy
# 1  05-22  58.08  50.61  51.62  48.55  54.03  05-22   sunny
# 2  05-23  38.67  31.73  57.91  59.23  49.08  05-23   sunny
# 3  05-24  45.83  31.48  45.94  71.21  46.53  05-24  cloudy
# 4  05-25  70.26  55.96  53.81  58.48  43.23  05-25   sunny
# 5  05-26  46.61  22.73  45.77  63.63  56.79  05-26  cloudy
# 6  05-27  49.73  40.47  69.13  55.16  58.71  05-27  cloudy
# 7  05-28  34.02  42.02  28.75  34.90  26.43  05-28   rainy
# 8  05-29  56.64  31.39  43.43  54.65  44.97  05-29   sunny
# 9  05-30  57.28  64.21  55.79  68.03  54.16  05-30   sunny

df1 = pd.read_csv('arr.csv')
df2 = pd.read_csv('weather.csv')
#date由数据列变成索引列了
print(df1.set_index('date').join(df2.set_index('date')))
#           s1     s2     s3     s4     s5 weather
# date
# 05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 05-30  57.28  64.21  55.79  68.03  54.16   sunny

df1 = pd.read_csv('arr.csv')
df2 = pd.read_csv('weather.csv')
print(df1.join(df2.set_index('date'),on = 'date'))
#     date     s1     s2     s3     s4     s5 weather
# 0  05-21  27.93  28.18  29.39  40.52  26.26   rainy
# 1  05-22  58.08  50.61  51.62  48.55  54.03   sunny
# 2  05-23  38.67  31.73  57.91  59.23  49.08   sunny
# 3  05-24  45.83  31.48  45.94  71.21  46.53  cloudy
# 4  05-25  70.26  55.96  53.81  58.48  43.23   sunny
# 5  05-26  46.61  22.73  45.77  63.63  56.79  cloudy
# 6  05-27  49.73  40.47  69.13  55.16  58.71  cloudy
# 7  05-28  34.02  42.02  28.75  34.90  26.43   rainy
# 8  05-29  56.64  31.39  43.43  54.65  44.97   sunny
# 9  05-30  57.28  64.21  55.79  68.03  54.16   sunny

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值