merge
按指定列进行合并
import pandas as pd
df1 = pd.read_csv('arr.csv')
print(df1)
# date s1 s2 s3 s4 s5
# 0 05-21 27.93 28.18 29.39 40.52 26.26
# 1 05-22 58.08 50.61 51.62 48.55 54.03
# 2 05-23 38.67 31.73 57.91 59.23 49.08
# 3 05-24 45.83 31.48 45.94 71.21 46.53
# 4 05-25 70.26 55.96 53.81 58.48 43.23
# 5 05-26 46.61 22.73 45.77 63.63 56.79
# 6 05-27 49.73 40.47 69.13 55.16 58.71
# 7 05-28 34.02 42.02 28.75 34.90 26.43
# 8 05-29 56.64 31.39 43.43 54.65 44.97
# 9 05-30 57.28 64.21 55.79 68.03 54.16
df2 = pd.read_csv('weather.csv')
print(df2)
# date weather
# 0 05-21 rainy
# 1 05-22 sunny
# 2 05-23 sunny
# 3 05-24 cloudy
# 4 05-25 sunny
# 5 05-26 cloudy
# 6 05-27 cloudy
# 7 05-28 rainy
# 8 05-29 sunny
# 9 05-30 sunny
df_new = pd.DataFrame([['05-30',57.28,64.21,55.70,0,0]],columns=['date','s1','s2','s3','s4','s5'])
df_appended = df1.append(df_new,ignore_index=True)
print(df_appended)
# date s1 s2 s3 s4 s5
# 0 05-21 27.93 28.18 29.39 40.52 26.26
# 1 05-22 58.08 50.61 51.62 48.55 54.03
# 2 05-23 38.67 31.73 57.91 59.23 49.08
# 3 05-24 45.83 31.48 45.94 71.21 46.53
# 4 05-25 70.26 55.96 53.81 58.48 43.23
# 5 05-26 46.61 22.73 45.77 63.63 56.79
# 6 05-27 49.73 40.47 69.13 55.16 58.71
# 7 05-28 34.02 42.02 28.75 34.90 26.43
# 8 05-29 56.64 31.39 43.43 54.65 44.97
# 9 05-30 57.28 64.21 55.79 68.03 54.16
# 10 05-30 57.28 64.21 55.70 0.00 0.00
print(df_appended.merge(df2,on='date'))
# date s1 s2 s3 s4 s5 weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 sunny
# 10 05-30 57.28 64.21 55.70 0.00 0.00 sunny
df_new = pd.DataFrame([['06-01',57.28,64.21,55.70,0,0]],columns = ['date','s1','s2','s3','s4','s5'])
df_appended2 = df1.append(df_new,ignore_index=True)
print(df_appended2)
# date s1 s2 s3 s4 s5
# 0 05-21 27.93 28.18 29.39 40.52 26.26
# 1 05-22 58.08 50.61 51.62 48.55 54.03
# 2 05-23 38.67 31.73 57.91 59.23 49.08
# 3 05-24 45.83 31.48 45.94 71.21 46.53
# 4 05-25 70.26 55.96 53.81 58.48 43.23
# 5 05-26 46.61 22.73 45.77 63.63 56.79
# 6 05-27 49.73 40.47 69.13 55.16 58.71
# 7 05-28 34.02 42.02 28.75 34.90 26.43
# 8 05-29 56.64 31.39 43.43 54.65 44.97
# 9 05-30 57.28 64.21 55.79 68.03 54.16
# 10 06-01 57.28 64.21 55.70 0.00 0.00
print(df_appended2.merge(df2,on='date',how='left'))
# date s1 s2 s3 s4 s5 weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 sunny
# 10 06-01 57.28 64.21 55.70 0.00 0.00 NaN
print(df_appended2.merge(df2,on='date',how='right'))
# date s1 s2 s3 s4 s5 weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 sunny
print(df_appended2.merge(df2,on='date',how='outer'))
# date s1 s2 s3 s4 s5 weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 sunny
# 10 06-01 57.28 64.21 55.70 0.00 0.00 NaN
#how不写 默认是inner
print(df_appended2.merge(df2,on='date',how='inner'))
# date s1 s2 s3 s4 s5 weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 sunny
print(df_appended2.merge(df1,on=['date','s1']))
# date s1 s2_x s3_x s4_x s5_x s2_y s3_y s4_y s5_y
# 0 05-21 27.93 28.18 29.39 40.52 26.26 28.18 29.39 40.52 26.26
# 1 05-22 58.08 50.61 51.62 48.55 54.03 50.61 51.62 48.55 54.03
# 2 05-23 38.67 31.73 57.91 59.23 49.08 31.73 57.91 59.23 49.08
# 3 05-24 45.83 31.48 45.94 71.21 46.53 31.48 45.94 71.21 46.53
# 4 05-25 70.26 55.96 53.81 58.48 43.23 55.96 53.81 58.48 43.23
# 5 05-26 46.61 22.73 45.77 63.63 56.79 22.73 45.77 63.63 56.79
# 6 05-27 49.73 40.47 69.13 55.16 58.71 40.47 69.13 55.16 58.71
# 7 05-28 34.02 42.02 28.75 34.90 26.43 42.02 28.75 34.90 26.43
# 8 05-29 56.64 31.39 43.43 54.65 44.97 31.39 43.43 54.65 44.97
# 9 05-30 57.28 64.21 55.79 68.03 54.16 64.21 55.79 68.03 54.16
join
按索引进行合并
import pandas as pd
df1 = pd.read_csv('arr.csv',index_col='date')
df2 = pd.read_csv('weather.csv',index_col='date')
print(df1.join(df2))
# s1 s2 s3 s4 s5 weather
# date
# 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 05-30 57.28 64.21 55.79 68.03 54.16 sunny
df1 = pd.read_csv('arr.csv')
df2 = pd.read_csv('weather.csv')
print(df1.join(df2,lsuffix='A',rsuffix='B'))
# dateA s1 s2 s3 s4 s5 dateB weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 05-21 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 05-22 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 05-23 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 05-24 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 05-25 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 05-26 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 05-27 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 05-28 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 05-29 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 05-30 sunny
df1 = pd.read_csv('arr.csv')
df2 = pd.read_csv('weather.csv')
#date由数据列变成索引列了
print(df1.set_index('date').join(df2.set_index('date')))
# s1 s2 s3 s4 s5 weather
# date
# 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 05-30 57.28 64.21 55.79 68.03 54.16 sunny
df1 = pd.read_csv('arr.csv')
df2 = pd.read_csv('weather.csv')
print(df1.join(df2.set_index('date'),on = 'date'))
# date s1 s2 s3 s4 s5 weather
# 0 05-21 27.93 28.18 29.39 40.52 26.26 rainy
# 1 05-22 58.08 50.61 51.62 48.55 54.03 sunny
# 2 05-23 38.67 31.73 57.91 59.23 49.08 sunny
# 3 05-24 45.83 31.48 45.94 71.21 46.53 cloudy
# 4 05-25 70.26 55.96 53.81 58.48 43.23 sunny
# 5 05-26 46.61 22.73 45.77 63.63 56.79 cloudy
# 6 05-27 49.73 40.47 69.13 55.16 58.71 cloudy
# 7 05-28 34.02 42.02 28.75 34.90 26.43 rainy
# 8 05-29 56.64 31.39 43.43 54.65 44.97 sunny
# 9 05-30 57.28 64.21 55.79 68.03 54.16 sunny