实际背景:将两个 df 如第一图所示,合并成如第二图所示的数据格式
解决方法:
import pandas as pd
data = {'item': list("111122223333"), "subject": list("123412341234"), "rate": list("qazwsxedcrfv"
)}
df1 = pd.DataFrame(data)
data1 = {"user": list('111111111222222222'), 'item': list("111222333111222333"), "subject": list("123124234123124234"),
"mean": list("123456789123456789")}
df2 = pd.DataFrame(data1)
print(df1)
print(df2)
df=df1.merge(df2[['user', 'item']].drop_duplicates(), on='item')
print(df)
#
df=df.merge(df2, on=['user', 'item', 'subject'], how='outer')
print(df)
print(df.groupby(['user', 'item', 'subject']).first())