import pandas as pd
train_data = pd.read_csv(filepath_or_buffer='E:\\tmp\\2\\xuhao', sep='\t',encoding='utf-8',names=['id','xuhao', 'txt'])
# train_data.drop_duplicates(inplace=True)
df = pd.DataFrame(train_data)
df['new_data'] = df.groupby('id')['txt'].shift(-1)
# grouped['newxuhao']= grouped.groupby(['id']).cumcount()+1
# grouped = grouped.sort_values(['label'], ascending=True)
#
print(df.head(10))
df.to_csv('E:\\tmp\\2\\202008182_3.txt',
sep='\t',
encoding='utf-8',
header=None)
id xuhao txt new_data
0 104 1 start 嗯
1 104 2 嗯 嗯
2 104 3 嗯 NaN
3 181 1 start 嗯是的
4 181 2 嗯是的 好的,好的嗯
5 181 3 好的,好的嗯 NaN
6 158 1 start 那是
7 158 2 那是 呃我是
8 158 3 呃我是 嗯,那我我我我,我能
9 158 4 嗯,那我我我我,我能 NaN