持续更新中
1.删除某列数据
del s['to_stamp_s_x']
2.保存为utf-8 数据csv格式,确保不乱码
d.to_csv('E:/data/franchisee/s2.csv', encoding='utf_8_sig')
3.合并数据集 inner、left、right、outer
s = pd.merge(s, d, how='inner', on='brand_fee_group_id')
s = pd.merge(df_new_to, df_new_add, how='outer', on='brand_fee_group_id')
df_new_to = pd.concat([df_new_to, df_new_add], axis=0)
4.填充某列数值
s['to_stamp_x'] = s['to_stamp_x'].fillna(0)
5.解析格式objest 转为int
df_new_add['id'] = pd.to_numeric(df_new_add['id'], errors='coerce').fillna(0)
6.平移数据列 -1 表示同列下一行上移
df_new_add['to_stamp_s'] = df_new_add.groupby([df_new_add['brand_fee_group_id']])['to_stamp'].shift(-1)
7.重命名列名称
df_new_add.rename(columns={0: 'brand_fee_group_id', 1: 'id', 2: 'to_stamp'}, inplace=True)
8.去除文件中完全重复的行
df_new_to = df_new_to.drop_duplicates()
9.切割数据
df_new_to = df_new_to[0].str.split(',', expand=True)
10.list 转dataframe
df_new_to = DataFrame(list_to)
11.zip 灵活拼接dataframe
list_to = [] list_from = [] list_add = [] list_all = []
for from_shop_id, from_stamp, from_id, shop_id, to_stamp, to_id in zip(df_from['shop_id'], df_from['from_stamp'], df_from['from_id'], df_to['shop_id'], df_to['to_stamp'], df_to['to_id']): # print(from_shop_id) for brand_fee_group_id, id, adjust_tp, update_tp in zip(df['brand_fee_group_id'], df['id'], df['adjust_tp'], df['update_tp']):
12.lamada
df['adjust_tp'] = df['adjust_date'].apply( lambda x: time.mktime(time.strptime(str(x), '%Y-%m-%d %H:%M:%S')))
df_result['o_half_standard'] = df_result['franch_count'].apply( lambda x: 15000 if x == 1 else (14000 if x == 2 else (13000 if x == 3 else ( 12000 if x == 4 else (11000 if x == 5 else (10000 if x == 6 else (9000 if x == 7 else 8000)))))))
13.分组统计次数,增加列名
df_counts = x.groupby('brand_fee_group_id')['brand_fee_group_id'].count().reset_index(name='franch_count')
14.lamada 介入自定义函数方式
df_result['n_sap_system'] = df_result.apply( lambda row: second_half_year(row['adjust_tp'], row['o_half_standard'], row['n_half_standard'], row['update_tp']), axis=1)