删除ad_position_id
这一列,出现一次的值,把出现一次的ad_position_id
放到一个列表,删除
train_flle_path = 'data/pre_data3.csv'
source_data = pd.read_csv(train_flle_path)
print(len(source_data))
d = pd.DataFrame(source_data.ad_position_id.value_counts())
d.columns = ['nums']
# 出现一次的全部删除
d = d[d['nums'] == 1]
delindexs = d.index
print(len(delindexs))
source_data = source_data[~source_data['ad_position_id'].isin(delindexs)]
print(len(source_data))