Pandas使用实例
需求: 使用pandas读取excel, 根据类目分类排序,然后对于每个类目的前15%添加标记
import pandas as pd
import os
file_names = os.listdir('./')
for each in file_names:
if each.endswith('.xlsx'):
file_name = each
break
print('解析数据中', file_name)
df = pd.read_excel(file_name)
categories = list(set(df['类目']))
final_df = None
for name in categories:
cat_df = df[df['类目'] == name]
cat_df = cat_df.sort_values(by='成交金额', ascending=False)
df_length = cat_df.shape[0]
first_15_percent = int(df_length * 0.15)
df_1 = cat_df[0:first_15_percent]
df_1['标记'] = 1
df_2 = cat_df[first_15_percent:df_length]
df_2['标记'] = 0
new_df = pd.concat([df_1, df_2])
final_df = pd.concat([final_df, new_df])
new_file = file_name.split('.')[0] + '.csv'
final_df.to_csv(new_file)
print(df.head(100))