list(untracked = "jinxiaosong/analysis/some_join.ipynb",
untracked = "jinxiaosong/data/pivot_help.xlsx")
# add @slsongge data
import pandas as pd
df_raw = pd.read_excel("../data/pivot_help.xlsx")
# df_raw.head()
df_raw.info()
RangeIndex: 276 entries, 0 to 275
Data columns (total 4 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 cut 276 non-null object
1 color 276 non-null object
2 clarity 276 non-null object
3 n 276 non-null int64
dtypes: int64(1), object(3)
memory usage: 8.8+ KB
#### 衍生占比【这段代码用了两次聚合,一次merge,太麻烦了,我想要进行优化】
df_gb_cut_sum = df_raw.groupby('cut')['n'].agg(['sum']).reset_index()
df_gb_cut_color_sum = df_raw.groupby(['cut','color'])[