利用group by 计算分类占比:对比 sql 和 python
数据有col_1,col_2,col_3如下:
SELECT A.*,ROUND(COL3/TOTAL1,2) AS RATIO FROM
(SELECT * FROM TTEST
LEFT JOIN
(SELECT COL1 AS TYPEB,SUM(COL3) AS TOTAL1
FROM TTEST
GROUP BY COL1) as B
ON ttest.COL1 = B.TYPEB) A
import pandas as pd
df = pd.read_excel(r'C:\Users\xiaoxiannv\Desktop\t_test.xlsx')
ratio_0 = df.groupby('col_1').agg({'col_3':'sum'})
ratio_0.rename(columns={'col_3': 'sum'}, inplace=True)ratio_0.reset_index()
# 如果没有指明列名会将重叠列的列名作为键
df1 = pd.merge(df, ratio_0, on='col_1')
# 若两df所要连接的键不同,则可以分别制定,默认也是inner
df1['ratio'] = round(df1["col_3"]/df1["sum"],2)
print(df1)