# groupby 取最大值最小值中位数偏度峰度最后尝试成功的方案---
# 最后尝试成功的方案---
from pyspark.sql.functions import first, collect_list, mean,stddev, skewness, kurtosis,min,max,expr,skewness,kurtosis
df.groupBy("pass_stop_id").agg(min("pts_num"),max("pts_num"),mean("pts_num"),expr('percentile_approx(pts_num, 0.5)'),skewness('pts_num'),kurtosis('pts_num'),first("pts_num"),first("pts_split_num")).collect()
pyspark
最新推荐文章于 2024-07-25 23:12:39 发布