首先查询每一个学生的全部成绩平均值
SELECT
CONCAT(SUBSTRING(student, 1, 1), '**') as student,
COUNT(*) as count,
AVG(score) AS average_score
FROM
student_scores
GROUP BY
student
ORDER BY
student;
结果:
利用子查询,将每个学生的全部成绩降序按列编号,并筛选出前50%
SELECT
CONCAT(SUBSTRING(student, 1, 1), '**') as student,
COUNT(*) AS score_count,
total_count AS total_score_count,
AVG(score) AS average_score
FROM
(
SELECT
student,
score,
ROW_NUMBER() OVER (PARTITION BY student ORDER BY score DESC) AS row_num,
COUNT(*) OVER (PARTITION BY student) AS total_count
FROM
student_scores
) subquery
WHERE
row_num <= CEIL(total_count * 0.5)
GROUP BY
student;
结果:
将全部成绩的平均值也显示出来
SELECT
CONCAT(SUBSTRING(student, 1, 1), '**') as student,
AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END) AS 'average_score_top_50%',
AVG(score) AS average_score_all,
total_count,
COUNT(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END) AS 'count_top_50%'
FROM
(
SELECT
student,
score,
ROW_NUMBER() OVER (PARTITION BY student ORDER BY score DESC) AS row_num,
COUNT(*) OVER (PARTITION BY student) AS total_count
FROM
student_scores
) subquery
GROUP BY
student;
结果:
继续增加功能,将成绩超过80条的显示前50%成绩平均值,未超过的计算全部成绩平均值,以及进行部分优化。
使用ROW_NUMBER()函数为每个学生及其分数分配一个行号,按降序排列分数。 使用COUNT()函数计算每个学生的总分。
使用CONCAT()函数将学生的首字母与**连接,进行姓名脱敏。
使用CASE语句根据学生的总分判断是否大于80。如果是,则计算前50%分数的平均值;否则,计算所有分数的平均值。
完整SQL语句:
WITH student_percentile_scores AS (
SELECT
student,
score,
ROW_NUMBER() OVER (PARTITION BY student ORDER BY score DESC) AS row_num,
COUNT(*) OVER (PARTITION BY student) AS total_count
FROM
student_scores
)
SELECT
CONCAT(SUBSTRING(student, 1, 1), '**') as student,
CASE
WHEN total_count > 80 THEN ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2)
ELSE ROUND(AVG(score), 2)
END AS average_score,
ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2) AS 'average_score_top_50%',
ROUND(AVG(score), 2) AS average_score_all,
total_count,
COUNT(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END) AS 'count_top_50%'
FROM
student_percentile_scores
GROUP BY
student;
结果:
添加名称长度判断
WITH student_percentile_scores AS (
SELECT
student,
score,
ROW_NUMBER() OVER (PARTITION BY student ORDER BY score DESC) AS row_num,
COUNT(*) OVER (PARTITION BY student) AS total_count
FROM
student_scores
)
SELECT
CONCAT(
SUBSTRING(student, 1, 1),
CASE
WHEN LENGTH(student) > 6 THEN '**'
ELSE '*'
END
) AS student,
CASE
WHEN total_count > 80 THEN ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2)
ELSE ROUND(AVG(score), 2)
END AS average_score,
ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2) AS 'average_score_top_50%',
ROUND(AVG(score), 2) AS average_score_all,
total_count,
COUNT(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END) AS 'count_top_50%'
FROM
student_percentile_scores
GROUP BY
student;
分组修改为学院和学生
WITH student_percentile_scores AS (
SELECT
student,
college,
score,
ROW_NUMBER() OVER (PARTITION BY student, college ORDER BY score DESC) AS row_num,
COUNT(*) OVER (PARTITION BY student, college) AS total_count
FROM
student_scores
)
SELECT
CONCAT(
SUBSTRING(student, 1, 1),
CASE
WHEN LENGTH(student) > 6 THEN '**'
ELSE '*'
END
) AS student,
college,
CASE
WHEN total_count > 80 THEN ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2)
ELSE ROUND(AVG(score), 2)
END AS average_score,
ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2) AS 'average_score_top_50%',
ROUND(AVG(score), 2) AS average_score_all,
total_count,
COUNT(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END) AS 'count_top_50%'
FROM
student_percentile_scores
GROUP BY
student,
college;
结果:
继续添加每个分数数量的统计
WITH student_percentile_scores AS (
SELECT
student,
college,
score,
ROW_NUMBER() OVER (PARTITION BY student, college ORDER BY score DESC) AS row_num,
COUNT(*) OVER (PARTITION BY student, college) AS total_count
FROM
student_scores
)
SELECT
CONCAT(
SUBSTRING(student, 1, 1),
CASE
WHEN LENGTH(student) > 6 THEN '**'
ELSE '*'
END
) AS student,
college,
CASE
WHEN total_count > 80 THEN ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2)
ELSE ROUND(AVG(score), 2)
END AS average_score,
ROUND(AVG(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END), 2) AS 'average_score_top_50%',
ROUND(AVG(score), 2) AS average_score_all,
total_count,
COUNT(CASE WHEN row_num <= CEIL(total_count * 0.5) THEN score END) AS 'count_top_50%',
COUNT(CASE WHEN score = 5 THEN score END) as s5,
COUNT(CASE WHEN score = 4 THEN score END) as s4,
COUNT(CASE WHEN score = 3 THEN score END) as s3,
COUNT(CASE WHEN score = 2 THEN score END) as s2,
COUNT(CASE WHEN score = 1 THEN score END) as s1
FROM
student_percentile_scores
GROUP BY
student,
college;