转载：Hive sql语句必练50题-入门到精通(2)

天长地久长久

已于 2023-02-17 19:54:11 修改

阅读量64

点赞数

分类专栏： bigdata 数据库文章标签： sql hive 数据库

于 2022-09-04 10:56:11 首次发布

原文链接：https://blog.csdn.net/elizabethxxy/article/details/108816295?spm=1001.2014.3001.5502

版权

bigdata 同时被 2 个专栏收录

8 篇文章 0 订阅

订阅专栏

数据库

8 篇文章 0 订阅

订阅专栏

原始链接：https://blog.csdn.net/Thomson617/article/details/83280617

Hive sql语句必练50题-入门到精通(2)

承接: Hive sql语句必练50题-入门到精通(1)

– 21、查询不同老师所教不同课程平均分从高到低显示:
– 方法1


   
   
     
     
      
      
     
     
     
     
      
      
       
       select course.c_id,course.t_id,t_name,round(avg(s_score),
       
       2)
       
       as avgscore 
       
       from course
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join teacher 
       
       on teacher.t_id=course.t_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join score 
       
       on course.c_id=score.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       group 
       
       by course.c_id,course.t_id,t_name 
       
       order 
       
       by avgscore desc;

– 方法2


   
   
     
     
      
      
     
     
     
     
      
      
       
       select course.c_id,course.t_id,t_name,round(avg(s_score),
       
       2)
       
       as avgscore 
       
       from course,teacher,score
      
      
     
     

     
     
      
      
     
     
     
     
      
         
       
       where teacher.t_id=course.t_id 
       
       and course.c_id=score.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       group 
       
       by course.c_id,course.t_id,t_name 
       
       order 
       
       by avgscore desc;

– 22、查询所有课程的成绩第2名到第3名的学生信息及该课程成绩:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select tmp1.
       
       * 
       
       from
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           (
       
       select 
       
       * 
       
       from score 
       
       where c_id
       
       =
       
       '01' 
       
       order 
       
       by s_score 
       
       desc limit 
       
       3)tmp1
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       order 
       
       by s_score 
       
       asc limit 
       
       2
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       union 
       
       all 
       
       select tmp2.
       
       * 
       
       from
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           (
       
       select 
       
       * 
       
       from score 
       
       where c_id
       
       =
       
       '02' 
       
       order 
       
       by s_score 
       
       desc limit 
       
       3)tmp2
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       order 
       
       by s_score 
       
       asc limit 
       
       2
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       union 
       
       all 
       
       select tmp3.
       
       * 
       
       from
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           (
       
       select 
       
       * 
       
       from score 
       
       where c_id
       
       =
       
       '03' 
       
       order 
       
       by s_score 
       
       desc limit 
       
       3)tmp3
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       order 
       
       by s_score 
       
       asc limit 
       
       2;

– 23、统计各科成绩各分数段人数：课程编号,课程名称,[100-85],[85-70],[70-60],[0-60]及所占百分比


   
   
     
     
      
      
     
     
     
     
      
      
       
       select c.c_id,c.c_name,tmp1.s0_60, tmp1.percentum,tmp2.s60_70, tmp2.percentum,tmp3.s70_85, tmp3.percentum,tmp4.s85_100, tmp4.percentum
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       from course c
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join(
       
       select c_id,sum(
       
       case 
       
       when s_score<
       
       60 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )
       
       as s0_60,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                      round(
       
       100*sum(
       
       case 
       
       when s_score<
       
       60 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )/count(c_id),
       
       2)
       
       as percentum
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       from score 
       
       group 
       
       by c_id)tmp1 
       
       on tmp1.c_id =c.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       left 
       
       join(
       
       select c_id,sum(
       
       case 
       
       when s_score<
       
       70 
       
       and s_score>=
       
       60 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )
       
       as s60_70,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                      round(
       
       100*sum(
       
       case 
       
       when s_score<
       
       70 
       
       and s_score>=
       
       60 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )/count(c_id),
       
       2)
       
       as percentum
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       from score 
       
       group 
       
       by c_id)tmp2 
       
       on tmp2.c_id =c.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       left 
       
       join(
       
       select c_id,sum(
       
       case 
       
       when s_score<
       
       85 
       
       and s_score>=
       
       70 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )
       
       as s70_85,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                      round(
       
       100*sum(
       
       case 
       
       when s_score<
       
       85 
       
       and s_score>=
       
       70 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )/count(c_id),
       
       2)
       
       as percentum
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       from score 
       
       group 
       
       by c_id)tmp3 
       
       on tmp3.c_id =c.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       left 
       
       join(
       
       select c_id,sum(
       
       case 
       
       when s_score>=
       
       85 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )
       
       as s85_100,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                      round(
       
       100*sum(
       
       case 
       
       when s_score>=
       
       85 
       
       then 
       
       1 
       
       else 
       
       0 
       
       end )/count(c_id),
       
       2)
       
       as percentum
      
      
     
     

     
     
      
      
     
     
     
     
      
           
       
       from score 
       
       group 
       
       by c_id)tmp4 
       
       on tmp4.c_id =c.c_id;

– 24、查询学生平均成绩及其名次:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select tmp.
       
       *,
       
       row_number()
       
       over(
       
       order 
       
       by tmp.avgScore 
       
       desc) Ranking 
       
       from
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
         (
       
       select student.s_id,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                 student.s_name,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
                 round(
       
       avg(score.s_score),
       
       2) 
       
       as avgScore
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       from student 
       
       join score
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       on student.s_id
       
       =score.s_id
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       group 
       
       by student.s_id,student.s_name)tmp
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       order 
       
       by avgScore 
       
       desc;

– 25、查询各科成绩前三名的记录

–课程id为01的前三名


   
   
     
     
      
      
     
     
     
     
      
      
       
       select score.c_id,course.c_name,student.s_name,s_score 
       
       from score
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join student 
       
       on student.s_id=score.s_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join course 
       
       on  score.c_id=
       
       '01' and course.c_id=score.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       order 
       
       by s_score desc limit 
       
       3;

–课程id为02的前三名


   
   
     
     
      
      
     
     
     
     
      
      
       
       select score.c_id,course.c_name,student.s_name,s_score 
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       from score
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join student 
       
       on student.s_id=score.s_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join course 
       
       on  score.c_id=
       
       '02' and course.c_id=score.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       order 
       
       by s_score desc limit 
       
       3;

–课程id为03的前三名


   
   
     
     
      
      
     
     
     
     
      
      
       
       select score.c_id,course.c_name,student.s_name,s_score 
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       from score
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join student 
       
       on student.s_id=score.s_id
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       join course 
       
       on  score.c_id=
       
       '03' and course.c_id=score.c_id  
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       order 
       
       by s_score desc limit 
       
       3;

– 26、查询每门课程被选修的学生数:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select c.c_id,c.c_name,tmp.number 
       
       from course c
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join (
       
       select c_id,count(
       
       1) 
       
       as number 
       
       from score
      
      
     
     

     
     
      
      
     
     
     
     
      
              
       
       where score.s_score<
       
       60 
       
       group 
       
       by score.c_id)tmp
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       on tmp.c_id=c.c_id;

– 27、查询出只有两门课程的全部学生的学号和姓名:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select st.s_id,st.s_name 
       
       from student st
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       join (
       
       select s_id 
       
       from score 
       
       group 
       
       by s_id 
       
       having 
       
       count(c_id) 
       
       =
       
       2)tmp
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       on st.s_id
       
       =tmp.s_id;

– 28、查询男生、女生人数:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select tmp1.man,tmp2.
       
       women from
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           (
       
       select count(1) 
       
       as man 
       
       from student 
       
       where s_sex=
       
       '男')tmp1,
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
           (
       
       select count(1) as women from student where s_sex=
       
       '女')tmp2;

– 29、查询名字中含有"风"字的学生信息:

select * from student where s_name like '%风%';

– 30、查询同名同性学生名单，并统计同名人数:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select s1.s_id,s1.s_name,s1.s_sex,count(*) 
       
       as sameName
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       from student s1,student s2
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       where s1.s_name=s2.s_name 
       
       and s1.s_id<>s2.s_id 
       
       and s1.s_sex=s2.s_sex
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       group 
       
       by s1.s_id,s1.s_name,s1.s_sex;

– 31、查询1990年出生的学生名单:

select * from student where s_birth like '1990%';

– 32、查询每门课程的平均成绩，结果按平均成绩降序排列，平均成绩相同时，按课程编号升序排列:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select score.c_id,c_name,round(avg(s_score),
       
       2) 
       
       as avgScore 
       
       from score
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       join course 
       
       on score.c_id=course.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       group 
       
       by score.c_id,c_name 
       
       order 
       
       by avgScore desc,score.c_id asc;

– 33、查询平均成绩大于等于85的所有学生的学号、姓名和平均成绩:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select score.s_id,s_name,round(avg(s_score),
       
       2)
       
       as avgScore 
       
       from score
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join student 
       
       on student.s_id=score.s_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       group 
       
       by score.s_id,
       
       s_name having avg(s_score) >= 
       
       85;

– 34、查询课程名称为"数学"，且分数低于60的学生姓名和分数:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select s_name,s_score 
       
       as mathScore 
       
       from student
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join (
       
       select s_id,s_score
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       from score,course
      
      
     
     

     
     
      
      
     
     
     
     
      
                  
       
       where score.c_id=course.c_id 
       
       and c_name=
       
       '数学')tmp
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       on tmp.s_score < 
       
       60 
       
       and student.s_id=tmp.s_id;

– 35、查询所有学生的课程及分数情况:


   
   
     
     
      
      
     
     
     
     
      
      
       
       select a.s_name,
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       SUM(
       
       case c.c_name 
       
       when 
       
       '语文' 
       
       then b.s_score 
       
       else 
       
       0 
       
       end ) 
       
       as chainese,
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       SUM(
       
       case c.c_name 
       
       when 
       
       '数学' 
       
       then b.s_score 
       
       else 
       
       0 
       
       end ) 
       
       as math,
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       SUM(
       
       case c.c_name 
       
       when 
       
       '英语' 
       
       then b.s_score 
       
       else 
       
       0 
       
       end ) 
       
       as english,
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       SUM(b.s_score) 
       
       as sumScore
      
      
     
     

     
     
      
      
     
     
     
     
      
        
       
       from student a
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join score b 
       
       on a.s_id
       
       =b.s_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       join course c 
       
       on b.c_id
       
       =c.c_id
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       group 
       
       by s_name,a.s_id;

后续部分参见:
https://blog.csdn.net/Thomson617/article/details/83281254

Hive下的SQL经验总结：


   
   
     
     
      
      
     
     
     
     
      
      
       
       (
       
       1).不支持非等值连接，一般使用
       
       left 
       
       join、
       
       right 
       
       join 或者
       
       inner 
       
       join替代。
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	•
       
       SQL中对两表内联可以写成：
      
      
     
     

     
     
      
      
     
     
     
     
      
      		
       
       select 
       
       * 
       
       from dual a,dual b 
       
       where a.key 
       
       = b.key;
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	•Hive中应为:
      
      
     
     

     
     
      
      
     
     
     
     
      
      		
       
       select 
       
       * 
       
       from dual a 
       
       join dual b 
       
       on a.key 
       
       = b.key; 
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	而不是传统的格式：
      
      
     
     

     
     
      
      
     
     
     
     
      
      		
       
       SELECT t1.a1 
       
       as c1, t2.b1 
       
       as c2 
       
       FROM t1, t2 
       
       WHERE t1.a2 
       
       = t2.b2	
      
      
     
     

     
     
      
      
     
     
     
     
      
      		
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       2).分号字符:不能智能识别concat(‘;’,key)，只会将‘；’当做
       
       SQL结束符号。
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	•分号是
       
       SQL语句结束标记，在HiveQL中也是，但是在HiveQL中，对分号的识别没有那么智慧，例如：
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       		•
       
       select concat(key,concat(
       
       ';',key)) 
       
       from dual;
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	•但HiveQL在解析语句时提示：
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
               FAILED: Parse Error: line 
       
       0:
       
       -1 mismatched input 
       
       '<EOF>' expecting ) 
       
       in 
       
       function specification
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	•解决的办法是，使用分号的八进制的ASCII码进行转义，那么上述语句应写成：
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       		•
       
       select concat(key,concat(
       
       '\073',key)) 
       
       from dual;
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       3).不支持
       
       INSERT 
       
       INTO 表 
       
       Values（）, 
       
       UPDATE, 
       
       DELETE等操作.这样的话，就不要很复杂的锁机制来读写数据。
      
      
     
     

     
     
      
      
     
     
     
     
      
      	
       
       INSERT 
       
       INTO syntax 
       
       is 
       
       only available starting 
       
       in version 
       
       0.8。
       
       INSERT 
       
       INTO就是在表或分区中追加数据。
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       4).HiveQL中String类型的字段若是空(
       
       empty)字符串, 即长度为
       
       0, 那么对它进行
       
       IS 
       
       NULL的判断结果是
       
       False，使用
       
       left 
       
       join可以进行筛选行。
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       5).不支持 ‘
       
       < dt 
       
       <’这种格式的范围查找，可以用dt 
       
       in(”,”)或者
       
       between替代。
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       6).Hive不支持将数据插入现有的表或分区中，仅支持覆盖重写整个表，示例如下：
      
      
     
     

     
     
      
      
     
     
     
     
      
          
       
       INSERT OVERWRITE 
       
       TABLE t1 
       
       SELECT 
       
       * 
       
       FROM t2;
      
      
     
     

     
     
      
      
     
     
     
     
      
      	
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       7).
       
       group 
       
       by的字段,必须是
       
       select后面的字段，
       
       select后面的字段不能比
       
       group 
       
       by的字段多.
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	如果
       
       select后面有聚合函数,则该
       
       select语句中必须有
       
       group 
       
       by语句;
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	而且
       
       group 
       
       by后面不能使用别名;
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	有聚合函数存在就必须有
       
       group by.
      
      
     
     

     
     
      
      
     
     
     
     
      
      	
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       8).
       
       select , 
       
       where 及 
       
       having 之后不能跟子查询语句(一般使用
       
       left 
       
       join、
       
       right 
       
       join 或者
       
       inner 
       
       join替代)
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       9).先
       
       join(及
       
       inner 
       
       join) 然后
       
       left 
       
       join或
       
       right 
       
       join
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       10).hive不支持group_concat方法,可用 concat_ws(
       
       '|', collect_set(str)) 实现
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       11).
       
       not 
       
       in 和 
       
       <> 不起作用,可用
       
       left 
       
       join tmp 
       
       on tableName.id 
       
       = tmp.id 
       
       where tmp.id 
       
       is 
       
       null 替代实现
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       12).hive 中‘不等于’不管是用！ 或者
       
       <>符号实现，都会将空值即
       
       null过滤掉，此时要用
      
      
     
     

     
     
      
      
     
     
     
     
      
      		
       
       where （white_level
       
       <>
       
       '3' 
       
       or  white_level 
       
       is 
       
       null） 
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	或者 
       
       where (white_level
       
       !=
       
       '3' 
       
       or white_level 
       
       is 
       
       null )  来保留
       
       null 的情况。
      
      
     
     

     
     
      
      
     
     
     
     
      
       
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       (
       
       13).
       
       union 
       
       all 后面的表不加括号,不然执行报错;
      
      
     
     

     
     
      
      
     
     
     
     
      
      
       
       	hive也不支持顶层的
       
       union 
       
       all，使用子查询来解决;
      
      
     
     

     
     
      
      
     
     
     
     
      
      	
       
       union 
       
       all 之前不能有DISTRIBUTE 
       
       BY 
       
       | SORT 
       
       BY
       
       | 
       
       ORDER 
       
       BY 
       
       | LIMIT 等查询条件