第七章 HIVE

Adopat

已于 2022-12-01 09:53:27 修改

阅读量498

点赞数 1

于 2022-09-28 19:06:15 首次发布

本文链接：https://blog.csdn.net/weixin_44446122/article/details/127094814

版权

大数据专栏收录该内容

11 篇文章 0 订阅

订阅专栏

7.1 HIVE概念

HIVE是建立在HADOOP上的数据仓库基础架构。HIVE 存储依赖HDFS,计算依赖mapreduce.

SQL ON HADOOP,mapreduce 是hive的默认执行引擎。

7.2 HIVE 架构和原理

HIVE 架构

1、用户接口，包括 CLI、JDBC/ODBC、WebGUI
CLI，即Shell命令行，表示我们可以通过shell命令行操作Hive
JDBC/ODBC 是 Hive 的Java操作方式，与使用传统数据库JDBC的方式类似
2、WebUI是通过浏览器访问 Hive
元数据存储(Metastore)，注意：这里的存储是名词，Metastore表示是一个存储系统
Hive中的元数据包括表的相关信息，Hive会将这些元数据存储在Metastore中，目前Metastore只支持 mysql、derby。
3、Driver：包含：编译器、优化器、执行器
编译器、优化器、执行器可以完成 Hive的查询语句从词法分析、语法分析、编译、优化以及查询计划的生成。生成的查询计划最终存储在 HDFS 中，并在随后由 MapReduce 调用执行
4、Hadoop：Hive会使用 HDFS 进行存储，利用 MapReduce 进行计算
Hive 的数据存储在 HDFS 中，大部分的查询由 MapReduce 完成（特例 select * from table 不会生成 MapRedcue 任务，如果在SQL语句后面再增加where过滤条件就会生成MapReduce任务了。）
HIVE 执行流程

7.3 HIVE 安装和连接

安装

文件下载上传解压

[root@bigdata04 soft]# ll apache-hive*
-rw-r--r--.  1 root root 278813748 Aug 29 11:29 apache-hive-3.1.2-bin.tar.gz
[root@bigdata04 soft]# tar -zxvf apache-hive-3.1.2-bin.tar.gz

重命名配置文件

[root@bigdata04 soft]# cd apache-hive-3.1.2-bin/conf/
[root@bigdata04 conf]# mv hive-env.sh.template  hive-env.sh
[root@bigdata04 conf]# mv hive-default.xml.template  hive-site.xml

修改文件内容

# 1. 修改 hive-env.sh 内容，增加环境变量
[root@bigdata04 soft]# cd apache-hive-3.1.2-bin/conf/
[root@bigdata04 conf]# vi hive-env.sh 
.....
export JAVA_HOME=/data/soft/jdk1.8
export HIVE_HOME=/data/soft/apache-hive-3.1.2-bin
export HADOOP_HOME=/data/soft/hadoop-3.2.0
# 修改 hive-site.xml
[root@bigdata04 conf]# vi hive-site.xml
<property>
    <name>javax.jdo.option.ConnectionURL</name>
    <value>jdbc:mysql://172.18.32.39:3306/hive?serverTimezone=Asia/Shanghai</value>
</property>
<property>
    <name>javax.jdo.option.ConnectionDriverName</name>
    <value>com.mysql.cj.jdbc.Driver</value>
</property>
<property>
    <name>javax.jdo.option.ConnectionUserName</name>
    <value>root</value>
</property>
<property>
    <name>javax.jdo.option.ConnectionPassword</name>
    <value>root</value>
</property>
<property>
    <name>hive.querylog.location</name>
    <value>/data/hive_repo/querylog</value>
</property>
<property>
    <name>hive.exec.local.scratchdir</name>
    <value>/data/hive_repo/scratchdir</value>
</property>
<property>
    <name>hive.downloaded.resources.dir</name>
    <value>/data/hive_repo/resources</value>
</property>
<property>
    <name>hive.txn.xlock.iow</name>
    <value>true</value>
    <description>
    </description>
  </property>

jdbc:mysql://172.18.32.39:3306/hive?serverTimezone=Asia/Shanghai ip 为MYSQL 安装所在机器IP，

hive.txn.xlock.iow 中的 description 内容必须删除，不然后期会初始化不成功。

安装MySQL8.0

windows 安装 MySQL

Centos 安装 MySQL
CentOS7安装MySQL8

Centos 安装Oracle 服务

上传MySQL驱动包

[root@bigdata04 lib]# pwd
/data/soft/apache-hive-3.1.2-bin/lib
[root@bigdata04 lib]# ll mysql*
-rw-r--r--. 1 root root 2321813 Aug  9  2021 mysql-connector-java-8.0.17.jar
-rw-r--r--. 1 root root   10476 Nov 16  2018 mysql-metadata-storage-0.12.0.jar

修改core-site.xml

[root@bigdata01 hadoop]# vi core-site.xml
    <property>
        <name>hadoop.proxyuser.root.hosts</name>
        <value>*</value>
    </property>
    <property>
        <name>hadoop.proxyuser.root.groups</name>
        <value>*</value>
</property>
[root@bigdata01 hadoop]# scp -rq etc/hadoop/core-site.xml  bigdata02:/data/soft/hadoop-3.2.0/etc/hadoop/
[root@bigdata01 hadoop]# scp -rq etc/hadoop/core-site.xml  bigdata03:/data/soft/hadoop-3.2.0/etc/hadoop/

配置系统环境变量

[root@bigdata04 lib]# cat /etc/profile
export JAVA_HOME=/data/soft/jdk1.8
export HADOOP_HOME=/data/soft/hadoop-3.2.0
export FLUME_HOME=/data/soft/apache-flume-1.9.0-bin
export HIVE_HOME=/data/soft/apache-hive-3.1.2-bin
export PATH=.:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/bin:$FLUME_HOME/bin:$HIVE_HOME/bin:$PATH

重新加载环境变量配置文件生效配置 source /etc/profile

重启Hadoop集群

[root@bigdata01 hadoop-3.2.0]# sbin/stop-all.sh
[root@bigdata01 hadoop-3.2.0]# sbin/start-all.sh

修改host文件

[root@bigdata01 ~]# vi /etc/hosts
1192.168.35.100 bigdata01
192.168.35.101 bigdata02
192.168.35.102 bigdata03
192.168.35.103 bigdata04
[root@bigdata02 ~]# vi /etc/hosts
192.168.35.100 bigdata01
192.168.35.101 bigdata02
192.168.35.102 bigdata03
192.168.35.103 bigdata04
[root@bigdata03 ~]# vi /etc/hosts
192.168.35.100 bigdata01
192.168.35.101 bigdata02
192.168.35.102 bigdata03
192.168.35.103 bigdata04
[root@bigdata04 ~]# vi /etc/hosts
192.168.35.100 bigdata01
192.168.35.101 bigdata02
192.168.35.102 bigdata03
192.168.35.103 bigdata04

初始化Hive的Metastore

[root@bigdata04 apache-hive-3.1.2-bin]# bin/schematool -dbType mysql -initSchema

记得要先创建 hive 数据库

连接方式

hive

[root@bigdata04 lib]# hive
which: no hbase in (.:/data/soft/jdk1.8/bin:/data/soft/hadoop-3.2.0/bin:/data/soft/hadoop-3.2.0/bin:/data/soft/apache-flume-1.9.0-bin/bin:/data/soft/apache-hive-3.1.2-bin/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin)
Hive Session ID = 6b6e917f-86a7-48a8-aa2f-c1dec6150021

Logging initialized using configuration in file:/data/soft/apache-hive-3.1.2-bin/conf/hive-log4j2.properties Async: true
Hive Session ID = c07e9a61-c4e0-486c-a0a0-d60a797f7798
Hive-on-MR is deprecated in Hive 2 and may not be available in the future versions. Consider using a different execution engine (i.e. spark, tez) or using Hive 1.X releases.
hive (default)> show databases;
OK
database_name
default
hive_test
Time taken: 0.566 seconds, Fetched: 2 row(s)
hive (default)>

beeline

[root@bigdata04 lib]# hiveserver2
which: no hbase in (.:/data/soft/jdk1.8/bin:/data/soft/hadoop-3.2.0/bin:/data/soft/hadoop-3.2.0/bin:/data/soft/apache-flume-1.9.0-bin/bin:/data/soft/apache-hive-3.1.2-bin/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/root/bin)
2022-09-03 13:36:35: Starting HiveServer2
Hive Session ID = 5e9104e0-841c-49b5-8714-fb9090809344
Hive Session ID = f7404843-9851-477f-a373-8185f35b9552
Hive Session ID = ffe7a1bc-8aca-4519-8ecc-48d973ee613f
Hive Session ID = c4a20683-f49e-4dd9-9116-2c5c6facbb17
[root@bigdata04 ~]# beeline -u  jdbc:hive2://192.168.35.103:10000 -n root   
Connecting to jdbc:hive2://192.168.35.103:10000
Connected to: Apache Hive (version 3.1.2)
Driver: Hive JDBC (version 3.1.2)
Transaction isolation: TRANSACTION_REPEATABLE_READ
Beeline version 3.1.2 by Apache Hive
0: jdbc:hive2://192.168.35.103:10000> show databases;
+----------------+
| database_name  |
+----------------+
| default        |
| hive_test      |
+----------------+
2 rows selected (1.367 seconds)
0: jdbc:hive2://192.168.35.103:10000>

注意使用 beeline 连接要先开启hiveserver2服务，然后新建窗口连接，

注意了，hive后面可以使用 -e 命令，这样这条hive命令就可以放到脚本中定时调度执行了

jdbc 连接(第三方工具连接)

推荐使用DataGrap 或这DBever连接(注意要开启hiveserver2服务)

在这里插入图片描述

常用设置

设置显示数据库名和表名
```
[root@bigdata04 apache-hive-3.1.2-bin]# vi ~/.hiverc
set hive.cli.print.current.db = true;
set hive.cli.print.header = true;
```
修改~/.hiverc，我们每次在进入hive命令行的时候都会加载当前用户目录下的.hiverc文件中的内容
去掉重复打印的日志
```
cd /data/soft/apache-hive-3.1.2-bin/lib
mv log4j-slf4j-impl-2.10.0.jar  log4j-slf4j-impl-2.10.0.jar.bak
```
hive 日志包 /data/soft/apache-hive-3.1.2-bin/lib/log4j-slf4j-impl-2.10.0.jar

hadoop 日志包 /data/soft/hadoop-3.2.0/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar

两个日志包冲突，修改 hive 日志包

配置hive 的日志输出

# 修改hive-log4j.properties.template 文件
cd cd /data/soft/apache-hive-3.1.2-bin/conf

mv hive-log4j2.properties.template  hive-log4j2.properties

vi hive-log4j2.properties

修改如下配置为：
property.hive.log.level = WARN
property.hive.root.logger = DRFA
property.hive.log.dir = /data/hive_repo/log
property.hive.log.file = hive.log
property.hive.perflogger.log.level = INFO
# 修改hive-exec-log4j2.properties.template文件
mv hive-exec-log4j2.properties.template hive-exec-log4j2.properties

vi hive-exec-log4j2.properties

修改如下配置为：
property.hive.log.level = WARN
property.hive.root.logger = FA
property.hive.query.id = hadoop
property.hive.log.dir = /data/hive_repo/log
property.hive.log.file = ${sys:hive.query.id}.log

配置伪表 dual

echo 'X'>dual.txt
create table dual (dummy String);
load data local inpath '/data/soft/hivedata/dual.txt'  into table hive_test.dual;

配置字段comment

可能会出现字段comment 乱码

alter table COLUMNS_V2 modify column COMMENT varchar(256) character set utf8;
alter table TABLE_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
-- 分区表额外执行以下两条命令
alter table PARTITION_PARAMS modify column PARAM_VALUE varchar(4000) character set utf8;
alter table PARTITION_KEYS modify column PKEY_COMMENT varchar(4000) character set utf8;

Metastore 常用的表

常用表

--查询表信息
select * from from tbls where TBL_NAME = 't2'
--查询数据的输入输出格式、存储位置等信息
select s.* from tbls t,sds s where t.SD_ID=s.SD_ID and t.TBL_NAME = 't2'
--查询表的属性信息
select p.* from tbls t,table_params p where t.TBL_ID=p.TBL_ID and t.TBL_NAME = 't2'
--查询表的分区列
select p.* from tbls t,partition_keys p where t.TBL_ID=p.TBL_ID and t.TBL_NAME = 't2'
-- 存储hive 字段信息 
select * from hive.columns_v2
-- 存储Hive表，视图和索引表相关信息
select * from hive.tbls t

Hive MetaStore 3.1.x 元数据管理库表结构介绍及特色功能

7.4 HIVE基础操作

建表语法

create table t3_new(
id int comment 'ID',
stu_name string comment 'name',
stu_birthday date comment 'birthday',
online boolean comment 'is online'
)row format delimited 
fields terminated by '\t' 
lines terminated by '\n';
# 过滤掉文件第一行
create table tableName(字段名称 字段类型，字段名称 字段类型， ....)
row format delimited fields terminated by ','--逗号分隔
tblproperties("skip.header.line.count"="1")  --跳过文件行第一1行

row format delimited fields terminated by ‘\t’ 指定列分割符

lines terminated by ‘\n’ 指定行分割符，可以默认不写

LOAD语法

# 加载到内部表
LOAD DATA LOCAL INPATH "/home/hadoopUser/data/test1.txt" INTO TABLE test1;  
# 加载到分区表
load data local inpath '/liguodong/dataext' into table test1 partition(dt='20150717');
# 覆盖重写数据
load data local inpath '/data/soft/hivedata/row_number_demo.data' overwrite into table hive_test.row_number_demo;
# 从HDFS 中加载数据
load data inpath '/hivedatas/techer.csv' into table techer;
# 加载数据到指定分区
load data inpath '/hivedatas/techer.csv' into table techer partition(cur_date=20201210);

local 指的是本地hive安装环境

into 代表追加数据，如果需要覆盖重写使用 overwrite into
使用 load data local 表示从本地文件系统加载，文件会拷贝到hdfs上
使用 load data 表示从hdfs文件系统加载，文件会直接移动到hive相关目录下，注意不是拷贝过去，因为hive认为hdfs文件已经有副本了，没必要再次拷贝了
如果表是分区表，load 时不指定分区会报错
如果加载相同文件名的文件，会被自动重命名

7.5 HIVE进阶

7.5.1 常见的数据类型

基本数据类型
- TINYINT
- SMALLINT
- INT/INTEGER
- BIGINT
- FLOAT
- DOUBLE
- DECIMAL
- TIMESTAMP
- DATE
- STRING
- VARCHAR
- CHAR
- BOOLEAN

复合类型

数组

-- 建表语法
create table stu(
  id int,
  name string,
  favors array<string>
)row format delimited
fields terminated by '\t'
collection items terminated by ','
lines terminated by '\n';
-- 加载数据
load data local inpath '/data/soft/hivedata/stu.data' into table stu;
-- stu.data
1       zhangsan        swing,sing,coding
2       lisi    music,football

map

-- 建表语句
create table stu2(
id int,
name string,
scores  map<string,int>
)row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':'
lines terminated by '\n';
-- 加载数据
load data local inpath '/data/soft/hivedata/stu2.data' into table stu2;
-- stu2.data
1       zhangsan        chinese:80,math:90,english:100
2       lisi    chinese:89,english:70,math:88

struct

-- 建表语句
create table stu3(
id int,
name string,
address  struct<home_addr:string,office_addr:string>
)row format delimited
fields terminated by '\t'
collection items terminated by ','
lines terminated by '\n';
-- 加载数据
load data local inpath '/data/soft/hivedata/stu3.data' into table stu3;
-- stu3.data
1       zhangsan        bj,sh
2       lisi    gz,sz

复合类型综合使用

-- 建表语句
create table mix_stu(
id int comment '编码',
name string comment '姓名',
favors array<string> comment '爱好',
scores map<string,string> comment '成绩',
address struct<home_addr:string,office_addr:string> comment '地址'
)row format delimited
fields terminated by '\t'
collection items terminated by ','
map keys terminated by ':'
-- 加载数据
load data local inpath '/data/soft/hivedata/mix_stu.data' into table mix_stu;
-- mix_stu.data
1       zhangsan        english,swing,swing     chinese:80,math:90,english:100  bj,sh
2       lisi    games,coding    chinese:80,math:90,english:100  nj,sz

map 和 struct区别

map比较灵活，但是会额外占用磁盘空间，因为他比struct多存储了数据的key，struct只需要存储value，比较节省空间，但是灵活性有限，后期无法动态增加k-v。

7.5.2 HIVE中的表类型

内部表

外部表

create external table external_table(
id int,
name string
)row format delimited
fields terminated by '\t'
location '/data/external';

select * from external_table

load data local inpath '/data/soft/hivedata/external_data.dat' into table external_table;

外部表是最常用的表类型，删除表，并不会删除表中的数据，删除的是对表数据的引用

分区表

-- 分区表
create table partition_1 (
id int,
name string
)partitioned by (dt string)
row format delimited
fields terminated by '\t';

select * from partition_1;

-- 展示分区信息
show partitions partition_1;
load data local inpath '/data/soft/hivedata/partition_1.data' into table partition_1 partition(dt='2022-09-01');

load data local inpath '/data/soft/hivedata/partition_2.dat' into table partition_1 partition(dt='2022-09-02');

-- 给表创建多个分区
create table partition_2(
id int,
name string
)partitioned by (year int,school string)
row format delimited
fields terminated by '\t';

select * from partition_2;


load data local inpath '/data/soft/hivedata/partition_3.dat' into table partition_2 partition(year=2020,school='xk');

load data local inpath '/data/soft/hivedata/partition_3.dat' into table partition_2 partition(year=2021,school='english');
load data local inpath '/data/soft/hivedata/partition_3.dat' into table partition_2 partition(year=2022,school='math');
load data local inpath '/data/soft/hivedata/partition_3.dat' into table partition_2 partition(year=2023,school='chinese');
load data local inpath '/data/soft/hivedata/partition_3.dat' into table partition_2 partition(year=2024,school='xk1');
load data local inpath '/data/soft/hivedata/partition_3.dat' into table partition_2 partition(year=2025,school='xk2');

show partitions partition_2;

-- 外部分区表

create external table ex_par(
id int,
name string
)partitioned by(dt string) 
 row format delimited  
 fields terminated by '\t'
 location '/data/ex_par';

select * from ex_par;
load data local inpath '/data/soft/hivedata/ex_par.dat' into table ex_par partition (dt='2020-01-01');
load data local inpath '/data/soft/hivedata/ex_par.dat' into table ex_par partition (dt='2020-01-02');
load data local inpath '/data/soft/hivedata/ex_par.dat' into table ex_par partition (dt='2020-01-03');

show partitions ex_par;

-- 删除外部表分区表
alter table ex_par drop partition(dt='2020-01-03');
-- 添加分区绑定 
alter table ex_par add partition(dt='2020-01-03') location '/data/ex_par/dt=2020-01-03';

桶表

-- 桶表
create table bucket_tb(
id int
)clustered by (id) into 4 buckets;

select * from bucket_tb;
-- 开启桶操作
set hive.enforce.bucketing=true;

create table b_source(id int);
load data local inpath '/data/soft/hivedata/b_source.data' into table b_source;
select * from b_source;

-- 向桶中加载数据
insert into table bucket_tb select id from b_source where id is not null;

select * from bucket_tb;

-- 数据抽样
select * from bucket_tb tablesample(bucket 1 out of 4 on id);
select * from bucket_tb tablesample(bucket 2 out of 4 on id);
select * from bucket_tb tablesample(bucket 3 out of 4 on id);
select * from bucket_tb tablesample(bucket 4 out of 4 on id);

视图

7.5.3 HIVE 函数的使用

常用函数

-- 1.查看系统自带函数
show functions;
-- 2.显示自带函数的用法
desc function round;
-- 3.显示自带函数的详细用法
desc function extended round;

时间函数

-- 1.unix_timestamp 返回当前或指定时间时间戳
select unix_timestamp()
select unix_timestamp('2022-09-06','yyyy-MM-dd');
-- 2.from_unixtime 将时间戳转为日期格式
select from_unixtime(1662422400);
-- 3.current_date 获取当前日期
select current_date;
-- 4.current_timestamp 获取当前日期和时间
select current_timestamp;
-- 5.to_date 从字符串中提取 时间 to_date()
select to_date('2022-09-12 16:00:09')
-- 6.year 获取年
select year(current_timestamp)
-- 7.month 获取月
select month(current_timestamp);
-- 8.获取日
select day(current_timestamp);
-- 9.获取小时
select hour(current_timestamp)
-- 10.获取分钟
select minute(current_timestamp)
-- 11.获取秒
select second(current_timestamp)
-- 12.获取周
select weekofyear(current_timestamp)
-- 13.获取当前时间是一个月的第几天
select dayofmonth('2008-08-08')
-- 14.两个日期相差的月份
select months_between('2022-08-08','2008-08-08')
-- 15.日期加减月
select add_months('2022-09-06',1)
-- 16.两个日期相差的天数
select datediff('2022-08-08','2008-08-08')
-- 17.日期增加天数
select date_add('2022-09-06',-6)
-- 18.日期减少天数
select date_sub('2022-09-06',6)
-- 19.日期当月的最后一天
select last_day('2022-09-09')
-- 20.字符串转时间
select from_unixtime(unix_timestamp('20220909','yyyymmdd'),'yyyy-mm-dd')
-- 21.日期格式话
select date_format('2022-09-09 18:00:00','yyyyMMdd HH:mm:ss')

字符函数

select upper('upper')
select lower('lower')
select length('hello world')
select trim(' hello world ')
select LPAD('123',9,0) 
select RPAD('123',9,0)
SELECT SUBSTR('HELLO WORLD',7)
SELECT REGEXP_REPLACE('2022-09-06','-','/')

取整函数

select round(3.1415)
select ceil(-3.9)
select ceil(3.1)
select floor(-3.9)

集合操作

-- 1.size 获取数组大小
-- 2.map_keys() 
select 
size(favors),
map_keys(scores),
map_values(scores),
favors,
sort_array(favors)-- 只能升序
from hive_test.mix_stu

空值处理
```
nvl()
```
条件处理
```
case when 
if
```

行列转换

-- 行合并 collect_set()
with temp_b as (
select 1 as id,'a' as col from hive_test.dual
union all
select 1 as id,'b' as col from hive_test.dual
union all
select 1 as id,'c' as col from hive_test.dual
union all
select 1 as id,'d' as col from hive_test.dual
union all
select 2 as id,'e' as col from hive_test.dual
union all
select 2 as id,'f' as col from hive_test.dual
union all
select 2 as id,'g' as col from hive_test.dual
union all
select 2 as id,'h' as col from hive_test.dual
union all
select 2 as id,'h' as col from hive_test.dual
union all
select 3 as id,'i' as col from hive_test.dual
)
select id,concat_ws('_',COLLECT_SET(col)) as concatcol1 from temp_b group by id
-- 列合并 concat() 
with temp_c as (
select 1 as id,'zhangsan' as name,10 as age from hive_test.dual
union all 
select 1 as id,'lisi' as name,15 as age from hive_test.dual
union all
select 1 as id,'wangwu' as name, 20 as age from hive_test.dual
)
select id,name,age,concat(name,age) as name_age,name||age as name_age1 from temp_c 
-- 行转列
with temp_d as (
select 'zhangsan' as name ,'chinese' as subject,90 as score from hive_test.dual
union all 
select 'zhangsan' as name ,'math' as subject,87 as score from hive_test.dual
union all
select 'zhangsan' as name ,'english' as subject,75 as score from hive_test.dual
union all
select 'lisi' as name ,'chinese' as subject,70 as score from hive_test.dual
union all 
select 'lisi' as name ,'math' as subject,78 as score from hive_test.dual
union all
select 'lisi' as name ,'english' as subject,75 as score from hive_test.dual
)
select 
name,
max(case when subject ='chinese' then score end) as chinese,
max(case when subject ='math' then score end) as math,
max(case when subject ='english' then score end) as english
from temp_d
group by name

-- 列转行
with temp_e as (
select 'lisi' as name,70 as chinese,78 as math,75 as english from hive_test.dual
union all 
select '张三' as name,90 as chinese,87 as math,75 as english from hive_test.dual
)
-- 列转行方法一
select
name,
'chinese' as subject,
chinese as score
from temp_e
union all 
select
name,
'math' as subject,
math as score
from temp_e
union all 
select
name,
'english' as subject,
english as score
from temp_e

-- 列转行方法二 explode
select t.name,tt2.subject,tt2.score from (
select 
name,
map('chinese',chinese,'math',math,'english',english) as score
from temp_e
) t 
lateral view explode(score) tt2 as subject,score

排序函数

-- row_number() over() 顺序排序
select id,name,subject,score,row_number() over(partition by subject order by score desc) rn from hive_test.row_number_demo;
-- rank() over() 跳跃排序
select id,name,subject,score,rank() over(partition by subject order by score desc) rn from hive_test.row_number_demo;
-- dense_rank() over  连续排序
select id,name,subject,score,dense_rank() over(partition by subject order by score desc) rn from hive_test.row_number_demo;