目录
1. mkdir /usr/local/soft/hive-3.1.2/data/
4.上传数据至HDFSdfs -put /usr/local/soft/hive-3.1.2/data/ /user/hive/warehouse/filetest.db/students
5.通过查看HDFS路径可以看到数据大小没有发生变化 大小为37M
7.插入数据:insert into table students_rcFile select * from students;
8.通过查看HDFS路径可以看到数据经过压缩大小为26.44 M
10.插入数据:insert into table students_orc select * from students;
11.通过查看HDFS路径可以看到数据经过压缩大小为 220.38 KB,同时插入数据时与RCFile格式时间相差不大
13.insert into table students_parquet select * from students;
14.通过查看HDFS路径可以看到数据经过压缩大小为 3.01 MB 写入数据所花费时间相比ORCFile更少
3.查看服务有没有启动:(hive的JDBC启动非常慢,需要等待一定时间,Hive Session ID出现4个以后基本可以看到端口启动)
1.测试几种存储格式:
1. mkdir /usr/local/soft/hive-3.1.2/data/
2. 上传数据至data目录中
把student.txt放入data中
3.创建TEXTFILE学生信息表
create table students(
id bigint comment '学生id',
name string comment '学生姓名',
age int comment '学生年龄',
gender string comment '学生性别',
clazz string comment '学生班级'
) comment '学生信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
4.上传数据至HDFS
dfs -put /usr/local/soft/hive-3.1.2/data/ /user/hive/warehouse/filetest.db/students
5.通过查看HDFS路径可以看到数据大小没有发生变化 大小为37M
6.创建RCFile格式的学生信息表
create table students_rcFile(
id bigint comment '学生id',
name string comment '学生姓名',
age int comment '学生年龄',
gender string comment '学生性别',
clazz string comment '学生班级'
) comment '学生信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS RCFile;
7.插入数据:
insert into table students_rcFile select * from students;
8.通过查看HDFS路径可以看到数据经过压缩大小为26.44 M
9.创建ORCFile格式的学生信息表
create table students_orc(
id bigint comment '学生id',
name string comment '学生姓名',
age int comment '学生年龄',
gender string comment '学生性别',
clazz string comment '学生班级'
) comment '学生信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS ORC;
10.插入数据:
insert into table students_orc select * from students;
11.通过查看HDFS路径可以看到数据经过压缩大小为 220.38 KB,同时插入数据时与RCFile格式时间相差不大
12.创建Parquet格式的学生信息表
create table students_parquet(
id bigint comment '学生id',
name string comment '学生姓名',
age int comment '学生年龄',
gender string comment '学生性别',
clazz string comment '学生班级'
) comment '学生信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS PARQUET;
13.insert into table students_parquet select * from students;
14.通过查看HDFS路径可以看到数据经过压缩大小为 3.01 MB 写入数据所花费时间相比ORCFile更少
2.开启HIVE JDBC 连接
1.开启命令:
hive --service hiveserver2
2.连接命令:
beeline -u jdbc:hive2://master:10000 -n root
注意: 如果报以下错误:
Error: Could not open client transport with JDBC Uri: jdbc:hive2://master:10000:
Failed to open new session: java.lang.RuntimeException:
org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.authorize.AuthorizationException):
User: root is not allowed to impersonate root (state=08S01,code=0)
解决方法:
先关闭Hadoop集群 :stop-all.sh
配置core-site.xml:添加如下内容:
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
重启集群:start-all.sh
3.查看服务有没有启动:(hive的JDBC启动非常慢,需要等待一定时间,Hive Session ID出现4个以后基本可以看到端口启动)
netstat -nplt | grep 10000
3.HIVE基本语法
CREATE DATABASE [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, ...)];
1. 创建数据库
CREATE DATABASE IF NOT EXISTS my_database;
DROP DATABASE my_database;
CREATE DATABASE IF NOT EXISTS my_database COMMENT "测试数据库创建";
通过指定的HDFS路径创建数据库
dfs -mkdir /testDatabase;
-- CREATE DATABASE IF NOT EXISTS test_database COMMENT "测试数据库创建" LOCATION "/testDatabase";
创建数据库时增加配置信息
CREATE DATABASE IF NOT EXISTS test_database2 COMMENT "测试数据库创建" WITH DBPROPERTIES("author"="xingtong","application"="save base data");
2. 查看数据库信息
DESC DATABASE my_database;
注意:如果中文显示乱码,请修改MYSQL中hive库的DBS中的DESC字段的字符集为utf8
alter table DBS modify column DESC varchar(4000) character set utf8;
desc database extended test_database2;
查看数据库创建的详细信息,包括配置的参数
SHOW DATABASE;
3. 删除数据库
3.1 DROP DATABASE my_database;
如果数据库不为空 删除会报错: message:Database my_database is not empty.
3.2 DROP DATABASE my_database cascade;
强制删除数据库
4. 创建表
CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name -- EXTERNAL表示外部表的标志
[(col_name data_type [COMMENT col_comment], ...)] -- 列名 数据类型 注释信息
[COMMENT table_comment] -- 表的注释信息
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)] -- PARTITIONED BY 构建分区表 包括 列名 数据类型 注释信息
[CLUSTERED BY (col_name, col_name, ...) --CLUSTERED BY 分桶 列名
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS] -- SORTED BY 分桶中的排序 INTO num_buckets BUCKETS 具体分成多少桶
[
[ROW FORMAT row_format] -- 表示数据的分隔信息
[STORED AS file_format] -- 表示存储格式
| STORED BY 'storage.handler.class.name' [ WITH SERDEPROPERTIES (...) ] (Note: only available starting with 0.6.0) --自定义存储类
]
[LOCATION hdfs_path] -- 表的存储位置
[TBLPROPERTIES (property_name=property_value, ...)] (Note: only available starting with 0.6.0) -- 表示表的配置信息
[AS select_statement] (Note: this feature is only available starting with 0.5.0.) -- AS 通过查询表创建(接查询语句)
4.1 ROW FORMAT用法
CREATE TABLE IF NOT EXISTS filetest.total_score(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t"
STORED AS TEXTFILE;
load data local inpath "/usr/local/soft/hive-3.1.2/data/total_score.txt" into table filetest.total_score;
CREATE TABLE IF NOT EXISTS filetest.total_score2(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
STORED AS TEXTFILE;
load data local inpath "/usr/local/soft/hive-3.1.2/data/total_score.txt" into table filetest.total_score2;
注意:如果数据和分隔符不匹配则原始数据中的所有数据会变成新表中的一列数据
4.2 LOCATION用法
dfs -mkdir /testDatabase/total_score;
CREATE TABLE IF NOT EXISTS filetest.total_score3(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t"
STORED AS TEXTFILE
LOCATION "/testDatabase/total_score";
load data local inpath "/usr/local/soft/hive-3.1.2/data/total_score.txt" into table filetest.total_score3;
4.3 EXTERNAL 用法
dfs -mkdir /testDatabase/total_score;
CREATE EXTERNAL TABLE IF NOT EXISTS filetest.total_score3(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t"
STORED AS TEXTFILE
LOCATION "/testDatabase/total_score";
load data local inpath "/usr/local/soft/hive-3.1.2/data/total_score.txt" into table filetest.total_score3;
CREATE EXTERNAL TABLE IF NOT EXISTS filetest.total_score4(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t"
STORED AS TEXTFILE;
load data local inpath "/usr/local/soft/hive-3.1.2/data/total_score.txt" into table filetest.total_score4;
DROP TABLE total_score4; -- 数据依然存在
注: 外部表与普通内部表的区别在于:外部表删除表时不会删除对应的数据 并且和表数据存储位置无关
4.4 STORED AS 用法:设置数据存储格式
使用方法一:
CREATE EXTERNAL TABLE IF NOT EXISTS filetest.total_score5(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t"
STORED AS ORC;
--load data local inpath "/usr/local/soft/hive-3.1.2/data/total_score.txt" into table filetest.total_score5;
注意:如果数据存储格式为压缩格式,那么就不能直接将文本数据加载至表中
INSERT INTO TABLE filetest.total_score5 SELECT * FROM filetest.total_score
使用方法二:
通过查询语句结果去创建一个新表
CREATE TABLE IF NOT EXISTS filetest.total_score99
STORED AS ORC
AS SELECT student_id,score FROM filetest.total_score3;
4.5 TBLPROPERTIES 用法
CREATE EXTERNAL TABLE IF NOT EXISTS filetest.total_score8(
student_id STRING COMMENT "学生编号",
score int COMMENT "总分"
)ROW FORMAT DELIMITED FIELDS TERMINATED BY "\t"
STORED AS TEXTFILE
TBLPROPERTIES("author"="xing","application"="学生表,保存学生的总分,如有错误请联系 XXX");
4.6 AS 用法
CREATE TABLE IF NOT EXISTS filetest.total_score8
STORED AS ORC
AS SELECT student_id,score FROM filetest.total_score;
4.7 PARTITIONED BY 分区
-- 将学生信息表中不同性别的学生进行分区保存
CREATE TABLE IF NOT EXISTS filetest.partition_student(
id STRING COMMENT "学生ID",
name STRING COMMENT "学生姓名",
age int COMMENT "年龄",
clazz STRING COMMENT "班级"
) PARTITIONED BY (gender STRING COMMENT "性别分区")
STORED AS TEXTFILE;
INSERT INTO TABLE filetest.partition_student PARTITION(gender="nv")
SELECT id,name,age,clazz FROM filetest.students WHERE gender="男" limit 10;
INSERT INTO TABLE filetest.partition_student PARTITION(gender="女生")
SELECT "1500100002","吕金鹏",24,"文科六班"
5.查看表信息
DESC table_name
查看表的字段信息
DESC FORMATTED table_name
查看表的详细信息
6.删除表
DROP TABLE table_name;
删除表时,HDFS上的数据也会跟着删除掉