建表语句,内部表，外部表

小胖超凶哦！

已于 2022-04-09 14:49:59 修改

阅读量1k

点赞数

分类专栏： hive基础初学大数据文章标签： hive

于 2022-04-06 19:23:07 首次发布

本文链接：https://blog.csdn.net/ZZJXP/article/details/123984983

版权

初学大数据同时被 2 个专栏收录

158 篇文章 1 订阅

订阅专栏

hive基础

19 篇文章 0 订阅

订阅专栏

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name
// 定义字段名，字段类型
[(col_name data_type [COMMENT col_comment], ...)]
// 给表加上注解
[COMMENT table_comment]
// 分区
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
// 分桶
[CLUSTERED BY (col_name, col_name, ...)
// 设置排序字段升序、降序
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
[
// 指定设置行、列分隔符
[ROW FORMAT row_format]
// 指定Hive储存格式：textFile、rcFile、SequenceFile 默认为：textFile
[STORED AS file_format]

| STORED BY 'storage.handler.class.name' [ WITH SERDEPROPERTIES (...) ] (Note: only available starting with 0.6.0)
]
// 指定储存位置
[LOCATION hdfs_path]
// 跟外部表配合使用，比如：映射HBase表，然后可以使用HQL对hbase数据进行查询，当然速度比较慢
[TBLPROPERTIES (property_name=property_value, ...)] (Note: only available starting with 0.6.0)
[AS select_statement] (Note: this feature is only available starting with 0.5.0.)

create table students
(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; // 必选，指定列分隔符

hive> create database test2;
OK
Time taken: 0.528 seconds
hive> use test2;
OK
Time taken: 0.012 seconds
hive> create table students
    > (
    >     id bigint,
    >     name string,
    >     age int,
    >     gender string,
    >     clazz string
    > );
OK
Time taken: 0.157 seconds
hive> show create table students;
OK
CREATE TABLE `students`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://master:9000/user/hive/warehouse/test2.db/students'
TBLPROPERTIES (
  'transient_lastDdlTime'='1649226473')
Time taken: 0.027 seconds, Fetched: 16 row(s)
hive> show create table test1.students;
OK
CREATE TABLE `test1.students`(
  `id` bigint COMMENT 'fid', 
  `name` string COMMENT 'f�
', 
  `age` int COMMENT 'ft�', 
  `gender` string COMMENT 'f'+', 
  `clazz` string COMMENT 'f�')
COMMENT 'f�oh'
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://master:9000/user/hive/warehouse/test1.db/students'
TBLPROPERTIES (
  'transient_lastDdlTime'='1648889522')
Time taken: 0.047 seconds, Fetched: 18 row(s)

create table students2_1
(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/data/stu/input'; // 指定Hive表的数据的存储位置，一般在数据已经上传到HDFS，想要直接使用，会指定Location，通常Locaion会跟外部表一起使用，内部表一般使用默认的location

hive> create table students2_1
    > (
    >     id bigint,
    >     name string,
    >     age int,
    >     gender string,
    >     clazz string
    > )
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    > LOCATION '/data/stu/input'; 
OK
Time taken: 0.046 seconds
hive> show create table students2_1;
OK
CREATE TABLE `students2_1`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://master:9000/data/stu/input'
TBLPROPERTIES (
  'COLUMN_STATS_ACCURATE'='false', 
  'numFiles'='0', 
  'numRows'='-1', 
  'rawDataSize'='-1', 
  'totalSize'='0', 
  'transient_lastDdlTime'='1649226836')
Time taken: 0.027 seconds, Fetched: 21 row(s)
hive> select * from students2_1 limit 10;
OK
1500100001	施笑槐	22	女	文科六班
1500100002	吕金鹏	24	男	文科六班
1500100003	单乐蕊	22	女	理科六班
1500100004	葛德曜	24	男	理科三班
1500100005	宣谷芹	22	女	理科五班
1500100006	边昂雄	21	男	理科二班
1500100007	尚孤风	23	女	文科六班
1500100008	符半双	22	女	理科六班
1500100009	沈德昌	21	男	理科一班
1500100010	羿彦昌	23	男	理科六班
Time taken: 0.042 seconds, Fetched: 10 row(s)

create external table students2
(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/data/stu/input'; // 指定Hive表的数据的存储位置，一般在数据已经上传到HDFS，想要直接使用，会指定Location，通常Locaion会跟外部表一起使用，内部表一般使用默认的location

hive> 
    > create external table students2
    > (
    >     id bigint,
    >     name string,
    >     age int,
    >     gender string,
    >     clazz string
    > )
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    > LOCATION '/data/stu/input'; 
OK
Time taken: 0.034 seconds
hive> select *from students2 limit 10
    > ;
OK
1500100001	施笑槐	22	女	文科六班
1500100002	吕金鹏	24	男	文科六班
1500100003	单乐蕊	22	女	理科六班
1500100004	葛德曜	24	男	理科三班
1500100005	宣谷芹	22	女	理科五班
1500100006	边昂雄	21	男	理科二班
1500100007	尚孤风	23	女	文科六班
1500100008	符半双	22	女	理科六班
1500100009	沈德昌	21	男	理科一班
1500100010	羿彦昌	23	男	理科六班
Time taken: 0.13 seconds, Fetched: 10 row(s)
hive> show create table students2;
OK
CREATE EXTERNAL TABLE `students2`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://master:9000/data/stu/input'
TBLPROPERTIES (
  'COLUMN_STATS_ACCURATE'='false', 
  'numFiles'='0', 
  'numRows'='-1', 
  'rawDataSize'='-1', 
  'totalSize'='0', 
  'transient_lastDdlTime'='1649226023')
Time taken: 0.034 seconds, Fetched: 21 row(s)

[root@master ~]# cd /usr/local/soft
[root@master soft]# cd data/
[root@master data]# ls
new_db.sql  student.sql         theZen.txt
score.sql   students.txt        wordcount
score.txt   theZenOfPython.txt  words.txt
[root@master data]# hdfs dfs -put students.txt /user/hive/warehouse/test2.db/students
hive> select * from students limit 10;
OK
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
NULL	NULL	NULL	NULL	NULL
Time taken: 0.041 seconds, Fetched: 10 row(s)
hive> show create table students;
OK
CREATE TABLE `students`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT SERDE 
  'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://master:9000/user/hive/warehouse/test2.db/students'
TBLPROPERTIES (
  'transient_lastDdlTime'='1649226473')
Time taken: 0.024 seconds, Fetched: 16 row(s)
hive> drop table students;
Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students' to trash at: hdfs://master:9000/user/root/.Trash/Current
OK
Time taken: 0.197 seconds
hive> show create table students2;
OK
CREATE EXTERNAL TABLE `students2`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://master:9000/data/stu/input'
TBLPROPERTIES (
  'COLUMN_STATS_ACCURATE'='false', 
  'numFiles'='0', 
  'numRows'='-1', 
  'rawDataSize'='-1', 
  'totalSize'='0', 
  'transient_lastDdlTime'='1649230136')
Time taken: 0.021 seconds, Fetched: 21 row(s)
hive> drop table students2;
OK
Time taken: 0.052 seconds

// 内部表
create table students_internal
(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/input2';

// 外部表
create external table students_external
(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/input3';

create table students3
(
id bigint,
name string,
age int,
gender string,
clazz string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS rcfile; // 指定储存格式为rcfile，inputFormat:RCFileInputFormat,outputFormat:RCFileOutputFormat，如果不指定，默认为textfile，注意：除textfile以外，其他的存储格式的数据都不能直接加载，需要使用从表加载的方式。

hive> create table students_orc
    > (
    >     id bigint,
    >     name string,
    >     age int,
    >     gender string,
    >     clazz string
    > )
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    > STORED AS orcfile;
OK
Time taken: 0.057 seconds
hive> create table students_parquet
    > (
    >     id bigint,
    >     name string,
    >     age int,
    >     gender string,
    >     clazz string
    > )
    > ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
    > STORED AS parquetfile;
OK
Time taken: 0.044 seconds
hive> show tables;
OK
students2_1
students_orc
students_parquet
Time taken: 0.012 seconds, Fetched: 3 row(s)
hive> show create table students_orc
    > ;
OK
CREATE TABLE `students_orc`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.ORCFileInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.ORCFileOutputFormat'
LOCATION
  'hdfs://master:9000/user/hive/warehouse/test2.db/students_orc'
TBLPROPERTIES (
  'transient_lastDdlTime'='1649230990')
Time taken: 0.023 seconds, Fetched: 16 row(s)
hive> show create table students_parquet
    > ;
OK
CREATE TABLE `students_parquet`(
  `id` bigint, 
  `name` string, 
  `age` int, 
  `gender` string, 
  `clazz` string)
ROW FORMAT DELIMITED 
  FIELDS TERMINATED BY ',' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
  'hdfs://master:9000/user/hive/warehouse/test2.db/students_parquet'
TBLPROPERTIES (
  'transient_lastDdlTime'='1649231044')
Time taken: 0.02 seconds, Fetched: 16 row(s)

[root@master data]# hdfs dfs -put students.txt /user/hive/warehouse/test2.db/students_orc
[root@master data]# hdfs dfs -put students.txt /user/hive/warehouse/test2.db/students_parquet
hive> select * from students_orc limit 10;
OK
Failed with exception java.io.IOException:java.io.IOException: hdfs://master:9000/user/hive/warehouse/test2.db/students_orc/students.txt not a RCFile and has magic of 150
Time taken: 0.029 seconds
hive> select * from students_parquet  limit 10;
OK
Failed with exception java.io.IOException:java.lang.RuntimeException: hdfs://master:9000/user/hive/warehouse/test2.db/students_parquet/students.txt is not a Parquet file. expected magic number at tail [80, 65, 82, 49] but found [-83, -25, -113, -83]
Time taken: 0.031 seconds
[root@master data]# hdfs dfs -rmr /user/hive/warehouse/test2.db/students_orc/students.txt
rmr: DEPRECATED: Please use 'rm -r' instead.
22/04/06 16:09:27 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/04/06 16:09:27 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students_orc/students.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current/user/hive/warehouse/test2.db/students_orc/students.txt1649232567736
Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students_orc/students.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current

hive> select * from students2_1 limit 2;
OK
1500100001	施笑槐	22	女	文科六班
1500100002	吕金鹏	24	男	文科六班
Time taken: 0.032 seconds, Fetched: 2 row(s)
hive> insert into table students_orc select * from students2_1;
Query ID = root_20220406161318_0b1866bf-21e4-4f93-ab93-18c8e24998c0
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1649211406017_0001, Tracking URL = http://master:8088/proxy/application_1649211406017_0001/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job  -kill job_1649211406017_0001
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2022-04-06 16:13:29,142 Stage-1 map = 0%,  reduce = 0%
2022-04-06 16:13:35,486 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.4 sec
MapReduce Total cumulative CPU time: 1 seconds 400 msec
Ended Job = job_1649211406017_0001
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://master:9000/user/hive/warehouse/test2.db/students_orc/.hive-staging_hive_2022-04-06_16-13-18_263_1846120048648544602-1/-ext-10000
Loading data to table test2.students_orc
Table test2.students_orc stats: [numFiles=1, numRows=1000, totalSize=7253, rawDataSize=288000]
MapReduce Jobs Launched: 
Stage-Stage-1: Map: 1   Cumulative CPU: 1.4 sec   HDFS Read: 45760 HDFS Write: 7333 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 400 msec
OK
Time taken: 19.435 seconds
hive> select * from students_orc limit 10;
OK
1500100001	施笑槐	22	女	文科六班
1500100002	吕金鹏	24	男	文科六班
1500100003	单乐蕊	22	女	理科六班
1500100004	葛德曜	24	男	理科三班
1500100005	宣谷芹	22	女	理科五班
1500100006	边昂雄	21	男	理科二班
1500100007	尚孤风	23	女	文科六班
1500100008	符半双	22	女	理科六班
1500100009	沈德昌	21	男	理科一班
1500100010	羿彦昌	23	男	理科六班
Time taken: 0.033 seconds, Fetched: 10 row(s)
hive> select count(*) from students_orc;
Query ID = root_20220406161652_04fc529a-1e88-4dff-8383-ca6f3a83dbf5
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks determined at compile time: 1
In order to change the average load for a reducer (in bytes):
  set hive.exec.reducers.bytes.per.reducer=<number>
In order to limit the maximum number of reducers:
  set hive.exec.reducers.max=<number>
In order to set a constant number of reducers:
  set mapreduce.job.reduces=<number>
Starting Job = job_1649211406017_0002, Tracking URL = http://master:8088/proxy/application_1649211406017_0002/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job  -kill job_1649211406017_0002
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 1
2022-04-06 16:17:02,774 Stage-1 map = 0%,  reduce = 0%
2022-04-06 16:17:07,908 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 0.66 sec
2022-04-06 16:17:14,039 Stage-1 map = 100%,  reduce = 100%, Cumulative CPU 1.62 sec
MapReduce Total cumulative CPU time: 1 seconds 620 msec
Ended Job = job_1649211406017_0002
MapReduce Jobs Launched: 
Stage-Stage-1: Map: 1  Reduce: 1   Cumulative CPU: 1.62 sec   HDFS Read: 14219 HDFS Write: 5 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 620 msec
OK
1000
Time taken: 22.629 seconds, Fetched: 1 row(s)

[root@master data]# hdfs dfs -rmr /user/hive/warehouse/test2.db/students_parquet/students.txt
rmr: DEPRECATED: Please use 'rm -r' instead.
22/04/06 16:21:52 INFO fs.TrashPolicyDefault: Namenode trash configuration: Deletion interval = 1440 minutes, Emptier interval = 0 minutes.
22/04/06 16:21:52 INFO fs.TrashPolicyDefault: Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students_parquet/students.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current/user/hive/warehouse/test2.db/students_parquet/students.txt
Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students_parquet/students.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current
hive> insert into table students_parquet select * from students2_1;
Query ID = root_20220406161924_eee5417d-0312-4656-b58c-3e530ae3dec6
Total jobs = 3
Launching Job 1 out of 3
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1649211406017_0003, Tracking URL = http://master:8088/proxy/application_1649211406017_0003/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job  -kill job_1649211406017_0003
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2022-04-06 16:19:33,118 Stage-1 map = 0%,  reduce = 0%
2022-04-06 16:19:39,247 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.24 sec
MapReduce Total cumulative CPU time: 1 seconds 240 msec
Ended Job = job_1649211406017_0003
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://master:9000/user/hive/warehouse/test2.db/students_parquet/.hive-staging_hive_2022-04-06_16-19-24_869_8919741910758401213-1/-ext-10000
Loading data to table test2.students_parquet
Table test2.students_parquet stats: [numFiles=2, numRows=1000, totalSize=64819, rawDataSize=5000]
MapReduce Jobs Launched: 
Stage-Stage-1: Map: 1   Cumulative CPU: 1.24 sec   HDFS Read: 46015 HDFS Write: 22903 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 240 msec
OK
Time taken: 15.547 seconds

hive> create table students5 as select * from students2_1;
Query ID = root_20220406162622_37d32527-9077-42c6-850b-c368fcfe3dd1
Total jobs = 3
Launching Job 1 out of 3
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1649211406017_0004, Tracking URL = http://master:8088/proxy/application_1649211406017_0004/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job  -kill job_1649211406017_0004
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2022-04-06 16:26:29,990 Stage-1 map = 0%,  reduce = 0%
2022-04-06 16:26:35,112 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 0.78 sec
MapReduce Total cumulative CPU time: 780 msec
Ended Job = job_1649211406017_0004
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://master:9000/user/hive/warehouse/test2.db/.hive-staging_hive_2022-04-06_16-26-22_678_6947166971297328109-1/-ext-10001
Moving data to: hdfs://master:9000/user/hive/warehouse/test2.db/students5
Table test2.students5 stats: [numFiles=1, numRows=1000, totalSize=41000, rawDataSize=40000]
MapReduce Jobs Launched: 
Stage-Stage-1: Map: 1   Cumulative CPU: 0.78 sec   HDFS Read: 45272 HDFS Write: 41076 SUCCESS
Total MapReduce CPU Time Spent: 780 msec
OK
Time taken: 13.542 seconds
hive> desc students5;
OK
id                  	bigint              	                    
name                	string              	                    
age                 	int                 	                    
gender              	string              	                    
clazz               	string              	                    
Time taken: 0.04 seconds, Fetched: 5 row(s)
hive> create table students4 like students2_1;
OK
Time taken: 0.043 seconds
hive> 
    > select * from students4;
OK
Time taken: 0.052 seconds
hive> create table students6 as select id,name from student2_1;
hive> truncate table students6;
OK
Time taken: 0.048 seconds

hive> insert overwrite table students_orc select * from students2_1;
Query ID = root_20220406164128_ce9bfa5d-6c7f-4cb1-a569-b2358fbe229a
Total jobs = 1
Launching Job 1 out of 1
Number of reduce tasks is set to 0 since there's no reduce operator
Starting Job = job_1649211406017_0006, Tracking URL = http://master:8088/proxy/application_1649211406017_0006/
Kill Command = /usr/local/soft/hadoop-2.7.6/bin/hadoop job  -kill job_1649211406017_0006
Hadoop job information for Stage-1: number of mappers: 1; number of reducers: 0
2022-04-06 16:41:36,675 Stage-1 map = 0%,  reduce = 0%
2022-04-06 16:41:41,835 Stage-1 map = 100%,  reduce = 0%, Cumulative CPU 1.27 sec
MapReduce Total cumulative CPU time: 1 seconds 270 msec
Ended Job = job_1649211406017_0006
Stage-4 is selected by condition resolver.
Stage-3 is filtered out by condition resolver.
Stage-5 is filtered out by condition resolver.
Moving data to: hdfs://master:9000/user/hive/warehouse/test2.db/students_orc/.hive-staging_hive_2022-04-06_16-41-28_893_8152676924006567408-1/-ext-10000
Loading data to table test2.students_orc
Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students_orc/000000_0' to trash at: hdfs://master:9000/user/root/.Trash/Current
Table test2.students_orc stats: [numFiles=1, numRows=1000, totalSize=7253, rawDataSize=288000]
MapReduce Jobs Launched: 
Stage-Stage-1: Map: 1   Cumulative CPU: 1.27 sec   HDFS Read: 46029 HDFS Write: 7333 SUCCESS
Total MapReduce CPU Time Spent: 1 seconds 270 msec
OK
Time taken: 15.11 seconds
hive> alter table students2_1 rename to students2;
OK
Time taken: 0.054 seconds
hive> show tables;
OK
students2
students4
students5
students6
students_orc
students_parquet
Time taken: 0.009 seconds, Fetched: 6 row(s)

hive> dfs -ls /
    > ;
Found 4 items
drwxr-xr-x   - lenovo supergroup          0 2022-03-23 10:04 /TestAPI
drwxr-xr-x   - root   supergroup          0 2022-03-27 20:13 /data
drwx------   - root   supergroup          0 2022-04-02 16:34 /tmp
drwx------   - root   supergroup          0 2022-04-02 16:36 /user
hive> dfs -df -h /
    > ;
Filesystem            Size   Used  Available  Use%
hdfs://master:9000  93.9 G  9.4 M     82.9 G    0%

// 将HDFS上的/input1目录下面的数据移动至 students表对应的HDFS目录下，注意是移动、移动、移动
load data inpath '/input1/students.txt' into table students;

// 清空表
truncate table students;
// 加上 local 关键字可以将Linux本地目录下的文件上传到 hive表对应HDFS 目录下原文件不会被删除
load data local inpath '/usr/local/soft/data/students.txt' into table students;
// overwrite 覆盖加载
load data local inpath '/usr/local/soft/data/students.txt' overwrite into table students;

[root@master data]# pwd
/usr/local/soft/data
hive> load data local inpath '/usr/local/soft/data/students.txt' into table students4;
Loading data to table test2.students4
Table test2.students4 stats: [numFiles=1, totalSize=41998]
OK
Time taken: 0.504 seconds
hive> load data inpath '/data/stu/input/students.txt' into table students4;
Loading data to table test2.students4
Table test2.students4 stats: [numFiles=2, totalSize=83996]
OK
Time taken: 0.158 seconds
hive>  load data local inpath '/usr/local/soft/data/students.txt' overwrite into table students4;
Loading data to table test2.students4
Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students4/students.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current
Moved: 'hdfs://master:9000/user/hive/warehouse/test2.db/students4/students_copy_1.txt' to trash at: hdfs://master:9000/user/root/.Trash/Current
Table test2.students4 stats: [numFiles=1, numRows=0, totalSize=41998, rawDataSize=0]
OK
Time taken: 0.595 seconds

hive> show databases;
OK
default
test1
test2
Time taken: 0.01 seconds, Fetched: 3 row(s)
hive> drop database test1;
FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask. InvalidOperationException(message:Database test1 is not empty. One or more tables exist.)
hive> drop database test1 cascade;
Moved: 'hdfs://master:9000/user/hive/warehouse/test1.db/score' to trash at: hdfs://master:9000/user/root/.Trash/Current
Moved: 'hdfs://master:9000/user/hive/warehouse/test1.db/students' to trash at: hdfs://master:9000/user/root/.Trash/Current
Moved: 'hdfs://master:9000/user/hive/warehouse/test1.db' to trash at: hdfs://master:9000/user/root/.Trash/Current
OK
Time taken: 0.311 seconds
hive> show databases;
OK
default
test2
Time taken: 0.009 seconds, Fetched: 2 row(s)

hive> desc students2;
OK
id                  	bigint              	                    
name                	string              	                    
age                 	int                 	                    
gender              	string              	                    
clazz               	string              	                    
Time taken: 0.062 seconds, Fetched: 5 row(s)
hive> desc extended students2;
OK
id                  	bigint              	                    
name                	string              	                    
age                 	int                 	                    
gender              	string              	                    
clazz               	string              	                    
	 	 
Detailed Table Information	Table(tableName:students2, dbName:test2, owner:root, createTime:1649226836, lastAccessTime:0, retention:0, sd:StorageDescriptor(cols:[FieldSchema(name:id, type:bigint, comment:null), FieldSchema(name:name, type:string, comment:null), FieldSchema(name:age, type:int, comment:null), FieldSchema(name:gender, type:string, comment:null), FieldSchema(name:clazz, type:string, comment:null)], location:hdfs://master:9000/user/hive/warehouse/test2.db/students2, inputFormat:org.apache.hadoop.mapred.TextInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{field.delim=,, serialization.format=,}), bucketCols:[], sortCols:[], parameters:{}, skewedInfo:SkewedInfo(skewedColNames:[], skewedColValues:[], skewedColValueLocationMaps:{}), storedAsSubDirectories:false), partitionKeys:[], parameters:{last_modified_time=1649234943, totalSize=0, numRows=-1, rawDataSize=-1, COLUMN_STATS_ACCURATE=false, numFiles=0, transient_lastDdlTime=1649234943, last_modified_by=root}, viewOriginalText:null, viewExpandedText:null, tableType:MANAGED_TABLE)	
Time taken: 0.045 seconds, Fetched: 7 row(s)
hive> desc formatted students2;
OK
# col_name            	data_type           	comment             
	 	 
id                  	bigint              	                    
name                	string              	                    
age                 	int                 	                    
gender              	string              	                    
clazz               	string              	                    
	 	 
# Detailed Table Information	 	 
Database:           	test2               	 
Owner:              	root                	 
CreateTime:         	Wed Apr 06 14:33:56 CST 2022	 
LastAccessTime:     	UNKNOWN             	 
Protect Mode:       	None                	 
Retention:          	0                   	 
Location:           	hdfs://master:9000/user/hive/warehouse/test2.db/students2	 
Table Type:         	MANAGED_TABLE       	 
Table Parameters:	 	 
	COLUMN_STATS_ACCURATE	false               
	last_modified_by    	root                
	last_modified_time  	1649234943          
	numFiles            	0                   
	numRows             	-1                  
	rawDataSize         	-1                  
	totalSize           	0                   
	transient_lastDdlTime	1649234943          
	 	 
# Storage Information	 	 
SerDe Library:      	org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe	 
InputFormat:        	org.apache.hadoop.mapred.TextInputForma 
OutputFormat:       	org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat	 
Compressed:         	No                  	 
Num Buckets:        	-1                  	 
Bucket Columns:     	[]                  	 
Sort Columns:       	[]                  	 
Storage Desc Params:	 	 
	field.delim         	,                   
	serialization.format	,                   
Time taken: 0.044 seconds, Fetched: 38 row(s)