hive

最新推荐文章于 2023-11-12 22:31:26 发布

wm_43827516

最新推荐文章于 2023-11-12 22:31:26 发布

阅读量366

点赞数

分类专栏： hive

本文链接：https://blog.csdn.net/qq_43827516/article/details/109458126

版权

Hive访问

--先开启服务
[atguigu@hadoop102 hive]$ nohup hive --service metastore>log.txt 2>&1 &
[atguigu@hadoop102 hive]$ nohup hive --service hiveserver2>log2.txt 2>&1 &

--启动beeline客户端
[atguigu@hadoop102 hive]$ bin/beeline -u jdbc:hive2://hadoop102:10000 -n atguigu
--启动hive客户端
[atguigu@hadoop102 hive]$ bin/hive

Hive常用交互命令

-- 在hive命令行里创建一个表student，并插入1条数据
hive (default)> create table student(id int,name string);
OK

--查看hdfs文件系统（很少用，一般直接网页看）
hive(default)>dfs -ls /;

--查看在hive中输入的所有历史命令
[atguigu@hadoop102 ~]$ cat .hivehistory

脚本中调用hive

--hive -e （不进入hive的交互窗口执行sql语句）
[atguigu@hadoop102 hive]hive -e "select * from student"

--hive -f （执行脚本中sql语句）
[atguigu@hadoop102 hive]hive -f stu.sql

--执行文件中的sql语句并将结果写入文件中
[atguigu@hadoop102 hive]$ hive -f /opt/module/hive/datas/hivef.sql  > /opt/module/datas/hive_result.txt

DDL数据定义语言

4.1 库的DDL

--创建数据库
create database if not exists db_hive
comment "this is my first db"
with dbproperties ("name"="db_hive","owner"="atguigu");
location '/db_hive';

--查询数据库
hive> show databases；
hive> show databases like 'db_hive*';
--简单查看
desc database 数据库名;
--详细查看 （详细查看可以看到库的属性信息，简单查看看不到）
desc database extended 数据库名;
--切换数据库
use 数据库名;
--修改数据库(只能修改数据库的属性信息，别的都无法更改)
alter database db_hive set dbproperties('createtime'='20200624');

--删除数据库(hdfs上对应的目录也会删除，谨慎操作。)
--如果数据库不为空，可以在最后加上cascade强制删除
drop database 数据库名 cascade;
--为了更严谨，我们可以在删除之前判断数据库是否存在
drop database if exists 数据库名 cascade;

4.2 表的DDL

1 创建表

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] table_name          --指定表名  【external 外部表/内部表】
[(col_name data_type [COMMENT col_comment], ...)]           --指定表的列名，列类型 【列描述】
[COMMENT table_comment]                                     --指定表的描述
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]  --指定分区表的分区字段（分区字段可以是多个）
[CLUSTERED BY (col_name, col_name, ...)     --指定分桶表的分桶字段  
[SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]  --指定分桶表桶内排序字段   指定分桶的个数
[ROW FORMAT DELIMITED      --指定hive表在hdfs上存储的原始数据的格式
 [FIELDS TERMINATED BY char]     --每行数据中字段的分隔符    默认分隔符：ascII码表的第一个字符  ^A 
 [COLLECTION ITEMS TERMINATED BY char]   --集合元素分隔符  默认分隔符：ascII码表的第二个字符  ^B
 [MAP KEYS TERMINATED BY char]  --map集合中 key 和 value 的分隔符    ascII码表的第三个字符  ^C
 [LINES TERMINATED BY char]      --每行数据的分隔符     默认值：'\n'
]  
[STORED AS file_format]         --指定hive的数据在hdfs上存储的格式
[LOCATION hdfs_path]            --指定hive数据在hdfs上存储的路径
[TBLPROPERTIES (property_name=property_value, ...)]    --指定表的属性
[AS select_statement]    --按照as后面的查询语句的结果来创建表，复制表结构以及表数据
[LIKE table_name]     --按照like后面的表结构来创建表，只复制表结构，不复制表数据

默认分割符 ^{A：在shell里面vim模式下，按ctrl+v,出来}，再按ctrl+A，出来A

linux查看默认分隔符，用cat -A …

-- 创建管理表
create table student(id int,name string)
row format delimited fields terminated by '\t';
--根据AS select语句查询结构创建表,复制表结构，复制表数据
create table student3 as select * from student;
--根据like 创建表，只复制表结构，不复制表数据
create table student4 like student;

--简单查看表信息
desc 表名;
--详细查看表信息
desc formatted 表名;
--删除管理表(同时会删除hdfs上对应目录的数据，谨慎操作)
drop table student3;

--创建外部表
create external table if not exists test(
id string,up_area string,down_area string,up_time TIMESTAMP)
row format delimited fields terminated by '\t'
location '/company/test';//location只能是个目录
--删除外部表(删除后hdfs中的数据还在，但是metadata中dept的元数据已被删除)
drop table test;

--外、内表的转换（TRUE是外部表，FALSE是内部表）
alter table student set tblproperties('EXTERNAL'='TRUE/FALSE');
--重命名表（如果该表是管理表且创建时指定了目录，会一并修改hdfs上的目录名）
ALTER TABLE table_name RENAME TO new_table_name
--更新列，列名可以随意修改，列的类型只能小改大，不能大改小（遵循自动转换规则）
alter table student CHANGE COLUMN age newage int;
--增加列
alter table student add columns(age int);
--替换列（REPLACE是表示替换表中所有字段）
ALTER TABLE student replace columns(n_name string,n_age double);
--清除表数据（只能truncate 管理表，外部表不行。truncate的本质其实就是删除hdfs上对应路径的数据）
truncate table 表名;

创建外部表时还要

上传数据到HDFS

hive (default)> dfs -put /opt/module/hive/datas/test.txt /company/test;

或者向表中装载数据（Load）

hive (default)> load data local inpath '/opt/module/hive/datas/test.txt' into table dept;

DML数据操作(后面再完善)

5.1 数据导入

--Load
load data [local] inpath '数据的path' [overwrite] into table student [partition (partcol1=val1,…)];
--加载本地文件到hive,覆盖数据
load data local inpath '/opt/module/hive/datas/student.txt' overwrite into table student;
--加载HDFS文件到hive中，追加数据
dfs -put /opt/module/hive/datas/student.txt /user/atguigu;
load data inpath '/user/atguigu/student.txt' into table student;

--Insert
--into是追加插入，overwrite是覆盖插入，此方式一般没人用
insert into/overwrite table student values(1018,'ss18'),(1019,'ss19');
--此方式用的比较多，一般都是查询原始表的数据到临时表，注意select之前不能加as，跟创建表时as select区分开
--注意：通过inset插入数据，数据格式和列的数量要一致才可以。
insert into/overwrite table student2  select * from student where id < 1006;
-- 
create table student3 as  select * from student;

5.2 数据导出

--insert 导出(后面只能跟overwrite,导出路径可以不存在，hive会帮我们创建路径)
insert overwrite local directory '/opt/module/hive/datas/export/student'
select * from student;

insert overwrite local directory '/opt/module/hive/datas/export/student2'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
select * from student

最低0.47元/天解锁文章

wm_43827516

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
hive

Hive访问--先开启服务[atguigu@hadoop102 hive]$ nohup hive --service metastore>log.txt 2>&1 &[atguigu@hadoop102 hive]$ nohup hive --service hiveserver2>log2.txt 2>&1 &--启动beeline客户端[atguigu@hadoop102 hive]$ bin/beeline -u jdbc:hive
复制链接

扫一扫