hive常用命令

最新推荐文章于 2021-06-02 07:08:05 发布

chiwu9780

最新推荐文章于 2021-06-02 07:08:05 发布

阅读量159

点赞数

文章标签：数据库大数据

原文链接：https://my.oschina.net/chinahufei/blog/3083846

版权

一、连接

后台启动hiveserver2

nohup bin/hive --service hiveserver2  &

连接hiveserver2

bin/beeline
!connect jdbc:hive2://node03.hadoop.com:10000

使用本地模式

set hive.exec.mode.local.auto=true;

二、数据库增删改查

创建

create database if not exists myhive;
create database myhive2 location '/myhive2'; # 指定hdfs存储位置
use  myhive;

删除

drop  database  myhive2;  # 删除空数据库
drop  database  myhive  cascade;   # 强制删除数据库，包含数据库下面的表一起删除

修改

alter  database  myhive2  set  dbproperties('createtime'='20180611');
# 可以使用alter  database  命令来修改数据库的一些属性。
# 但是数据库的元数据信息是不可更改的，包括数据库的名称以及数据库所在的位置

查看

show databases;
desc  database  myhive2; # 查看数据库信息
desc database extended  myhive2;  #查看数据库更多详细信息

二、数据表

内部表和外部表

# 内部表
外部表因为是指定其他的hdfs路径的数据加载到表当中来，所以hive表会认为自己不完全独占这份数据，
所以删除hive表的时候，数据仍然存放在hdfs当中，不会删掉

# 外部表
而内部表删除后，hdfs中的数据也会被删除。

# 使用场景
每天将收集到的网站日志定期流入HDFS文本文件。在外部表（原始日志表）的基础上做大量的统计分析，
用到的中间表、结果表使用内部表存储，数据通过SELECT+INSERT进入内部表。

分区表(partitioned by (month string, day string, hour string))

# 创建分区表
create table score(s_id string,c_id string, s_score int) 
partitioned by (month string) row format delimited fields terminated by '\t';
# 多个分区
create table score2 (s_id string,c_id string, s_score int) 
partitioned by (year string,month string,day string) row format 
delimited fields terminated by '\t';
# 数据加载到分区
load data local inpath '/export/servers/hivedatas/score.csv' 
into table score partition (month='201806');
# 数据加载到多个分区
load data local inpath '/export/servers/hivedatas/score.csv' 
into table score2 partition(year='2018',month='06',day='01');
# 查看分区
show  partitions  score;
# 添加分区
alter table score add partition(month='201805');
# 删除分区
alter table score drop partition(month = '201806');

分桶表(clustered by(c_id) into 3 buckets)

# 创建分桶表
create table course (c_id string,c_name string,t_id string) clustered by(c_id) into 3 buckets 
row format delimited fields terminated by '\t';
# 插入数据(只能用insert overwirte)
insert overwrite table course select * from course_common cluster by(c_id);

插入数据

# 向分区插入数据
insert into table score3 partition(month ='201807') values ('001','002','100');
# load方式加载本地数据
load data local inpath '/export/servers/hivedatas/score.csv' overwrite into table score partition(month='201806');
# 通过插入模式添加数据
insert overwrite table score4 partition(month = '201806') select s_id,c_id,s_score from score;

创建表

create table stu(id int,name string); # 新增基础表

create  table if not exists stu2(id int ,name string) 
row format delimited fields terminated by '\t'  # 指定分隔符
stored as textfile location '/user/stu2'; #指定存储位置

create table stu3 as select * from stu2; #根据查询结果创建表

create table stu4 like stu2; #根据已存在的表结构创建表

修改表

alter  table  old_table_name  rename  to  new_table_name;
alter table score5 add columns (mycol string, mysco string);

转载于:https://my.oschina.net/chinahufei/blog/3083846

chiwu9780

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
hive常用命令

一、连接后台启动hiveserver2nohup bin/hive --service hiveserver2 &连接hiveserver2bin/beeline!connect jdbc:hive2://node03.hadoop.com:10000使用本...
复制链接

扫一扫