Shell命令

Hive Shell命令

一、DDL操作

  1. 库操作

    进入客户端:hive
    创建普通库:create database dbname;
    创建库的时候检查存与否:create database if not exists mydb;
    创建库的时候带注释:
    create database if not exists dbname comment 'create my db named dbname';
    创建带属性的库:
    create database if not exists dbname with dbproperties ('a'='aaa','b'='bbb');
    
    查看库:
    show databases;
    show databases like 'my*';
    显示数据库的详细属性信息:desc database [extended] dbname;
    查看正在使用哪个库:select current_database();
    查看创建库的详细语句:show create database mydb;
    
    删除库操作:
    drop database dbname;
    drop database if exists dbname;
    删除包含表的数据库:drop database if exists dbname cascade;
    
    切换库:use mydb;
    
  2. 表操作

    查看表结构:
    desc table_name;
    desc extended table_name;
    desc formatted table_name;
    查看表列表:
    show tables;
    show tables in db_name;
    使用 like 关键字拷贝表:
    create table new_table like mytable;
    create external table if not exists new_table like mytable;
    查看 hive 函数列表:show functions;
    
    创建内部表:
    create table if not exists my_user(id string, name string)
    create table my_user(id string, name string) row format delimited fields terminated by ',';
    create table mytable (id int, name string) row format delimited fields terminated by ',' stored as textfile;
    创建外部表:
    create external table mytable2 (id int, name string) row format delimited fields terminated by ',' location '/user/hive/warehouse/mytable2';
    创建分区表:
    create table mytable3(id int, name string) partitioned by(sex string) row format delimited fields terminated by ',' stored as textfile;
    创建分桶表:
    create table stu_buck(Sno int,Sname string,Sex string,Sage int,Sdept string)
    clustered by(Sno) sorted by(Sno DESC) into 4 buckets
    row format delimited fields terminated by ',';
    
    修改表:
    重命名表:ALTER TABLE table_name RENAME TO new_table_name
    修改表属性:ALTER TABLE table_name SET TBLPROPERTIES ('comment' = 'my new students table');
    更改列分隔符:ALTER TABLE student SET SERDEPROPERTIES ('field.delim' = '-');
    增加/删除/改变/替换列:
    ALTER TABLE name ADD COLUMNS (col_spec[, col_spec ...])
    ALTER TABLE name CHANGE c_name new_name new_type [FIRST|AFTER c_name]
    ALTER TABLE name REPLACE COLUMNS (col_spec[, col_spec ...])
    查询表分区:
    show partitions mytable3
    show partitions table_name partition(city='beijing')
    删除分区:
    ALTER TABLE student_p DROP if exists partition(part='aa');
    ALTER TABLE student_p DROP if exists partition(part='aa') if exists partition(part='bb');
    添加分区:
    ALTER TABLE student_p ADD partition(part='a') partition(part='b');
    ALTER TABLE student_p ADD IF NOT EXISTS partition(part='bb') location '/myhive_bb' partition(part='cc') location '/myhive_cc';
    修改分区路径:
    ALTER TABLE student_p partition (part='bb') SET location '/myhive_bbbbb';
    防止分区被删除:
    alter table student_p partition (part='aa') enable no_drop;
    防止分区被查询:
    alter table student_p partition (part='aa') enable offline;
    
    删除表:
    drop table if exists mytable;
    
    清空表:
    truncate table student;
    truncate table student_ptn partition(city=’beijing’);
    
    查看到 hive 执行的历史命令:cat /home/hadoop/.hivehistory
    显示表头信息:set hive.cli.print.header=true;
    

二、DML操作

  1. Load 装载数据

    LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION(partcol1=val1, partcol2=val2 ...)]
    
    插入数据:
    insert into table my_user values ('1','huangbo'), ('2','xuzheng'), ('3','wangbaoqiang');
    插入男分区数据:
    load data local inpath '/root/hivedata/mingxing.txt' overwrite into table mytable3 partition(sex='boy');
    
    查询数据:select * from my_user;
    
    导入数据:
    a) 导入 HDFS 数据:load data inpath '/user.txt' into table my_user;
    b) 导入本地数据:load data local inpath '/home/hadoop/user.txt' into table my_user;
    
  2. Insert 插入数据

    插入一条数据:INSERT INTO TABLE table_name VALUES(XX,YY,ZZ);
    
    利用查询语句将结果导入新表:
    INSERT OVERWRITE [INTO] TABLE table_name [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement
    
    多重插入:
    from mingxing
    insert into table mingxing2 select id,name,sex,age
    insert into table mingxing select id,name,sex ,age,department ;
    
    from student
    insert into table ptn_student partition(city='MA') select id,name,sex,age,department where
    department='MA'
    insert into table ptn_student partition(city='IS') select id,name,sex,age,department where
    department='IS';
    insert into table ptn_student partition(city='CS') select id,name,sex,age,department where
    department='CS';
    
    动态分区插入:
    set hive.exec.dynamic.partition=true;
    set hive.exec.dynamic.partition.mode=nonstrict;
    一个分区字段:
    insert into table test2 partition (age) select name,address,school,age from students;
    多个分区字段:
    insert into table student_ptn2 partition(city='sa',zipcode) 
    select id, name, sex, age,department, department as zipcode from studentss;
    注意:查询语句 select 查询出来的动态分区 age 和 zipcode 必须放最后,和分区字段对应,不然结果会出错
    
    CTAS(create table … as select …):
    CREATE TABLE mytest AS SELECT name, age FROM test;
    
  3. Insert 导出数据

    导出数据到本地:
    insert overwrite local directory '/home/hadoop/student.txt' select * from studentss;
    导出数据到 HDFS:
    insert overwrite directory '/student' select * from studentss where age >= 20;
    insert overwrite directory 'hdfs://hadoop02:9000/user/hive/warehouse/mystudent' select * from studentss;
    
  4. Select 查询数据

    SELECT [ALL | DISTINCT] select_ condition, select_ condition, ...
    FROM table_name a
    [JOIN table_other b ON a.id = b.id]
    [WHERE where_condition]
    [GROUP BY col_list [HAVING condition]]
    [CLUSTER BY col_list | [DISTRIBUTE BY col_list] [SORT BY col_list | ORDER BY col_list] ]
    [LIMIT number]
    
    order by(字段) 全局排序
    sort by(字段) 局部排序
    distribute by(字段)
    cluster by(字段)
    因此,如果分桶和 sort 字段是同一个时,此时,cluster by = distribute by + sort by 如果我们要分桶的字段和要排序的字段不一样,
    那么我们就不能使用 clustered by 
    
    获取年龄大的三个学生:
    select id, age,name from student where stat_date= '20140101' order by age desc limit 3;
    查询学生年龄按降序排序:
    set mapred.reduce.tasks=4;
    select id, age, name from student sort by age desc;
    select id, age, name from student order by age desc;
    select id, age, name from student distribute by age;
    分桶和排序的组合操作,对 id 进行分桶,对 age,id 进行降序排序:
    insert overwrite directory '/root/outputdata6' select * from mingxing2 distribute by id sort by age desc, id desc;
    分桶操作,按照 id 分桶,但是不进行排序:
    insert overwrite directory '/root/outputdata4' select * from mingxing2 distribute by id sort by age;
    分桶操作,按照 id 分桶,并且按照 id 排序
    insert overwrite directory '/root/outputdata3' select * from mingxing2 cluster by id;
    分桶查询:
    指定开启分桶:
    set hive.enforce.bucketing = true; // 在旧版本中需要开启分桶查询的开关
    指定 reducetask 数量,也就是指定桶的数量
    set mapreduce.job.reduces=4;
    insert overwrite directory '/root/outputdata3' select * from mingxing2 cluster by id;
    
    HQL 语句并不会转换成 MapReduce:
    1、select * from student; // 简单读取表中文件数据时不会
    2、where 过滤条件中只是分区字段时不会转换成 MapReduce
    3、set hive.exec.mode.local.auto=true; // hive 会尝试使用本地模式执行
    
  5. Hive Join 查询

    1) 只支持等值链接,支持 and,不支持 or
    2) 可以 join 多于 2 个表
    3) Join 时,每次 map/reduce 任务的逻辑
        实践中,应该把最大的那个表写在最后(否则会因为缓存浪费大量内存)。
    4) HiveJoin 分三种:inner join, outer join, semi join
    
    inner join(内连接)(把符合两边连接条件的数据查询出来)
    HQL 语句:select * from tablea a inner join tableb b on a.id=b.id;
    
    left join(左连接,等同于 left outer join)
    1、以左表数据为匹配标准,左大右小
    2、匹配不上的就是 null
    3、返回的数据条数与左表相同
    HQL 语句:select * from tablea a left join tableb b on a.id=b.id;
    
    right join(右连接,等同于 right outer join)
    1、以右表数据为匹配标准,左小右大
    2、匹配不上的就是 null
    3、返回的数据条数与右表相同
    HQL 语句:select * from tablea a right join tableb b on a.id=b.id;
    
    left semi join(左半连接)(因为 hive 不支持 in/exists 操作(1.2.1 版本的 hive 支持
    in 的操作),所以用该操作实现,并且是 in/exists 的高效实现)
    HQL 语句:select * from tablea a left semi join tableb b on a.id=b.id;
    

Hbase Shell命令

1.表操作

显示 hbase 中的表:list
创建 user 表,包含 info、data 两个列族:
create 'user', 'info1', 'data1'
create 'user', {NAME => 'info', VERSIONS => '3'}
向 user 表中插入信息,row key 为 rk0001,列族 info 中添加 name 列标示符,值为 zhangsan
put 'user', 'rk0001', 'info:name', 'zhangsan'
put 'user', 'rk0001', 'info:gender', 'female'
put 'user', 'rk0001', 'info:age', 20
put 'user', 'rk0001', 'data:pic', 'picture'
获取 user 表中 row key 为 rk0001 的所有信息
get 'user', 'rk0001'
get 'user', 'rk0001', 'info'
get 'user', 'rk0001', 'info:name', 'info:age'
获取 user 表中 row key 为 rk0001,列族为 info,版本号最新 5 个的信息
get 'user', 'rk0001', {COLUMN => 'info', VERSIONS => 2}
get 'user', 'rk0001', {COLUMN => 'info:name', VERSIONS => 5}
get 'user', 'rk0001', {COLUMN => 'info:name', VERSIONS => 5, TIMERANGE =>
[1392368783980, 1392380169184]}
获取 user 表中 row key 为 rk0001,cell 的值为 zhangsan 的信息
get 'people', 'rk0001', {FILTER => "ValueFilter(=, 'binary:图片')"}
获取 user 表中 row key 为 rk0001,列标示符中含有 a 的信息
get 'people', 'rk0001', {FILTER => "(QualifierFilter(=,'substring:a'))"}

2.scan操作

查询 user 表中的所有信息
scan 'user'
查询 user 表中列族为 info 的信息
scan 'user', {COLUMNS => 'info'}
scan 'user', {COLUMNS => 'info', RAW => true, VERSIONS => 5}
Scan 时可以设置是否开启 Raw 模式,开启 Raw 模式会返回包括已添加删除标记但是未实际删除的数据。
查询 user 表中列族为 info 和 data 且列标示符中含有 a 字符的信息
scan 'user', {COLUMNS => ['info', 'data'], FILTER => "(QualifierFilter(=,'substring:a'))"} 
查询 user 表中列族为 info,rk 范围是[rk0001, rk0003)的数据
scan 'people', {COLUMNS => 'info', STARTROW => 'rk0001', ENDROW => 'rk0003'}
查询 user 表中 row key 以 rk 字符开头的
scan 'user',{FILTER=>"PrefixFilter('rk')"}
查询 user 表中指定范围的数据
scan 'user', {TIMERANGE => [1392368783980, 1392380169184]}

3.删除数据

删除 user 表 row key 为 rk0001,列标示符为 info:name 的数据
delete 'people', 'rk0001', 'info:name'
删除 user 表 row key 为 rk0001,列标示符为 info:name,timestamp 为 1392383705316 的数据
delete 'user', 'rk0001', 'info:name', 1392383705316
清空 user 表中的数据
truncate 'people'

4.修改表结构

停用 user 表
disable 'user'
添加两个列族 f1 和 f2
alter 'people', NAME => 'f1'
alter 'user', NAME => 'f2'
启用表
enable 'user'
删除一个列族:
alter 'user', NAME => 'f1', METHOD => 'delete' 或 alter 'user', 'delete' => 'f1'
添加列族 f1 同时删除列族 f2
alter 'user', {NAME => 'f1'}, {NAME => 'f2', METHOD => 'delete'}
将 user 表的 f1 列族版本号改为 5
alter 'people', NAME => 'info', VERSIONS => 5
删除记录
delete 'person', 'rk0001', 'info:name'
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值