Hive学习笔记(1)基本操作

hive学习官方网站:
General Info
============
For the latest information about Hive, please visit out website at:
  http://hive.apache.org/

Getting Started
===============
- Installation Instructions and a quick tutorial:
  https://cwiki.apache.org/confluence/display/Hive/GettingStarted
- A longer tutorial that covers more features of HiveQL:
  https://cwiki.apache.org/confluence/display/Hive/Tutorial
- The HiveQL Language Manual:
  https://cwiki.apache.org/confluence/display/Hive/LanguageManual

 

1.显示数据库
show databases ;
2.创建数据库
create database db_hive ;
3. 显示表,显示表的详细信息,显示表的详细信息(格式化后),创建表
show tables ;
desc extended student ;
desc formated student ;
create table student(id int, name string) ROW FORMAT DELIMITED FIELDS
TERMINATED BY '\t' ;
4. 装载数据,最好加上数据库名
hive> load data local inpath '/opt/datas/student.txt' into
    > table db_hive.student ;
5. 查看函数,查看函数使用方法
show functions ;
desc function upper ;
6. hive中操作文件系统
dfs -ls /user/hive/warehouse ;
dfs -rm -R /user/hive/warehouse/emp;
7. 查看hive的交互命令
hive -help
usage: hive
 -d,--define <key=value>          Variable subsitution to apply to hive
                                  commands. e.g. -d A=B or --define A=B
    --database <databasename>     Specify the database to use
 -e <quoted-query-string>         SQL from command line
 例如:[root@zhangbk hadoop]# hive -e 'select * from db_hive.student ;'
 -f <filename>                    SQL from files
 例如:vi /opt/datas/hivef.sql
         select * from db_hive.student ;
      [root@zhangbk datas]# hive -f /opt/datas/hivef.sql
  也可将查询结果写入文件:[root@zhangbk datas]# hive -f /opt/datas/hivef.sql > /opt/datas/hivef.txt
 -h <hostname>                    connecting to Hive Server on remote host
 -H,--help                        Print help information
    --hiveconf <property=value>   Use value for given property
    --hivevar <key=value>         Variable subsitution to apply to hive
                                  commands. e.g. --hivevar A=B
 -i <filename>                    Initialization SQL file
 例如:与用户自定义的udf相互使用。
 -p <port>                        connecting to Hive Server on port number
 -S,--silent                      Silent mode in interactive shell
 -v,--verbose                     Verbose mode (echo executed SQL to the
                                  console)
8. 交互命令hive cli
exit/quit ;
dfs -ls / ;
!ls ;查看操作系统本地的文件

 

================================================================
第二章 笔记2
1. 创建表
create table if not exists db_hive.log_20190512(
ip string comment '',
user string,
req_url string comment 'user request url'
)
comment 'Web Access Logs'
row format delimited fields terminated by ',' 
 -- collection items terminated by '\n'
STORED AS TEXTFILE
location '/user/hive/warehouse/db_hive.db/log_20190512' ;

复制表部分字段(分表)
create table db_hive.log_20190512_bak as
select ip,user from db_hive.log_20190512 ;

复制表
create table db_hive.log_20190512_bak like db_hive.log_20190512;

2. 删除数据库,表
drop database db_hive cascade ;
drop table student ;
================================================================
CREATE TABLE [IF NOT EXISTS] [db_name.]table_name  
  [(col_name data_type [COMMENT col_comment], ... [constraint_specification])]
  [COMMENT table_comment]
  [PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
  [CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]
  [SKEWED BY (col_name, col_name, ...)                  
     ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)
     [STORED AS DIRECTORIES]
  [
   [ROW FORMAT row_format] 
   [STORED AS file_format]
     | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  
  ]
  [LOCATION hdfs_path]
  [TBLPROPERTIES (property_name=property_value, ...)]   
  [AS select_statement];   
 
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
  LIKE existing_table_or_view_name
  [LOCATION hdfs_path];
 
data_type
  : primitive_type
  | array_type  --常用
  | map_type     --常用
  | struct_type
  | union_type  
 
primitive_type
  : TINYINT
  | SMALLINT
  | INT     --常用
  | BIGINT
  | BOOLEAN
  | FLOAT
  | DOUBLE   --常用
  | DOUBLE PRECISION 
  | STRING     --常用
  | BINARY      
  | TIMESTAMP   
  | DECIMAL     
  | DECIMAL(precision, scale)  
  | DATE        
  | VARCHAR     
  | CHAR        
 
array_type
  : ARRAY < data_type >
 
map_type
  : MAP < primitive_type, data_type >
 
struct_type
  : STRUCT < col_name : data_type [COMMENT col_comment], ...>
 
union_type
   : UNIONTYPE < data_type, data_type, ... > 
 
row_format
  : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
        [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
        [NULL DEFINED AS char]   -- (Note: Available in Hive 0.13 and later)
  | SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]
 
file_format:
  : SEQUENCEFILE
  | TEXTFILE    -- (Default, depending on hive.default.fileformat configuration)
  | RCFILE      
  | ORC         
  | PARQUET     
  | AVRO        
  | JSONFILE    
  | INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
 
constraint_specification:
  : [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]
    [, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE 

=================================================================================================
员工表
create table if not exists default.emp(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
comment 'emp table'
row format delimited fields terminated by '\t' ;

部门表
create table if not exists default.dept(
deptno int,
dname string,
loc string
)
comment 'dept table'
row format delimited fields terminated by '\t' ;

load data local inpath '/opt/datas/emp.txt' overwrite into table default.emp ;
load data local inpath '/opt/datas/dept.txt' overwrite into table default.dept;

create table if not exists default.dept_cats as
select * from dept ;

清除表数据:truncate table dept_cats ;
修改表明:alter table dept_cates rename to dept_sub ;
删除表:drop table if exists dept_sub ;

Hive中表的类型
   内部表(管理表),删除表,会删除表数据和元数据
   外部表(托管表),删除表,只会删除元数据,不会删除数据
create external table if not exists default.ext_emp(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
comment 'emp table'
row format delimited fields terminated by '\t' 
location '/user/hive/warehouse/ext_emp';--将数据文件放在此文件夹下

分区表
分区表实际上就是对应一个HDFS文件系统上的独立的文件夹,该文件夹下是该分区所有的数据文件,
Hive中的分区就是分目录,把一个大的数据集根据业务需要分割成小的数据集。
create table if not exists default.emp_partiton(
empno int,
ename string,
job string,
mgr int,
hiredate string,
sal double,
comm double,
deptno int
)
partitioned by (month string, day string)
row format delimited fields terminated by '\t' ;

load data local inpath '/opt/datas/emp.txt'
overwrite into table emp_partition partition (month='201905',day='');
select * from emp_partition where month='201905' ;

无论内部表还是外部表,创建好表之后,可直接将数据文件放在表文件夹下即可,对于分区表
需要注意:
dfs -mkdir -p /user/hive/warehouse/dept_part/day=20190513 ;
dfs -put /opt/datas/dept.txt /user/hive/warehouse/dept_part/day=20190513 ;
alter table dept_part add partition (day='20190513') ;
=============================================================================================
导入数据:
1. 加载本地文件到hive表
  load data local inpath '/opt/datas/emp.txt' overwrite into table emp ;
2. 加载hdfs文件到hive表
  load data inpath '/opt/datas/emp.txt' overwrite into table emp ;
3. 创建表,通过insert加载
  insert overwrite table emp
  select * from emp_bak ;
4.将数据文件上传到hive表目录下

=============================================================================================
导出数据
insert overwrite local directory '/opt/datas/hive_emp'
row format delimited fields terminated by '\t' 
collection items terminated by '\n'
select * from emp ;

hive -e "select * from emp ;" > /opt/datas/hive_emp2

==========================================================================
查询分析数据
select * from emp
where 
group by 
limit number ;
=, >=, <=, between and
is null ,is not null, in, not in
max, min, count,sum,avg

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值