Hive - 基础2 DDL&DML& Function

Hive - 基础2 DDL&DML& Function

  • DDL

    `CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name    -- (Note: TEMPORARY available in Hive ``0.14``.``0` `and later)``  ``[(col_name data_type [COMMENT col_comment], ... [constraint_specification])]``  ``[COMMENT table_comment]``  ``[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]``  ``[CLUSTERED BY (col_name, col_name, ...) [SORTED BY (col_name [ASC|DESC], ...)] INTO num_buckets BUCKETS]``  ``[SKEWED BY (col_name, col_name, ...)                  -- (Note: Available in Hive ``0.10``.``0` `and later)]``     ``ON ((col_value, col_value, ...), (col_value, col_value, ...), ...)``     ``[STORED AS DIRECTORIES]``  ``[``   ``[ROW FORMAT row_format] ``   ``[STORED AS file_format]``     ``| STORED BY ``'storage.handler.class.name'` `[WITH SERDEPROPERTIES (...)]  -- (Note: Available in Hive ``0.6``.``0` `and later)``  ``]``  ``[LOCATION hdfs_path]``  ``[TBLPROPERTIES (property_name=property_value, ...)]   -- (Note: Available in Hive ``0.6``.``0` `and later)``  ``[AS select_statement];   -- (Note: Available in Hive ``0.5``.``0` `and later; not supported ``for` `external tables)` `CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name``  ``LIKE existing_table_or_view_name``  ``[LOCATION hdfs_path];` `data_type``  ``: primitive_type``  ``| array_type``  ``| map_type``  ``| struct_type``  ``| union_type  -- (Note: Available in Hive ``0.7``.``0` `and later)` `primitive_type``  ``: TINYINT``  ``| SMALLINT``  ``| INT``  ``| BIGINT``  ``| BOOLEAN``  ``| FLOAT``  ``| DOUBLE``  ``| DOUBLE PRECISION -- (Note: Available in Hive ``2.2``.``0` `and later)``  ``| STRING``  ``| BINARY      -- (Note: Available in Hive ``0.8``.``0` `and later)``  ``| TIMESTAMP   -- (Note: Available in Hive ``0.8``.``0` `and later)``  ``| DECIMAL     -- (Note: Available in Hive ``0.11``.``0` `and later)``  ``| DECIMAL(precision, scale)  -- (Note: Available in Hive ``0.13``.``0` `and later)``  ``| DATE        -- (Note: Available in Hive ``0.12``.``0` `and later)``  ``| VARCHAR     -- (Note: Available in Hive ``0.12``.``0` `and later)``  ``| CHAR        -- (Note: Available in Hive ``0.13``.``0` `and later)` `array_type``  ``: ARRAY < data_type >` `map_type``  ``: MAP < primitive_type, data_type >` `struct_type``  ``: STRUCT < col_name : data_type [COMMENT col_comment], ...>` `union_type``   ``: UNIONTYPE < data_type, data_type, ... >  -- (Note: Available in Hive ``0.7``.``0` `and later)` `row_format``  ``: DELIMITED [FIELDS TERMINATED BY ``char` `[ESCAPED BY ``char``]] [COLLECTION ITEMS TERMINATED BY ``char``]``        ``[MAP KEYS TERMINATED BY ``char``] [LINES TERMINATED BY ``char``]``        ``[NULL DEFINED AS ``char``]   -- (Note: Available in Hive ``0.13` `and later)``  ``| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value, ...)]` `file_format:``  ``: SEQUENCEFILE``  ``| TEXTFILE    -- (Default, depending on hive.``default``.fileformat configuration)``  ``| RCFILE      -- (Note: Available in Hive ``0.6``.``0` `and later)``  ``| ORC         -- (Note: Available in Hive ``0.11``.``0` `and later)``  ``| PARQUET     -- (Note: Available in Hive ``0.13``.``0` `and later)``  ``| AVRO        -- (Note: Available in Hive ``0.14``.``0` `and later)``  ``| JSONFILE    -- (Note: Available in Hive ``4.0``.``0` `and later)``  ``| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname` `constraint_specification:``  ``: [, PRIMARY KEY (col_name, ...) DISABLE NOVALIDATE ]``    ``[, CONSTRAINT constraint_name FOREIGN KEY (col_name, ...) REFERENCES table_name(col_name, ...) DISABLE NOVALIDATE `
    
    • 创建表
    # Alter Column
    ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
      [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
    
    # 正常创建
    CREATE  TABLE IF NOT EXISTS hive_emp(EMPNO int,ENAME string,JOB string,MGR int,HIREDATE date,SAL int,COMM int,DEPTNO int)ROW FORMAT DELIMITED FIELDS TERMINATED BY ',';
      
      # 复制表结构
    CREATE TABLE IF NOT EXISTS hive_emp_copy LIKE hive_emp;
      
      # CTAS复制表结构和内容 - create table as select
    CREATE TABLE emp_ctas AS SELECT * FROM hive_emp;
    
    • 修改表
    #Alter Table Constraints
    
    ALTER TABLE table_name ADD CONSTRAINT constraint_name PRIMARY KEY (column, ...) DISABLE NOVALIDATE;
    ALTER TABLE table_name ADD CONSTRAINT constraint_name FOREIGN KEY (column, ...) REFERENCES table_name(column, ...) DISABLE NOVALIDATE RELY;
    ALTER TABLE table_name DROP CONSTRAINT constraint_name;
    
    ...
    
    # Alter Column
    
    ALTER TABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
      [COMMENT col_comment] [FIRST|AFTER column_name] [CASCADE|RESTRICT];
    
    # 修改表名
    ALTER TABLE hive_emp  RENAME TO emp;
    
    #修改列信息
    ALTER TABLE hive_emp_copy CHANGE COLUMN ENAME EMP_NAME string COMMENT '员工姓名'
    
    • 删除表
    # 删除表数据
    TRUNCATE TABLE hive_emp_copy;
    
    # 删除表结构和数据
    DROP TABLE IF EXISTS hive_emp_copy;
    

Hive外部表和内部表

Hive默认创建的表为内部表,内部表被drop,直接啥都没了

外部表需要手动指定external关键字,drop之后,数据库数据还在,一般创建表时会指定Location进行存放数据


  • DML

    • Load加载数据
    LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)]
    
    LOAD DATA [LOCAL] INPATH 'filepath' [OVERWRITE] INTO TABLE tablename [PARTITION (partcol1=val1, partcol2=val2 ...)] [INPUTFORMAT 'inputformat' SERDE 'serde'] (3.0 or later)
    
    LOAD DATA LOCAL  INPATH '/emp.data' OVERWRITE INTO TABLE emp
    
    • insert data into table from query 从查询结果集导入数据
    Standard syntax:
    INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1 FROM from_statement;
    INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1 FROM from_statement;
     
    Hive extension (multiple inserts):
    FROM from_statement
    INSERT OVERWRITE TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...) [IF NOT EXISTS]] select_statement1
    [INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2]
    [INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2] ...;
    FROM from_statement
    INSERT INTO TABLE tablename1 [PARTITION (partcol1=val1, partcol2=val2 ...)] select_statement1
    [INSERT INTO TABLE tablename2 [PARTITION ...] select_statement2]
    [INSERT OVERWRITE TABLE tablename2 [PARTITION ... [IF NOT EXISTS]] select_statement2] ...;
     
    Hive extension (dynamic partition inserts):
    INSERT OVERWRITE TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
    INSERT INTO TABLE tablename PARTITION (partcol1[=val1], partcol2[=val2] ...) select_statement FROM from_statement;
    
    INSERT OVERWRITE INTO TABLE emp_ctas  SELECT * FROM emp
    
    • Writing data into the filesystem from queries 导出数据
    
    Standard syntax:
    INSERT OVERWRITE [LOCAL] DIRECTORY directory1
      [ROW FORMAT row_format] [STORED AS file_format] (Note: Only available starting with Hive 0.11.0)
      SELECT ... FROM ...
     
    Hive extension (multiple inserts):
    FROM from_statement
    INSERT OVERWRITE [LOCAL] DIRECTORY directory1 select_statement1
    [INSERT OVERWRITE [LOCAL] DIRECTORY directory2 select_statement2] ...
     
      
    row_format
      : DELIMITED [FIELDS TERMINATED BY char [ESCAPED BY char]] [COLLECTION ITEMS TERMINATED BY char]
            [MAP KEYS TERMINATED BY char] [LINES TERMINATED BY char]
            [NULL DEFINED AS char] (Note: Only available starting with Hive 0.13)
    
    INSERT OVERWRITE LOCAL DIRECTORY '/hive/data/emp' ROW  FORMAT DELIMITED FIELDS TERMINATED BY '\t' SELECT * FROM emp;
    
    • export/import 导出/导入表结构和数据
    EXPORT TABLE tablename [PARTITION (part_column="value"[, ...])]
      TO 'export_target_path' [ FOR replication('eventid') ]
    
    IMPORT [[EXTERNAL] TABLE new_or_original_tablename [PARTITION (part_column="value"[, ...])]]
      FROM 'source_path'
      [LOCATION 'import_target_path']
    
    EXPORT TABLE emp TO '/hive/data/emp';
    
    IMPORT EXTERNAL TABLE emp_import FROM '/hive/data/emp' LOCATION '/hive/db/external/emp_import'
    

  • 内置函数

hive提供了很多的内置函数,具体看官方文档

show functions;
desc function split;
desc function extended explode;
返回类型签名描述
BIGINTround(double a)返回BIGINT最近的double值。
BIGINTfloor(double a)返回最大BIGINT值等于或小于double。
BIGINTceil(double a)它返回最小BIGINT值等于或大于double。
stringconcat(string A, string B,…)它返回从A后串联B产生的字符串
stringsubstr(string A, int start)它返回一个起始,从起始位置的子字符串,直到A.结束
stringsubstr(string A, int start, int length)返回从给定长度的起始start位置开始的字符串。
stringupper(string A)它返回从转换的所有字符为大写产生的字符串。
value of <type>cast(<expr> as <type>)它把表达式的结果expr<类型>如cast(‘1’作为BIGINT)代表整体转换为字符串’1’。如果转换不成功,返回的是NULL。
stringfrom_unixtime(int unixtime)转换的秒数从Unix纪元(1970-01-0100:00:00 UTC)代表那一刻,在当前系统时区的时间戳字符的串格式:“1970-01-01 00:00:00”
stringto_date(string timestamp)返回一个字符串时间戳的日期部分:to_date(“1970-01-01 00:00:00”) = “1970-01-01”
intyear(string date)返回年份部分的日期或时间戳字符串:year(“1970-01-01 00:00:00”) = 1970, year(“1970-01-01”) = 1970
intmonth(string date)返回日期或时间戳记字符串月份部分:month(“1970-11-01 00:00:00”) = 11, month(“1970-11-01”) = 11
intday(string date)返回日期或时间戳记字符串当天部分:day(“1970-11-01 00:00:00”) = 1, day(“1970-11-01”) = 1
stringget_json_object(string json_string, string path)提取从基于指定的JSON路径的JSON字符串JSON对象,并返回提取的JSON字符串的JSON对象。如果输入的JSON字符串无效,返回NULL。
Texplode(ARRAY<T> a)Explodes an array to multiple rows. Returns a row-set with a single column (col), one row for each element from the array.行转列
# 2.0
SELECT round(1.535);
# 2
SELECT ceil(1.535);
# 2
SELECT ceiling(1.535);
SELECT concat ('abc',12345);
# 正向取bcdefg12345
SELECT substr('abcdefg12345',2);
# 逆向取 45
SELECT substr('abcdefg12345',-2);
# abcde,下标从1开始
SELECT substr('abcdefg12345',1,5);
# substring同substr
SELECT substring('abcdefg12345',1,5);

SELECT upper('abCDefG');

SELECT cast('123' as int);

# 2019-07-23 11:23:18
SELECT from_unixtime(1563852198);
# 2019-07-23SELECT year('2019-07-23 11:23:18');
SELECT month('2019-07-23 11:23:18');
SELECT day('2019-07-23 11:23:18');

SELECT to_date('2019-07-23 11:23:18');

SELECT year('2019-07-23 11:23:18');
SELECT month('2019-07-23 11:23:18');
SELECT day('2019-07-23 11:23:18');


# 提取json
# [{"weight":8,"type":"apple"},{"weight":9,"type":"pear"}]
SELECT get_json_object('{
  "store": {
    "fruit": [{
      "weight": 8,
      "type": "apple"
    }, {
      "weight": 9,
      "type": "pear"
    }],
    "bicycle": {
      "price": 19.95,
      "color": "red"
    }
  },
  "email": "amy@only_for_json_udf_test.net",
  "owner": "amy"
}','$.store.fruit') fruit;

SELECT explode(split('Explodes an array to multiple rows. Returns a row-set with a single column (col), one row for each element from the array.',' ')) as word;
  • 一个小的wordcount例子
SELECT tmp.word,count(*) as count_num FROM (
SELECT explode(split('Explodes an array to multiple rows. Returns a row-set with a single column (col), one row for each element from the array.',' ')) word
) tmp group by tmp.word
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值