hive常用sql语句

最新推荐文章于 2024-08-20 14:26:08 发布

weixin_33919941

最新推荐文章于 2024-08-20 14:26:08 发布

阅读量111

点赞数

文章标签：大数据数据库 python

原文链接：https://my.oschina.net/u/2000675/blog/856553

版权

2019独角兽企业重金招聘Python工程师标准>>>

数据库

创建数据库

hive> CREATE DATABASE financials;


hive> CREATE DATABASE IF NOT EXISTS financials;

显示现在有的数据库

hive> SHOW DATABASES;
default
financials
hive> CREATE DATABASE human_resources;
hive> SHOW DATABASES;
default
financials
human_resources

条件查询数据库

hive> SHOW DATABASES LIKE 'h.*';
human_resources
hive> ...

创建指定存放文件位置数据库

hive> CREATE DATABASE financials
    > LOCATION '/my/preferred/directory';

创建数据库时添加注释信息

hive> CREATE DATABASE financials
    > COMMENT 'Holds all financial tables';
hive> DESCRIBE DATABASE financials;
financials   Holds all financial tables
  hdfs://master-server/user/hive/warehouse/financials.db

使用数据库

hive> USE financials;

删除数据库

hive> DROP DATABASE IF EXISTS financials;

当数据库存在表时，先要删除表再能删除数据库

当数据库中存在表时无法直接删除，会提示数据库非空，存在表，这时可以使用CASCADE关键字

hive> DROP DATABASE IF EXISTS financials CASCADE;

数据表

创建表

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [COMMENT col_comment], ...)]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], ...)]
[CLUSTERED BY (col_name, col_name, ...) [SORTED BY(col_name[ASC|DESC], ...)] INTO num_buckets BUCKETS]
[SKEWED BY (col_name, col_name, ...) ON ([(col_value, col_value, ...), ...|col_value, col_value, ...]) [STORED AS DIRECTORIES]
  [ [ROW FORMAT row_format] [STORED AS file_format]
   | STORED BY 'storage.handler.class.name' [WITH SERDEPROPERTIES (...)]  ]
  [LOCATION hdfs_path]
  [TBLPROPERTIES (property_name=property_value, ...)] 
  [AS select_statement] 


例：
CREATE TABLE IF NOT EXISTS mydb.employees (
  name         STRING COMMENT 'Employee name',
  salary       FLOAT  COMMENT 'Employee salary',
  subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
  deductions   MAP<STRING, FLOAT>
               COMMENT 'Keys are deductions names, values are percentages
  address      STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
               COMMENT 'Home address')
COMMENT 'Description of the table'
TBLPROPERTIES ('creator'='me', 'created_at'='2012-01-02 10:00:00', ...)
LOCATION '/user/hive/warehouse/mydb.db/employees';

CREATE [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
  LIKE existing_table_or_view_name
  [LOCATION hdfs_path];


例：
CREATE TABLE IF NOT EXISTS mydb.employees2
LIKE mydb.employees;

显示某个数据库中的表

hive> USE mydb;
hive> SHOW TABLES;
employees
table1
table2

显示指定筛选条件表名

hive> USE mydb;
hive> SHOW TABLES 'empl.*';
employees

显示表扩展信息

hive> DESCRIBE EXTENDED mydb.employees;
name    string  Employee name
salary  float   Employee salary
subordinates    array<string>   Names of subordinates
deductions      map<string,float> Keys are deductions names, values are percentages
address struct<street:string,city:string,state:string,zip:int>  Home address
Detailed Table Information      Table(tableName:employees, dbName:mydb, owner:me,
...
location:hdfs://master-server/user/hive/warehouse/mydb.db/employees,
parameters:{creator=me, created_at='2012-01-02 10:00:00',
            last_modified_user=me, last_modified_time=1337544510,
            comment:Description of the table, ...}, ...)

指定显示某个字段的信息

hive> DESCRIBE mydb.employees.salary;
salary  float   Employee salary

外部表，删除表不删除数据

CREATE EXTERNAL TABLE IF NOT EXISTS stocks (
  exchange        STRING,
  symbol          STRING,
  ymd             STRING,
  price_open      FLOAT,
  price_high      FLOAT,
  price_low       FLOAT,
  price_close     FLOAT,
  volume          INT,
  price_adj_close FLOAT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/data/stocks';

复制表结构仓库外部表

CREATE EXTERNAL TABLE IF NOT EXISTS mydb.employees3
LIKE mydb.employees
LOCATION '/path/to/data';

分区表

CREATE TABLE employees (
  name         STRING,
  salary       FLOAT,
  subordinates ARRAY<STRING>,
  deductions   MAP<STRING, FLOAT>,
  address      STRUCT<street:STRING, city:STRING, state:STRING, zip:INT>
)
PARTITIONED BY (country STRING, state STRING);

HIVE 可以将分区表设置成“严格”模式,禁止分区表的查询没有一个WHERE子句

hive> set hive.mapred.mode=strict;
hive> SELECT e.name, e.salary FROM employees e LIMIT 100;
FAILED: Error in semantic analysis: No partition predicate found for
 Alias "e" Table "employees"
hive> set hive.mapred.mode=nonstrict;
hive> SELECT e.name, e.salary FROM employees e LIMIT 100;

查看现有分区

hive> SHOW PARTITIONS employees;
...
Country=CA/state=AB
country=CA/state=BC
...
country=US/state=AL
country=US/state=AK

查看分区详细分区键

hive> SHOW PARTITIONS employees PARTITION(country='US');
country=US/state=AL
country=US/state=AK
...
hive> SHOW PARTITIONS employees PARTITION(country='US', state='AK');
country=US/state=AK

通过 DESC 显示分区键

hive> DESCRIBE EXTENDED employees;
name         string,
salary       float,
...
address      struct<...>,
country      string,
state        string
Detailed Table Information...
partitionKeys:[FieldSchema(name:country, type:string, comment:null),
FieldSchema(name:state, type:string, comment:null)],
...

查看建表语句

hive> show create table score;
OK
CREATE  TABLE `score`(
  `num` string,
  `name` string,
  `score` float)
PARTITIONED BY (
  `year` string,
  `month` string,
  `day` string)
ROW FORMAT DELIMITED
  FIELDS TERMINATED BY '\t'
  LINES TERMINATED BY '\n'
STORED AS INPUTFORMAT
  'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  'hdfs://hdpcls1/user/hive/warehouse/score'
TBLPROPERTIES (
  'transient_lastDdlTime'='1489117674')
Time taken: 0.35 seconds, Fetched: 19 row(s)

从表读入数据到表中

INSERT OVERWRITE TABLE employees
PARTITION (country = 'US', state = 'OR')
SELECT * FROM staged_employees se
WHERE se.cnty = 'US' AND se.st = 'OR';

从文件读入分区表

从本地
LOAD DATA LOCAL INPATH '${env:HOME}/california-employees'
INTO TABLE employees
PARTITION (country = 'US', state = 'CA');

从hdfs
LOAD DATA  INPATH '/california-employees'
INTO TABLE employees
PARTITION (country = 'US', state = 'CA');

为外部表增加指定分区

ALTER TABLE log_messages ADD PARTITION(year = 2012, month = 1, day = 2)
LOCATION 'hdfs://master_server/data/log_messages/2012/01/02';

导出数据

INSERT OVERWRITE LOCAL DIRECTORY '/tmp/ca_employees'
SELECT name, salary, address
FROM employees
WHERE se.state = 'CA';

桶表

CREATE TABLE par_table(viewTime INT, userid BIGINT,
     page_url STRING, referrer_url STRING,
     ip STRING COMMENT 'IP Address of the User')
 COMMENT 'This is the page view table'
 PARTITIONED BY(date STRING, pos STRING)
 CLUSTERED BY(userid) SORTED BY(viewTime) INTO 32 BUCKETS
 ROW FORMAT DELIMITED ‘\t’
   FIELDS TERMINATED BY '\n'
STORED AS SEQUENCEFILE;

删除表

DROP TABLE IF EXISTS employees;

修改表结构

ALTER TABLE modifies table metadata only. The data for the table is
untouched. It’s up to you to ensure that any modifications are consistent
with the actual data.

修改表名

ALTER TABLE log_messages RENAME TO logmsgs;

增加，修改，删除表分区

ALTER TABLE log_messages ADD IF NOT EXISTS
PARTITION (year = 2011, month = 1, day = 1) LOCATION '/logs/2011/01/01'
PARTITION (year = 2011, month = 1, day = 2) LOCATION '/logs/2011/01/02'
PARTITION (year = 2011, month = 1, day = 3) LOCATION '/logs/2011/01/03'

ALTER TABLE log_messages PARTITION(year = 2011, month = 12, day = 2)
SET LOCATION 's3n://ourbucket/logs/2011/01/02';

ALTER TABLE log_messages DROP IF EXISTS PARTITION(year = 2011, month = 12, day = 2);

修改列

ALTER TABLE log_messages
CHANGE COLUMN hms hours_minutes_seconds INT
COMMENT 'The hours, minutes, and seconds part of the timestamp'
AFTER severity;

这个命令可以允许改变列名、数据类型、注释、列位置或者它们的任意组合

增加列

ALTER TABLE log_messages ADD COLUMNS (
 app_name   STRING COMMENT 'Application name',
 session_id LONG   COMMENT 'The current session id');

字段位置在所有列后面(partition列前)

删除替换列

ALTER TABLE log_messages REPLACE COLUMNS (
 hours_mins_secs INT    COMMENT 'hour, minute, seconds from timestamp',
 severity        STRING COMMENT 'The message severity'
 message         STRING COMMENT 'The rest of the message');

转载于:https://my.oschina.net/u/2000675/blog/856553