ALTER(DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value,...);-- (Note: SCHEMA added in Hive 0.14.0)ALTER(DATABASE|SCHEMA) database_name SET OWNER [USER|ROLE] user_or_role;-- (Note: Hive 0.13.0 and later; SCHEMA added in Hive 0.14.0)ALTER(DATABASE|SCHEMA) database_name SET LOCATION hdfs_path;-- (Note: Hive 2.2.1, 2.4.0 and later)ALTER(DATABASE|SCHEMA) database_name SET MANAGEDLOCATION hdfs_path;-- (Note: Hive 4.0.0 and later)
CREATE[TEMPORARY][EXTERNAL]TABLE[IFNOTEXISTS][db_name.]table_name -- (Note: TEMPORARY available in Hive 0.14.0 and later)[(col_name data_type [column_constraint_specification][COMMENT col_comment],...[constraint_specification])][COMMENT table_comment][PARTITIONED BY(col_name data_type [COMMENT col_comment],...)][CLUSTEREDBY(col_name, col_name,...)[SORTED BY(col_name [ASC|DESC],...)]INTO num_buckets BUCKETS][SKEWED BY(col_name, col_name,...)-- (Note: Available in Hive 0.10.0 and later)]ON((col_value, col_value,...),(col_value, col_value,...),...)[STORED AS DIRECTORIES][[ROW FORMAT row_format][STORED AS file_format]| STORED BY'storage.handler.class.name'[WITH SERDEPROPERTIES (...)]-- (Note: Available in Hive 0.6.0 and later)][LOCATION hdfs_path][TBLPROPERTIES (property_name=property_value,...)]-- (Note: Available in Hive 0.6.0 and later)[AS select_statement];-- (Note: Available in Hive 0.5.0 and later; not supported for external tables)CREATE[TEMPORARY][EXTERNAL]TABLE[IFNOTEXISTS][db_name.]table_name
LIKE existing_table_or_view_name
[LOCATION hdfs_path];
data_type
: primitive_type
| array_type
| map_type
| struct_type
| union_type -- (Note: Available in Hive 0.7.0 and later)
primitive_type
: TINYINT|SMALLINT|INT|BIGINT|BOOLEAN|FLOAT|DOUBLE|DOUBLEPRECISION-- (Note: Available in Hive 2.2.0 and later)| STRING
|BINARY-- (Note: Available in Hive 0.8.0 and later)|TIMESTAMP-- (Note: Available in Hive 0.8.0 and later)|DECIMAL-- (Note: Available in Hive 0.11.0 and later)|DECIMAL(precision, scale)-- (Note: Available in Hive 0.13.0 and later)|DATE-- (Note: Available in Hive 0.12.0 and later)|VARCHAR-- (Note: Available in Hive 0.12.0 and later)|CHAR-- (Note: Available in Hive 0.13.0 and later)
array_type
: ARRAY < data_type >
map_type
: MAP < primitive_type, data_type >
struct_type
: STRUCT < col_name : data_type [COMMENT col_comment],...>
union_type
: UNIONTYPE < data_type, data_type,...>-- (Note: Available in Hive 0.7.0 and later)
row_format
: DELIMITED [FIELDSTERMINATEDBYchar[ESCAPEDBYchar]][COLLECTION ITEMS TERMINATEDBYchar][MAP KEYSTERMINATEDBYchar][LINESTERMINATEDBYchar][NULL DEFINED ASchar]-- (Note: Available in Hive 0.13 and later)| SERDE serde_name [WITH SERDEPROPERTIES (property_name=property_value, property_name=property_value,...)]
file_format:
: SEQUENCEFILE
| TEXTFILE -- (Default, depending on hive.default.fileformat configuration)| RCFILE -- (Note: Available in Hive 0.6.0 and later)| ORC -- (Note: Available in Hive 0.11.0 and later)| PARQUET -- (Note: Available in Hive 0.13.0 and later)| AVRO -- (Note: Available in Hive 0.14.0 and later)| JSONFILE -- (Note: Available in Hive 4.0.0 and later)| INPUTFORMAT input_format_classname OUTPUTFORMAT output_format_classname
column_constraint_specification:
: [PRIMARYKEY|UNIQUE|NOTNULL|DEFAULT[default_value]|CHECK[check_expression]ENABLE|DISABLE NOVALIDATE RELY/NORELY ]
default_value:
: [ LITERAL|CURRENT_USER()|CURRENT_DATE()|CURRENT_TIMESTAMP()|NULL]
constraint_specification:
: [,PRIMARYKEY(col_name,...)DISABLE NOVALIDATE RELY/NORELY ][,PRIMARYKEY(col_name,...)DISABLE NOVALIDATE RELY/NORELY ][,CONSTRAINT constraint_name FOREIGNKEY(col_name,...)REFERENCES table_name(col_name,...)DISABLE NOVALIDATE
[,CONSTRAINT constraint_name UNIQUE(col_name,...)DISABLE NOVALIDATE RELY/NORELY ][,CONSTRAINT constraint_name CHECK[check_expression]ENABLE|DISABLE NOVALIDATE RELY/NORELY ]#内部表createtableifnotexists student
(id int,name string)row format delimited fieldsterminatedby',';#外部表create external tableifnotexists student
(id int,name string)row format delimited fieldsterminatedby','
location '/hive/data/';#分区表createtableifnotexists student
(id int,name string)
partitioned by(age int conmment 'partitioned comment')row format delimited fieldsterminatedby',';#分区字段的字段名称,不能是表中的任意一个字段#创建分桶表createtableifnotexists
student (id int,name string,age int)clusteredby(age) sort by(age desc)into10 buckets
row format delimited fieldsterminatedby',';#分桶字段一定要是表中的属性字段 #like 方式createtable student like t_student ;#复制一个表结构,分区表和分桶表也同样可以复制(分区表只能复制在创建表的时候的信息,之后添加的信息不能复制)#CTAScreatetable student asselect*from t_student #创建表并复制
Drop Table
DROPTABLE[IFEXISTS] table_name [PURGE];-- (Note: PURGE available in Hive 0.14.0 and later)droptableifexists tb_name;// 删除数据和元数据
添加分区:指定分区语句之间不能有逗号!!!看例子
ALTERTABLE table_name ADD[IFNOTEXISTS]PARTITION partition_spec [LOCATION 'location'][,PARTITION partition_spec [LOCATION 'location'],...];
partition_spec:
: (partition_column = partition_col_value, partition_column = partition_col_value,...)
例如:0.8版本后可以一次添加多个partitionALTERTABLE page_view ADDPARTITION(dt='2008-08-08', country='us') location '/path/to/us/part080808'PARTITION(dt='2008-08-09', country='us') location '/path/to/us/part080809';
重命名分区:
ALTERTABLE table_name PARTITION partition_spec RENAMETOPARTITION partition_spec;
删除分区:添加分区的表 必须在表创建之初就创立了分区! 一次删除多个分区 多个分区之间必须加逗号!!!
ALTERTABLE table_name DROP[IFEXISTS]PARTITION partition_spec[,PARTITION partition_spec,...][IGNORE PROTECTION][PURGE];-- (Note: PURGE available in Hive 1.2.0 and later, IGNORE PROTECTION not available 2.0.0 and later)
Alter Column
改变列的 名称/类型/位置/注释 Change Column Name/Type/Position/CommentALTERTABLE table_name [PARTITION partition_spec] CHANGE [COLUMN] col_old_name col_new_name column_type
[COMMENT col_comment][FIRST|AFTER column_name][CASCADE|RESTRICT];
例子:
CREATETABLE test_change (a int, b int, c int);// First change column a's name to a1.ALTERTABLE test_change CHANGE a a1 INT;// Next change column a1's name to a2, its data type to string, and put it after column b.ALTERTABLE test_change CHANGE a1 a2 STRING AFTER b;// The new table's structure is: b int, a2 string, c int.// Then change column c's name to c1, and put it as the first column.ALTERTABLE test_change CHANGE c c1 INTFIRST;// The new table's structure is: c1 int, b int, a2 string.// Add a comment to column a1ALTERTABLE test_change CHANGE a1 a1 INTCOMMENT'this is column a1';
添加列/替换列 Add/ReplaceColumnsALTERTABLE table_name
[PARTITION partition_spec]-- (Note: Hive 0.14.0 and later)ADD|REPLACECOLUMNS(col_name data_type [COMMENT col_comment],...)[CASCADE|RESTRICT]-- (Note: Hive 1.1.0 and later)PartialPartition Specification 部分分区spec,类似动态分区,可以部分的指定列来change:
// 首先设置Hive属性// hive.exec.dynamic.partition needs to be set to true to enable dynamic partitioning with ALTER PARTITIONSET hive.exec.dynamic.partition=true;// 静态changeALTERTABLE foo PARTITION(ds='2008-04-08', hr=11) CHANGE COLUMN dec_column_name dec_column_name DECIMAL(38,18);// This will alter all existing partitions in the table with ds='2008-04-08' -- be sure you know what you are doing!ALTERTABLE foo PARTITION(ds='2008-04-08', hr) CHANGE COLUMN dec_column_name dec_column_name DECIMAL(38,18);// This will alter all existing partitions in the table -- be sure you know what you are doing!ALTERTABLE foo PARTITION(ds, hr) CHANGE COLUMN dec_column_name dec_column_name DECIMAL(38,18);
SELECT[ALL|DISTINCT] select_expr, select_expr,...FROM table_reference
[WHERE where_condition][GROUPBY col_list][ORDERBY col_list][CLUSTER BY col_list
|[DISTRIBUTE BY col_list][SORT BY col_list]][LIMIT[offset,]rows]
Where
SELECT*FROM sales WHERE amount >10AND region ="US"
ALL and DISTINCT
默认为ALL,返回所有符合的结果
DISTINCT去除重复的rows
Partition Based Queries
SELECT page_views.*FROM page_views
WHERE page_views.date>='2008-03-01'AND page_views.date<='2008-03-31'
#内连接:两个表的交集select a.*,b.*from a innerjoin b on a.id=b.id;#左外链接:以join左侧的表为基础表 左侧的表的所有数据都会显示 右侧可以关联上的就会补全 关联不上 null补充select a.*,b.*from a leftjoin b on a.id=b.id;#右外链接:以join右侧的表为基础select a.*,b.*from a rightjoin b on a.id=b.id;#全外链接:取两个表的并集select a.*,b.*from a fulljoin b on a.id=b.id;#半连接,以一种高效的方式实现了不相关的IN/EXISTS子查询语义。 #取左半表的数据,左表中在右表中出现关联上的数据select*from a left semi join b on a.id=b.id;
相当于:
select*from a where a.id in(select b.id from b)# 多个表链式JOINSELECT a.val, b.val, c.val FROM a JOIN b ON(a.key= b.key1)JOIN c ON(c.key= b.key2)
Union
UNION用于将多个SELECT语句的结果组合为单个结果集。
select中字段须一致,每个select语句返回的列的数量和名字必须相同。
select*from a union selecet *from b;select*from a unionall selecet *from b;union:表示去重连接
unionall :表示不去重连接