Hive_02_数据定义及操作语言
DDL:Data Definition Language
需求:
1.各个部门每年入职的人数
select
deptno,year(hiredate),
count(*)
from emp
group by deptno year(hiredate);
2.整个公司每年每月的入职人数
year,month函数:
select year(hiredate),month(hiredate),count(*)
from emp
group by year(hiredate),month(hiredate);
date_format函数:
select
date_format(hiredate,“YYYY-MM”) as YM,
count(*) as cnt
from emp
group by date_format(hiredate,“YYYY-MM”);
3.销售部和经理部入职的人薪资范围在1500-2500 每年每月的入职人数
4.公司内有绩效的员工每年每月的入职人数
5.销售部和经理部入职的人薪资范围在1500-2500 每年每月的入职人数 以及员工信息
Database:
1.hive 默认有一个数据库 default 路径:/user/hive/warehouse
1.创建数据库
CREATE [REMOTE] (DATABASE|SCHEMA) [IF NOT EXISTS] database_name
[COMMENT database_comment]
[LOCATION hdfs_path]
[MANAGEDLOCATION hdfs_path]
[WITH DBPROPERTIES (property_name=property_value, …)];
[] 可有可无
(|) 选择其中一个即可
create database bigdata_hive;
思考: 这个数据库在hdfs什么地方?
database 在hdfs上的路径:
默认数据库路径:/user/hive/warehouse
非默认数据库路径:/user/hive/warehouse/dbname.db
create database if not exists bigdata_hive;
create database bigdata_hive2 LOCATION ‘/data/bigdata_hive2’;
create database bigdata_hive3 WITH DBPROPERTIES (‘creator’=‘doublehappy’, ‘create_dt’=“2099-11-29”);
create database if not exists bigdata_hive4 COMMENT “这是一个数据库4”;
2.查看数据库
show databases;
show databases like “bigdata_hive*”
desc database bigdata_hive3;
desc database EXTENDED bigdata_hive3;
3.更改数据库
ALTER (DATABASE|SCHEMA) database_name SET DBPROPERTIES (property_name=property_value, …); – (Note: SCHEMA added in Hive 0.14.0)
alter database bigdata_hive3 set dbproperties (‘create_dt’=“2022-11-29”);
4.删除数据库
DROP (DATABASE|SCHEMA) [IF EXISTS] database_name [RESTRICT|CASCADE];
DROP DATABASE bigdata_hive4;
DROP DATABASE bigdata_hive2 CASCADE; =>删库跑路的操作
查看table的信息:
DESCRIBE [EXTENDED|FORMATTED]
table_name
desc EXTENDED test;
desc FORMATTED test;
2.table
CREATE [TEMPORARY] [EXTERNAL] TABLE [IF NOT EXISTS] [db_name.]table_name
[(col_name data_type [column_constraint_specification] [COMMENT col_comment], … [constraint_specification])]
[COMMENT table_comment]
[PARTITIONED BY (col_name data_type [COMMENT col_comment], …)]
[CLUSTERED BY (col_name, col_name, …) [SORTED BY (col_name [ASC|DESC], …)]