1.数据库操作
//创建数据库
CREATE DATABASE financials;
//如果没有同名数据库则创建数据库
CREATE DATABASE IF NOT EXISTS financials;
//显示所有数据库
SHOW DATABASES;
//用正则匹配搜索数据库
SHOW DATABASES LIKE 'd*';
//创建数据库时指定数据库所在目录,否则默认在/user/root/warehouse目录下
CREATE DATABASE financials LOCATION '/my/preferred/directory';
//创建数据库时添加描述信息
CREATE DATABASE financials COMMENT 'Holds all financials tables';
//显示数据库信息
DESCRIBE DATABASE financials;
//切换数据库
USE financials;
//删除数据库
DROP DATABASE IF EXISTS financials;
//删除有表的数据库
DROP DATABASE IF EXISTS financials CASCADE;
2.表操作
//创建表
CREATE TABLE IF NOT EXISTS mydb.employees(
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING,FLOAT> COMMENT 'Keys are deductions names,values are percenttages',
address STRUCT<street:STRING,city:STRING,state:STRING,zip:INT> COMMENT 'Home address')
COMMENT 'Description of the table'
TBLPROPERTIES ('creator'='me','created_at'='2012-01-02 10:00:00');
//拷贝表模式
CREATE TABLE IF NOT EXISTS mydb.employees2 LIKE mydb.employees;
//查看表属性
DESCRIBE EXTENDED/FORMATTED mydb.employees;
//创建分区表,按国家和州对数据进行分区
CREATE TABLE mydb.employees(
name STRING COMMENT 'Employee name',
salary FLOAT COMMENT 'Employee salary',
subordinates ARRAY<STRING> COMMENT 'Names of subordinates',
deductions MAP<STRING,FLOAT> COMMENT 'Keys are deductions names,values are percenttages',
address STRUCT<street:STRING,city:STRING,state:STRING,zip:INT> COMMENT 'Home address')
PARTITIONED BY (country STRING, state STRING);
//查看表分区
SHOW PARTITIONS employees;
//创建外部表并在对应目录下新建文件
CREATE EXTERNAL TABLE student(
id INT,
name STRING,
age INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/usr/student';
//创建外部表分区表并添加一个分区
CREATE EXTERNAL TABLE log_messages(
hms INT,
serverity STRING,
server STRING,
process_id INT,
message STRING)
PARTITIONED BY (year INT,month INT,day INT)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '/t';
ALTER TABLE log_messages ADD PARTITION(year = 2012,month=12,day=1)
LOCATION 'hdfs://hadoop1:9000/data/log_messages/2012/12/1';
//查看分区表的详细信息
DESCRIBE FORMATTED log_messages PARTITION (year=2012,month=12,day=1);
3.向表中插数据
//向分区表中插入数据,文件在hdfs的/user/root目录下
LOAD DATA INPATH 'student.txt'
//OVERWRITE先删除原来数据,然后再加载
OVERWRITE INTO TABLE student
PARTITION (country = 'US',state = 'CA');
//通过查询语句插入
INSERT INTO TABLE student2
SELECT * FROM student s1
WHERE s1.id = 1;
//建表并将查询结果输入表中
CREATE TABLE student3
AS SELECT * FROM student
WHERE id =2;
4.导出数据
INSERT OVERWRITE DIRECTORY 'student2'
row format delimited
fields terminated by ','
SELECT * FROM student;
5.查询语句
create table student(
id int,
name String,
teamMumbers ARRAY<String>,
score MAP<String,int>,
address STRUCT<street:string,city:string,province:string,zip:int>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
MAP KEYS TERMINATED BY ':';
1 张三 张三,李四 数学:100,语文:24,英语:78 甘井子区,大连市,辽宁省,1234565
2 李四 数学:78,语文:36 沙河口区,大连市,辽宁省,145678
3 王五 数学:89,语文:24,英语:99 西岗区,大连市,辽宁省,147852
//查询street类型的某一部分数据
select name,address.street from student;
6.常用函数
parse_url(url, partToExtract[, key]) 解析url地址
concat字符串连接函数,需要是 string型字段
unix_timestamp() 当前系统时间
https://www.cnblogs.com/MOBIN/p/5618747.html
7.视图
CREATE VIEW student1 AS
SELECT * from student
WHERE id =1;
8.索引
索引适用于不更新的静态字段。以免总是重建索引数据。每次建立、更新数据后,都要重建索引以构建索引表。