一
1.创建字段之间用空格
create table student(id int,name string,age int) row format delimited fields terminated by ’ ';
2.将本地文件数据加载到数据库
load data local inpath '/home/hivedata/student.txt into table student;
3.从同一张表根据不同的条件导入数据到不同的表
from student insert into table stu2 select * where age>=15 insert into table stu3 select * where id ❤️
4,把hive上的数据以‘,’分隔的方式导入到本地目录
insert overwrite local directory ‘/home/hivedir’ row format delimited fields terminated by ‘,’ select * from student where age <19;
5.从student表查数据以空格为分隔 放到hdfs student里
insert overwrite directory ‘/student’ row format delimited fields terminated by ’ ’ select * from student
二
通过 show create table 表名称 可以查看在hdfs路径判断
内部表:在/user/hive/warehouse 下
外部表:没有在/user/hive/warehouse
内部表如果drop掉表 hdfs上数据也会删除,外部表hdfs上数据不会删除
三 分区表
1.创建分区
create table city(id int,name,string) partitioned by (country string) row format delimited fields terminated by ’ ';
加载数据
load data local inpath ‘/home/hivedata/cncity.txt’ into table city partition(county=‘chinese’)
2.手动添加分区修复方式
2.1 msck repair table city;
2.2 alter table city add partition(country=‘england’) location ‘/user/hive/warehoust/hivedemo.db/city/country=england’;
3.从没有分区的表数据插入到分区表的数据
3.1先开启动态分区
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nostrict
加载数据
insert into table classes partition(class) select * from tmp distribute by class;
说明 两个表结构相同 分区字段是class 没有分区表的最后一个字段也是class 上面的执行有效
如果没分区的partition字段和分区的partition字段不一样要查出来 下面这样写法
insert into table classes partition(class) select 字段1,字段2,partition字段 from tmp distribute by partition字段
四 复杂数据类型
1.arrray 数组
a,b,c,d a,b,c,d
a,b,c,d,c a,b,c,d,c
a,b,c,d,c,d a,b,c,d,c,d
1.1建表语句
create table str (str1 array,str2 array) fow format delimited fields terminated by ’ ’ collection items terminated by ‘,’;
1.2 加载数据
load data local inpath ‘/home/hivedata/numbers.txt’ into table numbers;
1.3 查询
select str1[5] from numbers;
1.3.1 非空查询
select str1[5] from numbers where str1[5] is not null
五 map映射
1.1数据
1 a:1
2 b:2
3 c:3
4 d:4
1.2建表
create table person(id int ,info map<string,int> row format delimited fields terminated by ’ ’ map keys terminated by ‘:’;
1.3加载数据
load data local inpath ‘/home/hivedta/persion.txt’ into table person
1.4非空查询
select info[‘a’] from person where info[‘a’] is not null
六 struct 结构体:
score 源数据
bob 80 80 90
lucy 90 97 98
create external table score(info structname:string,chinese:int,math:int,englis:int) row format delimited collection items terminated by ’ ’ location ‘/score’
select * from score 结果
{“name”:“Bob”,“chinese”:90,“math”:64,“english”:92}
{“name”:“Alex”,“chinese”:64,“math”:63,“english”:68}
{“name”:“Grace”,“chinese”:57,“math”:86,“english”:24}
{“name”:“Henry”,“chinese”:39,“math”:79,“english”:78}
{“name”:“Adair”,“chinese”:88,“math”:82,“english”:64}
{“name”:“Chad”,“chinese”:66,“math”:74,“english”:37}
{“name”:“Colin”,“chinese”:64,“math”:86,“english”:74}
{“name”:“Eden”,“chinese”:71,“math”:85,“english”:43}
{“name”:“Grover”,“chinese”:99,“math”:86,“english”:43}
select info.name from socre
七 explode
将数组中的没一个元素都取出来单独做为一行
1.1建表
create external table words(line string) location ‘/words’;
数据切分:select split(line,’ ‘) from words;
独立成一个:select explode(split(line,’ ‘))from words
统计单个字符出现的次数
select w,count(w) from (select explode(split(line,’ ')) w from words) ws group by w;
八 SerDe 正则
针对不规则的数据进行提取
create table log(ip string,time string,timezone string,request string,resource string,protocols string,state int) row format serde ‘org.apache.hadoop.hive.serde2.RegexSerDe’ with
serdeproperties(“input.regex”="(.) -- \[(.) (.)\] "(.) (.) (.)" (.*) -" )stored as textfile;