建表的基本语法:create table tablename(col,type)
例一:
create table sxtTest(
id int,
name string,
likes array<string>,
address map<string,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n';
#加载数据的方式
load data local inpath '/home/data/sxt_hive.txt' into table sxtTest;
模拟的数据:
1,penglinag,lol-book-movie,shanghai:pudong
2,peng,lol-book,shanghai:pudong
3,linagliang,lol-book,shanghai:yanyin
4,liang,lol-book-movie,shanghai:yu
例二:外部表
create table sxtTest(
id int,
name string,
likes array<string>,
address map<string,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n'
location /usr;
注意加上,上面的这句就成为外部表了
分区字段:注意分区字段中的属性不能出现在分区里面
create table a(
id int,
name string,
likes array<string>,
address map<string,string>
)
partitioned by (age int,sex string)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
lines terminated by '\n';
当有几个分区字段时候,加载数据就要给分区指定几个属性
load data local inpath '/home/data/sxt_hive.txt' into table a partition(age=10,sex='boy');
alter table a add partition(age=10,sex='girl');
结论:添加分区的时候,必须在现有的分区基础之上
删除分区的时候,会将所有的存在的分区全部删除
hive案例实践:
1、实现struct例子
create table student(
id int,
info struct<name:string,age:int>
)
row format delimited
fields terminated by ','
collection items terminated by ':';
数据:
2017113828,彭亮:22
2017113822,小明:20
2017113820,流星:22
2、基站掉话率:找出掉线率最高的前10基站
结果表的字段:
record_time:通话时间
imei:基站编号
cell:手机编号
drop_num:掉话的秒数
duration:通话持续总秒数
结果表:
create table call_result(
imei string,
drop_num int,
duration int,
drop_rate double
)row format delimited
fields terminated by ','
hive的语句:
from call_monitor cm
insert into call_result
select cm.imei,sum(cm.drop_num) sdrop,sum(cm.duration) sdura,sum(cm.drop_num)/sum(cm.duration) s_rate group by cm.imei order by s_rate desc
原始数据表:
create table call_monitor(
record_time string,
imei string,
cell string,
ph_num string,
call_num string,
drop_num int,
duration int,
drop_rate double,
net_type string,
erl string
)
row format delimited
fields terminated by ',';
3、使用hive实现wordcount
create table WC(word string);
实现语句
方法一:
select t1.word,count(t1.word) from (select explode(split(word,' '))word from WC)t1 group by t1.word;
方法二:
用一个结果表来存放查出的单词和次数,然后进行统计:
create table WC_result(
word string,
ct int
);
from (select explode(split(word,' ')) word from WC)t1
insert into WC_result
select t1.word count(t1.word) group by t1.word;