Hive1
1、实现struct例子
(1)创建学生struct表
create table student (id int, info struct<name:string, age:int>)
row format delimited fields terminated by ','
collection items terminated by ':';
(2)导入数据
load data local inpath '/root/ test.txt' into table student;
(3)查询struct表中的内容
select info.age from student;
2、基站掉话率:找出掉线率最高的前10基站
record_time:通话时间 imei:基站编号 cell:手机编号
drop_num:掉话的秒数 duration:通话持续总秒数
1、建原始数据表
create table cell_monitor(
record_time string,
imei string,
cell string,
ph_num int,
call_num int,
drop_num int,
duration int,
drop_rate DOUBLE,
net_type string,
erl string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE;
2、结果表
create table cell_drop_monitor(
imei string,
total_call_num int,
total_drop_num int,
d_rate DOUBLE
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
STORED AS TEXTFILE;
3、load数据
LOAD DATA LOCAL INPATH '/opt/data/cdr_summ_imei_cell_info.csv' OVERWRITE INTO TABLE cell_monitor;
4、找出掉线率最高的基站
from cell_monitor cm
insert overwrite table cell_drop_monitor
select cm.imei ,sum(cm.drop_num),sum(cm.duration),sum(cm.drop_num)/sum(cm.duration) d_rate
group by cm.imei
sort by d_rate desc;
3、使用hive实现wordcount
1、建表
create table docs(line string);
create table wc(word string, totalword int);
2、加载数据
load data local inpath '/tmp/wc' into table docs;
3、统计
from (select explode(split(line, ' ')) as word from docs) w
insert into table wc
select word, count(1) as totalword
group by word
order by word;
4、查询结果
select * from wc;