1 实现 struct 例子
创建 student 表
create table student( id int, info struct<name:string,age:int> ) row format delimited fields terminated by ',' collection items terminated by ':'; |
向这个 student 表中插入数据
[root@node4 data]# vim stu.txt 1,tuhao:25 2,diaosi:26 |
将数据加载到表中:
hive> load data local inpath '/opt/data/stu.txt' into table student; hive> select * from student; OK 1 {"name":"tuhao","age":25} 2 {"name":"diaosi","age":26} hive>select id,info.name,info.age from student; OK 1 tuhao 28 2 diaoshi 35 |
2 基站掉话率:找出掉线率最高的前 10 基站
record_time:通话时间
imei:基站编号
cell:手机编号
drop_num:掉话的秒数
duration:通话持续总秒数
创建原始数据表
create table jizhan( record_time string, imei int, cell string, ph_num int, call_num int, drop_num int, duration int, drop_rate double, net_type string, erl int) row format delimited fields terminated by ','; |
上传数据到 node4,改名,然后加载到表
hive> load data local inpath '/root/data/cdr_info.csv' into table jizhan; Loading data to table default.jizhan Table default.jizhan stats: [numFiles=1, totalSize=57400917] OK Time taken: 1.315 seconds hive> select * from jizhan limit 10; OK record_time NULL cell NULL NULL NULL NULL NULL net_type NULL 2011-07-13 00:00:00+08 356966 29448-37062 0 0 0 0 0 2011-07-13 00:00:00+08 352024 29448-51331 0 0 0 0 0 2011-07-13 00:00:00+08 353736 29448-51331 0 0 0 0 0 2011-07-13 00:00:00+08 353736 29448-51333 0 0 0 0 0 2011-07-13 00:00:00+08 351545 29448-51333 0 0 0 0 0 2011-07-13 00:00:00+08 353736 29448-51343 1 0 0 8 0 2011-07-13 00:00:00+08 359681 29448-51462 0 0 0 0 0 2011-07-13 00:00:00+08 354707 29448-51462 0 0 0 0 0 2011-07-13 00:00:00+08 356137 29448-51470 0 0 0 0 0 Time taken: 0.086 seconds, Fetched: 10 row(s) |
创建结果表 jizhan_result
create table jizhan_result( imei string, drop_num int, duration int, drop_rate double ); |
from jizhan
|
3 hive 实现 wordcount
1. 准备数据:
[root@node4 data]# hdfs dfs -mkdir /usr/wordcount [root@node4 data]# hdfs dfs -put wc.txt /usr/wordcount [root@node4 data]# hdfs dfs -ls /usr drwxr-xr-x - root supergroup 0 2020-02-05 11:28 /usr/wordcount [root@node4 data]# hdfs dfs -ls /usr/wordcount Found 1 items -rw-r--r-- 2 root supergroup 265 2020-02-05 11:28 /usr/wordcount/wc.txt |
2. 建表原始数据表 words
hive> create external table words(line string) location '/usr/wordcount'; OK Time taken: 0.053 seconds hive> select * from words; OK hello tom hi lucy hive hadoop node zk |
3. 将结果表 wc_count
create table wc_count(word string,count int);
4. Hive Sql 语句
先将每行内容安装空格拆分
select split(line,' ') from words;
OK ["hello","tom"] ["hi","lucy"] ["hive","hadoop"] |
select explode(split(line,' ')) from words;
hello tom hi lucy |
select word,count(word)
from (select explode(split(line,' ')) word from words) tmp
group by word;
c 1 hadoop 1 hello 3 hi 3 hive 1 java 1 lucy 2 node 1 tom 1 zk 2 |
我们想将结果保存到 wc_count 表中
from (select explode(split(line,' ')) word from words) tmp insert into wc_count select word,count(word) count group by word; |