第92课,Hive中的Array,Map,Struct及自定义数据类型案例实战。

测试数据: 注意数据之间用tab键分开的

0    Hadoop  America    2000|8000|9000   male
1    spark      America    4000|8000|12000  famale
2    flunk       America    2000|8000|11000  male
3    Hadoop  America    4000|8000|12000  male
4    spark      America    3000|8000|9000   male
5    Hadoop  America    4000|8000|13000  famale
6    spark      America    3000|8000|9000   male
7    flunk       America    2000|8000|13000|8000|13000  male

//创建表

hive> create table employee_array (userid Int ,name String,address String,salaries array<BigInt>,gender String) row format delimited fields terminated by '\t' collection items terminated by '|' lines terminated by '\n' stored as textfile;

//加载数据

hive> load data local inpath '/bigdata/learn_data/employee.txt' into table employee_array;


//查询数组中最后一个元素

hive> SELECT name,salarys[2] from employee_arry;

//如何去数组的长度

hive> select userid , size(salaries) as length from employee_array;

//如何查询薪水曾经达到高2000的。

hive> select * from employee_array where array_contains(salaries,12000);


Map 

0    Hadoop  America1     st=2000|nd=8000|3rd=9000  male
1    spark   America1     st=4000|nd=8000|3rd=12000  famale
2    flunk   America1     st=2000|nd=8000|3rd=11000  male
3    Hadoop  America1     st=4000|nd=8000|3rd=12000  male
4    spark   America1     st=3000|nd=8000|3rd=9000  male
5    Hadoop  America1     st=4000|nd=8000|3rd=13000  famale
6    spark   America1     st=3000|2nd=8000|3rd=9000  male
7    flunk   America1     st=2000|2nd=8000|3rd=13000  male

create table employee_map (userid Int ,name String,address String,salaries Map<String,BigInt>,gender String) row format delimited fields terminated by '\t' collection items terminated by '|' map keys terminated by '=' lines terminated by '\n' stored as textfile;


 load data local inpath '/bigdata/learn_data/employeeMap.txt' into table employee_map;

//查询key是3rd的列




hive> select userid ,salaries['3rd'] from employee_map;

Struct

create table employee_struct (userid Int ,name String,address String,salariesandlevel  struct<s1:BigInt,s2:BigInt,s3:BigInt,level:String>,gender String) row format delimited fields terminated by '\t' collection items terminated by '|' map keys terminated by '=' lines terminated by '\n' stored as textfile;


load data local inpath '/bigdata/learn_data/employeeStruct.txt' into table employee_struct;

Select * from employee_struct;

//查询特定的level

hive> select userid , salariesandlevel.level from employee_struct; 

自定义格式的数据类型


自定义格式的数据类型

通过SerDes的方式对下面的数据进行存储和查询工作,涉及到序列号和反序列化的


0^^Hadoop^^America^^2000|8000|9000|level8^^male

1^^spark^^America^^4000|8000|12000|level9^^famale

2^^flunk^^America^^2000|8000|11000|level10^^male

3^^Hadoop^^America^^4000|8000|12000|level8^^male

4^^spark^^America^^3000|8000|9000|level8^^male

5^^Hadoop^^America^^4000|8000|13000|level11^^famale

6^^spark^^America^^3000|8000|9000|level8^^male

7^^flunk^^America^^2000|8000|13000|level12^^male


一下方法来自于蘑菇云行动上海的一个学员:

1、添加 jar包,根据自己的路径进行修改


add jar /usr/local/apache-hive-1.2.1-bin/lib/hive-contrib-1.2.1.jar;


2、使用 SERDE的方式创建 employee_serde


CREATE TABLE employee_serde (userid STRING,name STRING,address STRING,salary STRING,gender STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' WITH SERDEPROPERTIES ("input.regex" = "(.*)\\^\\^(.*)\\^\\^(.*)\\^\\^(.*)\\^\\^(.*)","output.format.string" = "%1$s %2$s %3$s %4$s %5$s") STORED AS TEXTFILE;


3Load数据


LOAD DATA LOCAL INPATH '/usr/local/data/salary_serde.txt' INTO TABLE employee_serde;


4、根据 employee_serde表的数据创建一个新的表 employee_struct3


CREATE TABLE employee_struct3 as SELECT cast(userid as int),name,address,struct(cast(split(salary,"\\|")[0] as bigint),cast(split(salary,"\\|")[1] as bigint),cast(split(salary,"\\|")[2] as bigint),split(salary,"\\|")[3]) as salary,gender FROM employee_serde;


5desc查看表结构并使用 select * from … 查看数据




评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值