测试数据: 注意数据之间用tab键分开的
0 Hadoop America 2000|8000|9000 male
1 spark America 4000|8000|12000 famale
2 flunk America 2000|8000|11000 male
3 Hadoop America 4000|8000|12000 male
4 spark America 3000|8000|9000 male
5 Hadoop America 4000|8000|13000 famale
6 spark America 3000|8000|9000 male
7 flunk America 2000|8000|13000|8000|13000 male
//创建表
hive> create table employee_array (userid Int ,name String,address String,salaries array<BigInt>,gender String) row format delimited fields terminated by '\t' collection items terminated by '|' lines terminated by '\n' stored as textfile;
//加载数据
hive> load data local inpath '/bigdata/learn_data/employee.txt' into table employee_array;
//查询数组中最后一个元素
hive> SELECT name,salarys[2] from employee_arry;
//如何去数组的长度
hive> select userid , size(salaries) as length from employee_array;
//如何查询薪水曾经达到高2000的。
hive> select * from employee_array where array_contains(salaries,12000);
Map
0 Hadoop America1 st=2000|nd=8000|3rd=9000 male
1 spark America1 st=4000|nd=8000|3rd=12000 famale
2 flunk America1 st=2000|nd=8000|3rd=11000 male
3 Hadoop America1 st=4000|nd=8000|3rd=12000 male
4 spark America1 st=3000|nd=8000|3rd=9000 male
5 Hadoop America1 st=4000|nd=8000|3rd=13000 famale
6 spark America1 st=3000|2nd=8000|3rd=9000 male
7 flunk America1 st=2000|2nd=8000|3rd=13000 male
create table employee_map (userid Int ,name String,address String,salaries Map<String,BigInt>,gender String) row format delimited fields terminated by '\t' collection items terminated by '|' map keys terminated by '=' lines terminated by '\n' stored as textfile;
load data local inpath '/bigdata/learn_data/employeeMap.txt' into table employee_map;
//查询key是3rd的列
hive> select userid ,salaries['3rd'] from employee_map;
Struct
create table employee_struct (userid Int ,name String,address String,salariesandlevel struct<s1:BigInt,s2:BigInt,s3:BigInt,level:String>,gender String) row format delimited fields terminated by '\t' collection items terminated by '|' map keys terminated by '=' lines terminated by '\n' stored as textfile;
load data local inpath '/bigdata/learn_data/employeeStruct.txt' into table employee_struct;
Select * from employee_struct;
//查询特定的level
hive> select userid , salariesandlevel.level from employee_struct;
自定义格式的数据类型
自定义格式的数据类型
通过SerDes的方式对下面的数据进行存储和查询工作,涉及到序列号和反序列化的
0^^Hadoop^^America^^2000|8000|9000|level8^^male
1^^spark^^America^^4000|8000|12000|level9^^famale
2^^flunk^^America^^2000|8000|11000|level10^^male
3^^Hadoop^^America^^4000|8000|12000|level8^^male
4^^spark^^America^^3000|8000|9000|level8^^male
5^^Hadoop^^America^^4000|8000|13000|level11^^famale
6^^spark^^America^^3000|8000|9000|level8^^male
7^^flunk^^America^^2000|8000|13000|level12^^male
一下方法来自于蘑菇云行动上海的一个学员:
1、添加 jar包,根据自己的路径进行修改
add jar /usr/local/apache-hive-1.2.1-bin/lib/hive-contrib-1.2.1.jar;
2、使用 SERDE的方式创建 employee_serde表
CREATE TABLE employee_serde (userid STRING,name STRING,address STRING,salary STRING,gender STRING) ROW FORMAT SERDE 'org.apache.hadoop.hive.contrib.serde2.RegexSerDe' WITH SERDEPROPERTIES ("input.regex" = "(.*)\\^\\^(.*)\\^\\^(.*)\\^\\^(.*)\\^\\^(.*)","output.format.string" = "%1$s %2$s %3$s %4$s %5$s") STORED AS TEXTFILE;
3、Load数据
LOAD DATA LOCAL INPATH '/usr/local/data/salary_serde.txt' INTO TABLE employee_serde;
4、根据 employee_serde表的数据创建一个新的表 employee_struct3
CREATE TABLE employee_struct3 as SELECT cast(userid as int),name,address,struct(cast(split(salary,"\\|")[0] as bigint),cast(split(salary,"\\|")[1] as bigint),cast(split(salary,"\\|")[2] as bigint),split(salary,"\\|")[3]) as salary,gender FROM employee_serde;
5、desc查看表结构并使用 select * from … 查看数据