1.Array:数组类型,一系列相同元素组成
创建一张student表
create table student(
sid int,
sname string,
grade array<float>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ','
STORED AS TEXTFILE;
> desc student;
+-----------+---------------+----------+--+
| col_name | data_type | comment |
+-----------+---------------+----------+--+
| sid | int | |
| sname | string | |
| grade | array<float> | |
+-----------+---------------+----------+--+
hdfs dfs -put student.txt /user/hive/warehouse/xxx.db/student
> select * from student;
+--------------+----------------+-------------------+--+
| student.sid | student.sname | student.grade |
+--------------+----------------+-------------------+--+
| 1 | Mark | [78.0,81.5,90.0] |
| 2 | john | [67.0,78.5] |
+--------------+----------------+-------------------+--+
student.txt的数据:
1 Mark 78,81.5,90
2 john 67,78.5
2. Map:key-value集合,通过key访问元素
create table student1(
sid int,
sname string,
grade map<string,float>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;
> desc student1;
+-----------+--------------------+----------+--+
| col_name | data_type | comment |
+-----------+--------------------+----------+--+
| sid | int | |
| sname | string | |
| grade | map<string,float> | |
+-----------+--------------------+----------+--+
> select * from student1;
+---------------+-----------------+----------------------------------------------+--+
| student1.sid | student1.sname | student1.grade |
+---------------+-----------------+----------------------------------------------+--+
| 1 | Mark | {"\"语文\"":78.0,"\"英语\"":81.5,"\"数学\"":90.0} |
| 2 | john | {"\"语文\"":65.0,"\"英语\"":85.5} |
+---------------+-----------------+----------------------------------------------+--+
student1.txt的数据:
1 Mark "语文":78|"英语":81.5|"数学":90
2 john "语文":65|"英语":85.5
array
create table student2(
sid int,
sname string,
grade array<map<string,float>>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;
数据格式如:{1,'Mark',[<"高等数学",80>,<"c语言",83>]}
beeline> desc student2;
+-----------+---------------------------+----------+--+
| col_name | data_type | comment |
+-----------+---------------------------+----------+--+
| sid | int | |
| sname | string | |
| grade | array<map<string,float>> | |
+-----------+---------------------------+----------+--+
3. struct:结构类型,类似于C和C++中的结构体。可以包含不同数据类型的元素,这些元素可以通过点语法的方式来得到所需要的元素。
create table student3(
sid int,
info struct<name:string,age:int,sex:string>)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY '|'
MAP KEYS TERMINATED BY ':'
STORED AS TEXTFILE;
> desc student3;
+-----------+-----------------------------------------+----------+--+
| col_name | data_type | comment |
+-----------+-----------------------------------------+----------+--+
| sid | int | |
| info | struct<name:string,age:int,sex:string> | |
+-----------+-----------------------------------------+----------+--+
> select * from student3;
+---------------+--------------------------------------------------------+--+
| student3.sid | student3.info |
+---------------+--------------------------------------------------------+--+
| 1 | {"name":"name:\"Mark\"","age":null,"sex":"sex:\"男\""} |
| 2 | {"name":"name:\"Lily\"","age":null,"sex":"sex:\"女\""} |
+---------------+--------------------------------------------------------+--+