1、创建表带有Array的表:
create table t_afan_test
(
info1 array<int>,
info2 array<string>
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
2、插入数据:
insert into t_afan_test values (array(12,23,23,34),array("what","are","this"));
insert into t_afan_test values (array(12,23,23,34,56,32),array("what","are","this","aaa"));
3、查询出的结果如下:
4、再如以下案例:
drop table if exists t_afan_test;
create table t_afan_test
(
info1 string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
insert into t_afan_test values ('[12,23,23,34]');
insert into t_afan_test values ('[22,33,43,54]');
select * from t_afan_test;
+--------------------+
| t_afan_test.info1 |
+--------------------+
| [12,23,23,34] |
| [22,33,43,54] |
+--------------------+
drop table if exists tmp_xxx;
create table tmp_xxx as
select
split(regexp_extract(`info1`,'^\\[(.*)]$',1),',') as key_word_label
from t_afan_test
where `info1` is not null limit 10;
select * from tmp_xxx;
+-------------------------+
| tmp_xxx.key_word_label |
+-------------------------+
| ["22","33","43","54"] |
| ["12","23","23","34"] |
+-------------------------+
select collect_list(cast(array_element as int)) int_array
from (select explode(key_word_label) array_element from tmp_xxx) s;
+----------------------------+
| int_array |
+----------------------------+
| [22,33,43,54,12,23,23,34] |
+----------------------------+
select explode(key_word_label) array_element from tmp_xxx;
+----------------+
| array_element |
+----------------+
| 22 |
| 33 |
| 43 |
| 54 |
| 12 |
| 23 |
| 23 |
| 34 |
+----------------+
5、将’[12,23,23,34]'格式数据转成struct<> , array 的方式:
转成array编写UDF,代码如下:
package com.xxx.stringtoarray;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.ArrayList;
import java.util.List;
/**
* 将数组格式的字符串 转成 整型数组
* @author tzq
*/
public final class StringToArray extends UDF {
private static final String NULL_STRING = "null";
/**
* 如果想最后hive的数据格式是struct<>, 返回值是Integer[]的。
* @param sourceText :源字符串
* @return
*/
public List<Integer> evaluate(String sourceText) {
if (isBlank(sourceText)) {
return null;
}
if (NULL_STRING.equalsIgnoreCase(sourceText)) {
return null;
}
String[] arr1 = sourceText.replace("[","").replace("]","").split(",");
//Integer[] arr2 = new Integer[arr1.length];
List<Integer> list = new ArrayList<>();
for(int i = 0; i < arr1.length; i++) {
list.add(Integer.parseInt(arr1[i]));
}
return list;
}
public static boolean isBlank(String str) {
int strLen;
if (str != null && (strLen = str.length()) != 0) {
for(int i = 0; i < strLen; ++i) {
if (!Character.isWhitespace(str.charAt(i))) {
return false;
}
}
return true;
} else {
return true;
}
}
}
hive中创建临时函数:
hive> add jar /xxx/xxx/xxx/xx.jar;
hive> create temporary function stringToArray as 'com.xxx.stringtoarray.StringToArray';
使用的时候,类似如下:
drop table if exists t_afan_test;
create table t_afan_test
(
info1 string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t'
COLLECTION ITEMS TERMINATED BY ',';
insert into t_afan_test values ('[12,23,23,34]');
insert into t_afan_test values ('[22,33,43,54]');
drop table if exists tmp_xxx;
create table tmp_xxx as
select
stringToArray(`info1`) as key_word_label
from t_afan_test;
查看结果: