常用配置参数
--针对小数据集,开启本地化
set hive.exec.mode.local.auto=true;
--打印表头;
--单独显式字段,不带表名;
set hive.cli.print.header=true;
set hive.resultset.use.unique.column.names=false;
正则表达式:regexp_extract、regexp_replace等,补充中
create_union
-- 用 regexp_extract 函数匹配出 data字段中,第二个正则表达式包含的内容 --------------------
------ 匹配规则: regexp_extract(data,'(\\\\[")(.*)(\\\\"])',2) 效果如下 --------------
select
*,
regexp_extract( data, '(\\{)(.*)(\\})', 2) as new_data
from test_map_1_to_string;
---- 效果如下:注意在hive客户端用2个转义符 \\, 如果需要在脚本运行,应用四个 \\\\ 做转义 ---
uid data new_data
1 {"key1":"value1,key2":"value2"} "key1":"value1,key2":"value2"
2 {"key3":"value3,key4":"value4"} "key3":"value3,key4":"value4"
表结构调整
--字段重命名
alter table [tableName] change column [oldColumn] [newColumn] [columnType];
复合结构及相关函数
-- 是否含有某元素
hive> select array_contains( split('1,2,3,4,5', ','), cast(1 as string)) as isFlag;
OK
isflag
true
json格式字符串的解析
hive> SELECT
> get_json_object('{"name":"zs","age":18}', '$.name') as name,
> get_json_object('{"name":"zs","age":18}', '$.age') age;
OK
name age
zs 18
select
get_json_object(line, '$.name') as name,
get_json_object(line, '$.age') as age
from(
select '{"name":"zs","age":18}' as line
)t;
OK
name age
zs 18