工作使用阿里云dataworks的 hivesql 记录
分割
普通字符分割
SPLIT(industry_list,':')[0] AS industry
' | '字符分割
split(industry_list,"\\|")[0] AS industry_
一行转多行,by分隔符
SELECT a
,industry_list
,ss.industry_ as industry_list
FROM mi_ads_dmp_dev.tmp_outer_profile_predictset
LATERAL VIEW explode(split(industry_list,"\\|")) ss AS industry_
分位数
PERCENTILE_APPROX(a.bidding_price,ARRAY(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9)) price_distribution
覆盖率(缺失)
SELECT COUNT(a.id) AS cnt
,COUNT(
CASE WHEN b.column IS NOT NULL THEN 1
ELSE NULL
END
)/COUNT(a.id) column_not_null_per
,dt
FROM a_table
left join b_table
on a_table.key = b_table.key
GROUP BY dt