统计优惠券覆盖了多少的商品sku
-- 优惠券下,sku数量分布
SELECT
percentile_approx(CAST(amount AS DOUBLE), 0.95, 9999) AS q095,
percentile_approx(CAST(amount AS DOUBLE), 0.94, 9999) AS q094,
percentile_approx(CAST(amount AS DOUBLE), 0.93, 9999) AS q093,
percentile_approx(CAST(amount AS DOUBLE), 0.92, 9999) AS q092,
percentile_approx(CAST(amount AS DOUBLE), 0.91, 9999) AS q091,
percentile_approx(CAST(amount AS DOUBLE), 0.90, 9999) AS q090,
percentile_approx(CAST(amount AS DOUBLE), 0.85, 9999) AS q085,
percentile_approx(CAST(amount AS DOUBLE), 0.80, 9999) AS q080
FROM
(
SELECT
batch_id,
COUNT( *) AS amount
FROM
(
SELECT
batch_id,
item_sku_id
FROM
你的表
GROUP BY
batch_id,
item_sku_id
)
a
GROUP BY
batch_id
)
c
q090分位数(覆盖了90%的商品sku)
这个还要再理解-_-||
SELECT size(split('a#b#c', '#'))
# 3
ht.exec_sql(schema_name='gdm', table_name='test_gdm_job_status_sum', sql=sql, exec_engine='hive',
merge_flag=True, merge_part_dir =['dt='+ date_str], merge_type='mr')
#==========================HiveTask使用说明==========================
# schema_name: 必选,目标库名;
# table_name: 可选,目标表名;
# sql: 必选,执行的sql语句;
# merge_flag: False (default),是否合并文件;
# lzo_compress: 可选 False (default) 是否lzo模式压缩;
#merge_part_dir: 可选,合并目录,建议为当前时间分区;
# merge_type:可选,合并方式(streaming、mr,默认为streaming)
# exec_engine:可选,执行引擎,exec_engine='hive'或不填写时,为hive引擎;如需使用spark引擎,需增加exec_engine='spark',spark_resource_level='low'
#====================================================================