1.需求
2.分析
计算连涨或者连跌天数 涨跌幅
拆分为下面的几种情况进行分析
涨幅 | 连板 | |||
2023/6/1 | 10 | 0% | 0 | |
2023/6/2 | 20 | |||
2023/6/3 | 30 | |||
2023/6/4 | 无 | |||
2023/6/5 | 20 | |||
2023/6/6 | 10 | |||
2023/6/7 | 无 | |||
涨幅 | 连板 | |||
2023/6/1 | 10 | 100% | 1 | |
2023/6/2 | 20 | |||
2023/6/3 | 30 | |||
2023/6/4 | 无 | |||
2023/6/5 | 20 | |||
2023/6/6 | 10 | |||
2023/6/7 | 20 | |||
涨幅 | 连板 | |||
2023/6/1 | 10 | -75% | -2 | |
2023/6/2 | 20 | |||
2023/6/3 | 30 | |||
2023/6/4 | 无 | |||
2023/6/5 | 20 | |||
2023/6/6 | 10 | |||
2023/6/7 | 5 | |||
涨幅 | 连板 | |||
2023/6/1 | 10 | 0% | 0 | |
2023/6/2 | 20 | |||
2023/6/3 | 30 | |||
2023/6/4 | 无 | |||
2023/6/5 | 20 | |||
2023/6/6 | 无 | |||
2023/6/7 | 5 | |||
涨幅 | 连板 | |||
2023/6/1 | 10 | 600% | 6 | |
2023/6/2 | 20 | |||
2023/6/3 | 30 | |||
2023/6/4 | 40 | |||
2023/6/5 | 50 | |||
2023/6/6 | 60 | |||
2023/6/7 | 70 | |||
涨幅 | 连板 | |||
2023/6/1 | 70 | -85% | -6 | |
2023/6/2 | 60 | |||
2023/6/3 | 50 | |||
2023/6/4 | 40 | |||
2023/6/5 | 30 | |||
2023/6/6 | 20 | |||
2023/6/7 | 10 |
3. hive sql 求解
1) 模拟计算数据
drop table if exists dwd_db_tmp.dwd_rfq_analysis_tmp;
create table if not exists dwd_db_tmp.dwd_rfq_analysis_tmp as
select
'pntest' as pn
,'2023-06-01' as dt
,100 as quote_cnt
union all
select
'pntest' as pn
,'2023-06-02' as dt
,120 as quote_cnt
union all
select
'pntest' as pn
,'2023-06-03' as dt
,130 as quote_cnt
union all
select
'pntest' as pn
,'2023-06-04' as dt
,145 as quote_cnt
union all
select
'pntest' as pn
,'2023-06-05' as dt
,120 as quote_cnt
union all
select
'pntest' as pn
,'2023-06-06' as dt
,100 as quote_cnt
union all
select
'pntest' as pn
,'2023-06-07' as dt
,100 as quote_cnt
union all
select
'pntest2' as pn
,'2023-06-01' as dt
,100 as quote_cnt
union all
select
'pntest2' as pn
,'2023-06-03' as dt
,130 as quote_cnt
union all
select
'pntest2' as pn
,'2023-06-04' as dt
,145 as quote_cnt
union all
select
'pntest2' as pn
,'2023-06-05' as dt
,146 as quote_cnt
union all
select
'pntest2' as pn
,'2023-06-06' as dt
,150 as quote_cnt
union all
select
'pntest2' as pn
,'2023-06-07' as dt
,140 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-01' as dt
,100 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-02' as dt
,120 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-03' as dt
,130 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-04' as dt
,128 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-05' as dt
,126 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-06' as dt
,120 as quote_cnt
union all
select
'pntest3' as pn
,'2023-06-07' as dt
,130 as quote_cnt
-- 计算连板次数 日期中断则连板中断 再看涨幅 跌幅 求最大连续日期 最后连续日期 寻找变换点
select
pn
,sum(if(rn=1,quote_cnt,0)) as start_quote_cnt
,sum(if(dt_rn=1,quote_cnt,0)) as end_quote_cnt
,(sum(if(dt_rn=1,quote_cnt,0)) - sum(if(rn=1,quote_cnt,0))) / greatest(sum(if(rn=1,quote_cnt,0)),1.0) as rate -- 计算涨跌幅
,max(if(rn=1,dt,null)) as start_dt
,max(if(dt_rn=1,dt,null)) as end_dt
,datediff(max(if(dt_rn=1,dt,null)),max(if(rn=1,dt,null))) as day_cnt -- 计算连板次数
from(
-- 在这里直接过滤数据? 保留 rn =1 与dt_rn = 1 的记录 减少后续计算量
select
pn
,dt
,row_number() over (partition by pn order by ischange desc ,dt desc) as rn -- 找出开始连板日期
,row_number() over (partition by pn order by dt desc) as dt_rn -- 找出结束连板日期
,ischange
,quote_cnt
from
(
select
pn
,cnt_diff
,last_cnt_diff
,df
,if(df!=1 or cnt_diff = 0 or cnt_diff*last_cnt_diff < 0,1,0 ) as ischange -- 变化标志
,dt
,quote_cnt
from(
select
pn
,nvl(datediff(dt,last_dt),0) as df -- 求变换天数判断是否连续
,nvl((quote_cnt-last_quote_cnt),1) as cnt_diff -- 求变化量
,nvl(lead(quote_cnt-last_quote_cnt,1) over( partition by pn order by dt),0) as last_cnt_diff -- 求下一个时间窗口的变化量 最后一天默认为0
,dt
,quote_cnt
from(
select
d.pn
,d.quote_cnt
,d.dt
,lag(d.dt,1) over( partition by pn order by dt) as last_dt -- 求上个时间窗口日期
,lag(d.quote_cnt,1) over( partition by pn order by dt) as last_quote_cnt -- 求上个时间窗口值
from dwd_db_tmp.dwd_rfq_analysis_tmp d
) a
) b
) c
) d
where d.rn = 1 or d.dt_rn = 1
group by pn;
4. udf 函数求解
package org.xxx.com;
import org.apache.hadoop.hive.ql.exec.UDF;
import java.util.*;
public class IncreaseRateUdf extends UDF {
public String evaluate(String dtValStr, String lastDate) {
List<Integer> dateList = new ArrayList<>(8);
List<Double> valueList = new ArrayList<>(8);
// 进行参数解析
this.str2SortArray(dtValStr, valueList, dateList);
int size = dateList.size();
int i = size - 1;
// 1.列表为空直接返回
if (valueList == null || valueList.size() <= 0) {
return "0.0:0";
}
// 2.判断最后日期是否是计算日期 不是直接返回
if (dateList.get(size - 1) < Integer.valueOf(lastDate.replaceAll("-", ""))) {
return "0.0:0";
}
// 3.截取开始连续日期
while (true) {
if (i > 0 && (dateList.get(i) - dateList.get(i - 1) == 1)) {
i--;
} else {
break;
}
}
// i为连续日期开始index
if (i == size - 1) {
// 4.截取后如果只有一个元素 表示无涨幅,直接返回
return "0.0:0";
}
Double startVal = 0.0; // 连涨或连跌开始值
Double endVal = valueList.get(size - 1);// 结束值
int increaseDay = 0; // 连板数
int increaseVal = 0;
for (int j = valueList.size() - 2; j >= i; j--) {
Double curVal = valueList.get(j + 1);
Double preVal = valueList.get(j);
if (curVal > preVal) {
increaseVal++;
} else if (curVal < preVal) {
increaseVal--;
}
// 表示从后往前推 到当前为止已经不满足连涨或连跌条件 取往后一天作为开始值
if (Math.abs(increaseVal) + j != valueList.size() - 1) {
startVal = valueList.get(j + 1);
increaseDay = valueList.size() - 2 - j;
break;
} else {
startVal = valueList.get(j);
increaseDay = valueList.size() - 1 - j;
}
}
double increase = startVal == 0.0 ? 1.0 : (endVal - startVal) / startVal;
return increase + ":" + (increase > 0 ? increaseDay : (-1 * increaseDay));
}
// 传进来的日期:值 数组转换成日期 值列表并且按照日期进行排序
public void str2SortArray(String dtValueStr, List<Double> valueList, List<Integer> dateList) {
String[] arr = dtValueStr.split(",");
int rows = arr.length;
Map<Integer, Double> sortMap = new HashMap<>(8);
for (int i = 0; i < rows; i++) {
String[] temp = arr[i].split(":");
int dt = Integer.parseInt(temp[0].replaceAll("-", ""));
double val = Double.parseDouble(temp[1]);
dateList.add(dt);
sortMap.put(dt, val);
}
dateList.sort(Comparator.naturalOrder());
dateList.forEach(ele -> valueList.add(sortMap.get(ele)));
}
public static void main(String[] args) {
IncreaseRateUdf increaseRateUdf = new IncreaseRateUdf();
String dtValStr = "2023-06-13:213.02,2023-06-11:113.02,2023-06-12:21.02,2023-06-10:213.22,2023-06-09:22.02,2023-06-08:2.02,2023-06-07:23.02";
String lastDate = "2023-06-13";
String evaluate = increaseRateUdf.evaluate(dtValStr, lastDate);
System.out.println("result ==> " + evaluate);
}
}
打包并且上传 注册函数
注册udf函数
create function dwd_db.increase_cal as 'org.xxxx.com.IncreaseRateUdf' using jar 'hdfs:///dolphinscheduler/hdfs/udfs/increase-udf-1.0-SNAPSHOT.jar';
udf函数查询示例
select
pn
,cast(split(increase_res,':')[0] as decimal(19,6)) as increase_rate
,cast(split(increase_res,':')[1] as int) as increase_days
from (
select pn,dwd_db.increase_cal(dtValArray,'${startDay}') as increase_res from (
select
pn
,trim(concat_ws(',',collect_set(concat(a.dt,':',cast(a.quote_cnt as string))))) as dtValArray
from dwd_db_tmp.dwd_rfq_analysis_tmp a
group by
a.pn
) a
) b;