hive实战训练-求连涨/跌次数

1.需求

 2.分析

计算连涨或者连跌天数 涨跌幅

拆分为下面的几种情况进行分析

涨幅

连板

2023/6/1

10

0%

0

2023/6/2

20

2023/6/3

30

2023/6/4

2023/6/5

20

2023/6/6

10

2023/6/7

涨幅

连板

2023/6/1

10

100%

1

2023/6/2

20

2023/6/3

30

2023/6/4

2023/6/5

20

2023/6/6

10

2023/6/7

20

涨幅

连板

2023/6/1

10

-75%

-2

2023/6/2

20

2023/6/3

30

2023/6/4

2023/6/5

20

2023/6/6

10

2023/6/7

5

涨幅

连板

2023/6/1

10

0%

0

2023/6/2

20

2023/6/3

30

2023/6/4

2023/6/5

20

2023/6/6

2023/6/7

5

涨幅

连板

2023/6/1

10

600%

6

2023/6/2

20

2023/6/3

30

2023/6/4

40

2023/6/5

50

2023/6/6

60

2023/6/7

70

涨幅

连板

2023/6/1

70

-85%

-6

2023/6/2

60

2023/6/3

50

2023/6/4

40

2023/6/5

30

2023/6/6

20

2023/6/7

10

 3. hive sql 求解

1) 模拟计算数据
drop table if exists dwd_db_tmp.dwd_rfq_analysis_tmp;
create table if not exists dwd_db_tmp.dwd_rfq_analysis_tmp as
select 
    'pntest' as pn
    ,'2023-06-01' as dt 
    ,100 as quote_cnt
union all     
select 
    'pntest' as pn
    ,'2023-06-02' as dt 
    ,120 as quote_cnt
union all     
select 
    'pntest' as pn
    ,'2023-06-03' as dt 
    ,130 as quote_cnt
union all     
select 
    'pntest' as pn
    ,'2023-06-04' as dt 
    ,145 as quote_cnt
union all     
select 
    'pntest' as pn
    ,'2023-06-05' as dt 
    ,120 as quote_cnt
union all     
select 
    'pntest' as pn
    ,'2023-06-06' as dt 
    ,100 as quote_cnt
union all  
select 
    'pntest' as pn
    ,'2023-06-07' as dt 
    ,100 as quote_cnt
    
    

union all     
select 
    'pntest2' as pn
    ,'2023-06-01' as dt 
    ,100 as quote_cnt
union all     
select 
    'pntest2' as pn
    ,'2023-06-03' as dt 
    ,130 as quote_cnt
union all     
select 
    'pntest2' as pn
    ,'2023-06-04' as dt 
    ,145 as quote_cnt
union all     
select 
    'pntest2' as pn
    ,'2023-06-05' as dt 
    ,146 as quote_cnt
union all     
select 
    'pntest2' as pn
    ,'2023-06-06' as dt 
    ,150 as quote_cnt
union all  
select 
    'pntest2' as pn
    ,'2023-06-07' as dt 
    ,140 as quote_cnt 
    
    

union all     
select 
    'pntest3' as pn
    ,'2023-06-01' as dt 
    ,100 as quote_cnt
union all     
select 
    'pntest3' as pn
    ,'2023-06-02' as dt 
    ,120 as quote_cnt
union all     
select 
    'pntest3' as pn
    ,'2023-06-03' as dt 
    ,130 as quote_cnt
union all     
select 
    'pntest3' as pn
    ,'2023-06-04' as dt 
    ,128 as quote_cnt
union all     
select 
    'pntest3' as pn
    ,'2023-06-05' as dt 
    ,126 as quote_cnt
union all     
select 
    'pntest3' as pn
    ,'2023-06-06' as dt 
    ,120 as quote_cnt
union all  
select 
    'pntest3' as pn
    ,'2023-06-07' as dt 
    ,130 as quote_cnt     
    
    
-- 计算连板次数  日期中断则连板中断  再看涨幅 跌幅  求最大连续日期 最后连续日期 寻找变换点
select 
    pn
    ,sum(if(rn=1,quote_cnt,0)) as start_quote_cnt
    ,sum(if(dt_rn=1,quote_cnt,0)) as end_quote_cnt 
    ,(sum(if(dt_rn=1,quote_cnt,0)) - sum(if(rn=1,quote_cnt,0))) / greatest(sum(if(rn=1,quote_cnt,0)),1.0) as rate   -- 计算涨跌幅
    ,max(if(rn=1,dt,null)) as start_dt
    ,max(if(dt_rn=1,dt,null)) as end_dt 
    ,datediff(max(if(dt_rn=1,dt,null)),max(if(rn=1,dt,null))) as day_cnt                                        -- 计算连板次数
from(    
    -- 在这里直接过滤数据? 保留 rn =1 与dt_rn = 1 的记录 减少后续计算量
    select 
        pn
        ,dt 
        ,row_number() over (partition by pn order by ischange desc ,dt desc) as rn                              -- 找出开始连板日期
        ,row_number() over (partition by pn order by dt desc) as dt_rn                                          -- 找出结束连板日期
        ,ischange
        ,quote_cnt
    from 
    (
        select 
            pn
            ,cnt_diff 
            ,last_cnt_diff
            ,df
            ,if(df!=1 or cnt_diff = 0 or cnt_diff*last_cnt_diff < 0,1,0 ) as ischange                           -- 变化标志
            ,dt
            ,quote_cnt
        from(    
            select 
                pn
                ,nvl(datediff(dt,last_dt),0) as df                                                              -- 求变换天数判断是否连续
                ,nvl((quote_cnt-last_quote_cnt),1) as cnt_diff                                                  -- 求变化量
                ,nvl(lead(quote_cnt-last_quote_cnt,1) over( partition by pn order by dt),0) as last_cnt_diff    -- 求下一个时间窗口的变化量 最后一天默认为0
                ,dt 
                ,quote_cnt
            from(    
                select 
                    d.pn
                    ,d.quote_cnt
                    ,d.dt
                    ,lag(d.dt,1) over( partition by pn order by dt) as last_dt                                  -- 求上个时间窗口日期
                    ,lag(d.quote_cnt,1) over( partition by pn order by dt) as last_quote_cnt                    -- 求上个时间窗口值
                from dwd_db_tmp.dwd_rfq_analysis_tmp d   
            ) a 
        ) b
    ) c 
) d 
where d.rn = 1 or d.dt_rn = 1 
group by pn;

4. udf 函数求解

package org.xxx.com;

import org.apache.hadoop.hive.ql.exec.UDF;

import java.util.*;

public class IncreaseRateUdf extends UDF {

   public String evaluate(String dtValStr, String lastDate) {
      List<Integer> dateList = new ArrayList<>(8);
      List<Double> valueList = new ArrayList<>(8);
      // 进行参数解析
      this.str2SortArray(dtValStr, valueList, dateList);

      int size = dateList.size();
      int i = size - 1;

      // 1.列表为空直接返回
      if (valueList == null || valueList.size() <= 0) {
         return "0.0:0";
      }

      // 2.判断最后日期是否是计算日期 不是直接返回
      if (dateList.get(size - 1) < Integer.valueOf(lastDate.replaceAll("-", ""))) {
         return "0.0:0";
      }

      // 3.截取开始连续日期
      while (true) {
         if (i > 0 && (dateList.get(i) - dateList.get(i - 1) == 1)) {
            i--;
         } else {
            break;
         }
      }
      // i为连续日期开始index
      if (i == size - 1) {
         // 4.截取后如果只有一个元素 表示无涨幅,直接返回
         return "0.0:0";
      }

      Double startVal = 0.0; // 连涨或连跌开始值
      Double endVal = valueList.get(size - 1);// 结束值
      int increaseDay = 0;  // 连板数
      int increaseVal = 0;

      for (int j = valueList.size() - 2; j >= i; j--) {
         Double curVal = valueList.get(j + 1);
         Double preVal = valueList.get(j);
         if (curVal > preVal) {
            increaseVal++;
         } else if (curVal < preVal) {
            increaseVal--;
         }
         // 表示从后往前推 到当前为止已经不满足连涨或连跌条件 取往后一天作为开始值
         if (Math.abs(increaseVal) + j != valueList.size() - 1) {
            startVal = valueList.get(j + 1);
            increaseDay = valueList.size() - 2 - j;
            break;
         } else {
            startVal = valueList.get(j);
            increaseDay = valueList.size() - 1 - j;
         }
      }
      double increase = startVal == 0.0 ? 1.0 : (endVal - startVal) / startVal;
      return increase + ":" + (increase > 0 ? increaseDay : (-1 * increaseDay));
   }

   // 传进来的日期:值 数组转换成日期 值列表并且按照日期进行排序
   public void str2SortArray(String dtValueStr, List<Double> valueList, List<Integer> dateList) {
      String[] arr = dtValueStr.split(",");
      int rows = arr.length;
      Map<Integer, Double> sortMap = new HashMap<>(8);
      for (int i = 0; i < rows; i++) {
         String[] temp = arr[i].split(":");
         int dt = Integer.parseInt(temp[0].replaceAll("-", ""));
         double val = Double.parseDouble(temp[1]);
         dateList.add(dt);
         sortMap.put(dt, val);
      }
      dateList.sort(Comparator.naturalOrder());
      dateList.forEach(ele -> valueList.add(sortMap.get(ele)));
   }

   public static void main(String[] args) {
      IncreaseRateUdf increaseRateUdf = new IncreaseRateUdf();
      String dtValStr = "2023-06-13:213.02,2023-06-11:113.02,2023-06-12:21.02,2023-06-10:213.22,2023-06-09:22.02,2023-06-08:2.02,2023-06-07:23.02";
      String lastDate = "2023-06-13";
      String evaluate = increaseRateUdf.evaluate(dtValStr, lastDate);
      System.out.println("result ==> " + evaluate);
   }

}

打包并且上传 注册函数

注册udf函数
create function dwd_db.increase_cal as 'org.xxxx.com.IncreaseRateUdf' using jar 'hdfs:///dolphinscheduler/hdfs/udfs/increase-udf-1.0-SNAPSHOT.jar';

udf函数查询示例

select 
    pn
    ,cast(split(increase_res,':')[0] as decimal(19,6)) as increase_rate
    ,cast(split(increase_res,':')[1] as int) as increase_days 
from (
    select pn,dwd_db.increase_cal(dtValArray,'${startDay}') as increase_res from (
        select 
            pn
            ,trim(concat_ws(',',collect_set(concat(a.dt,':',cast(a.quote_cnt as string))))) as dtValArray 
        from dwd_db_tmp.dwd_rfq_analysis_tmp a 
        group by 
            a.pn 
    ) a
) b; 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值